summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:18:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-13 12:18:05 +0000
commitb46aad6df449445a9fc4aa7b32bd40005438e3f7 (patch)
tree751aa858ca01f35de800164516b298887382919d /src
parentInitial commit. (diff)
downloadhaproxy-b46aad6df449445a9fc4aa7b32bd40005438e3f7.tar.xz
haproxy-b46aad6df449445a9fc4aa7b32bd40005438e3f7.zip
Adding upstream version 2.9.5.upstream/2.9.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src')
-rw-r--r--src/acl.c1377
-rw-r--r--src/action.c363
-rw-r--r--src/activity.c1248
-rw-r--r--src/applet.c501
-rw-r--r--src/arg.c479
-rw-r--r--src/auth.c316
-rw-r--r--src/backend.c3401
-rw-r--r--src/base64.c303
-rw-r--r--src/cache.c3014
-rw-r--r--src/calltrace.c286
-rw-r--r--src/cbuf.c59
-rw-r--r--src/cfgcond.c559
-rw-r--r--src/cfgdiag.c97
-rw-r--r--src/cfgparse-global.c1396
-rw-r--r--src/cfgparse-listen.c3073
-rw-r--r--src/cfgparse-quic.c292
-rw-r--r--src/cfgparse-ssl.c2382
-rw-r--r--src/cfgparse-tcp.c275
-rw-r--r--src/cfgparse-unix.c135
-rw-r--r--src/cfgparse.c4798
-rw-r--r--src/channel.c591
-rw-r--r--src/check.c2642
-rw-r--r--src/chunk.c311
-rw-r--r--src/cli.c3423
-rw-r--r--src/clock.c460
-rw-r--r--src/compression.c742
-rw-r--r--src/connection.c2748
-rw-r--r--src/cpuset.c296
-rw-r--r--src/debug.c2301
-rw-r--r--src/dgram.c79
-rw-r--r--src/dict.c127
-rw-r--r--src/dns.c1330
-rw-r--r--src/dynbuf.c129
-rw-r--r--src/eb32sctree.c472
-rw-r--r--src/eb32tree.c218
-rw-r--r--src/eb64tree.c218
-rw-r--r--src/ebimtree.c44
-rw-r--r--src/ebistree.c42
-rw-r--r--src/ebmbtree.c77
-rw-r--r--src/ebpttree.c208
-rw-r--r--src/ebsttree.c42
-rw-r--r--src/ebtree.c50
-rw-r--r--src/errors.c567
-rw-r--r--src/ev_epoll.c413
-rw-r--r--src/ev_evports.c441
-rw-r--r--src/ev_kqueue.c380
-rw-r--r--src/ev_poll.c348
-rw-r--r--src/ev_select.c335
-rw-r--r--src/event_hdl.c999
-rw-r--r--src/extcheck.c694
-rw-r--r--src/fcgi-app.c1133
-rw-r--r--src/fcgi.c294
-rw-r--r--src/fd.c1348
-rw-r--r--src/filters.c1125
-rw-r--r--src/fix.c264
-rw-r--r--src/flt_bwlim.c976
-rw-r--r--src/flt_http_comp.c1076
-rw-r--r--src/flt_spoe.c4739
-rw-r--r--src/flt_trace.c675
-rw-r--r--src/freq_ctr.c218
-rw-r--r--src/frontend.c339
-rw-r--r--src/h1.c1319
-rw-r--r--src/h1_htx.c1074
-rw-r--r--src/h2.c814
-rw-r--r--src/h3.c2403
-rw-r--r--src/h3_stats.c276
-rw-r--r--src/haproxy.c3962
-rw-r--r--src/hash.c190
-rw-r--r--src/hlua.c13961
-rw-r--r--src/hlua_fcn.c2721
-rw-r--r--src/hpack-dec.c475
-rw-r--r--src/hpack-enc.c210
-rw-r--r--src/hpack-huff.c861
-rw-r--r--src/hpack-tbl.c372
-rw-r--r--src/hq_interop.c174
-rw-r--r--src/http.c1433
-rw-r--r--src/http_acl.c185
-rw-r--r--src/http_act.c2501
-rw-r--r--src/http_ana.c5153
-rw-r--r--src/http_client.c1598
-rw-r--r--src/http_conv.c453
-rw-r--r--src/http_ext.c1881
-rw-r--r--src/http_fetch.c2368
-rw-r--r--src/http_htx.c3028
-rw-r--r--src/http_rules.c530
-rw-r--r--src/htx.c1099
-rw-r--r--src/init.c249
-rw-r--r--src/jwt.c478
-rw-r--r--src/lb_chash.c517
-rw-r--r--src/lb_fas.c348
-rw-r--r--src/lb_fwlc.c375
-rw-r--r--src/lb_fwrr.c623
-rw-r--r--src/lb_map.c281
-rw-r--r--src/linuxcap.c191
-rw-r--r--src/listener.c2487
-rw-r--r--src/log.c4659
-rw-r--r--src/lru.c305
-rw-r--r--src/mailers.c329
-rw-r--r--src/map.c1232
-rw-r--r--src/mjson.c1048
-rw-r--r--src/mqtt.c1281
-rw-r--r--src/mux_fcgi.c4268
-rw-r--r--src/mux_h1.c5374
-rw-r--r--src/mux_h2.c7598
-rw-r--r--src/mux_pt.c904
-rw-r--r--src/mux_quic.c3067
-rw-r--r--src/mworker-prog.c359
-rw-r--r--src/mworker.c821
-rw-r--r--src/namespace.c132
-rw-r--r--src/ncbuf.c986
-rw-r--r--src/pattern.c2683
-rw-r--r--src/payload.c1448
-rw-r--r--src/peers.c4231
-rw-r--r--src/pipe.c136
-rw-r--r--src/pool.c1539
-rw-r--r--src/proto_quic.c799
-rw-r--r--src/proto_rhttp.c464
-rw-r--r--src/proto_sockpair.c589
-rw-r--r--src/proto_tcp.c834
-rw-r--r--src/proto_udp.c247
-rw-r--r--src/proto_uxdg.c159
-rw-r--r--src/proto_uxst.c372
-rw-r--r--src/protocol.c309
-rw-r--r--src/proxy.c3451
-rw-r--r--src/qmux_http.c108
-rw-r--r--src/qmux_trace.c114
-rw-r--r--src/qpack-dec.c563
-rw-r--r--src/qpack-enc.c185
-rw-r--r--src/qpack-tbl.c415
-rw-r--r--src/queue.c761
-rw-r--r--src/quic_ack.c258
-rw-r--r--src/quic_cc.c49
-rw-r--r--src/quic_cc_cubic.c542
-rw-r--r--src/quic_cc_newreno.c220
-rw-r--r--src/quic_cc_nocc.c76
-rw-r--r--src/quic_cid.c286
-rw-r--r--src/quic_cli.c413
-rw-r--r--src/quic_conn.c1893
-rw-r--r--src/quic_frame.c1273
-rw-r--r--src/quic_loss.c312
-rw-r--r--src/quic_openssl_compat.c531
-rw-r--r--src/quic_retransmit.c252
-rw-r--r--src/quic_retry.c320
-rw-r--r--src/quic_rx.c2290
-rw-r--r--src/quic_sock.c1080
-rw-r--r--src/quic_ssl.c790
-rw-r--r--src/quic_stats.c215
-rw-r--r--src/quic_stream.c294
-rw-r--r--src/quic_tls.c1095
-rw-r--r--src/quic_tp.c714
-rw-r--r--src/quic_trace.c633
-rw-r--r--src/quic_tx.c2348
-rw-r--r--src/raw_sock.c489
-rw-r--r--src/regex.c459
-rw-r--r--src/resolvers.c3813
-rw-r--r--src/ring.c482
-rw-r--r--src/sample.c5173
-rw-r--r--src/server.c6765
-rw-r--r--src/server_state.c947
-rw-r--r--src/session.c528
-rw-r--r--src/sha1.c308
-rw-r--r--src/shctx.c320
-rw-r--r--src/signal.c284
-rw-r--r--src/sink.c1406
-rw-r--r--src/slz.c1421
-rw-r--r--src/sock.c1072
-rw-r--r--src/sock_inet.c521
-rw-r--r--src/sock_unix.c387
-rw-r--r--src/ssl_ckch.c3968
-rw-r--r--src/ssl_crtlist.c1577
-rw-r--r--src/ssl_ocsp.c1986
-rw-r--r--src/ssl_sample.c2389
-rw-r--r--src/ssl_sock.c8100
-rw-r--r--src/ssl_utils.c702
-rw-r--r--src/stats.c5521
-rw-r--r--src/stconn.c2050
-rw-r--r--src/stick_table.c5658
-rw-r--r--src/stream.c4045
-rw-r--r--src/task.c979
-rw-r--r--src/tcp_act.c749
-rw-r--r--src/tcp_rules.c1428
-rw-r--r--src/tcp_sample.c641
-rw-r--r--src/tcpcheck.c5150
-rw-r--r--src/thread.c1864
-rw-r--r--src/time.c147
-rw-r--r--src/tools.c6348
-rw-r--r--src/trace.c997
-rw-r--r--src/uri_auth.c318
-rw-r--r--src/uri_normalizer.c467
-rw-r--r--src/vars.c1454
-rw-r--r--src/version.c28
-rw-r--r--src/wdt.c193
-rw-r--r--src/xprt_handshake.c299
-rw-r--r--src/xprt_quic.c175
194 files changed, 261120 insertions, 0 deletions
diff --git a/src/acl.c b/src/acl.c
new file mode 100644
index 0000000..8ef2b7d
--- /dev/null
+++ b/src/acl.c
@@ -0,0 +1,1377 @@
+/*
+ * ACL management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <import/ebsttree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pattern.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sample.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/tools.h>
+#include <haproxy/cfgparse.h>
+
+/* List head of all known ACL keywords */
+static struct acl_kw_list acl_keywords = {
+ .list = LIST_HEAD_INIT(acl_keywords.list)
+};
+
+/* input values are 0 or 3, output is the same */
+static inline enum acl_test_res pat2acl(struct pattern *pat)
+{
+ if (pat)
+ return ACL_TEST_PASS;
+ else
+ return ACL_TEST_FAIL;
+}
+
+/*
+ * Registers the ACL keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void acl_register_keywords(struct acl_kw_list *kwl)
+{
+ LIST_APPEND(&acl_keywords.list, &kwl->list);
+}
+
+/*
+ * Unregisters the ACL keyword list <kwl> from the list of valid keywords.
+ */
+void acl_unregister_keywords(struct acl_kw_list *kwl)
+{
+ LIST_DELETE(&kwl->list);
+ LIST_INIT(&kwl->list);
+}
+
+/* Return a pointer to the ACL <name> within the list starting at <head>, or
+ * NULL if not found.
+ */
+struct acl *find_acl_by_name(const char *name, struct list *head)
+{
+ struct acl *acl;
+ list_for_each_entry(acl, head, list) {
+ if (strcmp(acl->name, name) == 0)
+ return acl;
+ }
+ return NULL;
+}
+
+/* Return a pointer to the ACL keyword <kw>, or NULL if not found. Note that if
+ * <kw> contains an opening parenthesis or a comma, only the left part of it is
+ * checked.
+ */
+struct acl_keyword *find_acl_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct acl_kw_list *kwl;
+
+ kwend = kw;
+ while (is_idchar(*kwend))
+ kwend++;
+
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0)
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+static struct acl_expr *prune_acl_expr(struct acl_expr *expr)
+{
+ struct arg *arg;
+
+ pattern_prune(&expr->pat);
+
+ for (arg = expr->smp->arg_p; arg; arg++) {
+ if (arg->type == ARGT_STOP)
+ break;
+ if (arg->type == ARGT_STR || arg->unresolved) {
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ }
+ }
+
+ release_sample_expr(expr->smp);
+
+ return expr;
+}
+
+/* Parse an ACL expression starting at <args>[0], and return it. If <err> is
+ * not NULL, it will be filled with a pointer to an error message in case of
+ * error. This pointer must be freeable or NULL. <al> is an arg_list serving
+ * as a list head to report missing dependencies. It may be NULL if such
+ * dependencies are not allowed.
+ *
+ * Right now, the only accepted syntax is :
+ * <subject> [<value>...]
+ */
+struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_expr;
+ struct acl_expr *expr;
+ struct acl_keyword *aclkw;
+ int refflags, patflags;
+ const char *arg;
+ struct sample_expr *smp = NULL;
+ int idx = 0;
+ char *ckw = NULL;
+ const char *endt;
+ int cur_type;
+ int nbargs;
+ int operator = STD_OP_EQ;
+ int op;
+ int contain_colon, have_dot;
+ const char *dot;
+ signed long long value, minor;
+ /* The following buffer contain two numbers, a ':' separator and the final \0. */
+ char buffer[NB_LLMAX_STR + 1 + NB_LLMAX_STR + 1];
+ int is_loaded;
+ int unique_id;
+ char *error;
+ struct pat_ref *ref;
+ struct pattern_expr *pattern_expr;
+ int load_as_map = 0;
+ int acl_conv_found = 0;
+
+ /* First, we look for an ACL keyword. And if we don't find one, then
+ * we look for a sample fetch expression starting with a sample fetch
+ * keyword.
+ */
+
+ if (al) {
+ al->ctx = ARGC_ACL; // to report errors while resolving args late
+ al->kw = *args;
+ al->conv = NULL;
+ }
+
+ aclkw = find_acl_kw(args[0]);
+ if (aclkw) {
+ /* OK we have a real ACL keyword */
+
+ /* build new sample expression for this ACL */
+ smp = calloc(1, sizeof(*smp));
+ if (!smp) {
+ memprintf(err, "out of memory when parsing ACL expression");
+ goto out_return;
+ }
+ LIST_INIT(&(smp->conv_exprs));
+ smp->fetch = aclkw->smp;
+ smp->arg_p = empty_arg_list;
+
+ /* look for the beginning of the subject arguments */
+ for (arg = args[0]; is_idchar(*arg); arg++)
+ ;
+
+ /* At this point, we have :
+ * - args[0] : beginning of the keyword
+ * - arg : end of the keyword, first character not part of keyword
+ */
+ nbargs = make_arg_list(arg, -1, smp->fetch->arg_mask, &smp->arg_p,
+ err, &endt, NULL, al);
+ if (nbargs < 0) {
+ /* note that make_arg_list will have set <err> here */
+ memprintf(err, "ACL keyword '%s' : %s", aclkw->kw, *err);
+ goto out_free_smp;
+ }
+
+ if (!smp->arg_p) {
+ smp->arg_p = empty_arg_list;
+ }
+ else if (smp->fetch->val_args && !smp->fetch->val_args(smp->arg_p, err)) {
+ /* invalid keyword argument, error must have been
+ * set by val_args().
+ */
+ memprintf(err, "in argument to '%s', %s", aclkw->kw, *err);
+ goto out_free_smp;
+ }
+
+ /* look for the beginning of the converters list. Those directly attached
+ * to the ACL keyword are found just after the comma.
+ * If we find any converter, then we don't use the ACL keyword's match
+ * anymore but the one related to the converter's output type.
+ */
+ if (!sample_parse_expr_cnv((char **)args, NULL, NULL, err, al, file, line, smp, endt)) {
+ if (err)
+ memprintf(err, "ACL keyword '%s' : %s", aclkw->kw, *err);
+ goto out_free_smp;
+ }
+ acl_conv_found = !LIST_ISEMPTY(&smp->conv_exprs);
+ }
+ else {
+ /* This is not an ACL keyword, so we hope this is a sample fetch
+ * keyword that we're going to transparently use as an ACL. If
+ * so, we retrieve a completely parsed expression with args and
+ * convs already done.
+ */
+ smp = sample_parse_expr((char **)args, &idx, file, line, err, al, NULL);
+ if (!smp) {
+ memprintf(err, "%s in ACL expression '%s'", *err, *args);
+ goto out_return;
+ }
+ }
+
+ /* get last effective output type for smp */
+ cur_type = smp_expr_output_type(smp);
+
+ expr = calloc(1, sizeof(*expr));
+ if (!expr) {
+ memprintf(err, "out of memory when parsing ACL expression");
+ goto out_free_smp;
+ }
+
+ pattern_init_head(&expr->pat);
+
+ expr->pat.expect_type = cur_type;
+ expr->smp = smp;
+ expr->kw = smp->fetch->kw;
+ smp = NULL; /* don't free it anymore */
+
+ if (aclkw && !acl_conv_found) {
+ expr->kw = aclkw->kw;
+ expr->pat.parse = aclkw->parse ? aclkw->parse : pat_parse_fcts[aclkw->match_type];
+ expr->pat.index = aclkw->index ? aclkw->index : pat_index_fcts[aclkw->match_type];
+ expr->pat.match = aclkw->match ? aclkw->match : pat_match_fcts[aclkw->match_type];
+ expr->pat.prune = aclkw->prune ? aclkw->prune : pat_prune_fcts[aclkw->match_type];
+ }
+
+ if (!expr->pat.parse) {
+ /* Parse/index/match functions depend on the expression type,
+ * so we have to map them now. Some types can be automatically
+ * converted.
+ */
+ switch (cur_type) {
+ case SMP_T_BOOL:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_BOOL];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_BOOL];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_BOOL];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_BOOL];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_BOOL];
+ break;
+ case SMP_T_SINT:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_INT];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_INT];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_INT];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_INT];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_INT];
+ break;
+ case SMP_T_ADDR:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_IP];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_IP];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_IP];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_IP];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_IP];
+ break;
+ case SMP_T_STR:
+ expr->pat.parse = pat_parse_fcts[PAT_MATCH_STR];
+ expr->pat.index = pat_index_fcts[PAT_MATCH_STR];
+ expr->pat.match = pat_match_fcts[PAT_MATCH_STR];
+ expr->pat.prune = pat_prune_fcts[PAT_MATCH_STR];
+ expr->pat.expect_type = pat_match_types[PAT_MATCH_STR];
+ break;
+ }
+ }
+
+ /* Additional check to protect against common mistakes */
+ if (expr->pat.parse && cur_type != SMP_T_BOOL && !*args[1]) {
+ ha_warning("parsing acl keyword '%s' :\n"
+ " no pattern to match against were provided, so this ACL will never match.\n"
+ " If this is what you intended, please add '--' to get rid of this warning.\n"
+ " If you intended to match only for existence, please use '-m found'.\n"
+ " If you wanted to force an int to match as a bool, please use '-m bool'.\n"
+ "\n",
+ args[0]);
+ }
+
+ args++;
+
+ /* check for options before patterns. Supported options are :
+ * -i : ignore case for all patterns by default
+ * -f : read patterns from those files
+ * -m : force matching method (must be used before -f)
+ * -M : load the file as map file
+ * -u : force the unique id of the acl
+ * -- : everything after this is not an option
+ */
+ refflags = PAT_REF_ACL;
+ patflags = 0;
+ is_loaded = 0;
+ unique_id = -1;
+ while (**args == '-') {
+ if (strcmp(*args, "-i") == 0)
+ patflags |= PAT_MF_IGNORE_CASE;
+ else if (strcmp(*args, "-n") == 0)
+ patflags |= PAT_MF_NO_DNS;
+ else if (strcmp(*args, "-u") == 0) {
+ unique_id = strtol(args[1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "the argument of -u must be an integer");
+ goto out_free_expr;
+ }
+
+ /* Check if this id is really unique. */
+ if (pat_ref_lookupid(unique_id)) {
+ memprintf(err, "the id is already used");
+ goto out_free_expr;
+ }
+
+ args++;
+ }
+ else if (strcmp(*args, "-f") == 0) {
+ if (!expr->pat.parse) {
+ memprintf(err, "matching method must be specified first (using '-m') when using a sample fetch of this type ('%s')", expr->kw);
+ goto out_free_expr;
+ }
+
+ if (!pattern_read_from_file(&expr->pat, refflags, args[1], patflags, load_as_map, err, file, line))
+ goto out_free_expr;
+ is_loaded = 1;
+ args++;
+ }
+ else if (strcmp(*args, "-m") == 0) {
+ int idx;
+
+ if (is_loaded) {
+ memprintf(err, "'-m' must only be specified before patterns and files in parsing ACL expression");
+ goto out_free_expr;
+ }
+
+ idx = pat_find_match_name(args[1]);
+ if (idx < 0) {
+ memprintf(err, "unknown matching method '%s' when parsing ACL expression", args[1]);
+ goto out_free_expr;
+ }
+
+ /* Note: -m found is always valid, bool/int are compatible, str/bin/reg/len are compatible */
+ if (idx != PAT_MATCH_FOUND && !sample_casts[cur_type][pat_match_types[idx]]) {
+ memprintf(err, "matching method '%s' cannot be used with fetch keyword '%s'", args[1], expr->kw);
+ goto out_free_expr;
+ }
+ expr->pat.parse = pat_parse_fcts[idx];
+ expr->pat.index = pat_index_fcts[idx];
+ expr->pat.match = pat_match_fcts[idx];
+ expr->pat.prune = pat_prune_fcts[idx];
+ expr->pat.expect_type = pat_match_types[idx];
+ args++;
+ }
+ else if (strcmp(*args, "-M") == 0) {
+ refflags |= PAT_REF_MAP;
+ load_as_map = 1;
+ }
+ else if (strcmp(*args, "--") == 0) {
+ args++;
+ break;
+ }
+ else {
+ memprintf(err, "'%s' is not a valid ACL option. Please use '--' before any pattern beginning with a '-'", args[0]);
+ goto out_free_expr;
+ break;
+ }
+ args++;
+ }
+
+ if (!expr->pat.parse) {
+ memprintf(err, "matching method must be specified first (using '-m') when using a sample fetch of this type ('%s')", expr->kw);
+ goto out_free_expr;
+ }
+
+ /* Create displayed reference */
+ snprintf(trash.area, trash.size, "acl '%s' file '%s' line %d",
+ expr->kw, file, line);
+ trash.area[trash.size - 1] = '\0';
+
+ /* Create new pattern reference. */
+ ref = pat_ref_newid(unique_id, trash.area, PAT_REF_ACL);
+ if (!ref) {
+ memprintf(err, "memory error");
+ goto out_free_expr;
+ }
+
+ /* Create new pattern expression associated to this reference. */
+ pattern_expr = pattern_new_expr(&expr->pat, ref, patflags, err, NULL);
+ if (!pattern_expr)
+ goto out_free_expr;
+
+ /* now parse all patterns */
+ while (**args) {
+ arg = *args;
+
+ /* Compatibility layer. Each pattern can parse only one string per pattern,
+ * but the pat_parser_int() and pat_parse_dotted_ver() parsers were need
+ * optionally two operators. The first operator is the match method: eq,
+ * le, lt, ge and gt. pat_parse_int() and pat_parse_dotted_ver() functions
+ * can have a compatibility syntax based on ranges:
+ *
+ * pat_parse_int():
+ *
+ * "eq x" -> "x" or "x:x"
+ * "le x" -> ":x"
+ * "lt x" -> ":y" (with y = x - 1)
+ * "ge x" -> "x:"
+ * "gt x" -> "y:" (with y = x + 1)
+ *
+ * pat_parse_dotted_ver():
+ *
+ * "eq x.y" -> "x.y" or "x.y:x.y"
+ * "le x.y" -> ":x.y"
+ * "lt x.y" -> ":w.z" (with w.z = x.y - 1)
+ * "ge x.y" -> "x.y:"
+ * "gt x.y" -> "w.z:" (with w.z = x.y + 1)
+ *
+ * If y is not present, assume that is "0".
+ *
+ * The syntax eq, le, lt, ge and gt are proper to the acl syntax. The
+ * following block of code detect the operator, and rewrite each value
+ * in parsable string.
+ */
+ if (expr->pat.parse == pat_parse_int ||
+ expr->pat.parse == pat_parse_dotted_ver) {
+ /* Check for operator. If the argument is operator, memorise it and
+ * continue to the next argument.
+ */
+ op = get_std_op(arg);
+ if (op != -1) {
+ operator = op;
+ args++;
+ continue;
+ }
+
+ /* Check if the pattern contain ':' or '-' character. */
+ contain_colon = (strchr(arg, ':') || strchr(arg, '-'));
+
+ /* If the pattern contain ':' or '-' character, give it to the parser as is.
+ * If no contain ':' and operator is STD_OP_EQ, give it to the parser as is.
+ * In other case, try to convert the value according with the operator.
+ */
+ if (!contain_colon && operator != STD_OP_EQ) {
+ /* Search '.' separator. */
+ dot = strchr(arg, '.');
+ if (!dot) {
+ have_dot = 0;
+ minor = 0;
+ dot = arg + strlen(arg);
+ }
+ else
+ have_dot = 1;
+
+ /* convert the integer minor part for the pat_parse_dotted_ver() function. */
+ if (expr->pat.parse == pat_parse_dotted_ver && have_dot) {
+ if (strl2llrc(dot+1, strlen(dot+1), &minor) != 0) {
+ memprintf(err, "'%s' is neither a number nor a supported operator", arg);
+ goto out_free_expr;
+ }
+ if (minor >= 65536) {
+ memprintf(err, "'%s' contains too large a minor value", arg);
+ goto out_free_expr;
+ }
+ }
+
+ /* convert the integer value for the pat_parse_int() function, and the
+ * integer major part for the pat_parse_dotted_ver() function.
+ */
+ if (strl2llrc(arg, dot - arg, &value) != 0) {
+ memprintf(err, "'%s' is neither a number nor a supported operator", arg);
+ goto out_free_expr;
+ }
+ if (expr->pat.parse == pat_parse_dotted_ver) {
+ if (value >= 65536) {
+ memprintf(err, "'%s' contains too large a major value", arg);
+ goto out_free_expr;
+ }
+ value = (value << 16) | (minor & 0xffff);
+ }
+
+ switch (operator) {
+
+ case STD_OP_EQ: /* this case is not possible. */
+ memprintf(err, "internal error");
+ goto out_free_expr;
+
+ case STD_OP_GT:
+ value++; /* gt = ge + 1 */
+ __fallthrough;
+
+ case STD_OP_GE:
+ if (expr->pat.parse == pat_parse_int)
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, "%lld:", value);
+ else
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, "%lld.%lld:",
+ value >> 16, value & 0xffff);
+ arg = buffer;
+ break;
+
+ case STD_OP_LT:
+ value--; /* lt = le - 1 */
+ __fallthrough;
+
+ case STD_OP_LE:
+ if (expr->pat.parse == pat_parse_int)
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, ":%lld", value);
+ else
+ snprintf(buffer, NB_LLMAX_STR+NB_LLMAX_STR+2, ":%lld.%lld",
+ value >> 16, value & 0xffff);
+ arg = buffer;
+ break;
+ }
+ }
+ }
+
+ /* Add sample to the reference, and try to compile it fior each pattern
+ * using this value.
+ */
+ if (!pat_ref_add(ref, arg, NULL, err))
+ goto out_free_expr;
+ args++;
+ }
+
+ return expr;
+
+ out_free_expr:
+ prune_acl_expr(expr);
+ free(expr);
+ out_free_smp:
+ free(ckw);
+ free(smp);
+ out_return:
+ return NULL;
+}
+
+/* Purge everything in the acl <acl>, then return <acl>. */
+struct acl *prune_acl(struct acl *acl) {
+
+ struct acl_expr *expr, *exprb;
+
+ free(acl->name);
+
+ list_for_each_entry_safe(expr, exprb, &acl->expr, list) {
+ LIST_DELETE(&expr->list);
+ prune_acl_expr(expr);
+ free(expr);
+ }
+
+ return acl;
+}
+
+/* Walk the ACL tree, following nested acl() sample fetches, for no more than
+ * max_recurse evaluations. Returns -1 if a recursive loop is detected, 0 if
+ * the max_recurse was reached, otherwise the number of max_recurse left.
+ */
+static int parse_acl_recurse(struct acl *acl, struct acl_expr *expr, int max_recurse)
+{
+ struct acl_term *term;
+ struct acl_sample *sample;
+
+ if (strcmp(expr->smp->fetch->kw, "acl") != 0)
+ return max_recurse;
+
+ if (--max_recurse <= 0)
+ return 0;
+
+ sample = (struct acl_sample *)expr->smp->arg_p->data.ptr;
+ list_for_each_entry(term, &sample->suite.terms, list) {
+ if (term->acl == acl)
+ return -1;
+ list_for_each_entry(expr, &term->acl->expr, list) {
+ max_recurse = parse_acl_recurse(acl, expr, max_recurse);
+ if (max_recurse <= 0)
+ return max_recurse;
+ }
+ }
+
+ return max_recurse;
+}
+
+/* Parse an ACL with the name starting at <args>[0], and with a list of already
+ * known ACLs in <acl>. If the ACL was not in the list, it will be added.
+ * A pointer to that ACL is returned. If the ACL has an empty name, then it's
+ * an anonymous one and it won't be merged with any other one. If <err> is not
+ * NULL, it will be filled with an appropriate error. This pointer must be
+ * freeable or NULL. <al> is the arg_list serving as a head for unresolved
+ * dependencies. It may be NULL if such dependencies are not allowed.
+ *
+ * args syntax: <aclname> <acl_expr>
+ */
+struct acl *parse_acl(const char **args, struct list *known_acl, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_acl_expr, out_free_name;
+ struct acl *cur_acl;
+ struct acl_expr *acl_expr;
+ char *name;
+ const char *pos;
+
+ if (**args && (pos = invalid_char(*args))) {
+ memprintf(err, "invalid character in ACL name : '%c'", *pos);
+ goto out_return;
+ }
+
+ acl_expr = parse_acl_expr(args + 1, err, al, file, line);
+ if (!acl_expr) {
+ /* parse_acl_expr will have filled <err> here */
+ goto out_return;
+ }
+
+ /* Check for args beginning with an opening parenthesis just after the
+ * subject, as this is almost certainly a typo. Right now we can only
+ * emit a warning, so let's do so.
+ */
+ if (!strchr(args[1], '(') && *args[2] == '(')
+ ha_warning("parsing acl '%s' :\n"
+ " matching '%s' for pattern '%s' is likely a mistake and probably\n"
+ " not what you want. Maybe you need to remove the extraneous space before '('.\n"
+ " If you are really sure this is not an error, please insert '--' between the\n"
+ " match and the pattern to make this warning message disappear.\n",
+ args[0], args[1], args[2]);
+
+ if (*args[0])
+ cur_acl = find_acl_by_name(args[0], known_acl);
+ else
+ cur_acl = NULL;
+
+ if (cur_acl) {
+ int ret = parse_acl_recurse(cur_acl, acl_expr, ACL_MAX_RECURSE);
+ if (ret <= 0) {
+ if (ret < 0)
+ memprintf(err, "have a recursive loop");
+ else
+ memprintf(err, "too deep acl() tree");
+ goto out_free_acl_expr;
+ }
+ } else {
+ name = strdup(args[0]);
+ if (!name) {
+ memprintf(err, "out of memory when parsing ACL");
+ goto out_free_acl_expr;
+ }
+ cur_acl = calloc(1, sizeof(*cur_acl));
+ if (cur_acl == NULL) {
+ memprintf(err, "out of memory when parsing ACL");
+ goto out_free_name;
+ }
+
+ LIST_INIT(&cur_acl->expr);
+ LIST_APPEND(known_acl, &cur_acl->list);
+ cur_acl->name = name;
+ }
+
+ /* We want to know what features the ACL needs (typically HTTP parsing),
+ * and where it may be used. If an ACL relies on multiple matches, it is
+ * OK if at least one of them may match in the context where it is used.
+ */
+ cur_acl->use |= acl_expr->smp->fetch->use;
+ cur_acl->val |= acl_expr->smp->fetch->val;
+ LIST_APPEND(&cur_acl->expr, &acl_expr->list);
+ return cur_acl;
+
+ out_free_name:
+ free(name);
+ out_free_acl_expr:
+ prune_acl_expr(acl_expr);
+ free(acl_expr);
+ out_return:
+ return NULL;
+}
+
+/* Some useful ACLs provided by default. Only those used are allocated. */
+
+const struct {
+ const char *name;
+ const char *expr[4]; /* put enough for longest expression */
+} default_acl_list[] = {
+ { .name = "TRUE", .expr = {"always_true",""}},
+ { .name = "FALSE", .expr = {"always_false",""}},
+ { .name = "LOCALHOST", .expr = {"src","127.0.0.1/8","::1",""}},
+ { .name = "HTTP", .expr = {"req.proto_http",""}},
+ { .name = "HTTP_1.0", .expr = {"req.ver","1.0",""}},
+ { .name = "HTTP_1.1", .expr = {"req.ver","1.1",""}},
+ { .name = "HTTP_2.0", .expr = {"req.ver","2.0",""}},
+ { .name = "HTTP_3.0", .expr = {"req.ver","3.0",""}},
+ { .name = "METH_CONNECT", .expr = {"method","CONNECT",""}},
+ { .name = "METH_DELETE", .expr = {"method","DELETE",""}},
+ { .name = "METH_GET", .expr = {"method","GET","HEAD",""}},
+ { .name = "METH_HEAD", .expr = {"method","HEAD",""}},
+ { .name = "METH_OPTIONS", .expr = {"method","OPTIONS",""}},
+ { .name = "METH_POST", .expr = {"method","POST",""}},
+ { .name = "METH_PUT", .expr = {"method","PUT",""}},
+ { .name = "METH_TRACE", .expr = {"method","TRACE",""}},
+ { .name = "HTTP_URL_ABS", .expr = {"url_reg","^[^/:]*://",""}},
+ { .name = "HTTP_URL_SLASH", .expr = {"url_beg","/",""}},
+ { .name = "HTTP_URL_STAR", .expr = {"url","*",""}},
+ { .name = "HTTP_CONTENT", .expr = {"req.hdr_val(content-length)","gt","0",""}},
+ { .name = "RDP_COOKIE", .expr = {"req.rdp_cookie_cnt","gt","0",""}},
+ { .name = "REQ_CONTENT", .expr = {"req.len","gt","0",""}},
+ { .name = "WAIT_END", .expr = {"wait_end",""}},
+ { .name = NULL, .expr = {""}}
+};
+
+/* Find a default ACL from the default_acl list, compile it and return it.
+ * If the ACL is not found, NULL is returned. In theory, it cannot fail,
+ * except when default ACLs are broken, in which case it will return NULL.
+ * If <known_acl> is not NULL, the ACL will be queued at its tail. If <err> is
+ * not NULL, it will be filled with an error message if an error occurs. This
+ * pointer must be freeable or NULL. <al> is an arg_list serving as a list head
+ * to report missing dependencies. It may be NULL if such dependencies are not
+ * allowed.
+ */
+static struct acl *find_acl_default(const char *acl_name, struct list *known_acl,
+ char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_acl_expr, out_free_name;
+ struct acl *cur_acl;
+ struct acl_expr *acl_expr;
+ char *name;
+ int index;
+
+ for (index = 0; default_acl_list[index].name != NULL; index++) {
+ if (strcmp(acl_name, default_acl_list[index].name) == 0)
+ break;
+ }
+
+ if (default_acl_list[index].name == NULL) {
+ memprintf(err, "no such ACL : '%s'", acl_name);
+ return NULL;
+ }
+
+ acl_expr = parse_acl_expr((const char **)default_acl_list[index].expr, err, al, file, line);
+ if (!acl_expr) {
+ /* parse_acl_expr must have filled err here */
+ goto out_return;
+ }
+
+ name = strdup(acl_name);
+ if (!name) {
+ memprintf(err, "out of memory when building default ACL '%s'", acl_name);
+ goto out_free_acl_expr;
+ }
+
+ cur_acl = calloc(1, sizeof(*cur_acl));
+ if (cur_acl == NULL) {
+ memprintf(err, "out of memory when building default ACL '%s'", acl_name);
+ goto out_free_name;
+ }
+
+ cur_acl->name = name;
+ cur_acl->use |= acl_expr->smp->fetch->use;
+ cur_acl->val |= acl_expr->smp->fetch->val;
+ LIST_INIT(&cur_acl->expr);
+ LIST_APPEND(&cur_acl->expr, &acl_expr->list);
+ if (known_acl)
+ LIST_APPEND(known_acl, &cur_acl->list);
+
+ return cur_acl;
+
+ out_free_name:
+ free(name);
+ out_free_acl_expr:
+ prune_acl_expr(acl_expr);
+ free(acl_expr);
+ out_return:
+ return NULL;
+}
+
+/* Parse an ACL condition starting at <args>[0], relying on a list of already
+ * known ACLs passed in <known_acl>. The new condition is returned (or NULL in
+ * case of low memory). Supports multiple conditions separated by "or". If
+ * <err> is not NULL, it will be filled with a pointer to an error message in
+ * case of error, that the caller is responsible for freeing. The initial
+ * location must either be freeable or NULL. The list <al> serves as a list head
+ * for unresolved dependencies. It may be NULL if such dependencies are not
+ * allowed.
+ */
+struct acl_cond *parse_acl_cond(const char **args, struct list *known_acl,
+ enum acl_cond_pol pol, char **err, struct arg_list *al,
+ const char *file, int line)
+{
+ __label__ out_return, out_free_suite, out_free_term;
+ int arg, neg;
+ const char *word;
+ struct acl *cur_acl;
+ struct acl_term *cur_term;
+ struct acl_term_suite *cur_suite;
+ struct acl_cond *cond;
+ unsigned int suite_val;
+
+ cond = calloc(1, sizeof(*cond));
+ if (cond == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_return;
+ }
+
+ LIST_INIT(&cond->list);
+ LIST_INIT(&cond->suites);
+ cond->pol = pol;
+ cond->val = 0;
+
+ cur_suite = NULL;
+ suite_val = ~0U;
+ neg = 0;
+ for (arg = 0; *args[arg]; arg++) {
+ word = args[arg];
+
+ /* remove as many exclamation marks as we can */
+ while (*word == '!') {
+ neg = !neg;
+ word++;
+ }
+
+ /* an empty word is allowed because we cannot force the user to
+ * always think about not leaving exclamation marks alone.
+ */
+ if (!*word)
+ continue;
+
+ if (strcasecmp(word, "or") == 0 || strcmp(word, "||") == 0) {
+ /* new term suite */
+ cond->val |= suite_val;
+ suite_val = ~0U;
+ cur_suite = NULL;
+ neg = 0;
+ continue;
+ }
+
+ if (strcmp(word, "{") == 0) {
+ /* we may have a complete ACL expression between two braces,
+ * find the last one.
+ */
+ int arg_end = arg + 1;
+ const char **args_new;
+
+ while (*args[arg_end] && strcmp(args[arg_end], "}") != 0)
+ arg_end++;
+
+ if (!*args[arg_end]) {
+ memprintf(err, "missing closing '}' in condition");
+ goto out_free_suite;
+ }
+
+ args_new = calloc(1, (arg_end - arg + 1) * sizeof(*args_new));
+ if (!args_new) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_suite;
+ }
+
+ args_new[0] = "";
+ memcpy(args_new + 1, args + arg + 1, (arg_end - arg) * sizeof(*args_new));
+ args_new[arg_end - arg] = "";
+ cur_acl = parse_acl(args_new, known_acl, err, al, file, line);
+ free(args_new);
+
+ if (!cur_acl) {
+ /* note that parse_acl() must have filled <err> here */
+ goto out_free_suite;
+ }
+ arg = arg_end;
+ }
+ else {
+ /* search for <word> in the known ACL names. If we do not find
+ * it, let's look for it in the default ACLs, and if found, add
+ * it to the list of ACLs of this proxy. This makes it possible
+ * to override them.
+ */
+ cur_acl = find_acl_by_name(word, known_acl);
+ if (cur_acl == NULL) {
+ cur_acl = find_acl_default(word, known_acl, err, al, file, line);
+ if (cur_acl == NULL) {
+ /* note that find_acl_default() must have filled <err> here */
+ goto out_free_suite;
+ }
+ }
+ }
+
+ cur_term = calloc(1, sizeof(*cur_term));
+ if (cur_term == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_suite;
+ }
+
+ cur_term->acl = cur_acl;
+ cur_term->neg = neg;
+
+ /* Here it is a bit complex. The acl_term_suite is a conjunction
+ * of many terms. It may only be used if all of its terms are
+ * usable at the same time. So the suite's validity domain is an
+ * AND between all ACL keywords' ones. But, the global condition
+ * is valid if at least one term suite is OK. So it's an OR between
+ * all of their validity domains. We could emit a warning as soon
+ * as suite_val is null because it means that the last ACL is not
+ * compatible with the previous ones. Let's remain simple for now.
+ */
+ cond->use |= cur_acl->use;
+ suite_val &= cur_acl->val;
+
+ if (!cur_suite) {
+ cur_suite = calloc(1, sizeof(*cur_suite));
+ if (cur_suite == NULL) {
+ memprintf(err, "out of memory when parsing condition");
+ goto out_free_term;
+ }
+ LIST_INIT(&cur_suite->terms);
+ LIST_APPEND(&cond->suites, &cur_suite->list);
+ }
+ LIST_APPEND(&cur_suite->terms, &cur_term->list);
+ neg = 0;
+ }
+
+ cond->val |= suite_val;
+ return cond;
+
+ out_free_term:
+ free(cur_term);
+ out_free_suite:
+ free_acl_cond(cond);
+ out_return:
+ return NULL;
+}
+
+/* Builds an ACL condition starting at the if/unless keyword. The complete
+ * condition is returned. NULL is returned in case of error or if the first
+ * word is neither "if" nor "unless". It automatically sets the file name and
+ * the line number in the condition for better error reporting, and sets the
+ * HTTP initialization requirements in the proxy. If <err> is not NULL, it will
+ * be filled with a pointer to an error message in case of error, that the
+ * caller is responsible for freeing. The initial location must either be
+ * freeable or NULL.
+ */
+struct acl_cond *build_acl_cond(const char *file, int line, struct list *known_acl,
+ struct proxy *px, const char **args, char **err)
+{
+ enum acl_cond_pol pol = ACL_COND_NONE;
+ struct acl_cond *cond = NULL;
+
+ if (err)
+ *err = NULL;
+
+ if (strcmp(*args, "if") == 0) {
+ pol = ACL_COND_IF;
+ args++;
+ }
+ else if (strcmp(*args, "unless") == 0) {
+ pol = ACL_COND_UNLESS;
+ args++;
+ }
+ else {
+ memprintf(err, "conditions must start with either 'if' or 'unless'");
+ return NULL;
+ }
+
+ cond = parse_acl_cond(args, known_acl, pol, err, &px->conf.args, file, line);
+ if (!cond) {
+ /* note that parse_acl_cond must have filled <err> here */
+ return NULL;
+ }
+
+ cond->file = file;
+ cond->line = line;
+ px->http_needed |= !!(cond->use & SMP_USE_HTTP_ANY);
+ return cond;
+}
+
+/* Execute condition <cond> and return either ACL_TEST_FAIL, ACL_TEST_MISS or
+ * ACL_TEST_PASS depending on the test results. ACL_TEST_MISS may only be
+ * returned if <opt> does not contain SMP_OPT_FINAL, indicating that incomplete
+ * data is being examined. The function automatically sets SMP_OPT_ITERATE. This
+ * function only computes the condition, it does not apply the polarity required
+ * by IF/UNLESS, it's up to the caller to do this using something like this :
+ *
+ * res = acl_pass(res);
+ * if (res == ACL_TEST_MISS)
+ * return 0;
+ * if (cond->pol == ACL_COND_UNLESS)
+ * res = !res;
+ */
+enum acl_test_res acl_exec_cond(struct acl_cond *cond, struct proxy *px, struct session *sess, struct stream *strm, unsigned int opt)
+{
+ __label__ fetch_next;
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl_expr *expr;
+ struct acl *acl;
+ struct sample smp;
+ enum acl_test_res acl_res, suite_res, cond_res;
+
+ /* ACLs are iterated over all values, so let's always set the flag to
+ * indicate this to the fetch functions.
+ */
+ opt |= SMP_OPT_ITERATE;
+
+ /* We're doing a logical OR between conditions so we initialize to FAIL.
+ * The MISS status is propagated down from the suites.
+ */
+ cond_res = ACL_TEST_FAIL;
+ list_for_each_entry(suite, &cond->suites, list) {
+ /* Evaluate condition suite <suite>. We stop at the first term
+ * which returns ACL_TEST_FAIL. The MISS status is still propagated
+ * in case of uncertainty in the result.
+ */
+
+ /* we're doing a logical AND between terms, so we must set the
+ * initial value to PASS.
+ */
+ suite_res = ACL_TEST_PASS;
+ list_for_each_entry(term, &suite->terms, list) {
+ acl = term->acl;
+
+ /* FIXME: use cache !
+ * check acl->cache_idx for this.
+ */
+
+ /* ACL result not cached. Let's scan all the expressions
+ * and use the first one to match.
+ */
+ acl_res = ACL_TEST_FAIL;
+ list_for_each_entry(expr, &acl->expr, list) {
+ /* we need to reset context and flags */
+ memset(&smp, 0, sizeof(smp));
+ fetch_next:
+ if (!sample_process(px, sess, strm, opt, expr->smp, &smp)) {
+ /* maybe we could not fetch because of missing data */
+ if (smp.flags & SMP_F_MAY_CHANGE && !(opt & SMP_OPT_FINAL))
+ acl_res |= ACL_TEST_MISS;
+ continue;
+ }
+
+ acl_res |= pat2acl(pattern_exec_match(&expr->pat, &smp, 0));
+ /*
+ * OK now acl_res holds the result of this expression
+ * as one of ACL_TEST_FAIL, ACL_TEST_MISS or ACL_TEST_PASS.
+ *
+ * Then if (!MISS) we can cache the result, and put
+ * (smp.flags & SMP_F_VOLATILE) in the cache flags.
+ *
+ * FIXME: implement cache.
+ *
+ */
+
+ /* we're ORing these terms, so a single PASS is enough */
+ if (acl_res == ACL_TEST_PASS)
+ break;
+
+ if (smp.flags & SMP_F_NOT_LAST)
+ goto fetch_next;
+
+ /* sometimes we know the fetched data is subject to change
+ * later and give another chance for a new match (eg: request
+ * size, time, ...)
+ */
+ if (smp.flags & SMP_F_MAY_CHANGE && !(opt & SMP_OPT_FINAL))
+ acl_res |= ACL_TEST_MISS;
+ }
+ /*
+ * Here we have the result of an ACL (cached or not).
+ * ACLs are combined, negated or not, to form conditions.
+ */
+
+ if (term->neg)
+ acl_res = acl_neg(acl_res);
+
+ suite_res &= acl_res;
+
+ /* we're ANDing these terms, so a single FAIL or MISS is enough */
+ if (suite_res != ACL_TEST_PASS)
+ break;
+ }
+ cond_res |= suite_res;
+
+ /* we're ORing these terms, so a single PASS is enough */
+ if (cond_res == ACL_TEST_PASS)
+ break;
+ }
+ return cond_res;
+}
+
+/* Returns a pointer to the first ACL conflicting with usage at place <where>
+ * which is one of the SMP_VAL_* bits indicating a check place, or NULL if
+ * no conflict is found. Only full conflicts are detected (ACL is not usable).
+ * Use the next function to check for useless keywords.
+ */
+const struct acl *acl_cond_conflicts(const struct acl_cond *cond, unsigned int where)
+{
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl *acl;
+
+ list_for_each_entry(suite, &cond->suites, list) {
+ list_for_each_entry(term, &suite->terms, list) {
+ acl = term->acl;
+ if (!(acl->val & where))
+ return acl;
+ }
+ }
+ return NULL;
+}
+
+/* Returns a pointer to the first ACL and its first keyword to conflict with
+ * usage at place <where> which is one of the SMP_VAL_* bits indicating a check
+ * place. Returns true if a conflict is found, with <acl> and <kw> set (if non
+ * null), or false if not conflict is found. The first useless keyword is
+ * returned.
+ */
+int acl_cond_kw_conflicts(const struct acl_cond *cond, unsigned int where, struct acl const **acl, char const **kw)
+{
+ struct acl_term_suite *suite;
+ struct acl_term *term;
+ struct acl_expr *expr;
+
+ list_for_each_entry(suite, &cond->suites, list) {
+ list_for_each_entry(term, &suite->terms, list) {
+ list_for_each_entry(expr, &term->acl->expr, list) {
+ if (!(expr->smp->fetch->val & where)) {
+ if (acl)
+ *acl = term->acl;
+ if (kw)
+ *kw = expr->kw;
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Find targets for userlist and groups in acl. Function returns the number
+ * of errors or OK if everything is fine. It must be called only once sample
+ * fetch arguments have been resolved (after smp_resolve_args()).
+ */
+int acl_find_targets(struct proxy *p)
+{
+
+ struct acl *acl;
+ struct acl_expr *expr;
+ struct pattern_list *pattern;
+ int cfgerr = 0;
+ struct pattern_expr_list *pexp;
+
+ list_for_each_entry(acl, &p->acl, list) {
+ list_for_each_entry(expr, &acl->expr, list) {
+ if (strcmp(expr->kw, "http_auth_group") == 0) {
+ /* Note: the ARGT_USR argument may only have been resolved earlier
+ * by smp_resolve_args().
+ */
+ if (expr->smp->arg_p->unresolved) {
+ ha_alert("Internal bug in proxy %s: %sacl %s %s() makes use of unresolved userlist '%s'. Please report this.\n",
+ p->id, *acl->name ? "" : "anonymous ", acl->name, expr->kw,
+ expr->smp->arg_p->data.str.area);
+ cfgerr++;
+ continue;
+ }
+
+ if (LIST_ISEMPTY(&expr->pat.head)) {
+ ha_alert("proxy %s: acl %s %s(): no groups specified.\n",
+ p->id, acl->name, expr->kw);
+ cfgerr++;
+ continue;
+ }
+
+ /* For each pattern, check if the group exists. */
+ list_for_each_entry(pexp, &expr->pat.head, list) {
+ if (LIST_ISEMPTY(&pexp->expr->patterns)) {
+ ha_alert("proxy %s: acl %s %s(): no groups specified.\n",
+ p->id, acl->name, expr->kw);
+ cfgerr++;
+ continue;
+ }
+
+ list_for_each_entry(pattern, &pexp->expr->patterns, list) {
+ /* this keyword only has one argument */
+ if (!check_group(expr->smp->arg_p->data.usr, pattern->pat.ptr.str)) {
+ ha_alert("proxy %s: acl %s %s(): invalid group '%s'.\n",
+ p->id, acl->name, expr->kw, pattern->pat.ptr.str);
+ cfgerr++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return cfgerr;
+}
+
+/* initializes ACLs by resolving the sample fetch names they rely upon.
+ * Returns 0 on success, otherwise an error.
+ */
+int init_acl()
+{
+ int err = 0;
+ int index;
+ const char *name;
+ struct acl_kw_list *kwl;
+ struct sample_fetch *smp;
+
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ name = kwl->kw[index].fetch_kw;
+ if (!name)
+ name = kwl->kw[index].kw;
+
+ smp = find_sample_fetch(name, strlen(name));
+ if (!smp) {
+ ha_alert("Critical internal error: ACL keyword '%s' relies on sample fetch '%s' which was not registered!\n",
+ kwl->kw[index].kw, name);
+ err++;
+ continue;
+ }
+ kwl->kw[index].smp = smp;
+ }
+ }
+ return err;
+}
+
+/* dump known ACL keywords on stdout */
+void acl_dump_kwd(void)
+{
+ struct acl_kw_list *kwl;
+ const struct acl_keyword *kwp, *kw;
+ const char *name;
+ int index;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &acl_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ name = kw->fetch_kw;
+ if (!name)
+ name = kw->kw;
+
+ printf("%s = %s -m %s\n", kw->kw, name, pat_match_names[kw->match_type]);
+ }
+}
+
+/* Purge everything in the acl_cond <cond>, then free <cond> */
+void free_acl_cond(struct acl_cond *cond)
+{
+ struct acl_term_suite *suite, *suiteb;
+ struct acl_term *term, *termb;
+
+ if (!cond)
+ return;
+
+ list_for_each_entry_safe(suite, suiteb, &cond->suites, list) {
+ list_for_each_entry_safe(term, termb, &suite->terms, list) {
+ LIST_DELETE(&term->list);
+ free(term);
+ }
+ LIST_DELETE(&suite->list);
+ free(suite);
+ }
+
+ free(cond);
+}
+
+
+static int smp_fetch_acl(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct acl_sample *acl_sample = (struct acl_sample *)args->data.ptr;
+ enum acl_test_res ret;
+
+ ret = acl_exec_cond(&acl_sample->cond, smp->px, smp->sess, smp->strm, smp->opt);
+ if (ret == ACL_TEST_MISS)
+ return 0;
+ smp->data.u.sint = ret == ACL_TEST_PASS;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+int smp_fetch_acl_parse(struct arg *args, char **err_msg)
+{
+ struct acl_sample *acl_sample;
+ char *name;
+ int i;
+
+ for (i = 0; args[i].type != ARGT_STOP; i++)
+ ;
+ acl_sample = calloc(1, sizeof(struct acl_sample) + sizeof(struct acl_term) * i);
+ LIST_INIT(&acl_sample->suite.terms);
+ LIST_INIT(&acl_sample->cond.suites);
+ LIST_APPEND(&acl_sample->cond.suites, &acl_sample->suite.list);
+ acl_sample->cond.val = ~0U; // the keyword is valid everywhere for now.
+
+ args->data.ptr = acl_sample;
+
+ for (i = 0; args[i].type != ARGT_STOP; i++) {
+ name = args[i].data.str.area;
+ if (name[0] == '!') {
+ acl_sample->terms[i].neg = 1;
+ name++;
+ }
+
+ if (!(acl_sample->terms[i].acl = find_acl_by_name(name, &curproxy->acl))) {
+ memprintf(err_msg, "ACL '%s' not found", name);
+ goto err;
+ }
+
+ acl_sample->cond.use |= acl_sample->terms[i].acl->use;
+ acl_sample->cond.val &= acl_sample->terms[i].acl->val;
+
+ LIST_APPEND(&acl_sample->suite.terms, &acl_sample->terms[i].list);
+ }
+
+ return 1;
+
+err:
+ free(acl_sample);
+ return 0;
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "acl", smp_fetch_acl, ARG12(1,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR), smp_fetch_acl_parse, SMP_T_BOOL, SMP_USE_CONST },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/action.c b/src/action.c
new file mode 100644
index 0000000..47f5f86
--- /dev/null
+++ b/src/action.c
@@ -0,0 +1,363 @@
+/*
+ * Action management functions.
+ *
+ * Copyright 2017 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+
+/* Check an action ruleset validity. It returns the number of error encountered
+ * and err_code is updated if a warning is emitted.
+ */
+int check_action_rules(struct list *rules, struct proxy *px, int *err_code)
+{
+ struct act_rule *rule;
+ char *errmsg = NULL;
+ int err = 0;
+
+ list_for_each_entry(rule, rules, list) {
+ if (rule->check_ptr && !rule->check_ptr(rule, px, &errmsg)) {
+ ha_alert("Proxy '%s': %s.\n", px->id, errmsg);
+ err++;
+ }
+ *err_code |= warnif_tcp_http_cond(px, rule->cond);
+ ha_free(&errmsg);
+ }
+
+ return err;
+}
+
+/* Find and check the target table used by an action track-sc*. This
+ * function should be called during the configuration validity check.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_trk_action(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct stktable *target;
+
+ if (rule->arg.trk_ctr.table.n)
+ target = stktable_find_by_name(rule->arg.trk_ctr.table.n);
+ else
+ target = px->table;
+
+ if (!target) {
+ memprintf(err, "unable to find table '%s' referenced by track-sc%d",
+ rule->arg.trk_ctr.table.n ? rule->arg.trk_ctr.table.n : px->id,
+ rule->action);
+ return 0;
+ }
+
+ if (!stktable_compatible_sample(rule->arg.trk_ctr.expr, target->type)) {
+ memprintf(err, "stick-table '%s' uses a type incompatible with the 'track-sc%d' rule",
+ rule->arg.trk_ctr.table.n ? rule->arg.trk_ctr.table.n : px->id,
+ rule->action);
+ return 0;
+ }
+ else {
+ if (!in_proxies_list(target->proxies_list, px)) {
+ px->next_stkt_ref = target->proxies_list;
+ target->proxies_list = px;
+ }
+ free(rule->arg.trk_ctr.table.n);
+ rule->arg.trk_ctr.table.t = target;
+ /* Note: if we decide to enhance the track-sc syntax, we may be
+ * able to pass a list of counters to track and allocate them
+ * right here using stktable_alloc_data_type().
+ */
+ }
+
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE)) {
+ if (!px->tcp_req.inspect_delay && !(rule->arg.trk_ctr.expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request content track-sc*' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+
+ /* The following warning is emitted because HTTP multiplexers are able to catch errors
+ * or timeouts at the session level, before instantiating any stream.
+ * Thus the tcp-request content ruleset will not be evaluated in such case. It means,
+ * http_req and http_err counters will not be incremented as expected, even if the tracked
+ * counter does not use the request content. To track invalid requests it should be
+ * performed at the session level using a tcp-request session rule.
+ */
+ if (px->mode == PR_MODE_HTTP &&
+ !(rule->arg.trk_ctr.expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_HRQHV|SMP_USE_HRQHP|SMP_USE_HRQBO)) &&
+ (!rule->cond || !(rule->cond->use & (SMP_USE_L6REQ|SMP_USE_HRQHV|SMP_USE_HRQHP|SMP_USE_HRQBO)))) {
+ ha_warning("%s '%s' : a 'tcp-request content track-sc*' rule not depending on request"
+ " contents for an HTTP frontend should be executed at the session level, using a"
+ " 'tcp-request session' rule (mandatory to track invalid HTTP requests).\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+
+ return 1;
+}
+
+/* check a capture rule. This function should be called during the configuration
+ * validity check.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE) && !px->tcp_req.inspect_delay &&
+ !(rule->arg.cap.expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request capture' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+
+ return 1;
+}
+
+int act_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters)
+{
+ struct stream *stream;
+
+ if (requester->resolution == NULL)
+ return 0;
+
+ stream = objt_stream(requester->owner);
+ if (stream == NULL)
+ return 0;
+
+ task_wakeup(stream->task, TASK_WOKEN_MSG);
+
+ return 0;
+}
+
+/*
+ * Do resolve error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ */
+int act_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct stream *stream;
+
+ if (requester->resolution == NULL)
+ return 0;
+
+ stream = objt_stream(requester->owner);
+ if (stream == NULL)
+ return 0;
+
+ task_wakeup(stream->task, TASK_WOKEN_MSG);
+
+ return 0;
+}
+
+/* Parse a set-timeout rule statement. It first checks if the timeout name is
+ * valid and proxy is capable of handling it, and returns it in <rule->arg.timeout.type>.
+ * Then the timeout is parsed as a plain value and * returned in <rule->arg.timeout.value>.
+ * If there is a parsing error, the value is reparsed as an expression and
+ * returned in <rule->arg.timeout.expr>.
+ *
+ * Returns -1 if the name is invalid or neither a time or an expression can be
+ * parsed, or if the timeout value is 0.
+ */
+int cfg_parse_rule_set_timeout(const char **args, int idx, struct act_rule *rule,
+ struct proxy *px, char **err)
+{
+ const char *res;
+ const char *timeout_name = args[idx++];
+
+ if (strcmp(timeout_name, "server") == 0) {
+ if (!(px->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' has no backend capability", px->id);
+ return -1;
+ }
+ rule->arg.timeout.type = ACT_TIMEOUT_SERVER;
+ }
+ else if (strcmp(timeout_name, "tunnel") == 0) {
+ if (!(px->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' has no backend capability", px->id);
+ return -1;
+ }
+ rule->arg.timeout.type = ACT_TIMEOUT_TUNNEL;
+ }
+ else if (strcmp(timeout_name, "client") == 0) {
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "'%s' has no frontend capability", px->id);
+ return -1;
+ }
+ rule->arg.timeout.type = ACT_TIMEOUT_CLIENT;
+ }
+ else {
+ memprintf(err,
+ "'set-timeout' rule supports 'server'/'tunnel'/'client' (got '%s')",
+ timeout_name);
+ return -1;
+ }
+
+ res = parse_time_err(args[idx], (unsigned int *)&rule->arg.timeout.value, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to rule 'set-timeout %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[idx], timeout_name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to rule 'set-timeout %s' (minimum value is 1 ms)",
+ args[idx], timeout_name);
+ return -1;
+ }
+ /* res not NULL, parsing error */
+ else if (res) {
+ rule->arg.timeout.expr = sample_parse_expr((char **)args, &idx, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.timeout.expr) {
+ memprintf(err, "unexpected character '%c' in rule 'set-timeout %s'", *res, timeout_name);
+ return -1;
+ }
+ }
+ /* res NULL, parsing ok but value is 0 */
+ else if (!(rule->arg.timeout.value)) {
+ memprintf(err, "null value is not valid for a 'set-timeout %s' rule",
+ timeout_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* tries to find in list <keywords> a similar looking action as the one in
+ * <word>, and returns it otherwise NULL. <word> may be NULL or empty. An
+ * optional array of extra words to compare may be passed in <extra>, but it
+ * must then be terminated by a NULL entry. If unused it may be NULL.
+ */
+const char *action_suggest(const char *word, const struct list *keywords, const char **extra)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct action_kw_list *kwl;
+ const struct action_kw *best_kw = NULL;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ if (!word || !*word)
+ return NULL;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, keywords, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_kw = &kwl->kw[index];
+ best_ptr = best_kw->kw;
+ }
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_kw = NULL;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ /* eliminate too different ones, with more tolerance for prefixes
+ * when they're known to exist (not from extra list).
+ */
+ if (best_ptr &&
+ (best_dist > (2 + (best_kw && (best_kw->flags & KWF_MATCH_PREFIX))) * strlen(word) ||
+ best_dist > (2 + (best_kw && (best_kw->flags & KWF_MATCH_PREFIX))) * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* allocates a rule for ruleset <from> (ACT_F_*), from file name <file> and
+ * line <linenum>. <file> and <linenum> may be zero if unknown. Returns the
+ * rule, otherwise NULL in case of memory allocation error.
+ */
+struct act_rule *new_act_rule(enum act_from from, const char *file, int linenum)
+{
+ struct act_rule *rule;
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ return NULL;
+ rule->from = from;
+ rule->conf.file = file ? strdup(file) : NULL;
+ rule->conf.line = linenum;
+ LIST_INIT(&rule->list);
+ return rule;
+}
+
+/* fees rule <rule> and its elements as well as the condition */
+void free_act_rule(struct act_rule *rule)
+{
+ LIST_DELETE(&rule->list);
+ free_acl_cond(rule->cond);
+ if (rule->release_ptr)
+ rule->release_ptr(rule);
+ free(rule->conf.file);
+ free(rule);
+}
+
+void free_act_rules(struct list *rules)
+{
+ struct act_rule *rule, *ruleb;
+
+ list_for_each_entry_safe(rule, ruleb, rules, list) {
+ free_act_rule(rule);
+ }
+}
+
+/* dumps all known actions registered in action rules <rules> after prefix
+ * <pfx> to stdout. The actions are alphabetically sorted. Those with the
+ * KWF_MATCH_PREFIX flag have their name suffixed with '*'.
+ */
+void dump_act_rules(const struct list *rules, const char *pfx)
+{
+ const struct action_kw *akwp, *akwn;
+ struct action_kw_list *akwl;
+ int index;
+
+ for (akwn = akwp = NULL;; akwp = akwn) {
+ list_for_each_entry(akwl, rules, list) {
+ for (index = 0; akwl->kw[index].kw != NULL; index++)
+ if (strordered(akwp ? akwp->kw : NULL,
+ akwl->kw[index].kw,
+ akwn != akwp ? akwn->kw : NULL))
+ akwn = &akwl->kw[index];
+ }
+ if (akwn == akwp)
+ break;
+ printf("%s%s%s\n", pfx ? pfx : "", akwn->kw,
+ (akwn->flags & KWF_MATCH_PREFIX) ? "*" : "");
+ }
+}
diff --git a/src/activity.c b/src/activity.c
new file mode 100644
index 0000000..07a30e6
--- /dev/null
+++ b/src/activity.c
@@ -0,0 +1,1248 @@
+/*
+ * activity measurement functions.
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/activity-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/listener.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+/* CLI context for the "show profiling" command */
+struct show_prof_ctx {
+ int dump_step; /* 0,1,2,4,5,6; see cli_iohandler_show_profiling() */
+ int linenum; /* next line to be dumped (starts at 0) */
+ int maxcnt; /* max line count per step (0=not set) */
+ int by_what; /* 0=sort by usage, 1=sort by address, 2=sort by time */
+ int aggr; /* 0=dump raw, 1=aggregate on callee */
+};
+
+/* CLI context for the "show activity" command */
+struct show_activity_ctx {
+ int thr; /* thread ID to show or -1 for all */
+ int line; /* line number being dumped */
+ int col; /* columnline being dumped, 0 to nbt+1 */
+};
+
+#if defined(DEBUG_MEM_STATS)
+/* these ones are macros in bug.h when DEBUG_MEM_STATS is set, and will
+ * prevent the new ones from being redefined.
+ */
+#undef calloc
+#undef malloc
+#undef realloc
+#endif
+
+/* bit field of profiling options. Beware, may be modified at runtime! */
+unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
+
+/* start/stop dates of profiling */
+uint64_t prof_task_start_ns = 0;
+uint64_t prof_task_stop_ns = 0;
+uint64_t prof_mem_start_ns = 0;
+uint64_t prof_mem_stop_ns = 0;
+
+/* One struct per thread containing all collected measurements */
+struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
+
+/* One struct per function pointer hash entry (SCHED_ACT_HASH_BUCKETS values, 0=collision) */
+struct sched_activity sched_activity[SCHED_ACT_HASH_BUCKETS] __attribute__((aligned(64))) = { };
+
+
+#ifdef USE_MEMORY_PROFILING
+
+static const char *const memprof_methods[MEMPROF_METH_METHODS] = {
+ "unknown", "malloc", "calloc", "realloc", "free", "p_alloc", "p_free",
+};
+
+/* last one is for hash collisions ("others") and has no caller address */
+struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1] = { };
+
+/* used to detect recursive calls */
+static THREAD_LOCAL int in_memprof = 0;
+
+/* These ones are used by glibc and will be called early. They are in charge of
+ * initializing the handlers with the original functions.
+ */
+static void *memprof_malloc_initial_handler(size_t size);
+static void *memprof_calloc_initial_handler(size_t nmemb, size_t size);
+static void *memprof_realloc_initial_handler(void *ptr, size_t size);
+static void memprof_free_initial_handler(void *ptr);
+
+/* Fallback handlers for the main alloc/free functions. They are preset to
+ * the initializer in order to save a test in the functions's critical path.
+ */
+static void *(*memprof_malloc_handler)(size_t size) = memprof_malloc_initial_handler;
+static void *(*memprof_calloc_handler)(size_t nmemb, size_t size) = memprof_calloc_initial_handler;
+static void *(*memprof_realloc_handler)(void *ptr, size_t size) = memprof_realloc_initial_handler;
+static void (*memprof_free_handler)(void *ptr) = memprof_free_initial_handler;
+
+/* Used to force to die if it's not possible to retrieve the allocation
+ * functions. We cannot even use stdio in this case.
+ */
+static __attribute__((noreturn)) void memprof_die(const char *msg)
+{
+ DISGUISE(write(2, msg, strlen(msg)));
+ exit(1);
+}
+
+/* Resolve original allocation functions and initialize all handlers.
+ * This must be called very early at boot, before the very first malloc()
+ * call, and is not thread-safe! It's not even possible to use stdio there.
+ * Worse, we have to account for the risk of reentrance from dlsym() when
+ * it tries to prepare its error messages. Here its ahndled by in_memprof
+ * that makes allocators return NULL. dlsym() handles it gracefully. An
+ * alternate approach consists in calling aligned_alloc() from these places
+ * but that would mean not being able to intercept it later if considered
+ * useful to do so.
+ */
+static void memprof_init()
+{
+ in_memprof++;
+ memprof_malloc_handler = get_sym_next_addr("malloc");
+ if (!memprof_malloc_handler)
+ memprof_die("FATAL: malloc() function not found.\n");
+
+ memprof_calloc_handler = get_sym_next_addr("calloc");
+ if (!memprof_calloc_handler)
+ memprof_die("FATAL: calloc() function not found.\n");
+
+ memprof_realloc_handler = get_sym_next_addr("realloc");
+ if (!memprof_realloc_handler)
+ memprof_die("FATAL: realloc() function not found.\n");
+
+ memprof_free_handler = get_sym_next_addr("free");
+ if (!memprof_free_handler)
+ memprof_die("FATAL: free() function not found.\n");
+ in_memprof--;
+}
+
+/* the initial handlers will initialize all regular handlers and will call the
+ * one they correspond to. A single one of these functions will typically be
+ * called, though it's unknown which one (as any might be called before main).
+ */
+static void *memprof_malloc_initial_handler(size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs malloc(), let's fail */
+ return NULL;
+ }
+
+ memprof_init();
+ return memprof_malloc_handler(size);
+}
+
+static void *memprof_calloc_initial_handler(size_t nmemb, size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs calloc(), let's fail */
+ return NULL;
+ }
+ memprof_init();
+ return memprof_calloc_handler(nmemb, size);
+}
+
+static void *memprof_realloc_initial_handler(void *ptr, size_t size)
+{
+ if (in_memprof) {
+ /* it's likely that dlsym() needs realloc(), let's fail */
+ return NULL;
+ }
+
+ memprof_init();
+ return memprof_realloc_handler(ptr, size);
+}
+
+static void memprof_free_initial_handler(void *ptr)
+{
+ memprof_init();
+ memprof_free_handler(ptr);
+}
+
+/* Assign a bin for the memprof_stats to the return address. May perform a few
+ * attempts before finding the right one, but always succeeds (in the worst
+ * case, returns a default bin). The caller address is atomically set except
+ * for the default one which is never set.
+ */
+struct memprof_stats *memprof_get_bin(const void *ra, enum memprof_method meth)
+{
+ int retries = 16; // up to 16 consecutive entries may be tested.
+ const void *old;
+ unsigned int bin;
+
+ bin = ptr_hash(ra, MEMPROF_HASH_BITS);
+ for (; memprof_stats[bin].caller != ra; bin = (bin + 1) & (MEMPROF_HASH_BUCKETS - 1)) {
+ if (!--retries) {
+ bin = MEMPROF_HASH_BUCKETS;
+ break;
+ }
+
+ old = NULL;
+ if (!memprof_stats[bin].caller &&
+ HA_ATOMIC_CAS(&memprof_stats[bin].caller, &old, ra)) {
+ memprof_stats[bin].method = meth;
+ break;
+ }
+ }
+ return &memprof_stats[bin];
+}
+
+/* This is the new global malloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ */
+void *malloc(size_t size)
+{
+ struct memprof_stats *bin;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_malloc_handler(size);
+
+ ret = memprof_malloc_handler(size);
+ size = malloc_usable_size(ret) + sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_MALLOC);
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size);
+ return ret;
+}
+
+/* This is the new global calloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ */
+void *calloc(size_t nmemb, size_t size)
+{
+ struct memprof_stats *bin;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_calloc_handler(nmemb, size);
+
+ ret = memprof_calloc_handler(nmemb, size);
+ size = malloc_usable_size(ret) + sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_CALLOC);
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size);
+ return ret;
+}
+
+/* This is the new global realloc() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1].
+ * Depending on the old vs new size, it's considered as an allocation or a free
+ * (or neither if the size remains the same).
+ */
+void *realloc(void *ptr, size_t size)
+{
+ struct memprof_stats *bin;
+ size_t size_before;
+ void *ret;
+
+ if (likely(!(profiling & HA_PROF_MEMORY)))
+ return memprof_realloc_handler(ptr, size);
+
+ size_before = malloc_usable_size(ptr);
+ ret = memprof_realloc_handler(ptr, size);
+ size = malloc_usable_size(ret);
+
+ /* only count the extra link for new allocations */
+ if (!ptr)
+ size += sizeof(void *);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_REALLOC);
+ if (size > size_before) {
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, size - size_before);
+ } else if (size < size_before) {
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, size_before - size);
+ }
+ return ret;
+}
+
+/* This is the new global free() function. It must optimize for the normal
+ * case (i.e. profiling disabled) hence the first test to permit a direct jump.
+ * It must remain simple to guarantee the lack of reentrance. stdio is not
+ * possible there even for debugging. The reported size is the really allocated
+ * one as returned by malloc_usable_size(), because this will allow it to be
+ * compared to the one before realloc() or free(). This is a GNU and jemalloc
+ * extension but other systems may also store this size in ptr[-1]. Since
+ * free() is often called on NULL pointers to collect garbage at the end of
+ * many functions or during config parsing, as a special case free(NULL)
+ * doesn't update any stats.
+ */
+void free(void *ptr)
+{
+ struct memprof_stats *bin;
+ size_t size_before;
+
+ if (likely(!(profiling & HA_PROF_MEMORY) || !ptr)) {
+ memprof_free_handler(ptr);
+ return;
+ }
+
+ size_before = malloc_usable_size(ptr) + sizeof(void *);
+ memprof_free_handler(ptr);
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_FREE);
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, size_before);
+}
+
+#endif // USE_MEMORY_PROFILING
+
+/* Updates the current thread's statistics about stolen CPU time. The unit for
+ * <stolen> is half-milliseconds.
+ */
+void report_stolen_time(uint64_t stolen)
+{
+ activity[tid].cpust_total += stolen;
+ update_freq_ctr(&activity[tid].cpust_1s, stolen);
+ update_freq_ctr_period(&activity[tid].cpust_15s, 15000, stolen);
+}
+
+/* Update avg_loop value for the current thread and possibly decide to enable
+ * task-level profiling on the current thread based on its average run time.
+ * The <run_time> argument is the number of microseconds elapsed since the
+ * last time poll() returned.
+ */
+void activity_count_runtime(uint32_t run_time)
+{
+ uint32_t up, down;
+
+ /* 1 millisecond per loop on average over last 1024 iterations is
+ * enough to turn on profiling.
+ */
+ up = 1000;
+ down = up * 99 / 100;
+
+ run_time = swrate_add(&activity[tid].avg_loop_us, TIME_STATS_SAMPLES, run_time);
+
+ /* In automatic mode, reaching the "up" threshold on average switches
+ * profiling to "on" when automatic, and going back below the "down"
+ * threshold switches to off. The forced modes don't check the load.
+ */
+ if (!(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)) {
+ if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
+ ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
+ swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING);
+ } else {
+ if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
+ ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
+ swrate_avg(run_time, TIME_STATS_SAMPLES) <= down)))
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING);
+ }
+}
+
+#ifdef USE_MEMORY_PROFILING
+/* config parser for global "profiling.memory", accepts "on" or "off" */
+static int cfg_parse_prof_memory(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0) {
+ profiling |= HA_PROF_MEMORY;
+ HA_ATOMIC_STORE(&prof_mem_start_ns, now_ns);
+ }
+ else if (strcmp(args[1], "off") == 0)
+ profiling &= ~HA_PROF_MEMORY;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+#endif // USE_MEMORY_PROFILING
+
+/* config parser for global "profiling.tasks", accepts "on" or "off" */
+static int cfg_parse_prof_tasks(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0) {
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON;
+ HA_ATOMIC_STORE(&prof_task_start_ns, now_ns);
+ }
+ else if (strcmp(args[1], "auto") == 0) {
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
+ HA_ATOMIC_STORE(&prof_task_start_ns, now_ns);
+ }
+ else if (strcmp(args[1], "off") == 0)
+ profiling = (profiling & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF;
+ else {
+ memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* parse a "set profiling" command. It always returns 1. */
+static int cli_parse_set_profiling(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[2], "memory") == 0) {
+#ifdef USE_MEMORY_PROFILING
+ if (strcmp(args[3], "on") == 0) {
+ unsigned int old = profiling;
+ int i;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, old | HA_PROF_MEMORY))
+ ;
+
+ HA_ATOMIC_STORE(&prof_mem_start_ns, now_ns);
+ HA_ATOMIC_STORE(&prof_mem_stop_ns, 0);
+
+ /* also flush current profiling stats */
+ for (i = 0; i < sizeof(memprof_stats) / sizeof(memprof_stats[0]); i++) {
+ HA_ATOMIC_STORE(&memprof_stats[i].alloc_calls, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].free_calls, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].alloc_tot, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].free_tot, 0);
+ HA_ATOMIC_STORE(&memprof_stats[i].caller, NULL);
+ }
+ }
+ else if (strcmp(args[3], "off") == 0) {
+ unsigned int old = profiling;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, old & ~HA_PROF_MEMORY))
+ ;
+
+ if (HA_ATOMIC_LOAD(&prof_mem_start_ns))
+ HA_ATOMIC_STORE(&prof_mem_stop_ns, now_ns);
+ }
+ else
+ return cli_err(appctx, "Expects either 'on' or 'off'.\n");
+ return 1;
+#else
+ return cli_err(appctx, "Memory profiling not compiled in.\n");
+#endif
+ }
+
+ if (strcmp(args[2], "tasks") != 0)
+ return cli_err(appctx, "Expects either 'tasks' or 'memory'.\n");
+
+ if (strcmp(args[3], "on") == 0) {
+ unsigned int old = profiling;
+ int i;
+
+ while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_ON))
+ ;
+
+ HA_ATOMIC_STORE(&prof_task_start_ns, now_ns);
+ HA_ATOMIC_STORE(&prof_task_stop_ns, 0);
+
+ /* also flush current profiling stats */
+ for (i = 0; i < SCHED_ACT_HASH_BUCKETS; i++) {
+ HA_ATOMIC_STORE(&sched_activity[i].calls, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].cpu_time, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].lat_time, 0);
+ HA_ATOMIC_STORE(&sched_activity[i].func, NULL);
+ HA_ATOMIC_STORE(&sched_activity[i].caller, NULL);
+ }
+ }
+ else if (strcmp(args[3], "auto") == 0) {
+ unsigned int old = profiling;
+ unsigned int new;
+
+ do {
+ if ((old & HA_PROF_TASKS_MASK) >= HA_PROF_TASKS_AON)
+ new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AON;
+ else
+ new = (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_AOFF;
+ } while (!_HA_ATOMIC_CAS(&profiling, &old, new));
+
+ HA_ATOMIC_STORE(&prof_task_start_ns, now_ns);
+ HA_ATOMIC_STORE(&prof_task_stop_ns, 0);
+ }
+ else if (strcmp(args[3], "off") == 0) {
+ unsigned int old = profiling;
+ while (!_HA_ATOMIC_CAS(&profiling, &old, (old & ~HA_PROF_TASKS_MASK) | HA_PROF_TASKS_OFF))
+ ;
+
+ if (HA_ATOMIC_LOAD(&prof_task_start_ns))
+ HA_ATOMIC_STORE(&prof_task_stop_ns, now_ns);
+ }
+ else
+ return cli_err(appctx, "Expects 'on', 'auto', or 'off'.\n");
+
+ return 1;
+}
+
+static int cmp_sched_activity_calls(const void *a, const void *b)
+{
+ const struct sched_activity *l = (const struct sched_activity *)a;
+ const struct sched_activity *r = (const struct sched_activity *)b;
+
+ if (l->calls > r->calls)
+ return -1;
+ else if (l->calls < r->calls)
+ return 1;
+ else
+ return 0;
+}
+
+/* sort by address first, then by call count */
+static int cmp_sched_activity_addr(const void *a, const void *b)
+{
+ const struct sched_activity *l = (const struct sched_activity *)a;
+ const struct sched_activity *r = (const struct sched_activity *)b;
+
+ if (l->func > r->func)
+ return -1;
+ else if (l->func < r->func)
+ return 1;
+ else if (l->calls > r->calls)
+ return -1;
+ else if (l->calls < r->calls)
+ return 1;
+ else
+ return 0;
+}
+
+/* sort by cpu time first, then by inverse call count (to spot highest offenders) */
+static int cmp_sched_activity_cpu(const void *a, const void *b)
+{
+ const struct sched_activity *l = (const struct sched_activity *)a;
+ const struct sched_activity *r = (const struct sched_activity *)b;
+
+ if (l->cpu_time > r->cpu_time)
+ return -1;
+ else if (l->cpu_time < r->cpu_time)
+ return 1;
+ else if (l->calls < r->calls)
+ return -1;
+ else if (l->calls > r->calls)
+ return 1;
+ else
+ return 0;
+}
+
+#ifdef USE_MEMORY_PROFILING
+/* used by qsort below */
+static int cmp_memprof_stats(const void *a, const void *b)
+{
+ const struct memprof_stats *l = (const struct memprof_stats *)a;
+ const struct memprof_stats *r = (const struct memprof_stats *)b;
+
+ if (l->alloc_tot + l->free_tot > r->alloc_tot + r->free_tot)
+ return -1;
+ else if (l->alloc_tot + l->free_tot < r->alloc_tot + r->free_tot)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_memprof_addr(const void *a, const void *b)
+{
+ const struct memprof_stats *l = (const struct memprof_stats *)a;
+ const struct memprof_stats *r = (const struct memprof_stats *)b;
+
+ if (l->caller > r->caller)
+ return -1;
+ else if (l->caller < r->caller)
+ return 1;
+ else
+ return 0;
+}
+#endif // USE_MEMORY_PROFILING
+
+/* Computes the index of function pointer <func> and caller <caller> for use
+ * with sched_activity[] or any other similar array passed in <array>, and
+ * returns a pointer to the entry after having atomically assigned it to this
+ * function pointer and caller combination. Note that in case of collision,
+ * the first entry is returned instead ("other").
+ */
+struct sched_activity *sched_activity_entry(struct sched_activity *array, const void *func, const void *caller)
+{
+ uint32_t hash = ptr2_hash(func, caller, SCHED_ACT_HASH_BITS);
+ struct sched_activity *ret;
+ const void *old;
+ int tries = 16;
+
+ for (tries = 16; tries > 0; tries--, hash++) {
+ ret = &array[hash];
+
+ while (1) {
+ if (likely(ret->func)) {
+ if (likely(ret->func == func && ret->caller == caller))
+ return ret;
+ break;
+ }
+
+ /* try to create the new entry. Func is sufficient to
+ * reserve the node.
+ */
+ old = NULL;
+ if (HA_ATOMIC_CAS(&ret->func, &old, func)) {
+ ret->caller = caller;
+ return ret;
+ }
+ /* changed in parallel, check again */
+ }
+ }
+
+ return array;
+}
+
+/* This function dumps all profiling settings. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero.
+ * It dumps some parts depending on the following states from show_prof_ctx:
+ * dump_step:
+ * 0, 4: dump status, then jump to 1 if 0
+ * 1, 5: dump tasks, then jump to 2 if 1
+ * 2, 6: dump memory, then stop
+ * linenum:
+ * restart line for each step (starts at zero)
+ * maxcnt:
+ * may contain a configured max line count for each step (0=not set)
+ * byaddr:
+ * 0: sort by usage
+ * 1: sort by address
+ */
+static int cli_io_handler_show_profiling(struct appctx *appctx)
+{
+ struct show_prof_ctx *ctx = appctx->svcctx;
+ struct sched_activity tmp_activity[SCHED_ACT_HASH_BUCKETS] __attribute__((aligned(64)));
+#ifdef USE_MEMORY_PROFILING
+ struct memprof_stats tmp_memstats[MEMPROF_HASH_BUCKETS + 1];
+ unsigned long long tot_alloc_calls, tot_free_calls;
+ unsigned long long tot_alloc_bytes, tot_free_bytes;
+#endif
+ struct stconn *sc = appctx_sc(appctx);
+ struct buffer *name_buffer = get_trash_chunk();
+ const struct ha_caller *caller;
+ const char *str;
+ int max_lines;
+ int i, j, max;
+
+ /* FIXME: Don't watch the other side ! */
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ chunk_reset(&trash);
+
+ switch (profiling & HA_PROF_TASKS_MASK) {
+ case HA_PROF_TASKS_AOFF: str="auto-off"; break;
+ case HA_PROF_TASKS_AON: str="auto-on"; break;
+ case HA_PROF_TASKS_ON: str="on"; break;
+ default: str="off"; break;
+ }
+
+ if ((ctx->dump_step & 3) != 0)
+ goto skip_status;
+
+ chunk_printf(&trash,
+ "Per-task CPU profiling : %-8s # set profiling tasks {on|auto|off}\n"
+ "Memory usage profiling : %-8s # set profiling memory {on|off}\n",
+ str, (profiling & HA_PROF_MEMORY) ? "on" : "off");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_status:
+ if ((ctx->dump_step & 3) != 1)
+ goto skip_tasks;
+
+ memcpy(tmp_activity, sched_activity, sizeof(tmp_activity));
+ /* for addr sort and for callee aggregation we have to first sort by address */
+ if (ctx->aggr || ctx->by_what == 1) // sort by addr
+ qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_addr);
+
+ if (ctx->aggr) {
+ /* merge entries for the same callee and reset their count */
+ for (i = j = 0; i < SCHED_ACT_HASH_BUCKETS; i = j) {
+ for (j = i + 1; j < SCHED_ACT_HASH_BUCKETS && tmp_activity[j].func == tmp_activity[i].func; j++) {
+ tmp_activity[i].calls += tmp_activity[j].calls;
+ tmp_activity[i].cpu_time += tmp_activity[j].cpu_time;
+ tmp_activity[i].lat_time += tmp_activity[j].lat_time;
+ tmp_activity[j].calls = 0;
+ }
+ }
+ }
+
+ if (!ctx->by_what) // sort by usage
+ qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls);
+ else if (ctx->by_what == 2) // by cpu_tot
+ qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_cpu);
+
+ if (!ctx->linenum)
+ chunk_appendf(&trash, "Tasks activity over %.3f sec till %.3f sec ago:\n"
+ " function calls cpu_tot cpu_avg lat_tot lat_avg\n",
+ (prof_task_start_ns ? (prof_task_stop_ns ? prof_task_stop_ns : now_ns) - prof_task_start_ns : 0) / 1000000000.0,
+ (prof_task_stop_ns ? now_ns - prof_task_stop_ns : 0) / 1000000000.0);
+
+ max_lines = ctx->maxcnt;
+ if (!max_lines)
+ max_lines = SCHED_ACT_HASH_BUCKETS;
+
+ for (i = ctx->linenum; i < max_lines; i++) {
+ if (!tmp_activity[i].calls)
+ continue; // skip aggregated or empty entries
+
+ ctx->linenum = i;
+ chunk_reset(name_buffer);
+ caller = HA_ATOMIC_LOAD(&tmp_activity[i].caller);
+
+ if (!tmp_activity[i].func)
+ chunk_printf(name_buffer, "other");
+ else
+ resolve_sym_name(name_buffer, "", tmp_activity[i].func);
+
+ /* reserve 35 chars for name+' '+#calls, knowing that longer names
+ * are often used for less often called functions.
+ */
+ max = 35 - name_buffer->data;
+ if (max < 1)
+ max = 1;
+ chunk_appendf(&trash, " %s%*llu", name_buffer->area, max, (unsigned long long)tmp_activity[i].calls);
+
+ print_time_short(&trash, " ", tmp_activity[i].cpu_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].cpu_time / tmp_activity[i].calls, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "");
+
+ if (caller && !ctx->aggr && caller->what <= WAKEUP_TYPE_APPCTX_WAKEUP)
+ chunk_appendf(&trash, " <- %s@%s:%d %s",
+ caller->func, caller->file, caller->line,
+ task_wakeup_type_str(caller->what));
+
+ b_putchr(&trash, '\n');
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_tasks:
+
+#ifdef USE_MEMORY_PROFILING
+ if ((ctx->dump_step & 3) != 2)
+ goto skip_mem;
+
+ memcpy(tmp_memstats, memprof_stats, sizeof(tmp_memstats));
+ if (ctx->by_what)
+ qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_addr);
+ else
+ qsort(tmp_memstats, MEMPROF_HASH_BUCKETS+1, sizeof(tmp_memstats[0]), cmp_memprof_stats);
+
+ if (!ctx->linenum)
+ chunk_appendf(&trash,
+ "Alloc/Free statistics by call place over %.3f sec till %.3f sec ago:\n"
+ " Calls | Tot Bytes | Caller and method\n"
+ "<- alloc -> <- free ->|<-- alloc ---> <-- free ---->|\n",
+ (prof_mem_start_ns ? (prof_mem_stop_ns ? prof_mem_stop_ns : now_ns) - prof_mem_start_ns : 0) / 1000000000.0,
+ (prof_mem_stop_ns ? now_ns - prof_mem_stop_ns : 0) / 1000000000.0);
+
+ max_lines = ctx->maxcnt;
+ if (!max_lines)
+ max_lines = MEMPROF_HASH_BUCKETS + 1;
+
+ for (i = ctx->linenum; i < max_lines; i++) {
+ struct memprof_stats *entry = &tmp_memstats[i];
+
+ ctx->linenum = i;
+ if (!entry->alloc_calls && !entry->free_calls)
+ continue;
+ chunk_appendf(&trash, "%11llu %11llu %14llu %14llu| %16p ",
+ entry->alloc_calls, entry->free_calls,
+ entry->alloc_tot, entry->free_tot,
+ entry->caller);
+
+ if (entry->caller)
+ resolve_sym_name(&trash, NULL, entry->caller);
+ else
+ chunk_appendf(&trash, "[other]");
+
+ chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method],
+ (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls));
+
+ if (entry->alloc_tot && entry->free_tot) {
+ /* that's a realloc, show the total diff to help spot leaks */
+ chunk_appendf(&trash," [delta=%lld]", (long long)(entry->alloc_tot - entry->free_tot));
+ }
+
+ if (entry->info) {
+ /* that's a pool name */
+ const struct pool_head *pool = entry->info;
+ chunk_appendf(&trash," [pool=%s]", pool->name);
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ tot_alloc_calls = tot_free_calls = tot_alloc_bytes = tot_free_bytes = 0;
+ for (i = 0; i < max_lines; i++) {
+ tot_alloc_calls += tmp_memstats[i].alloc_calls;
+ tot_free_calls += tmp_memstats[i].free_calls;
+ tot_alloc_bytes += tmp_memstats[i].alloc_tot;
+ tot_free_bytes += tmp_memstats[i].free_tot;
+ }
+
+ chunk_appendf(&trash,
+ "-----------------------|-----------------------------|\n"
+ "%11llu %11llu %14llu %14llu| <- Total; Delta_calls=%lld; Delta_bytes=%lld\n",
+ tot_alloc_calls, tot_free_calls,
+ tot_alloc_bytes, tot_free_bytes,
+ tot_alloc_calls - tot_free_calls,
+ tot_alloc_bytes - tot_free_bytes);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ ctx->linenum = 0; // reset first line to dump
+ if ((ctx->dump_step & 4) == 0)
+ ctx->dump_step++; // next step
+
+ skip_mem:
+#endif // USE_MEMORY_PROFILING
+
+ return 1;
+}
+
+/* parse a "show profiling" command. It returns 1 on failure, 0 if it starts to dump.
+ * - cli.i0 is set to the first state (0=all, 4=status, 5=tasks, 6=memory)
+ * - cli.o1 is set to 1 if the output must be sorted by addr instead of usage
+ * - cli.o0 is set to the number of lines of output
+ */
+static int cli_parse_show_profiling(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_prof_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ for (arg = 2; *args[arg]; arg++) {
+ if (strcmp(args[arg], "all") == 0) {
+ ctx->dump_step = 0; // will cycle through 0,1,2; default
+ }
+ else if (strcmp(args[arg], "status") == 0) {
+ ctx->dump_step = 4; // will visit status only
+ }
+ else if (strcmp(args[arg], "tasks") == 0) {
+ ctx->dump_step = 5; // will visit tasks only
+ }
+ else if (strcmp(args[arg], "memory") == 0) {
+ ctx->dump_step = 6; // will visit memory only
+ }
+ else if (strcmp(args[arg], "byaddr") == 0) {
+ ctx->by_what = 1; // sort output by address instead of usage
+ }
+ else if (strcmp(args[arg], "bytime") == 0) {
+ ctx->by_what = 2; // sort output by total time instead of usage
+ }
+ else if (strcmp(args[arg], "aggr") == 0) {
+ ctx->aggr = 1; // aggregate output by callee
+ }
+ else if (isdigit((unsigned char)*args[arg])) {
+ ctx->maxcnt = atoi(args[arg]); // number of entries to dump
+ }
+ else
+ return cli_err(appctx, "Expects either 'all', 'status', 'tasks', 'memory', 'byaddr', 'bytime', 'aggr' or a max number of output lines.\n");
+ }
+ return 0;
+}
+
+/* This function scans all threads' run queues and collects statistics about
+ * running tasks. It returns 0 if the output buffer is full and it needs to be
+ * called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_tasks(struct appctx *appctx)
+{
+ struct sched_activity tmp_activity[SCHED_ACT_HASH_BUCKETS] __attribute__((aligned(64)));
+ struct stconn *sc = appctx_sc(appctx);
+ struct buffer *name_buffer = get_trash_chunk();
+ struct sched_activity *entry;
+ const struct tasklet *tl;
+ const struct task *t;
+ uint64_t now_ns, lat;
+ struct eb32_node *rqnode;
+ uint64_t tot_calls;
+ int thr, queue;
+ int i, max;
+
+ /* FIXME: Don't watch the other side ! */
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ /* It's not possible to scan queues in small chunks and yield in the
+ * middle of the dump and come back again. So what we're doing instead
+ * is to freeze all threads and inspect their queues at once as fast as
+ * possible, using a sched_activity array to collect metrics with
+ * limited collision, then we'll report statistics only. The tasks'
+ * #calls will reflect the number of occurrences, and the lat_time will
+ * reflect the latency when set. We prefer to take the time before
+ * calling thread_isolate() so that the wait time doesn't impact the
+ * measurement accuracy. However this requires to take care of negative
+ * times since tasks might be queued after we retrieve it.
+ */
+
+ now_ns = now_mono_time();
+ memset(tmp_activity, 0, sizeof(tmp_activity));
+
+ thread_isolate();
+
+ /* 1. global run queue */
+
+#ifdef USE_THREAD
+ for (thr = 0; thr < global.nbthread; thr++) {
+ /* task run queue */
+ rqnode = eb32_first(&ha_thread_ctx[thr].rqueue_shared);
+ while (rqnode) {
+ t = eb32_entry(rqnode, struct task, rq);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
+ if (t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ rqnode = eb32_next(rqnode);
+ }
+ }
+#endif
+ /* 2. all threads's local run queues */
+ for (thr = 0; thr < global.nbthread; thr++) {
+ /* task run queue */
+ rqnode = eb32_first(&ha_thread_ctx[thr].rqueue);
+ while (rqnode) {
+ t = eb32_entry(rqnode, struct task, rq);
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
+ if (t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ rqnode = eb32_next(rqnode);
+ }
+
+ /* shared tasklet list */
+ list_for_each_entry(tl, mt_list_to_list(&ha_thread_ctx[thr].shared_tasklet_list), list) {
+ t = (const struct task *)tl;
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
+ if (!TASK_IS_TASKLET(t) && t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ }
+
+ /* classful tasklets */
+ for (queue = 0; queue < TL_CLASSES; queue++) {
+ list_for_each_entry(tl, &ha_thread_ctx[thr].tasklets[queue], list) {
+ t = (const struct task *)tl;
+ entry = sched_activity_entry(tmp_activity, t->process, NULL);
+ if (!TASK_IS_TASKLET(t) && t->wake_date) {
+ lat = now_ns - t->wake_date;
+ if ((int64_t)lat > 0)
+ entry->lat_time += lat;
+ }
+ entry->calls++;
+ }
+ }
+ }
+
+ /* hopefully we're done */
+ thread_release();
+
+ chunk_reset(&trash);
+
+ tot_calls = 0;
+ for (i = 0; i < SCHED_ACT_HASH_BUCKETS; i++)
+ tot_calls += tmp_activity[i].calls;
+
+ qsort(tmp_activity, SCHED_ACT_HASH_BUCKETS, sizeof(tmp_activity[0]), cmp_sched_activity_calls);
+
+ chunk_appendf(&trash, "Running tasks: %d (%d threads)\n"
+ " function places %% lat_tot lat_avg\n",
+ (int)tot_calls, global.nbthread);
+
+ for (i = 0; i < SCHED_ACT_HASH_BUCKETS && tmp_activity[i].calls; i++) {
+ chunk_reset(name_buffer);
+
+ if (!tmp_activity[i].func)
+ chunk_printf(name_buffer, "other");
+ else
+ resolve_sym_name(name_buffer, "", tmp_activity[i].func);
+
+ /* reserve 35 chars for name+' '+#calls, knowing that longer names
+ * are often used for less often called functions.
+ */
+ max = 35 - name_buffer->data;
+ if (max < 1)
+ max = 1;
+ chunk_appendf(&trash, " %s%*llu %3d.%1d",
+ name_buffer->area, max, (unsigned long long)tmp_activity[i].calls,
+ (int)(100ULL * tmp_activity[i].calls / tot_calls),
+ (int)((1000ULL * tmp_activity[i].calls / tot_calls)%10));
+ print_time_short(&trash, " ", tmp_activity[i].lat_time, "");
+ print_time_short(&trash, " ", tmp_activity[i].lat_time / tmp_activity[i].calls, "\n");
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ return 0;
+ }
+ return 1;
+}
+
+/* This function dumps some activity counters used by developers and support to
+ * rule out some hypothesis during bug reports. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero. It dumps
+ * everything at once in the buffer and is not designed to do it in multiple
+ * passes.
+ */
+static int cli_io_handler_show_activity(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct show_activity_ctx *actctx = appctx->svcctx;
+ int tgt = actctx->thr; // target thread, -1 for all, 0 for total only
+ uint up_sec, up_usec;
+ int base_line;
+ ullong up;
+
+ /* FIXME: Don't watch the other side ! */
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ /* this macro is used below to dump values. The thread number is "thr",
+ * and runs from 0 to nbt-1 when values are printed using the formula.
+ * We normally try to dmup integral lines in order to keep counters
+ * consistent. If we fail once on a line, we'll detect it next time
+ * because we'll have committed actctx->col=1 thanks to the header
+ * always being dumped individually. We'll be called again thanks to
+ * the header being present, leaving some data in the buffer. In this
+ * case once we restart we'll proceed one column at a time to make sure
+ * we don't overflow the buffer again.
+ */
+#undef SHOW_VAL
+#define SHOW_VAL(header, x, formula) \
+ do { \
+ unsigned int _v[MAX_THREADS]; \
+ unsigned int _tot; \
+ const int _nbt = global.nbthread; \
+ int restarted = actctx->col > 0; \
+ int thr; \
+ _tot = thr = 0; \
+ do { \
+ _tot += _v[thr] = (x); \
+ } while (++thr < _nbt); \
+ for (thr = actctx->col - 2; thr <= _nbt; thr++) { \
+ if (thr == -2) { \
+ /* line header */ \
+ chunk_appendf(&trash, "%s", header); \
+ } \
+ else if (thr == -1) { \
+ /* aggregate value only for multi-thread: all & 0 */ \
+ if (_nbt > 1 && tgt <= 0) \
+ chunk_appendf(&trash, " %u%s", \
+ (formula), \
+ (tgt < 0) ? \
+ " [" : ""); \
+ } \
+ else if (thr < _nbt) { \
+ /* individual value only for all or exact value */ \
+ if (tgt == -1 || tgt == thr+1) \
+ chunk_appendf(&trash, " %u", \
+ _v[thr]); \
+ } \
+ else /* thr == _nbt */ { \
+ chunk_appendf(&trash, "%s\n", \
+ (_nbt > 1 && tgt < 0) ? \
+ " ]" : ""); \
+ } \
+ if (thr == -2 || restarted) { \
+ /* failed once, emit one column at a time */\
+ if (applet_putchk(appctx, &trash) == -1) \
+ break; /* main loop handles it */ \
+ chunk_reset(&trash); \
+ actctx->col = thr + 3; \
+ } \
+ } \
+ if (applet_putchk(appctx, &trash) == -1) \
+ break; /* main loop will handle it */ \
+ /* OK dump done for this line */ \
+ chunk_reset(&trash); \
+ if (thr > _nbt) \
+ actctx->col = 0; \
+ } while (0)
+
+ /* retrieve uptime */
+ up = now_ns - start_time_ns;
+ up_sec = ns_to_sec(up);
+ up_usec = (up / 1000U) % 1000000U;
+
+ /* iterate over all dump lines. It happily skips over holes so it's
+ * not a problem not to have an exact match, we just need to have
+ * stable and consistent lines during a dump.
+ */
+ base_line = __LINE__;
+ do {
+ chunk_reset(&trash);
+
+ switch (actctx->line + base_line) {
+ case __LINE__: chunk_appendf(&trash, "thread_id: %u (%u..%u)\n", tid + 1, 1, global.nbthread); break;
+ case __LINE__: chunk_appendf(&trash, "date_now: %lu.%06lu\n", (ulong)date.tv_sec, (ulong)date.tv_usec); break;
+ case __LINE__: chunk_appendf(&trash, "uptime_now: %u.%06u\n", up_sec, up_usec); break;
+ case __LINE__: SHOW_VAL("ctxsw:", activity[thr].ctxsw, _tot); break;
+ case __LINE__: SHOW_VAL("tasksw:", activity[thr].tasksw, _tot); break;
+ case __LINE__: SHOW_VAL("empty_rq:", activity[thr].empty_rq, _tot); break;
+ case __LINE__: SHOW_VAL("long_rq:", activity[thr].long_rq, _tot); break;
+ case __LINE__: SHOW_VAL("curr_rq:", _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].rq_total), _tot); break;
+ case __LINE__: SHOW_VAL("loops:", activity[thr].loops, _tot); break;
+ case __LINE__: SHOW_VAL("wake_tasks:", activity[thr].wake_tasks, _tot); break;
+ case __LINE__: SHOW_VAL("wake_signal:", activity[thr].wake_signal, _tot); break;
+ case __LINE__: SHOW_VAL("poll_io:", activity[thr].poll_io, _tot); break;
+ case __LINE__: SHOW_VAL("poll_exp:", activity[thr].poll_exp, _tot); break;
+ case __LINE__: SHOW_VAL("poll_drop_fd:", activity[thr].poll_drop_fd, _tot); break;
+ case __LINE__: SHOW_VAL("poll_skip_fd:", activity[thr].poll_skip_fd, _tot); break;
+ case __LINE__: SHOW_VAL("conn_dead:", activity[thr].conn_dead, _tot); break;
+ case __LINE__: SHOW_VAL("stream_calls:", activity[thr].stream_calls, _tot); break;
+ case __LINE__: SHOW_VAL("pool_fail:", activity[thr].pool_fail, _tot); break;
+ case __LINE__: SHOW_VAL("buf_wait:", activity[thr].buf_wait, _tot); break;
+ case __LINE__: SHOW_VAL("cpust_ms_tot:", activity[thr].cpust_total / 2, _tot); break;
+ case __LINE__: SHOW_VAL("cpust_ms_1s:", read_freq_ctr(&activity[thr].cpust_1s) / 2, _tot); break;
+ case __LINE__: SHOW_VAL("cpust_ms_15s:", read_freq_ctr_period(&activity[thr].cpust_15s, 15000) / 2, _tot); break;
+ case __LINE__: SHOW_VAL("avg_cpu_pct:", (100 - ha_thread_ctx[thr].idle_pct), (_tot + _nbt/2) / _nbt); break;
+ case __LINE__: SHOW_VAL("avg_loop_us:", swrate_avg(activity[thr].avg_loop_us, TIME_STATS_SAMPLES), (_tot + _nbt/2) / _nbt); break;
+ case __LINE__: SHOW_VAL("accepted:", activity[thr].accepted, _tot); break;
+ case __LINE__: SHOW_VAL("accq_pushed:", activity[thr].accq_pushed, _tot); break;
+ case __LINE__: SHOW_VAL("accq_full:", activity[thr].accq_full, _tot); break;
+#ifdef USE_THREAD
+ case __LINE__: SHOW_VAL("accq_ring:", accept_queue_ring_len(&accept_queue_rings[thr]), _tot); break;
+ case __LINE__: SHOW_VAL("fd_takeover:", activity[thr].fd_takeover, _tot); break;
+ case __LINE__: SHOW_VAL("check_adopted:",activity[thr].check_adopted, _tot); break;
+#endif
+ case __LINE__: SHOW_VAL("check_started:",activity[thr].check_started, _tot); break;
+ case __LINE__: SHOW_VAL("check_active:", _HA_ATOMIC_LOAD(&ha_thread_ctx[thr].active_checks), _tot); break;
+ case __LINE__: SHOW_VAL("check_running:",_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].running_checks), _tot); break;
+
+#if defined(DEBUG_DEV)
+ /* keep these ones at the end */
+ case __LINE__: SHOW_VAL("ctr0:", activity[thr].ctr0, _tot); break;
+ case __LINE__: SHOW_VAL("ctr1:", activity[thr].ctr1, _tot); break;
+ case __LINE__: SHOW_VAL("ctr2:", activity[thr].ctr2, _tot); break;
+#endif
+ }
+#undef SHOW_VAL
+
+ /* try to dump what was possibly not dumped yet */
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* buffer full, retry later */
+ return 0;
+ }
+ /* line was dumped, let's commit it */
+ actctx->line++;
+ } while (actctx->line + base_line < __LINE__);
+
+ /* dump complete */
+ return 1;
+}
+
+/* parse a "show activity" CLI request. Returns 0 if it needs to continue, 1 if it
+ * wants to stop here. It sets a show_activity_ctx context where, if a specific
+ * thread is requested, it puts the thread number into ->thr otherwise sets it to
+ * -1.
+ */
+static int cli_parse_show_activity(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_activity_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ ctx->thr = -1; // show all by default
+ if (*args[2])
+ ctx->thr = atoi(args[2]);
+
+ if (ctx->thr < -1 || ctx->thr > global.nbthread)
+ return cli_err(appctx, "Thread ID number must be between -1 and nbthread\n");
+
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+#ifdef USE_MEMORY_PROFILING
+ { CFG_GLOBAL, "profiling.memory", cfg_parse_prof_memory },
+#endif
+ { CFG_GLOBAL, "profiling.tasks", cfg_parse_prof_tasks },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "set", "profiling", NULL }, "set profiling <what> {auto|on|off} : enable/disable resource profiling (tasks,memory)", cli_parse_set_profiling, NULL },
+ { { "show", "activity", NULL }, "show activity [-1|0|thread_num] : show per-thread activity stats (for support/developers)", cli_parse_show_activity, cli_io_handler_show_activity, NULL },
+ { { "show", "profiling", NULL }, "show profiling [<what>|<#lines>|<opts>]*: show profiling state (all,status,tasks,memory)", cli_parse_show_profiling, cli_io_handler_show_profiling, NULL },
+ { { "show", "tasks", NULL }, "show tasks : show running tasks", NULL, cli_io_handler_show_tasks, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/applet.c b/src/applet.c
new file mode 100644
index 0000000..a5b0946
--- /dev/null
+++ b/src/applet.c
@@ -0,0 +1,501 @@
+/*
+ * Functions managing applets
+ *
+ * Copyright 2000-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/channel.h>
+#include <haproxy/list.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/trace.h>
+
+unsigned int nb_applets = 0;
+
+DECLARE_POOL(pool_head_appctx, "appctx", sizeof(struct appctx));
+
+
+/* trace source and events */
+static void applet_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * app - applet
+ */
+static const struct trace_event applet_trace_events[] = {
+#define APPLET_EV_NEW (1ULL << 0)
+ { .mask = APPLET_EV_NEW, .name = "app_new", .desc = "new appctx" },
+#define APPLET_EV_FREE (1ULL << 1)
+ { .mask = APPLET_EV_FREE, .name = "app_free", .desc = "free appctx" },
+#define APPLET_EV_RELEASE (1ULL << 2)
+ { .mask = APPLET_EV_RELEASE, .name = "app_release", .desc = "release appctx" },
+#define APPLET_EV_PROCESS (1ULL << 3)
+ { .mask = APPLET_EV_PROCESS, .name = "app_proc", .desc = "process appctx" },
+#define APPLET_EV_ERR (1ULL << 4)
+ { .mask = APPLET_EV_ERR, .name = "app_err", .desc = "error on appctx" },
+#define APPLET_EV_START (1ULL << 5)
+ { .mask = APPLET_EV_START, .name = "app_start", .desc = "start appctx" },
+ {}
+};
+
+static const struct name_desc applet_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the applet */ },
+ /* arg2 */ { },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc applet_trace_decoding[] = {
+#define STRM_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define STRM_VERB_MINIMAL 2
+ { .name="minimal", .desc="report info on streams and connectors" },
+#define STRM_VERB_SIMPLE 3
+ { .name="simple", .desc="add info on request and response channels" },
+#define STRM_VERB_ADVANCED 4
+ { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define STRM_VERB_COMPLETE 5
+ { .name="complete", .desc="add info on channel's buffer" },
+ { /* end */ }
+};
+
+static struct trace_source trace_applet = {
+ .name = IST("applet"),
+ .desc = "Applet endpoint",
+ .arg_def = TRC_ARG1_APPCTX, // TRACE()'s first argument is always an appctx
+ .default_cb = applet_trace,
+ .known_events = applet_trace_events,
+ .lockon_args = applet_trace_lockon_args,
+ .decoding = applet_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_applet
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* the applet traces always expect that arg1, if non-null, is of a appctx (from
+ * which we can derive everything).
+ */
+static void applet_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct appctx *appctx = a1;
+ const struct stconn *sc = NULL, *sco = NULL;
+ const struct stream *s = NULL;
+ const struct channel *ic = NULL, *oc = NULL;
+
+ if (!appctx || src->verbosity < STRM_VERB_CLEAN)
+ return;
+
+ sc = appctx_sc(appctx);
+ if (sc) {
+ s = __sc_strm(sc);
+ sco = sc_opposite(sc);
+ ic = sc_ic(sc);
+ oc = sc_oc(sc);
+ }
+
+ /* General info about the stream (htx/tcp, id...) */
+ if (s)
+ chunk_appendf(&trace_buf, " : [%s,%s]",
+ appctx->applet->name, ((s->flags & SF_HTX) ? "HTX" : "TCP"));
+ else
+ chunk_appendf(&trace_buf, " : [%s]", appctx->applet->name);
+
+ if (sc)
+ /* local and opposite stream connector state */
+ chunk_appendf(&trace_buf, " SC=(%s,%s)",
+ sc_state_str(sc->state), sc_state_str(sco->state));
+ else
+ /* local and opposite stream connector state */
+ chunk_appendf(&trace_buf, " SC=(none,none)");
+
+ if (src->verbosity == STRM_VERB_CLEAN)
+ return;
+
+ chunk_appendf(&trace_buf, " appctx=%p .t=%p .t.exp=%d .state=%d .st0=%d .st1=%d",
+ appctx, appctx->t, tick_isset(appctx->t->expire) ? TICKS_TO_MS(appctx->t->expire - now_ms) : TICK_ETERNITY,
+ appctx->state, appctx->st0, appctx->st1);
+
+ if (!sc || src->verbosity == STRM_VERB_MINIMAL)
+ return;
+
+ chunk_appendf(&trace_buf, " - s=(%p,0x%08x,0x%x)", s, s->flags, s->conn_err_type);
+
+ chunk_appendf(&trace_buf, " sc=(%p,%d,0x%08x,0x%x) sco=(%p,%d,0x%08x,0x%x) sc.exp(r,w)=(%d,%d) sco.exp(r,w)=(%d,%d)",
+ sc, sc->state, sc->flags, sc->sedesc->flags,
+ sco, sco->state, sco->flags, sco->sedesc->flags,
+ tick_isset(sc_ep_rcv_ex(sc)) ? TICKS_TO_MS(sc_ep_rcv_ex(sc) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_snd_ex(sc)) ? TICKS_TO_MS(sc_ep_snd_ex(sc) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_rcv_ex(sco)) ? TICKS_TO_MS(sc_ep_rcv_ex(sco) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_snd_ex(sco)) ? TICKS_TO_MS(sc_ep_snd_ex(sco) - now_ms) : TICK_ETERNITY);
+
+
+ /* If txn defined, don't display all channel info */
+ if (src->verbosity == STRM_VERB_SIMPLE) {
+ chunk_appendf(&trace_buf, " ic=(%p .fl=0x%08x .exp=%d)",
+ ic, ic->flags, tick_isset(ic->analyse_exp) ? TICKS_TO_MS(ic->analyse_exp - now_ms) : TICK_ETERNITY);
+ chunk_appendf(&trace_buf, " oc=(%p .fl=0x%08x .exp=%d)",
+ oc, oc->flags, tick_isset(oc->analyse_exp) ? TICKS_TO_MS(oc->analyse_exp - now_ms) : TICK_ETERNITY);
+ }
+ else {
+ chunk_appendf(&trace_buf, " ic=(%p .fl=0x%08x .ana=0x%08x .exp=%u .o=%lu .tot=%llu .to_fwd=%u)",
+ ic, ic->flags, ic->analysers, ic->analyse_exp,
+ (long)ic->output, ic->total, ic->to_forward);
+ chunk_appendf(&trace_buf, " oc=(%p .fl=0x%08x .ana=0x%08x .exp=%u .o=%lu .tot=%llu .to_fwd=%u)",
+ oc, oc->flags, oc->analysers, oc->analyse_exp,
+ (long)oc->output, oc->total, oc->to_forward);
+ }
+
+ if (src->verbosity == STRM_VERB_SIMPLE ||
+ (src->verbosity == STRM_VERB_ADVANCED && src->level < TRACE_LEVEL_DATA))
+ return;
+
+ /* channels' buffer info */
+ if (s->flags & SF_HTX) {
+ struct htx *ichtx = htxbuf(&ic->buf);
+ struct htx *ochtx = htxbuf(&oc->buf);
+
+ chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)",
+ ichtx->data, ichtx->size, htx_nbblks(ichtx),
+ ochtx->data, ochtx->size, htx_nbblks(ochtx));
+ }
+ else {
+ chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)",
+ (unsigned int)b_data(&ic->buf), b_orig(&ic->buf),
+ (unsigned int)b_head_ofs(&ic->buf), (unsigned int)b_size(&ic->buf),
+ (unsigned int)b_data(&oc->buf), b_orig(&oc->buf),
+ (unsigned int)b_head_ofs(&oc->buf), (unsigned int)b_size(&oc->buf));
+ }
+}
+
+/* Tries to allocate a new appctx and initialize all of its fields. The appctx
+ * is returned on success, NULL on failure. The appctx must be released using
+ * appctx_free(). <applet> is assigned as the applet, but it can be NULL. <thr>
+ * is the thread ID to start the applet on, and a negative value allows the
+ * applet to start anywhere. Backend applets may only be created on the current
+ * thread.
+ */
+struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int thr)
+{
+ struct appctx *appctx;
+
+ /* Backend appctx cannot be started on another thread than the local one */
+ BUG_ON(thr != tid && sedesc);
+
+ TRACE_ENTER(APPLET_EV_NEW);
+
+ appctx = pool_zalloc(pool_head_appctx);
+ if (unlikely(!appctx)) {
+ TRACE_ERROR("APPCTX allocation failure", APPLET_EV_NEW|APPLET_EV_ERR);
+ goto fail_appctx;
+ }
+
+ LIST_INIT(&appctx->wait_entry);
+ appctx->obj_type = OBJ_TYPE_APPCTX;
+ appctx->applet = applet;
+ appctx->sess = NULL;
+
+ appctx->t = task_new_on(thr);
+ if (unlikely(!appctx->t)) {
+ TRACE_ERROR("APPCTX task allocation failure", APPLET_EV_NEW|APPLET_EV_ERR);
+ goto fail_task;
+ }
+
+ if (!sedesc) {
+ sedesc = sedesc_new();
+ if (unlikely(!sedesc)) {
+ TRACE_ERROR("APPCTX sedesc allocation failure", APPLET_EV_NEW|APPLET_EV_ERR);
+ goto fail_endp;
+ }
+ sedesc->se = appctx;
+ se_fl_set(sedesc, SE_FL_T_APPLET | SE_FL_ORPHAN);
+ }
+
+ appctx->sedesc = sedesc;
+ appctx->t->process = task_run_applet;
+ appctx->t->context = appctx;
+
+ LIST_INIT(&appctx->buffer_wait.list);
+ appctx->buffer_wait.target = appctx;
+ appctx->buffer_wait.wakeup_cb = appctx_buf_available;
+
+ _HA_ATOMIC_INC(&nb_applets);
+
+ TRACE_LEAVE(APPLET_EV_NEW, appctx);
+ return appctx;
+
+ fail_endp:
+ task_destroy(appctx->t);
+ fail_task:
+ pool_free(pool_head_appctx, appctx);
+ fail_appctx:
+ return NULL;
+}
+
+/* Finalize the frontend appctx startup. It must not be called for a backend
+ * appctx. This function is responsible to create the appctx's session and the
+ * frontend stream connector. By transitivity, the stream is also created.
+ *
+ * It returns 0 on success and -1 on error. In this case, it is the caller
+ * responsibility to release the appctx. However, the session is released if it
+ * was created. On success, if an error is encountered in the caller function,
+ * the stream must be released instead of the appctx. To be sure,
+ * appctx_free_on_early_error() must be called in this case.
+ */
+int appctx_finalize_startup(struct appctx *appctx, struct proxy *px, struct buffer *input)
+{
+ struct session *sess;
+
+ /* async startup is only possible for frontend appctx. Thus for orphan
+ * appctx. Because no backend appctx can be orphan.
+ */
+ BUG_ON(!se_fl_test(appctx->sedesc, SE_FL_ORPHAN));
+
+ TRACE_ENTER(APPLET_EV_START, appctx);
+
+ sess = session_new(px, NULL, &appctx->obj_type);
+ if (!sess) {
+ TRACE_ERROR("APPCTX session allocation failure", APPLET_EV_START|APPLET_EV_ERR, appctx);
+ return -1;
+ }
+ if (!sc_new_from_endp(appctx->sedesc, sess, input)) {
+ session_free(sess);
+ TRACE_ERROR("APPCTX sc allocation failure", APPLET_EV_START|APPLET_EV_ERR, appctx);
+ return -1;
+ }
+
+ appctx->sess = sess;
+ TRACE_LEAVE(APPLET_EV_START, appctx);
+ return 0;
+}
+
+/* Release function to call when an error occurred during init stage of a
+ * frontend appctx. For a backend appctx, it just calls appctx_free()
+ */
+void appctx_free_on_early_error(struct appctx *appctx)
+{
+ /* If a frontend appctx is attached to a stream connector, release the stream
+ * instead of the appctx.
+ */
+ if (!se_fl_test(appctx->sedesc, SE_FL_ORPHAN) && !(appctx_sc(appctx)->flags & SC_FL_ISBACK)) {
+ stream_free(appctx_strm(appctx));
+ return;
+ }
+ appctx_free(appctx);
+}
+
+void appctx_free(struct appctx *appctx)
+{
+ /* The task is supposed to be run on this thread, so we can just
+ * check if it's running already (or about to run) or not
+ */
+ if (!(appctx->t->state & (TASK_QUEUED | TASK_RUNNING))) {
+ TRACE_POINT(APPLET_EV_FREE, appctx);
+ __appctx_free(appctx);
+ }
+ else {
+ /* if it's running, or about to run, defer the freeing
+ * until the callback is called.
+ */
+ appctx->state |= APPLET_WANT_DIE;
+ task_wakeup(appctx->t, TASK_WOKEN_OTHER);
+ TRACE_DEVEL("Cannot release APPCTX now, wake it up", APPLET_EV_FREE, appctx);
+ }
+}
+
+/* reserves a command context of at least <size> bytes in the <appctx>, for
+ * use by a CLI command or any regular applet. The pointer to this context is
+ * stored in ctx.svcctx and is returned. The caller doesn't need to release
+ * it as it's allocated from reserved space. If the size is larger than
+ * APPLET_MAX_SVCCTX a crash will occur (hence that will never happen outside
+ * of development).
+ *
+ * Note that the command does *not* initialize the area, so that it can easily
+ * be used upon each entry in a function. It's left to the initialization code
+ * to do it if needed. The CLI will always zero the whole area before calling
+ * a keyword's ->parse() function.
+ */
+void *applet_reserve_svcctx(struct appctx *appctx, size_t size)
+{
+ BUG_ON(size > APPLET_MAX_SVCCTX);
+ appctx->svcctx = &appctx->svc.storage;
+ return appctx->svcctx;
+}
+
+/* This is used to reset an svcctx and the svc.storage without releasing the
+ * appctx. In fact this is only used by the CLI applet between commands.
+ */
+void applet_reset_svcctx(struct appctx *appctx)
+{
+ memset(&appctx->svc.storage, 0, APPLET_MAX_SVCCTX);
+ appctx->svcctx = NULL;
+}
+
+/* call the applet's release() function if any, and marks the sedesc as shut.
+ * Needs to be called upon close().
+ */
+void appctx_shut(struct appctx *appctx)
+{
+ if (se_fl_test(appctx->sedesc, SE_FL_SHR | SE_FL_SHW))
+ return;
+
+ TRACE_ENTER(APPLET_EV_RELEASE, appctx);
+ if (appctx->applet->release)
+ appctx->applet->release(appctx);
+
+ if (LIST_INLIST(&appctx->buffer_wait.list))
+ LIST_DEL_INIT(&appctx->buffer_wait.list);
+
+ se_fl_set(appctx->sedesc, SE_FL_SHRR | SE_FL_SHWN);
+ TRACE_LEAVE(APPLET_EV_RELEASE, appctx);
+}
+
+/* Callback used to wake up an applet when a buffer is available. The applet
+ * <appctx> is woken up if an input buffer was requested for the associated
+ * stream connector. In this case the buffer is immediately allocated and the
+ * function returns 1. Otherwise it returns 0. Note that this automatically
+ * covers multiple wake-up attempts by ensuring that the same buffer will not
+ * be accounted for multiple times.
+ */
+int appctx_buf_available(void *arg)
+{
+ struct appctx *appctx = arg;
+ struct stconn *sc = appctx_sc(appctx);
+
+ /* allocation requested ? */
+ if (!(sc->flags & SC_FL_NEED_BUFF))
+ return 0;
+
+ sc_have_buff(sc);
+
+ /* was already allocated another way ? if so, don't take this one */
+ if (c_size(sc_ic(sc)) || sc_ep_have_ff_data(sc_opposite(sc)))
+ return 0;
+
+ /* allocation possible now ? */
+ if (!b_alloc(&sc_ic(sc)->buf)) {
+ sc_need_buff(sc);
+ return 0;
+ }
+
+ task_wakeup(appctx->t, TASK_WOKEN_RES);
+ return 1;
+}
+
+/* Default applet handler */
+struct task *task_run_applet(struct task *t, void *context, unsigned int state)
+{
+ struct appctx *app = context;
+ struct stconn *sc, *sco;
+ unsigned int rate;
+ size_t count;
+ int did_send = 0;
+
+ TRACE_ENTER(APPLET_EV_PROCESS, app);
+
+ if (app->state & APPLET_WANT_DIE) {
+ TRACE_DEVEL("APPCTX want die, release it", APPLET_EV_FREE, app);
+ __appctx_free(app);
+ return NULL;
+ }
+
+ if (se_fl_test(app->sedesc, SE_FL_ORPHAN)) {
+ /* Finalize init of orphan appctx. .init callback function must
+ * be defined and it must finalize appctx startup.
+ */
+ BUG_ON(!app->applet->init);
+
+ if (appctx_init(app) == -1) {
+ TRACE_DEVEL("APPCTX init failed", APPLET_EV_FREE|APPLET_EV_ERR, app);
+ appctx_free_on_early_error(app);
+ return NULL;
+ }
+ BUG_ON(!app->sess || !appctx_sc(app) || !appctx_strm(app));
+ TRACE_DEVEL("APPCTX initialized", APPLET_EV_PROCESS, app);
+ }
+
+ sc = appctx_sc(app);
+ sco = sc_opposite(sc);
+
+ /* We always pretend the applet can't get and doesn't want to
+ * put, it's up to it to change this if needed. This ensures
+ * that one applet which ignores any event will not spin.
+ */
+ applet_need_more_data(app);
+ applet_have_no_more_data(app);
+
+ /* Now we'll try to allocate the input buffer. We wake up the applet in
+ * all cases. So this is the applet's responsibility to check if this
+ * buffer was allocated or not. This leaves a chance for applets to do
+ * some other processing if needed. The applet doesn't have anything to
+ * do if it needs the buffer, it will be called again upon readiness.
+ */
+ if (!sc_alloc_ibuf(sc, &app->buffer_wait))
+ applet_have_more_data(app);
+
+ count = co_data(sc_oc(sc));
+ app->applet->fct(app);
+
+ TRACE_POINT(APPLET_EV_PROCESS, app);
+
+ /* now check if the applet has released some room and forgot to
+ * notify the other side about it.
+ */
+ if (count != co_data(sc_oc(sc))) {
+ sc_oc(sc)->flags |= CF_WRITE_EVENT | CF_WROTE_DATA;
+ if (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed)
+ sc_have_room(sco);
+ did_send = 1;
+ }
+ else {
+ if (!sco->room_needed)
+ sc_have_room(sco);
+ }
+
+ if (sc_ic(sc)->flags & CF_READ_EVENT)
+ sc_ep_report_read_activity(sc);
+
+ if (sc_waiting_room(sc) && (sc->flags & SC_FL_ABRT_DONE)) {
+ sc_ep_set(sc, SE_FL_EOS|SE_FL_ERROR);
+ }
+
+ if (!co_data(sc_oc(sc))) {
+ if (did_send)
+ sc_ep_report_send_activity(sc);
+ }
+ else
+ sc_ep_report_blocked_send(sc, did_send);
+
+ /* measure the call rate and check for anomalies when too high */
+ if (((b_size(sc_ib(sc)) && sc->flags & SC_FL_NEED_BUFF) || // asks for a buffer which is present
+ (b_size(sc_ib(sc)) && !b_data(sc_ib(sc)) && sc->flags & SC_FL_NEED_ROOM) || // asks for room in an empty buffer
+ (b_data(sc_ob(sc)) && sc_is_send_allowed(sc)) || // asks for data already present
+ (!b_data(sc_ib(sc)) && b_data(sc_ob(sc)) && // didn't return anything ...
+ (!(sc_oc(sc)->flags & CF_WRITE_EVENT) && (sc->flags & SC_FL_SHUT_WANTED))))) { // ... and left data pending after a shut
+ rate = update_freq_ctr(&app->call_rate, 1);
+ if (rate >= 100000 && app->call_rate.prev_ctr) // looped like this more than 100k times over last second
+ stream_dump_and_crash(&app->obj_type, read_freq_ctr(&app->call_rate));
+ }
+
+ sc->app_ops->wake(sc);
+ channel_release_buffer(sc_ic(sc), &app->buffer_wait);
+ TRACE_LEAVE(APPLET_EV_PROCESS, app);
+ return t;
+}
diff --git a/src/arg.c b/src/arg.c
new file mode 100644
index 0000000..2810050
--- /dev/null
+++ b/src/arg.c
@@ -0,0 +1,479 @@
+/*
+ * Functions used to parse typed argument lists
+ *
+ * Copyright 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <haproxy/arg.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/regex.h>
+#include <haproxy/tools.h>
+
+const char *arg_type_names[ARGT_NBTYPES] = {
+ [ARGT_STOP] = "end of arguments",
+ [ARGT_SINT] = "integer",
+ [ARGT_STR] = "string",
+ [ARGT_IPV4] = "IPv4 address",
+ [ARGT_MSK4] = "IPv4 mask",
+ [ARGT_IPV6] = "IPv6 address",
+ [ARGT_MSK6] = "IPv6 mask",
+ [ARGT_TIME] = "delay",
+ [ARGT_SIZE] = "size",
+ [ARGT_FE] = "frontend",
+ [ARGT_BE] = "backend",
+ [ARGT_TAB] = "table",
+ [ARGT_SRV] = "server",
+ [ARGT_USR] = "user list",
+ [ARGT_MAP] = "map",
+ [ARGT_REG] = "regex",
+ [ARGT_VAR] = "variable",
+ [ARGT_PBUF_FNUM] = "Protocol buffers field number",
+ /* Unassigned types must never happen. Better crash during parsing if they do. */
+};
+
+/* This dummy arg list may be used by default when no arg is found, it helps
+ * parsers by removing pointer checks.
+ */
+struct arg empty_arg_list[ARGM_NBARGS] = { };
+
+/* This function clones a struct arg_list template into a new one which is
+ * returned.
+ */
+struct arg_list *arg_list_clone(const struct arg_list *orig)
+{
+ struct arg_list *new;
+
+ if ((new = calloc(1, sizeof(*new))) != NULL) {
+ /* ->list will be set by the caller when inserting the element.
+ * ->arg and ->arg_pos will be set by the caller.
+ */
+ new->ctx = orig->ctx;
+ new->kw = orig->kw;
+ new->conv = orig->conv;
+ new->file = orig->file;
+ new->line = orig->line;
+ }
+ return new;
+}
+
+/* This function clones a struct <arg_list> template into a new one which is
+ * set to point to arg <arg> at pos <pos>, and which is returned if the caller
+ * wants to apply further changes.
+ */
+struct arg_list *arg_list_add(struct arg_list *orig, struct arg *arg, int pos)
+{
+ struct arg_list *new;
+
+ new = arg_list_clone(orig);
+ if (new) {
+ new->arg = arg;
+ new->arg_pos = pos;
+ LIST_APPEND(&orig->list, &new->list);
+ }
+ return new;
+}
+
+/* This function builds an argument list from a config line, and stops at the
+ * first non-matching character, which is pointed to in <end_ptr>. A valid arg
+ * list starts with an opening parenthesis '(', contains a number of comma-
+ * delimited words, and ends with the closing parenthesis ')'. An empty list
+ * (with or without the parenthesis) will lead to a valid empty argument if the
+ * keyword has a mandatory one. The function returns the number of arguments
+ * emitted, or <0 in case of any error. Everything needed it automatically
+ * allocated. A pointer to an error message might be returned in err_msg if not
+ * NULL, in which case it would be allocated and the caller will have to check
+ * it and free it. The output arg list is returned in argp which must be valid.
+ * The returned array is always terminated by an arg of type ARGT_STOP (0),
+ * unless the mask indicates that no argument is supported. Unresolved arguments
+ * are appended to arg list <al>, which also serves as a template to create new
+ * entries. <al> may be NULL if unresolved arguments are not allowed. The mask
+ * is composed of a number of mandatory arguments in its lower ARGM_BITS bits,
+ * and a concatenation of each argument type in each subsequent ARGT_BITS-bit
+ * sblock. If <err_msg> is not NULL, it must point to a freeable or NULL
+ * pointer. The caller is expected to restart the parsing from the new pointer
+ * set in <end_ptr>, which is the first character considered as not being part
+ * of the arg list. The input string ends on the first between <len> characters
+ * (when len is positive) or the first NUL character. Placing -1 in <len> will
+ * make it virtually unbounded (~2GB long strings).
+ */
+int make_arg_list(const char *in, int len, uint64_t mask, struct arg **argp,
+ char **err_msg, const char **end_ptr, int *err_arg,
+ struct arg_list *al)
+{
+ int nbarg;
+ int pos;
+ struct arg *arg;
+ const char *beg;
+ const char *ptr_err = NULL;
+ int min_arg;
+ int empty;
+ struct arg_list *new_al = al;
+
+ *argp = NULL;
+
+ empty = 0;
+ if (!len || *in != '(') {
+ /* it's already not for us, stop here */
+ empty = 1;
+ len = 0;
+ } else {
+ /* skip opening parenthesis */
+ len--;
+ in++;
+ }
+
+ min_arg = mask & ARGM_MASK;
+ mask >>= ARGM_BITS;
+
+ pos = 0;
+ /* find between 0 and NBARGS the max number of args supported by the mask */
+ for (nbarg = 0; nbarg < ARGM_NBARGS && ((mask >> (nbarg * ARGT_BITS)) & ARGT_MASK); nbarg++);
+
+ if (!nbarg)
+ goto end_parse;
+
+ /* Note: an empty input string contains an empty argument if this argument
+ * is marked mandatory. Otherwise we can ignore it.
+ */
+ if (empty && !min_arg)
+ goto end_parse;
+
+ arg = *argp = calloc(nbarg + 1, sizeof(**argp));
+
+ if (!arg)
+ goto alloc_err;
+
+ /* Note: empty arguments after a comma always exist. */
+ while (pos < nbarg) {
+ unsigned int uint;
+ int squote = 0, dquote = 0;
+ char *out;
+
+ chunk_reset(&trash);
+ out = trash.area;
+
+ while (len && *in && trash.data < trash.size - 1) {
+ if (*in == '"' && !squote) { /* double quote outside single quotes */
+ if (dquote)
+ dquote = 0;
+ else
+ dquote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\'' && !dquote) { /* single quote outside double quotes */
+ if (squote)
+ squote = 0;
+ else
+ squote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\\' && !squote && len != 1) {
+ /* '\', ', ' ', '"' support being escaped by '\' */
+ if (in[1] == 0)
+ goto unquote_err;
+
+ if (in[1] == '\\' || in[1] == ' ' || in[1] == '"' || in[1] == '\'') {
+ in++; len--;
+ *out++ = *in;
+ }
+ else if (in[1] == 'r') {
+ in++; len--;
+ *out++ = '\r';
+ }
+ else if (in[1] == 'n') {
+ in++; len--;
+ *out++ = '\n';
+ }
+ else if (in[1] == 't') {
+ in++; len--;
+ *out++ = '\t';
+ }
+ else {
+ /* just a lone '\' */
+ *out++ = *in;
+ }
+ in++; len--;
+ }
+ else {
+ if (!squote && !dquote && (*in == ',' || *in == ')')) {
+ /* end of argument */
+ break;
+ }
+ /* verbatim copy */
+ *out++ = *in++;
+ len--;
+ }
+ trash.data = out - trash.area;
+ }
+
+ if (len && *in && *in != ',' && *in != ')')
+ goto buffer_err;
+
+ trash.area[trash.data] = 0;
+
+ arg->type = (mask >> (pos * ARGT_BITS)) & ARGT_MASK;
+
+ switch (arg->type) {
+ case ARGT_SINT:
+ if (!trash.data) // empty number
+ goto empty_err;
+ beg = trash.area;
+ arg->data.sint = read_int64(&beg, trash.area + trash.data);
+ if (beg < trash.area + trash.data)
+ goto parse_err;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_FE:
+ case ARGT_BE:
+ case ARGT_TAB:
+ case ARGT_SRV:
+ case ARGT_USR:
+ case ARGT_REG:
+ /* These argument types need to be stored as strings during
+ * parsing then resolved later.
+ */
+ if (!al)
+ goto resolve_err;
+ arg->unresolved = 1;
+ new_al = arg_list_add(al, arg, pos);
+ __fallthrough;
+
+ case ARGT_STR:
+ /* all types that must be resolved are stored as strings
+ * during the parsing. The caller must at one point resolve
+ * them and free the string.
+ */
+ arg->data.str.area = my_strndup(trash.area, trash.data);
+ arg->data.str.data = trash.data;
+ arg->data.str.size = trash.data + 1;
+ break;
+
+ case ARGT_IPV4:
+ if (!trash.data) // empty address
+ goto empty_err;
+
+ if (inet_pton(AF_INET, trash.area, &arg->data.ipv4) <= 0)
+ goto parse_err;
+ break;
+
+ case ARGT_MSK4:
+ if (!trash.data) // empty mask
+ goto empty_err;
+
+ if (!str2mask(trash.area, &arg->data.ipv4))
+ goto parse_err;
+
+ arg->type = ARGT_IPV4;
+ break;
+
+ case ARGT_IPV6:
+ if (!trash.data) // empty address
+ goto empty_err;
+
+ if (inet_pton(AF_INET6, trash.area, &arg->data.ipv6) <= 0)
+ goto parse_err;
+ break;
+
+ case ARGT_MSK6:
+ if (!trash.data) // empty mask
+ goto empty_err;
+
+ if (!str2mask6(trash.area, &arg->data.ipv6))
+ goto parse_err;
+
+ arg->type = ARGT_IPV6;
+ break;
+
+ case ARGT_TIME:
+ if (!trash.data) // empty time
+ goto empty_err;
+
+ ptr_err = parse_time_err(trash.area, &uint, TIME_UNIT_MS);
+ if (ptr_err) {
+ if (ptr_err == PARSE_TIME_OVER || ptr_err == PARSE_TIME_UNDER)
+ ptr_err = trash.area;
+ goto parse_err;
+ }
+ arg->data.sint = uint;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_SIZE:
+ if (!trash.data) // empty size
+ goto empty_err;
+
+ ptr_err = parse_size_err(trash.area, &uint);
+ if (ptr_err)
+ goto parse_err;
+
+ arg->data.sint = uint;
+ arg->type = ARGT_SINT;
+ break;
+
+ case ARGT_PBUF_FNUM:
+ if (!trash.data)
+ goto empty_err;
+
+ if (!parse_dotted_uints(trash.area, &arg->data.fid.ids, &arg->data.fid.sz))
+ goto parse_err;
+
+ break;
+
+ /* FIXME: other types need to be implemented here */
+ default:
+ goto not_impl;
+ }
+
+ pos++;
+ arg++;
+
+ /* don't go back to parsing if we reached end */
+ if (!len || !*in || *in == ')' || pos >= nbarg)
+ break;
+
+ /* skip comma */
+ in++; len--;
+ }
+
+ end_parse:
+ if (pos < min_arg) {
+ /* not enough arguments */
+ memprintf(err_msg,
+ "missing arguments (got %d/%d), type '%s' expected",
+ pos, min_arg, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
+ goto err;
+ }
+
+ if (empty) {
+ /* nothing to do */
+ } else if (*in == ')') {
+ /* skip the expected closing parenthesis */
+ in++;
+ } else {
+ /* the caller is responsible for freeing this message */
+ char *word = (len > 0) ? my_strndup(in, len) : (char *)in;
+
+ if (*word)
+ memprintf(err_msg, "expected ')' before '%s'", word);
+ else
+ memprintf(err_msg, "expected ')'");
+
+ if (len > 0)
+ free(word);
+ /* when we're missing a right paren, the empty part preceding
+ * already created an empty arg, adding one to the position, so
+ * let's fix the reporting to avoid being confusing.
+ */
+ if (pos > 1)
+ pos--;
+ goto err;
+ }
+
+ /* note that pos might be < nbarg and this is not an error, it's up to the
+ * caller to decide what to do with optional args.
+ */
+ if (err_arg)
+ *err_arg = pos;
+ if (end_ptr)
+ *end_ptr = in;
+ return pos;
+
+ err:
+ if (new_al == al) {
+ /* only free the arg area if we have not queued unresolved args
+ * still pointing to it.
+ */
+ free_args(*argp);
+ free(*argp);
+ }
+ *argp = NULL;
+ if (err_arg)
+ *err_arg = pos;
+ if (end_ptr)
+ *end_ptr = in;
+ return -1;
+
+ empty_err:
+ /* If we've only got an empty set of parenthesis with nothing
+ * in between, there is no arg at all.
+ */
+ if (!pos) {
+ ha_free(argp);
+ }
+
+ if (pos >= min_arg)
+ goto end_parse;
+
+ memprintf(err_msg, "expected type '%s' at position %d, but got nothing",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+ parse_err:
+ /* come here with the word attempted to parse in trash */
+ memprintf(err_msg, "failed to parse '%s' as type '%s' at position %d",
+ trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+ not_impl:
+ memprintf(err_msg, "parsing for type '%s' was not implemented, please report this bug",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK]);
+ goto err;
+
+ buffer_err:
+ memprintf(err_msg, "too small buffer size to store decoded argument %d, increase bufsize ?",
+ pos + 1);
+ goto err;
+
+ unquote_err:
+ /* come here with the parsed part in <trash.area>:<trash.data> and the
+ * unparsable part in <in>.
+ */
+ trash.area[trash.data] = 0;
+ memprintf(err_msg, "failed to parse '%s' after '%s' as type '%s' at position %d",
+ in, trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
+alloc_err:
+ memprintf(err_msg, "out of memory");
+ goto err;
+
+ resolve_err:
+ memprintf(err_msg, "unresolved argument of type '%s' at position %d not allowed",
+ arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+}
+
+/* Free all args of an args array, taking care of unresolved arguments as well.
+ * It stops at the ARGT_STOP, which must be present. The array itself is not
+ * freed, it's up to the caller to do it. However it is returned, allowing to
+ * call free(free_args(argptr)). It is valid to call it with a NULL args, and
+ * nothing will be done).
+ */
+struct arg *free_args(struct arg *args)
+{
+ struct arg *arg;
+
+ for (arg = args; arg && arg->type != ARGT_STOP; arg++) {
+ if (arg->type == ARGT_STR || arg->unresolved)
+ chunk_destroy(&arg->data.str);
+ else if (arg->type == ARGT_REG)
+ regex_free(arg->data.reg);
+ else if (arg->type == ARGT_PBUF_FNUM)
+ ha_free(&arg->data.fid.ids);
+ }
+ return args;
+}
diff --git a/src/auth.c b/src/auth.c
new file mode 100644
index 0000000..0031300
--- /dev/null
+++ b/src/auth.c
@@ -0,0 +1,316 @@
+/*
+ * User authentication & authorization
+ *
+ * Copyright 2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#ifdef USE_LIBCRYPT
+/* This is to have crypt() defined on Linux */
+#define _GNU_SOURCE
+
+#ifdef USE_CRYPT_H
+/* some platforms such as Solaris need this */
+#include <crypt.h>
+#endif
+#endif /* USE_LIBCRYPT */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/auth-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/sample-t.h>
+#include <haproxy/thread.h>
+
+struct userlist *userlist = NULL; /* list of all existing userlists */
+
+#ifdef USE_LIBCRYPT
+#define CRYPT_STATE_MSG "yes"
+#ifdef HA_HAVE_CRYPT_R
+/* context for crypt_r() */
+static THREAD_LOCAL struct crypt_data crypt_data = { .initialized = 0 };
+#else
+/* lock for crypt() */
+__decl_thread(static HA_SPINLOCK_T auth_lock);
+#endif
+#else /* USE_LIBCRYPT */
+#define CRYPT_STATE_MSG "no"
+#endif
+
+/* find targets for selected groups. The function returns pointer to
+ * the userlist struct or NULL if name is NULL/empty or unresolvable.
+ */
+
+struct userlist *
+auth_find_userlist(char *name)
+{
+ struct userlist *l;
+
+ if (!name || !*name)
+ return NULL;
+
+ for (l = userlist; l; l = l->next)
+ if (strcmp(l->name, name) == 0)
+ return l;
+
+ return NULL;
+}
+
+int check_group(struct userlist *ul, char *name)
+{
+ struct auth_groups *ag;
+
+ for (ag = ul->groups; ag; ag = ag->next)
+ if (strcmp(name, ag->name) == 0)
+ return 1;
+ return 0;
+}
+
+void
+userlist_free(struct userlist *ul)
+{
+ struct userlist *tul;
+ struct auth_users *au, *tau;
+ struct auth_groups_list *agl, *tagl;
+ struct auth_groups *ag, *tag;
+
+ while (ul) {
+ /* Free users. */
+ au = ul->users;
+ while (au) {
+ /* Free groups that own current user. */
+ agl = au->u.groups;
+ while (agl) {
+ tagl = agl;
+ agl = agl->next;
+ free(tagl);
+ }
+
+ tau = au;
+ au = au->next;
+ free(tau->user);
+ free(tau->pass);
+ free(tau);
+ }
+
+ /* Free grouplist. */
+ ag = ul->groups;
+ while (ag) {
+ tag = ag;
+ ag = ag->next;
+ free(tag->name);
+ free(tag);
+ }
+
+ tul = ul;
+ ul = ul->next;
+ free(tul->name);
+ free(tul);
+ };
+}
+
+int userlist_postinit()
+{
+ struct userlist *curuserlist = NULL;
+
+ /* Resolve usernames and groupnames. */
+ for (curuserlist = userlist; curuserlist; curuserlist = curuserlist->next) {
+ struct auth_groups *ag;
+ struct auth_users *curuser;
+ struct auth_groups_list *grl;
+
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ char *group = NULL;
+ struct auth_groups_list *groups = NULL;
+
+ if (!curuser->u.groups_names)
+ continue;
+
+ while ((group = strtok(group?NULL:curuser->u.groups_names, ","))) {
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ if (strcmp(ag->name, group) == 0)
+ break;
+ }
+
+ if (!ag) {
+ ha_alert("userlist '%s': no such group '%s' specified in user '%s'\n",
+ curuserlist->name, group, curuser->user);
+ free(groups);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Add this group at the group userlist. */
+ grl = calloc(1, sizeof(*grl));
+ if (!grl) {
+ ha_alert("userlist '%s': no more memory when trying to allocate the user groups.\n",
+ curuserlist->name);
+ free(groups);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ grl->group = ag;
+ grl->next = groups;
+ groups = grl;
+ }
+
+ free(curuser->u.groups);
+ curuser->u.groups = groups;
+ }
+
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ char *user = NULL;
+
+ if (!ag->groupusers)
+ continue;
+
+ while ((user = strtok(user?NULL:ag->groupusers, ","))) {
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ if (strcmp(curuser->user, user) == 0)
+ break;
+ }
+
+ if (!curuser) {
+ ha_alert("userlist '%s': no such user '%s' specified in group '%s'\n",
+ curuserlist->name, user, ag->name);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Add this group at the group userlist. */
+ grl = calloc(1, sizeof(*grl));
+ if (!grl) {
+ ha_alert("userlist '%s': no more memory when trying to allocate the user groups.\n",
+ curuserlist->name);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ grl->group = ag;
+ grl->next = curuser->u.groups;
+ curuser->u.groups = grl;
+ }
+
+ ha_free(&ag->groupusers);
+ }
+
+#ifdef DEBUG_AUTH
+ for (ag = curuserlist->groups; ag; ag = ag->next) {
+ struct auth_groups_list *agl;
+
+ fprintf(stderr, "group %s, id %p, users:", ag->name, ag);
+ for (curuser = curuserlist->users; curuser; curuser = curuser->next) {
+ for (agl = curuser->u.groups; agl; agl = agl->next) {
+ if (agl->group == ag)
+ fprintf(stderr, " %s", curuser->user);
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+#endif
+ }
+
+ return ERR_NONE;
+}
+
+/*
+ * Authenticate and authorize user; return 1 if OK, 0 if case of error.
+ */
+int
+check_user(struct userlist *ul, const char *user, const char *pass)
+{
+
+ struct auth_users *u;
+#ifdef DEBUG_AUTH
+ struct auth_groups_list *agl;
+#endif
+ const char *ep;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, "req: userlist=%s, user=%s, pass=%s\n",
+ ul->name, user, pass);
+#endif
+
+ for (u = ul->users; u; u = u->next)
+ if (strcmp(user, u->user) == 0)
+ break;
+
+ if (!u)
+ return 0;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, "cfg: user=%s, pass=%s, flags=%X, groups=",
+ u->user, u->pass, u->flags);
+ for (agl = u->u.groups; agl; agl = agl->next)
+ fprintf(stderr, " %s", agl->group->name);
+#endif
+
+ if (!(u->flags & AU_O_INSECURE)) {
+#ifdef USE_LIBCRYPT
+#ifdef HA_HAVE_CRYPT_R
+ ep = crypt_r(pass, u->pass, &crypt_data);
+#else
+ HA_SPIN_LOCK(AUTH_LOCK, &auth_lock);
+ ep = crypt(pass, u->pass);
+ HA_SPIN_UNLOCK(AUTH_LOCK, &auth_lock);
+#endif
+#else
+ return 0;
+#endif
+ } else
+ ep = pass;
+
+#ifdef DEBUG_AUTH
+ fprintf(stderr, ", crypt=%s\n", ((ep) ? ep : ""));
+#endif
+
+ if (ep && strcmp(ep, u->pass) == 0)
+ return 1;
+ else
+ return 0;
+}
+
+struct pattern *
+pat_match_auth(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct userlist *ul = smp->ctx.a[0];
+ struct pattern_list *lst;
+ struct auth_users *u;
+ struct auth_groups_list *agl;
+ struct pattern *pattern;
+
+ /* Check if the userlist is present in the context data. */
+ if (!ul)
+ return NULL;
+
+ /* Browse the userlist for searching user. */
+ for (u = ul->users; u; u = u->next) {
+ if (strcmp(smp->data.u.str.area, u->user) == 0)
+ break;
+ }
+ if (!u)
+ return NULL;
+
+ /* Browse each pattern. */
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ /* Browse each group for searching group name that match the pattern. */
+ for (agl = u->u.groups; agl; agl = agl->next) {
+ if (strcmp(agl->group->name, pattern->ptr.str) == 0)
+ return pattern;
+ }
+ }
+ return NULL;
+}
+
+REGISTER_BUILD_OPTS("Encrypted password support via crypt(3): "CRYPT_STATE_MSG);
diff --git a/src/backend.c b/src/backend.c
new file mode 100644
index 0000000..39d2c75
--- /dev/null
+++ b/src/backend.c
@@ -0,0 +1,3401 @@
+/*
+ * Backend variables and functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/acl.h>
+#include <haproxy/activity.h>
+#include <haproxy/arg.h>
+#include <haproxy/backend.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/lb_chash.h>
+#include <haproxy/lb_fas.h>
+#include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_fwrr.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/payload.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_strm
+
+int be_lastsession(const struct proxy *be)
+{
+ if (be->be_counters.last_sess)
+ return ns_to_sec(now_ns) - be->be_counters.last_sess;
+
+ return -1;
+}
+
+/* helper function to invoke the correct hash method */
+unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len)
+{
+ unsigned int hash;
+
+ switch (px->lbprm.algo & BE_LB_HASH_FUNC) {
+ case BE_LB_HFCN_DJB2:
+ hash = hash_djb2(key, len);
+ break;
+ case BE_LB_HFCN_WT6:
+ hash = hash_wt6(key, len);
+ break;
+ case BE_LB_HFCN_CRC32:
+ hash = hash_crc32(key, len);
+ break;
+ case BE_LB_HFCN_NONE:
+ /* use key as a hash */
+ {
+ const char *_key = key;
+
+ hash = read_int64(&_key, _key + len);
+ }
+ break;
+ case BE_LB_HFCN_SDBM:
+ /* this is the default hash function */
+ default:
+ hash = hash_sdbm(key, len);
+ break;
+ }
+
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ hash = full_hash(hash);
+
+ return hash;
+}
+
+/*
+ * This function recounts the number of usable active and backup servers for
+ * proxy <p>. These numbers are returned into the p->srv_act and p->srv_bck.
+ * This function also recomputes the total active and backup weights. However,
+ * it does not update tot_weight nor tot_used. Use update_backend_weight() for
+ * this.
+ * This functions is designed to be called before server's weight and state
+ * commit so it uses 'next' weight and states values.
+ *
+ * threads: this is the caller responsibility to lock data. For now, this
+ * function is called from lb modules, so it should be ok. But if you need to
+ * call it from another place, be careful (and update this comment).
+ */
+void recount_servers(struct proxy *px)
+{
+ struct server *srv;
+
+ px->srv_act = px->srv_bck = 0;
+ px->lbprm.tot_wact = px->lbprm.tot_wbck = 0;
+ px->lbprm.fbck = NULL;
+ for (srv = px->srv; srv != NULL; srv = srv->next) {
+ if (!srv_willbe_usable(srv))
+ continue;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ if (!px->srv_bck &&
+ !(px->options & PR_O_USE_ALL_BK))
+ px->lbprm.fbck = srv;
+ px->srv_bck++;
+ srv->cumulative_weight = px->lbprm.tot_wbck;
+ px->lbprm.tot_wbck += srv->next_eweight;
+ } else {
+ px->srv_act++;
+ srv->cumulative_weight = px->lbprm.tot_wact;
+ px->lbprm.tot_wact += srv->next_eweight;
+ }
+ }
+}
+
+/* This function simply updates the backend's tot_weight and tot_used values
+ * after servers weights have been updated. It is designed to be used after
+ * recount_servers() or equivalent.
+ *
+ * threads: this is the caller responsibility to lock data. For now, this
+ * function is called from lb modules, so it should be ok. But if you need to
+ * call it from another place, be careful (and update this comment).
+ */
+void update_backend_weight(struct proxy *px)
+{
+ if (px->srv_act) {
+ px->lbprm.tot_weight = px->lbprm.tot_wact;
+ px->lbprm.tot_used = px->srv_act;
+ }
+ else if (px->lbprm.fbck) {
+ /* use only the first backup server */
+ px->lbprm.tot_weight = px->lbprm.fbck->next_eweight;
+ px->lbprm.tot_used = 1;
+ }
+ else {
+ px->lbprm.tot_weight = px->lbprm.tot_wbck;
+ px->lbprm.tot_used = px->srv_bck;
+ }
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the source hash method. Depending on the number of active/backup servers,
+ * it will either look for active servers, or for backup servers.
+ * If any server is found, it will be returned. If no valid server is found,
+ * NULL is returned.
+ */
+static struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid)
+{
+ unsigned int h, l;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ l = h = 0;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ while ((l + sizeof (int)) <= len) {
+ h ^= ntohl(*(unsigned int *)(&addr[l]));
+ l += sizeof (int);
+ }
+ /* FIXME: why don't we use gen_hash() here as well?
+ * -> we don't take into account hash function from "hash_type"
+ * options here..
+ */
+ if ((px->lbprm.algo & BE_LB_HASH_MOD) == BE_LB_HMOD_AVAL)
+ h = full_hash(h);
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, h, avoid);
+ else
+ return map_get_server_hash(px, h);
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the URI hash method. In order to optimize cache hits, the hash computation
+ * ends at the question mark. Depending on the number of active/backup servers,
+ * it will either look for active servers, or for backup servers.
+ * If any server is found, it will be returned. If no valid server is found,
+ * NULL is returned. The lbprm.arg_opt{1,2,3} values correspond respectively to
+ * the "whole" optional argument (boolean, bit0), the "len" argument (numeric)
+ * and the "depth" argument (numeric).
+ *
+ * This code was contributed by Guillaume Dallaire, who also selected this hash
+ * algorithm out of a tens because it gave him the best results.
+ *
+ */
+static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ int c;
+ int slashes = 0;
+ const char *start, *end;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ if (px->lbprm.arg_opt2) // "len"
+ uri_len = MIN(uri_len, px->lbprm.arg_opt2);
+
+ start = end = uri;
+ while (uri_len--) {
+ c = *end;
+ if (c == '/') {
+ slashes++;
+ if (slashes == px->lbprm.arg_opt3) /* depth+1 */
+ break;
+ }
+ else if (c == '?' && !(px->lbprm.arg_opt1 & 1)) // "whole"
+ break;
+ end++;
+ }
+
+ hash = gen_hash(px, start, (end - start));
+
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the URL parameter hash method. It looks for a specific parameter in the
+ * URL and hashes it to compute the server ID. This is useful to optimize
+ * performance by avoiding bounces between servers in contexts where sessions
+ * are shared but cookies are not usable. If the parameter is not found, NULL
+ * is returned. If any server is found, it will be returned. If no valid server
+ * is found, NULL is returned.
+ */
+static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ const char *start, *end;
+ const char *p;
+ const char *params;
+ int plen;
+
+ /* when tot_weight is 0 then so is srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ if ((p = memchr(uri, '?', uri_len)) == NULL)
+ return NULL;
+
+ p++;
+
+ uri_len -= (p - uri);
+ plen = px->lbprm.arg_len;
+ params = p;
+
+ while (uri_len > plen) {
+ /* Look for the parameter name followed by an equal symbol */
+ if (params[plen] == '=') {
+ if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
+ * the value after the equal sign, at <p>
+ * skip the equal symbol
+ */
+ p += plen + 1;
+ start = end = p;
+ uri_len -= plen + 1;
+
+ while (uri_len && *end != '&') {
+ uri_len--;
+ end++;
+ }
+ hash = gen_hash(px, start, (end - start));
+
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+ }
+ }
+ /* skip to next parameter */
+ p = memchr(params, '&', uri_len);
+ if (!p)
+ return NULL;
+ p++;
+ uri_len -= (p - params);
+ params = p;
+ }
+ return NULL;
+}
+
+/*
+ * this does the same as the previous server_ph, but check the body contents
+ */
+static struct server *get_server_ph_post(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct channel *req = &s->req;
+ struct proxy *px = s->be;
+ struct htx *htx = htxbuf(&req->buf);
+ struct htx_blk *blk;
+ unsigned int plen = px->lbprm.arg_len;
+ unsigned long len;
+ const char *params, *p, *start, *end;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ p = params = NULL;
+ len = 0;
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist v;
+
+ if (type != HTX_BLK_DATA)
+ continue;
+ v = htx_get_blk_value(htx, blk);
+ p = params = v.ptr;
+ len = v.len;
+ break;
+ }
+
+ while (len > plen) {
+ /* Look for the parameter name followed by an equal symbol */
+ if (params[plen] == '=') {
+ if (memcmp(params, px->lbprm.arg_str, plen) == 0) {
+ /* OK, we have the parameter here at <params>, and
+ * the value after the equal sign, at <p>
+ * skip the equal symbol
+ */
+ p += plen + 1;
+ start = end = p;
+ len -= plen + 1;
+
+ while (len && *end != '&') {
+ if (unlikely(!HTTP_IS_TOKEN(*p))) {
+ /* if in a POST, body must be URI encoded or it's not a URI.
+ * Do not interpret any possible binary data as a parameter.
+ */
+ if (likely(HTTP_IS_LWS(*p))) /* eol, uncertain uri len */
+ break;
+ return NULL; /* oh, no; this is not uri-encoded.
+ * This body does not contain parameters.
+ */
+ }
+ len--;
+ end++;
+ /* should we break if vlen exceeds limit? */
+ }
+ hash = gen_hash(px, start, (end - start));
+
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+ }
+ }
+ /* skip to next parameter */
+ p = memchr(params, '&', len);
+ if (!p)
+ return NULL;
+ p++;
+ len -= (p - params);
+ params = p;
+ }
+ return NULL;
+}
+
+
+/*
+ * This function tries to find a running server for the proxy <px> following
+ * the Header parameter hash method. It looks for a specific parameter in the
+ * URL and hashes it to compute the server ID. This is useful to optimize
+ * performance by avoiding bounces between servers in contexts where sessions
+ * are shared but cookies are not usable. If the parameter is not found, NULL
+ * is returned. If any server is found, it will be returned. If no valid server
+ * is found, NULL is returned. When lbprm.arg_opt1 is set, the hash will only
+ * apply to the middle part of a domain name ("use_domain_only" option).
+ */
+static struct server *get_server_hh(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ unsigned int plen = px->lbprm.arg_len;
+ unsigned long len;
+ const char *p;
+ const char *start, *end;
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ http_find_header(htx, ist2(px->lbprm.arg_str, plen), &ctx, 0);
+
+ /* if the header is not found or empty, let's fallback to round robin */
+ if (!ctx.blk || !ctx.value.len)
+ return NULL;
+
+ /* Found a the param_name in the headers.
+ * we will compute the hash based on this value ctx.val.
+ */
+ len = ctx.value.len;
+ p = ctx.value.ptr;
+
+ if (!px->lbprm.arg_opt1) {
+ hash = gen_hash(px, p, len);
+ } else {
+ int dohash = 0;
+ p += len;
+ /* special computation, use only main domain name, not tld/host
+ * going back from the end of string, start hashing at first
+ * dot stop at next.
+ * This is designed to work with the 'Host' header, and requires
+ * a special option to activate this.
+ */
+ end = p;
+ while (len) {
+ if (dohash) {
+ /* Rewind the pointer until the previous char
+ * is a dot, this will allow to set the start
+ * position of the domain. */
+ if (*(p - 1) == '.')
+ break;
+ }
+ else if (*p == '.') {
+ /* The pointer is rewinded to the dot before the
+ * tld, we memorize the end of the domain and
+ * can enter the domain processing. */
+ end = p;
+ dohash = 1;
+ }
+ p--;
+ len--;
+ }
+ start = p;
+ hash = gen_hash(px, start, (end - start));
+ }
+
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* RDP Cookie HASH. */
+static struct server *get_server_rch(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ unsigned long len;
+ int ret;
+ struct sample smp;
+ int rewind;
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ memset(&smp, 0, sizeof(smp));
+
+ rewind = co_data(&s->req);
+ c_rew(&s->req, rewind);
+
+ ret = fetch_rdp_cookie_name(s, &smp, px->lbprm.arg_str, px->lbprm.arg_len);
+ len = smp.data.u.str.data;
+
+ c_adv(&s->req, rewind);
+
+ if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || len == 0)
+ return NULL;
+
+ /* note: we won't hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ /* Found the param_name in the headers.
+ * we will compute the hash based on this value ctx.val.
+ */
+ hash = gen_hash(px, smp.data.u.str.area, len);
+
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* sample expression HASH. Returns NULL if the sample is not found or if there
+ * are no server, relying on the caller to fall back to round robin instead.
+ */
+static struct server *get_server_expr(struct stream *s, const struct server *avoid)
+{
+ struct proxy *px = s->be;
+ struct sample *smp;
+ unsigned int hash = 0;
+
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ /* note: no need to hash if there's only one server left */
+ if (px->lbprm.tot_used == 1)
+ goto hash_done;
+
+ smp = sample_fetch_as_type(px, s->sess, s, SMP_OPT_DIR_REQ | SMP_OPT_FINAL, px->lbprm.expr, SMP_T_BIN);
+ if (!smp)
+ return NULL;
+
+ /* We have the desired data. Let's hash it according to the configured
+ * options and algorithm.
+ */
+ hash = gen_hash(px, smp->data.u.str.area, smp->data.u.str.data);
+
+ hash_done:
+ if ((px->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ return chash_get_server_hash(px, hash, avoid);
+ else
+ return map_get_server_hash(px, hash);
+}
+
+/* random value */
+static struct server *get_server_rnd(struct stream *s, const struct server *avoid)
+{
+ unsigned int hash = 0;
+ struct proxy *px = s->be;
+ struct server *prev, *curr;
+ int draws = px->lbprm.arg_opt1; // number of draws
+
+ /* tot_weight appears to mean srv_count */
+ if (px->lbprm.tot_weight == 0)
+ return NULL;
+
+ curr = NULL;
+ do {
+ prev = curr;
+ hash = statistical_prng();
+ curr = chash_get_server_hash(px, hash, avoid);
+ if (!curr)
+ break;
+
+ /* compare the new server to the previous best choice and pick
+ * the one with the least currently served requests.
+ */
+ if (prev && prev != curr &&
+ curr->served * prev->cur_eweight > prev->served * curr->cur_eweight)
+ curr = prev;
+ } while (--draws > 0);
+
+ /* if the selected server is full, pretend we have none so that we reach
+ * the backend's queue instead.
+ */
+ if (curr &&
+ (curr->queue.length || (curr->maxconn && curr->served >= srv_dynamic_maxconn(curr))))
+ curr = NULL;
+
+ return curr;
+}
+
+/*
+ * This function applies the load-balancing algorithm to the stream, as
+ * defined by the backend it is assigned to. The stream is then marked as
+ * 'assigned'.
+ *
+ * This function MAY NOT be called with SF_ASSIGNED already set. If the stream
+ * had a server previously assigned, it is rebalanced, trying to avoid the same
+ * server, which should still be present in target_srv(&s->target) before the call.
+ * The function tries to keep the original connection slot if it reconnects to
+ * the same server, otherwise it releases it and tries to offer it.
+ *
+ * It is illegal to call this function with a stream in a queue.
+ *
+ * It may return :
+ * SRV_STATUS_OK if everything is OK. ->srv and ->target are assigned.
+ * SRV_STATUS_NOSRV if no server is available. Stream is not ASSIGNED
+ * SRV_STATUS_FULL if all servers are saturated. Stream is not ASSIGNED
+ * SRV_STATUS_INTERNAL for other unrecoverable errors.
+ *
+ * Upon successful return, the stream flag SF_ASSIGNED is set to indicate that
+ * it does not need to be called anymore. This means that target_srv(&s->target)
+ * can be trusted in balance and direct modes.
+ *
+ */
+
+int assign_server(struct stream *s)
+{
+ struct connection *conn = NULL;
+ struct server *conn_slot;
+ struct server *srv = NULL, *prev_srv;
+ int err;
+
+ err = SRV_STATUS_INTERNAL;
+ if (unlikely(s->pend_pos || s->flags & SF_ASSIGNED))
+ goto out_err;
+
+ prev_srv = objt_server(s->target);
+ conn_slot = s->srv_conn;
+
+ /* We have to release any connection slot before applying any LB algo,
+ * otherwise we may erroneously end up with no available slot.
+ */
+ if (conn_slot)
+ sess_change_server(s, NULL);
+
+ /* We will now try to find the good server and store it into <objt_server(s->target)>.
+ * Note that <objt_server(s->target)> may be NULL in case of dispatch or proxy mode,
+ * as well as if no server is available (check error code).
+ */
+
+ srv = NULL;
+ s->target = NULL;
+
+ if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI &&
+ ((s->sess->flags & SESS_FL_PREFER_LAST) ||
+ (s->be->options & PR_O_PREF_LAST))) {
+ struct sess_srv_list *srv_list;
+ list_for_each_entry(srv_list, &s->sess->srv_list, srv_list) {
+ struct server *tmpsrv = objt_server(srv_list->target);
+
+ if (tmpsrv && tmpsrv->proxy == s->be &&
+ ((s->sess->flags & SESS_FL_PREFER_LAST) ||
+ (!s->be->max_ka_queue ||
+ server_has_room(tmpsrv) || (
+ tmpsrv->queue.length + 1 < s->be->max_ka_queue))) &&
+ srv_currently_usable(tmpsrv)) {
+ list_for_each_entry(conn, &srv_list->conn_list, session_list) {
+ if (!(conn->flags & CO_FL_WAIT_XPRT)) {
+ srv = tmpsrv;
+ s->target = &srv->obj_type;
+ if (conn->flags & CO_FL_SESS_IDLE) {
+ conn->flags &= ~CO_FL_SESS_IDLE;
+ s->sess->idle_conns--;
+ }
+ goto out_ok;
+ }
+ }
+ }
+ }
+ }
+
+ if (s->be->lbprm.algo & BE_LB_KIND) {
+ /* we must check if we have at least one server available */
+ if (!s->be->lbprm.tot_weight) {
+ err = SRV_STATUS_NOSRV;
+ goto out;
+ }
+
+ /* if there's some queue on the backend, with certain algos we
+ * know it's because all servers are full.
+ */
+ if (s->be->queue.length && s->be->queue.length != s->be->beconn &&
+ (((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_FAS)|| // first
+ ((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_RR) || // roundrobin
+ ((s->be->lbprm.algo & (BE_LB_KIND|BE_LB_NEED|BE_LB_PARM)) == BE_LB_ALGO_SRR))) { // static-rr
+ err = SRV_STATUS_FULL;
+ goto out;
+ }
+
+ /* First check whether we need to fetch some data or simply call
+ * the LB lookup function. Only the hashing functions will need
+ * some input data in fact, and will support multiple algorithms.
+ */
+ switch (s->be->lbprm.algo & BE_LB_LKUP) {
+ case BE_LB_LKUP_RRTREE:
+ srv = fwrr_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_FSTREE:
+ srv = fas_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_LCTREE:
+ srv = fwlc_get_next_server(s->be, prev_srv);
+ break;
+
+ case BE_LB_LKUP_CHTREE:
+ case BE_LB_LKUP_MAP:
+ if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_RR) {
+ /* static-rr (map) or random (chash) */
+ if ((s->be->lbprm.algo & BE_LB_PARM) == BE_LB_RR_RANDOM)
+ srv = get_server_rnd(s, prev_srv);
+ else
+ srv = map_get_server_rr(s->be, prev_srv);
+ break;
+ }
+ else if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI) {
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ switch (s->be->lbprm.algo & BE_LB_PARM) {
+ const struct sockaddr_storage *src;
+
+ case BE_LB_HASH_SRC:
+ src = sc_src(s->scf);
+ if (src && src->ss_family == AF_INET) {
+ srv = get_server_sh(s->be,
+ (void *)&((struct sockaddr_in *)src)->sin_addr,
+ 4, prev_srv);
+ }
+ else if (src && src->ss_family == AF_INET6) {
+ srv = get_server_sh(s->be,
+ (void *)&((struct sockaddr_in6 *)src)->sin6_addr,
+ 16, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_URI:
+ /* URI hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
+ struct ist uri;
+
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+ if (s->be->lbprm.arg_opt1 & 2) {
+ struct http_uri_parser parser =
+ http_uri_parser_init(uri);
+
+ uri = http_parse_path(&parser);
+ if (!isttest(uri))
+ uri = ist("");
+ }
+ srv = get_server_uh(s->be, uri.ptr, uri.len, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_PRM:
+ /* URL Parameter hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY) {
+ struct ist uri;
+
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+ srv = get_server_ph(s->be, uri.ptr, uri.len, prev_srv);
+
+ if (!srv && s->txn->meth == HTTP_METH_POST)
+ srv = get_server_ph_post(s, prev_srv);
+ }
+ break;
+
+ case BE_LB_HASH_HDR:
+ /* Header Parameter hashing */
+ if (IS_HTX_STRM(s) && s->txn->req.msg_state >= HTTP_MSG_BODY)
+ srv = get_server_hh(s, prev_srv);
+ break;
+
+ case BE_LB_HASH_RDP:
+ /* RDP Cookie hashing */
+ srv = get_server_rch(s, prev_srv);
+ break;
+
+ case BE_LB_HASH_SMP:
+ /* sample expression hashing */
+ srv = get_server_expr(s, prev_srv);
+ break;
+
+ default:
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ /* If the hashing parameter was not found, let's fall
+ * back to round robin on the map.
+ */
+ if (!srv) {
+ if ((s->be->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_CHTREE)
+ srv = chash_get_next_server(s->be, prev_srv);
+ else
+ srv = map_get_server_rr(s->be, prev_srv);
+ }
+
+ /* end of map-based LB */
+ break;
+
+ default:
+ /* unknown balancing algorithm */
+ err = SRV_STATUS_INTERNAL;
+ goto out;
+ }
+
+ if (!srv) {
+ err = SRV_STATUS_FULL;
+ goto out;
+ }
+ else if (srv != prev_srv) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cum_lbconn);
+ _HA_ATOMIC_INC(&srv->counters.cum_lbconn);
+ }
+ s->target = &srv->obj_type;
+ }
+ else if (s->be->options & (PR_O_DISPATCH | PR_O_TRANSP)) {
+ s->target = &s->be->obj_type;
+ }
+ else {
+ err = SRV_STATUS_NOSRV;
+ goto out;
+ }
+
+out_ok:
+ s->flags |= SF_ASSIGNED;
+ err = SRV_STATUS_OK;
+ out:
+
+ /* Either we take back our connection slot, or we offer it to someone
+ * else if we don't need it anymore.
+ */
+ if (conn_slot) {
+ if (conn_slot == srv) {
+ sess_change_server(s, srv);
+ } else {
+ if (may_dequeue_tasks(conn_slot, s->be))
+ process_srv_queue(conn_slot);
+ }
+ }
+
+ out_err:
+ return err;
+}
+
+/* Allocate an address for the destination endpoint
+ * The address is taken from the currently assigned server, or from the
+ * dispatch or transparent address.
+ *
+ * Returns SRV_STATUS_OK on success. Does nothing if the address was
+ * already set.
+ * On error, no address is allocated and SRV_STATUS_INTERNAL is returned.
+ */
+static int alloc_dst_address(struct sockaddr_storage **ss,
+ struct server *srv, struct stream *s)
+{
+ const struct sockaddr_storage *dst;
+
+ if (*ss)
+ return SRV_STATUS_OK;
+
+ if ((s->flags & SF_DIRECT) || (s->be->lbprm.algo & BE_LB_KIND)) {
+ /* A server is necessarily known for this stream */
+ if (!(s->flags & SF_ASSIGNED))
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = srv->addr;
+ set_host_port(*ss, srv->svc_port);
+ if (!is_addr(*ss)) {
+ /* if the server has no address, we use the same address
+ * the client asked, which is handy for remapping ports
+ * locally on multiple addresses at once. Nothing is done
+ * for AF_UNIX addresses.
+ */
+ dst = sc_dst(s->scf);
+ if (dst && dst->ss_family == AF_INET) {
+ ((struct sockaddr_in *)*ss)->sin_family = AF_INET;
+ ((struct sockaddr_in *)*ss)->sin_addr =
+ ((struct sockaddr_in *)dst)->sin_addr;
+ } else if (dst && dst->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)*ss)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)*ss)->sin6_addr =
+ ((struct sockaddr_in6 *)dst)->sin6_addr;
+ }
+ }
+
+ /* if this server remaps proxied ports, we'll use
+ * the port the client connected to with an offset. */
+ if ((srv->flags & SRV_F_MAPPORTS)) {
+ int base_port;
+
+ dst = sc_dst(s->scf);
+ if (dst) {
+ /* First, retrieve the port from the incoming connection */
+ base_port = get_host_port(dst);
+
+ /* Second, assign the outgoing connection's port */
+ base_port += get_host_port(*ss);
+ set_host_port(*ss, base_port);
+ }
+ }
+ }
+ else if (s->be->options & PR_O_DISPATCH) {
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* connect to the defined dispatch addr */
+ **ss = s->be->dispatch_addr;
+ }
+ else if ((s->be->options & PR_O_TRANSP)) {
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* in transparent mode, use the original dest addr if no dispatch specified */
+ dst = sc_dst(s->scf);
+ if (dst && (dst->ss_family == AF_INET || dst->ss_family == AF_INET6))
+ **ss = *dst;
+ }
+ else {
+ /* no server and no LB algorithm ! */
+ return SRV_STATUS_INTERNAL;
+ }
+
+ return SRV_STATUS_OK;
+}
+
+/* This function assigns a server to stream <s> if required, and can add the
+ * connection to either the assigned server's queue or to the proxy's queue.
+ * If ->srv_conn is set, the stream is first released from the server.
+ * It may also be called with SF_DIRECT and/or SF_ASSIGNED though. It will
+ * be called before any connection and after any retry or redispatch occurs.
+ *
+ * It is not allowed to call this function with a stream in a queue.
+ *
+ * Returns :
+ *
+ * SRV_STATUS_OK if everything is OK.
+ * SRV_STATUS_NOSRV if no server is available. objt_server(s->target) = NULL.
+ * SRV_STATUS_QUEUED if the connection has been queued.
+ * SRV_STATUS_FULL if the server(s) is/are saturated and the
+ * connection could not be queued at the server's,
+ * which may be NULL if we queue on the backend.
+ * SRV_STATUS_INTERNAL for other unrecoverable errors.
+ *
+ */
+int assign_server_and_queue(struct stream *s)
+{
+ struct pendconn *p;
+ struct server *srv;
+ int err;
+
+ if (s->pend_pos)
+ return SRV_STATUS_INTERNAL;
+
+ err = SRV_STATUS_OK;
+ if (!(s->flags & SF_ASSIGNED)) {
+ struct server *prev_srv = objt_server(s->target);
+
+ err = assign_server(s);
+ if (prev_srv) {
+ /* This stream was previously assigned to a server. We have to
+ * update the stream's and the server's stats :
+ * - if the server changed :
+ * - set TX_CK_DOWN if txn.flags was TX_CK_VALID
+ * - set SF_REDISP if it was successfully redispatched
+ * - increment srv->redispatches and be->redispatches
+ * - if the server remained the same : update retries.
+ */
+
+ if (prev_srv != objt_server(s->target)) {
+ if (s->txn && (s->txn->flags & TX_CK_MASK) == TX_CK_VALID) {
+ s->txn->flags &= ~TX_CK_MASK;
+ s->txn->flags |= TX_CK_DOWN;
+ }
+ s->flags |= SF_REDISP;
+ _HA_ATOMIC_INC(&prev_srv->counters.redispatches);
+ _HA_ATOMIC_INC(&s->be->be_counters.redispatches);
+ } else {
+ _HA_ATOMIC_INC(&prev_srv->counters.retries);
+ _HA_ATOMIC_INC(&s->be->be_counters.retries);
+ }
+ }
+ }
+
+ switch (err) {
+ case SRV_STATUS_OK:
+ /* we have SF_ASSIGNED set */
+ srv = objt_server(s->target);
+ if (!srv)
+ return SRV_STATUS_OK; /* dispatch or proxy mode */
+
+ /* If we already have a connection slot, no need to check any queue */
+ if (s->srv_conn == srv)
+ return SRV_STATUS_OK;
+
+ /* OK, this stream already has an assigned server, but no
+ * connection slot yet. Either it is a redispatch, or it was
+ * assigned from persistence information (direct mode).
+ */
+ if ((s->flags & SF_REDIRECTABLE) && srv->rdr_len) {
+ /* server scheduled for redirection, and already assigned. We
+ * don't want to go further nor check the queue.
+ */
+ sess_change_server(s, srv); /* not really needed in fact */
+ return SRV_STATUS_OK;
+ }
+
+ /* We might have to queue this stream if the assigned server is full.
+ * We know we have to queue it into the server's queue, so if a maxqueue
+ * is set on the server, we must also check that the server's queue is
+ * not full, in which case we have to return FULL.
+ */
+ if (srv->maxconn &&
+ (srv->queue.length || srv->served >= srv_dynamic_maxconn(srv))) {
+
+ if (srv->maxqueue > 0 && srv->queue.length >= srv->maxqueue)
+ return SRV_STATUS_FULL;
+
+ p = pendconn_add(s);
+ if (p)
+ return SRV_STATUS_QUEUED;
+ else
+ return SRV_STATUS_INTERNAL;
+ }
+
+ /* OK, we can use this server. Let's reserve our place */
+ sess_change_server(s, srv);
+ return SRV_STATUS_OK;
+
+ case SRV_STATUS_FULL:
+ /* queue this stream into the proxy's queue */
+ p = pendconn_add(s);
+ if (p)
+ return SRV_STATUS_QUEUED;
+ else
+ return SRV_STATUS_INTERNAL;
+
+ case SRV_STATUS_NOSRV:
+ return err;
+
+ case SRV_STATUS_INTERNAL:
+ return err;
+
+ default:
+ return SRV_STATUS_INTERNAL;
+ }
+}
+
+/* Allocate an address if an explicit source address must be used for a backend
+ * connection.
+ *
+ * Two parameters are taken into account to check if specific source address is
+ * configured. The first one is <srv> which is the server instance to connect
+ * to. It may be NULL when dispatching is used. The second one <be> is the
+ * backend instance which contains the target server or dispatch.
+ *
+ * A stream instance <s> can be used to set the stream owner of the backend
+ * connection. It is a required parameter if the source address is a dynamic
+ * parameter.
+ *
+ * Returns SRV_STATUS_OK if either no specific source address specified or its
+ * allocation is done correctly. On error returns SRV_STATUS_INTERNAL.
+ */
+int alloc_bind_address(struct sockaddr_storage **ss,
+ struct server *srv, struct proxy *be,
+ struct stream *s)
+{
+#if defined(CONFIG_HAP_TRANSPARENT)
+ const struct sockaddr_storage *addr;
+ struct conn_src *src = NULL;
+ struct sockaddr_in *sin;
+ char *vptr;
+ size_t vlen;
+#endif
+
+ /* Ensure the function will not overwrite an allocated address. */
+ BUG_ON(*ss);
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (be->conn_src.opts & CO_SRC_BIND)
+ src = &be->conn_src;
+
+ /* no transparent mode, no need to allocate an address, returns OK */
+ if (!src)
+ return SRV_STATUS_OK;
+
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_ADDR:
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = src->tproxy_addr;
+ break;
+
+ case CO_SRC_TPROXY_CLI:
+ case CO_SRC_TPROXY_CIP:
+ BUG_ON(!s); /* Dynamic source setting requires a stream instance. */
+
+ /* FIXME: what can we do if the client connects in IPv6 or unix socket ? */
+ addr = sc_src(s->scf);
+ if (!addr)
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ **ss = *addr;
+ break;
+
+ case CO_SRC_TPROXY_DYN:
+ BUG_ON(!s); /* Dynamic source setting requires a stream instance. */
+
+ if (!src->bind_hdr_occ || !IS_HTX_STRM(s))
+ return SRV_STATUS_INTERNAL;
+
+ if (!sockaddr_alloc(ss, NULL, 0))
+ return SRV_STATUS_INTERNAL;
+
+ /* bind to the IP in a header */
+ sin = (struct sockaddr_in *)*ss;
+ sin->sin_family = AF_INET;
+ sin->sin_port = 0;
+ sin->sin_addr.s_addr = 0;
+ if (!http_get_htx_hdr(htxbuf(&s->req.buf),
+ ist2(src->bind_hdr_name, src->bind_hdr_len),
+ src->bind_hdr_occ, NULL, &vptr, &vlen)) {
+ sockaddr_free(ss);
+ return SRV_STATUS_INTERNAL;
+ }
+
+ sin->sin_addr.s_addr = htonl(inetaddr_host_lim(vptr, vptr + vlen));
+ break;
+
+ default:
+ ;
+ }
+#endif
+
+ return SRV_STATUS_OK;
+}
+
+/* Attempt to get a backend connection from the specified mt_list array
+ * (safe or idle connections). The <is_safe> argument means what type of
+ * connection the caller wants.
+ */
+struct connection *conn_backend_get(struct stream *s, struct server *srv, int is_safe, int64_t hash)
+{
+ struct connection *conn = NULL;
+ int i; // thread number
+ int found = 0;
+ int stop;
+
+ /* We need to lock even if this is our own list, because another
+ * thread may be trying to migrate that connection, and we don't want
+ * to end up with two threads using the same connection.
+ */
+ i = tid;
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn = srv_lookup_conn(is_safe ? &srv->per_thr[tid].safe_conns : &srv->per_thr[tid].idle_conns, hash);
+ if (conn)
+ conn_delete_from_tree(conn);
+
+ /* If we failed to pick a connection from the idle list, let's try again with
+ * the safe list.
+ */
+ if (!conn && !is_safe && srv->curr_safe_nb > 0) {
+ conn = srv_lookup_conn(&srv->per_thr[tid].safe_conns, hash);
+ if (conn) {
+ conn_delete_from_tree(conn);
+ is_safe = 1;
+ }
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* If we found a connection in our own list, and we don't have to
+ * steal one from another thread, then we're done.
+ */
+ if (conn)
+ goto done;
+
+ /* pool sharing globally disabled ? */
+ if (!(global.tune.options & GTUNE_IDLE_POOL_SHARED))
+ goto done;
+
+ /* Are we allowed to pick from another thread ? We'll still try
+ * it if we're running low on FDs as we don't want to create
+ * extra conns in this case, otherwise we can give up if we have
+ * too few idle conns and the server protocol supports establishing
+ * connections (i.e. not a reverse-http server for example).
+ */
+ if (srv->curr_idle_conns < srv->low_idle_conns &&
+ ha_used_fds < global.tune.pool_low_count) {
+ const struct protocol *srv_proto = protocol_lookup(srv->addr.ss_family, PROTO_TYPE_STREAM, 0);
+
+ if (srv_proto && srv_proto->connect)
+ goto done;
+ }
+
+ /* Lookup all other threads for an idle connection, starting from last
+ * unvisited thread, but always staying in the same group.
+ */
+ stop = srv->per_tgrp[tgid - 1].next_takeover;
+ if (stop >= tg->count)
+ stop %= tg->count;
+
+ stop += tg->base;
+ i = stop;
+ do {
+ if (!srv->curr_idle_thr[i] || i == tid)
+ continue;
+
+ if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
+ continue;
+ conn = srv_lookup_conn(is_safe ? &srv->per_thr[i].safe_conns : &srv->per_thr[i].idle_conns, hash);
+ while (conn) {
+ if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
+ conn_delete_from_tree(conn);
+ _HA_ATOMIC_INC(&activity[tid].fd_takeover);
+ found = 1;
+ break;
+ }
+
+ conn = srv_lookup_conn_next(conn);
+ }
+
+ if (!found && !is_safe && srv->curr_safe_nb > 0) {
+ conn = srv_lookup_conn(&srv->per_thr[i].safe_conns, hash);
+ while (conn) {
+ if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) {
+ conn_delete_from_tree(conn);
+ _HA_ATOMIC_INC(&activity[tid].fd_takeover);
+ found = 1;
+ is_safe = 1;
+ break;
+ }
+
+ conn = srv_lookup_conn_next(conn);
+ }
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ } while (!found && (i = (i + 1 == tg->base + tg->count) ? tg->base : i + 1) != stop);
+
+ if (!found)
+ conn = NULL;
+ done:
+ if (conn) {
+ _HA_ATOMIC_STORE(&srv->per_tgrp[tgid - 1].next_takeover, (i + 1 == tg->base + tg->count) ? tg->base : i + 1);
+
+ srv_use_conn(srv, conn);
+
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ _HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
+ _HA_ATOMIC_DEC(&srv->curr_idle_thr[i]);
+ conn->flags &= ~CO_FL_LIST_MASK;
+ __ha_barrier_atomic_store();
+
+ if ((s->be->options & PR_O_REUSE_MASK) == PR_O_REUSE_SAFE &&
+ conn->mux->flags & MX_FL_HOL_RISK) {
+ /* attach the connection to the session private list
+ */
+ conn->owner = s->sess;
+ session_add_conn(s->sess, conn, conn->target);
+ }
+ else {
+ srv_add_to_avail_list(srv, conn);
+ }
+ }
+ return conn;
+}
+
+static int do_connect_server(struct stream *s, struct connection *conn)
+{
+ int ret = SF_ERR_NONE;
+ int conn_flags = 0;
+
+ if (unlikely(!conn || !conn->ctrl || !conn->ctrl->connect))
+ return SF_ERR_INTERNAL;
+
+ if (co_data(&s->res))
+ conn_flags |= CONNECT_HAS_DATA;
+ if (s->conn_retries == s->be->conn_retries)
+ conn_flags |= CONNECT_CAN_USE_TFO;
+ if (!conn_ctrl_ready(conn) || !conn_xprt_ready(conn)) {
+ ret = conn->ctrl->connect(conn, conn_flags);
+ if (ret != SF_ERR_NONE)
+ return ret;
+
+ /* we're in the process of establishing a connection */
+ s->scb->state = SC_ST_CON;
+ }
+ else {
+ /* try to reuse the existing connection, it will be
+ * confirmed once we can send on it.
+ */
+ /* Is the connection really ready ? */
+ if (conn->mux->ctl(conn, MUX_CTL_STATUS, NULL) & MUX_STATUS_READY)
+ s->scb->state = SC_ST_RDY;
+ else
+ s->scb->state = SC_ST_CON;
+ }
+
+ /* needs src ip/port for logging */
+ if (s->flags & SF_SRC_ADDR)
+ conn_get_src(conn);
+
+ return ret;
+}
+
+/*
+ * This function initiates a connection to the server assigned to this stream
+ * (s->target, (s->scb)->addr.to). It will assign a server if none
+ * is assigned yet.
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ * The server-facing stream connector is expected to hold a pre-allocated connection.
+ */
+int connect_server(struct stream *s)
+{
+ struct connection *cli_conn = objt_conn(strm_orig(s));
+ struct connection *srv_conn = NULL;
+ struct server *srv;
+ int reuse_mode = s->be->options & PR_O_REUSE_MASK;
+ int reuse = 0;
+ int init_mux = 0;
+ int err;
+#ifdef USE_OPENSSL
+ struct sample *sni_smp = NULL;
+#endif
+ struct sockaddr_storage *bind_addr = NULL;
+ int proxy_line_ret;
+ int64_t hash = 0;
+ struct conn_hash_params hash_params;
+
+ /* in standard configuration, srv will be valid
+ * it can be NULL for dispatch mode or transparent backend */
+ srv = objt_server(s->target);
+
+ /* Override reuse-mode if reverse-connect is used. */
+ if (srv && srv->flags & SRV_F_RHTTP)
+ reuse_mode = PR_O_REUSE_ALWS;
+
+ err = alloc_dst_address(&s->scb->dst, srv, s);
+ if (err != SRV_STATUS_OK)
+ return SF_ERR_INTERNAL;
+
+ err = alloc_bind_address(&bind_addr, srv, s->be, s);
+ if (err != SRV_STATUS_OK)
+ return SF_ERR_INTERNAL;
+
+#ifdef USE_OPENSSL
+ if (srv && srv->ssl_ctx.sni) {
+ sni_smp = sample_fetch_as_type(s->be, s->sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ srv->ssl_ctx.sni, SMP_T_STR);
+ }
+#endif
+
+ /* do not reuse if mode is not http */
+ if (!IS_HTX_STRM(s)) {
+ DBG_TRACE_STATE("skip idle connections reuse: no htx", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto skip_reuse;
+ }
+
+ /* disable reuse if websocket stream and the protocol to use is not the
+ * same as the main protocol of the server.
+ */
+ if (unlikely(s->flags & SF_WEBSOCKET) && srv) {
+ if (!srv_check_reuse_ws(srv)) {
+ DBG_TRACE_STATE("skip idle connections reuse: websocket stream", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto skip_reuse;
+ }
+ }
+
+ /* first, set unique connection parameters and then calculate hash */
+ memset(&hash_params, 0, sizeof(hash_params));
+
+ /* 1. target */
+ hash_params.target = s->target;
+
+#ifdef USE_OPENSSL
+ /* 2. sni
+ * only test if the sample is not null as smp_make_safe (called before
+ * ssl_sock_set_servername) can only fails if this is not the case
+ */
+ if (sni_smp) {
+ hash_params.sni_prehash =
+ conn_hash_prehash(sni_smp->data.u.str.area,
+ sni_smp->data.u.str.data);
+ }
+#endif /* USE_OPENSSL */
+
+ /* 3. destination address */
+ if (srv && srv_is_transparent(srv))
+ hash_params.dst_addr = s->scb->dst;
+
+ /* 4. source address */
+ hash_params.src_addr = bind_addr;
+
+ /* 5. proxy protocol */
+ if (srv && srv->pp_opts) {
+ proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s);
+ if (proxy_line_ret) {
+ hash_params.proxy_prehash =
+ conn_hash_prehash(trash.area, proxy_line_ret);
+ }
+ }
+
+ hash = conn_calculate_hash(&hash_params);
+
+ /* first, search for a matching connection in the session's idle conns */
+ srv_conn = session_get_conn(s->sess, s->target, hash);
+ if (srv_conn) {
+ DBG_TRACE_STATE("reuse connection from session", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+
+ if (srv && !reuse && reuse_mode != PR_O_REUSE_NEVR) {
+ /* Below we pick connections from the safe, idle or
+ * available (which are safe too) lists based
+ * on the strategy, the fact that this is a first or second
+ * (retryable) request, with the indicated priority (1 or 2) :
+ *
+ * SAFE AGGR ALWS
+ *
+ * +-----+-----+ +-----+-----+ +-----+-----+
+ * req| 1st | 2nd | req| 1st | 2nd | req| 1st | 2nd |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ * safe| - | 2 | safe| 1 | 2 | safe| 1 | 2 |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ * idle| - | 1 | idle| - | 1 | idle| 2 | 1 |
+ * ----+-----+-----+ ----+-----+-----+ ----+-----+-----+
+ *
+ * Idle conns are necessarily looked up on the same thread so
+ * that there is no concurrency issues.
+ */
+ if (!eb_is_empty(&srv->per_thr[tid].avail_conns)) {
+ srv_conn = srv_lookup_conn(&srv->per_thr[tid].avail_conns, hash);
+ if (srv_conn) {
+ /* connection cannot be in idle list if used as an avail idle conn. */
+ BUG_ON(LIST_INLIST(&srv_conn->idle_list));
+
+ DBG_TRACE_STATE("reuse connection from avail", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+ }
+
+ /* if no available connections found, search for an idle/safe */
+ if (!srv_conn && srv->max_idle_conns && srv->curr_idle_conns > 0) {
+ const int not_first_req = s->txn && s->txn->flags & TX_NOT_FIRST;
+ const int idle = srv->curr_idle_nb > 0;
+ const int safe = srv->curr_safe_nb > 0;
+ const int retry_safe = (s->be->retry_type & (PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT)) ==
+ (PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT);
+
+ /* second column of the tables above,
+ * search for an idle then safe conn */
+ if (not_first_req || retry_safe) {
+ if (idle || safe)
+ srv_conn = conn_backend_get(s, srv, 0, hash);
+ }
+ /* first column of the tables above */
+ else if (reuse_mode >= PR_O_REUSE_AGGR) {
+ /* search for a safe conn */
+ if (safe)
+ srv_conn = conn_backend_get(s, srv, 1, hash);
+
+ /* search for an idle conn if no safe conn found
+ * on always reuse mode */
+ if (!srv_conn &&
+ reuse_mode == PR_O_REUSE_ALWS && idle) {
+ /* TODO conn_backend_get should not check the
+ * safe list is this case */
+ srv_conn = conn_backend_get(s, srv, 0, hash);
+ }
+ }
+
+ if (srv_conn) {
+ DBG_TRACE_STATE("reuse connection from idle/safe", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ reuse = 1;
+ }
+ }
+ }
+
+
+ /* here reuse might have been set above, indicating srv_conn finally
+ * is OK.
+ */
+
+ if (ha_used_fds > global.tune.pool_high_count && srv) {
+ struct connection *tokill_conn = NULL;
+ /* We can't reuse a connection, and e have more FDs than deemd
+ * acceptable, attempt to kill an idling connection
+ */
+ /* First, try from our own idle list */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (!LIST_ISEMPTY(&srv->per_thr[tid].idle_conn_list)) {
+ tokill_conn = LIST_ELEM(srv->per_thr[tid].idle_conn_list.n, struct connection *, idle_list);
+ conn_delete_from_tree(tokill_conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Release the idle lock before calling mux->destroy.
+ * It will in turn call srv_release_conn through
+ * conn_free which also uses it.
+ */
+ tokill_conn->mux->destroy(tokill_conn->ctx);
+ }
+ else {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ /* If not, iterate over other thread's idling pool, and try to grab one */
+ if (!tokill_conn) {
+ int i;
+
+ for (i = tid; (i = ((i + 1 == global.nbthread) ? 0 : i + 1)) != tid;) {
+ // just silence stupid gcc which reports an absurd
+ // out-of-bounds warning for <i> which is always
+ // exactly zero without threads, but it seems to
+ // see it possibly larger.
+ ALREADY_CHECKED(i);
+
+ if (HA_SPIN_TRYLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock) != 0)
+ continue;
+
+ if (!LIST_ISEMPTY(&srv->per_thr[i].idle_conn_list)) {
+ tokill_conn = LIST_ELEM(srv->per_thr[i].idle_conn_list.n, struct connection *, idle_list);
+ conn_delete_from_tree(tokill_conn);
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+
+ if (tokill_conn) {
+ /* We got one, put it into the concerned thread's to kill list, and wake it's kill task */
+
+ MT_LIST_APPEND(&idle_conns[i].toremove_conns,
+ &tokill_conn->toremove_list);
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+ break;
+ }
+ }
+ }
+
+ }
+
+ if (reuse) {
+ if (srv_conn->mux) {
+ int avail = srv_conn->mux->avail_streams(srv_conn);
+
+ if (avail <= 1) {
+ /* No more streams available, remove it from the list */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(srv_conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ if (avail >= 1) {
+ if (srv_conn->mux->attach(srv_conn, s->scb->sedesc, s->sess) == -1) {
+ srv_conn = NULL;
+ if (sc_reset_endp(s->scb) < 0)
+ return SF_ERR_INTERNAL;
+ sc_ep_clr(s->scb, ~SE_FL_DETACHED);
+ }
+ }
+ else
+ srv_conn = NULL;
+ }
+ /* otherwise srv_conn is left intact */
+ }
+ else
+ srv_conn = NULL;
+
+skip_reuse:
+ /* no reuse or failed to reuse the connection above, pick a new one */
+ if (!srv_conn) {
+ if (srv && (srv->flags & SRV_F_RHTTP)) {
+ DBG_TRACE_USER("cannot open a new connection for reverse server", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ return SF_ERR_INTERNAL;
+ }
+
+ srv_conn = conn_new(s->target);
+ if (srv_conn) {
+ DBG_TRACE_STATE("alloc new be connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ srv_conn->owner = s->sess;
+
+ /* connection will be attached to the session if
+ * http-reuse mode is never or it is not targeted to a
+ * server */
+ if (reuse_mode == PR_O_REUSE_NEVR || !srv)
+ conn_set_private(srv_conn);
+
+ /* assign bind_addr to srv_conn */
+ srv_conn->src = bind_addr;
+ bind_addr = NULL;
+
+ srv_conn->hash_node->node.key = hash;
+ }
+ }
+
+ /* if bind_addr is non NULL free it */
+ sockaddr_free(&bind_addr);
+
+ /* srv_conn is still NULL only on allocation failure */
+ if (!srv_conn)
+ return SF_ERR_RESOURCE;
+
+ /* copy the target address into the connection */
+ *srv_conn->dst = *s->scb->dst;
+
+ /* Copy network namespace from client connection */
+ srv_conn->proxy_netns = cli_conn ? cli_conn->proxy_netns : NULL;
+
+ if (!srv_conn->xprt) {
+ /* set the correct protocol on the output stream connector */
+ if (srv) {
+ if (conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ } else if (obj_type(s->target) == OBJ_TYPE_PROXY) {
+ int ret;
+
+ /* proxies exclusively run on raw_sock right now */
+ ret = conn_prepare(srv_conn, protocol_lookup(srv_conn->dst->ss_family, PROTO_TYPE_STREAM, 0), xprt_get(XPRT_RAW));
+ if (ret < 0 || !(srv_conn->ctrl)) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+ else {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL; /* how did we get there ? */
+ }
+
+ if (sc_attach_mux(s->scb, NULL, srv_conn) < 0) {
+ conn_free(srv_conn);
+ return SF_ERR_INTERNAL; /* how did we get there ? */
+ }
+ srv_conn->ctx = s->scb;
+
+#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
+ if (!srv ||
+ (srv->use_ssl != 1 || (!(srv->ssl_ctx.alpn_str) && !(srv->ssl_ctx.npn_str)) ||
+ srv->mux_proto || !IS_HTX_STRM(s)))
+#endif
+ init_mux = 1;
+
+ /* process the case where the server requires the PROXY protocol to be sent */
+ srv_conn->send_proxy_ofs = 0;
+
+ if (srv && srv->pp_opts) {
+ srv_conn->flags |= CO_FL_SEND_PROXY;
+ srv_conn->send_proxy_ofs = 1; /* must compute size */
+ }
+
+ if (srv && (srv->flags & SRV_F_SOCKS4_PROXY)) {
+ srv_conn->send_proxy_ofs = 1;
+ srv_conn->flags |= CO_FL_SOCKS4;
+ }
+
+#if defined(USE_OPENSSL) && defined(TLSEXT_TYPE_application_layer_protocol_negotiation)
+ /* if websocket stream, try to update connection ALPN. */
+ if (unlikely(s->flags & SF_WEBSOCKET) &&
+ srv && srv->use_ssl && srv->ssl_ctx.alpn_str) {
+ char *alpn = "";
+ int force = 0;
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ alpn = "\x08http/1.1";
+ force = 0;
+ break;
+ case SRV_WS_H1:
+ alpn = "\x08http/1.1";
+ force = 1;
+ break;
+ case SRV_WS_H2:
+ alpn = "\x02h2";
+ force = 1;
+ break;
+ }
+
+ if (!conn_update_alpn(srv_conn, ist(alpn), force))
+ DBG_TRACE_STATE("update alpn for websocket", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+#endif
+ }
+ else {
+ s->flags |= SF_SRV_REUSED;
+
+ /* Currently there seems to be no known cases of xprt ready
+ * without the mux installed here.
+ */
+ BUG_ON(!srv_conn->mux);
+
+ if (!(srv_conn->mux->ctl(srv_conn, MUX_CTL_STATUS, NULL) & MUX_STATUS_READY))
+ s->flags |= SF_SRV_REUSED_ANTICIPATED;
+ }
+
+ /* flag for logging source ip/port */
+ if (strm_fe(s)->options2 & PR_O2_SRC_ADDR)
+ s->flags |= SF_SRC_ADDR;
+
+ /* disable lingering */
+ if (s->be->options & PR_O_TCP_NOLING)
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (s->flags & SF_SRV_REUSED) {
+ _HA_ATOMIC_INC(&s->be->be_counters.reuse);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.reuse);
+ } else {
+ _HA_ATOMIC_INC(&s->be->be_counters.connect);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.connect);
+ }
+
+ err = do_connect_server(s, srv_conn);
+ if (err != SF_ERR_NONE)
+ return err;
+
+#ifdef USE_OPENSSL
+ if (!(s->flags & SF_SRV_REUSED)) {
+ if (smp_make_safe(sni_smp))
+ ssl_sock_set_servername(srv_conn, sni_smp->data.u.str.area);
+ }
+#endif /* USE_OPENSSL */
+
+ /* The CO_FL_SEND_PROXY flag may have been set by the connect method,
+ * if so, add our handshake pseudo-XPRT now.
+ */
+ if ((srv_conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(srv_conn) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+ conn_xprt_start(srv_conn);
+
+ /* We have to defer the mux initialization until after si_connect()
+ * has been called, as we need the xprt to have been properly
+ * initialized, or any attempt to recv during the mux init may
+ * fail, and flag the connection as CO_FL_ERROR.
+ */
+ if (init_mux) {
+ const struct mux_ops *alt_mux =
+ likely(!(s->flags & SF_WEBSOCKET)) ? NULL : srv_get_ws_proto(srv);
+ if (conn_install_mux_be(srv_conn, s->scb, s->sess, alt_mux) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ if (IS_HTX_STRM(s)) {
+ /* If we're doing http-reuse always, and the connection
+ * is not private with available streams (an http2
+ * connection), add it to the available list, so that
+ * others can use it right away. If the connection is
+ * private or we're doing http-reuse safe and the mux
+ * protocol supports multiplexing, add it in the
+ * session server list.
+ */
+ if (srv && reuse_mode == PR_O_REUSE_ALWS &&
+ !(srv_conn->flags & CO_FL_PRIVATE) &&
+ srv_conn->mux->avail_streams(srv_conn) > 0) {
+ srv_add_to_avail_list(srv, srv_conn);
+ }
+ else if (srv_conn->flags & CO_FL_PRIVATE ||
+ (reuse_mode == PR_O_REUSE_SAFE &&
+ srv_conn->mux->flags & MX_FL_HOL_RISK)) {
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(s->sess, srv_conn, srv_conn->target);
+ }
+ }
+ }
+
+#if defined(USE_OPENSSL) && (defined(OPENSSL_IS_BORINGSSL) || (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L))
+
+ if (!reuse && cli_conn && srv && srv_conn->mux &&
+ (srv->ssl_ctx.options & SRV_SSL_O_EARLY_DATA) &&
+ /* Only attempt to use early data if either the client sent
+ * early data, so that we know it can handle a 425, or if
+ * we are allowed to retry requests on early data failure, and
+ * it's our first try
+ */
+ ((cli_conn->flags & CO_FL_EARLY_DATA) ||
+ ((s->be->retry_type & PR_RE_EARLY_ERROR) && !s->conn_retries)) &&
+ co_data(sc_oc(s->scb)) &&
+ srv_conn->flags & CO_FL_SSL_WAIT_HS)
+ srv_conn->flags &= ~(CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN);
+#endif
+
+ /* set connect timeout */
+ s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.connect);
+
+ if (srv) {
+ int count;
+
+ s->flags |= SF_CURR_SESS;
+ count = _HA_ATOMIC_ADD_FETCH(&srv->cur_sess, 1);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.cur_sess_max, count);
+ if (s->be->lbprm.server_take_conn)
+ s->be->lbprm.server_take_conn(srv);
+ }
+
+ /* Now handle synchronously connected sockets. We know the stream connector
+ * is at least in state SC_ST_CON. These ones typically are UNIX
+ * sockets, socket pairs, andoccasionally TCP connections on the
+ * loopback on a heavily loaded system.
+ */
+ if (srv_conn->flags & CO_FL_ERROR)
+ s->scb->flags |= SC_FL_ERROR;
+
+ /* If we had early data, and the handshake ended, then
+ * we can remove the flag, and attempt to wake the task up,
+ * in the event there's an analyser waiting for the end of
+ * the handshake.
+ */
+ if (!(srv_conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)))
+ sc_ep_clr(s->scb, SE_FL_WAIT_FOR_HS);
+
+ if (!sc_state_in(s->scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (srv_conn->flags & CO_FL_WAIT_XPRT) == 0) {
+ s->conn_exp = TICK_ETERNITY;
+ sc_oc(s->scb)->flags |= CF_WRITE_EVENT;
+ if (s->scb->state == SC_ST_CON)
+ s->scb->state = SC_ST_RDY;
+ }
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because si_cs_process is also the SI
+ * wake callback. Otherwise si_cs_recv()/si_cs_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(s->scb, SE_FL_EOI) && !(s->scb->flags & SC_FL_EOI)) {
+ s->scb->flags |= SC_FL_EOI;
+ sc_ic(s->scb)->flags |= CF_READ_EVENT;
+ }
+
+ /* catch all sync connect while the mux is not already installed */
+ if (!srv_conn->mux && !(srv_conn->flags & CO_FL_WAIT_XPRT)) {
+ if (conn_create_mux(srv_conn) < 0) {
+ conn_full_close(srv_conn);
+ return SF_ERR_INTERNAL;
+ }
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+
+/* This function performs the "redispatch" part of a connection attempt. It
+ * will assign a server if required, queue the connection if required, and
+ * handle errors that might arise at this level. It can change the server
+ * state. It will return 1 if it encounters an error, switches the server
+ * state, or has to queue a connection. Otherwise, it will return 0 indicating
+ * that the connection is ready to use.
+ */
+
+int srv_redispatch_connect(struct stream *s)
+{
+ struct server *srv;
+ int conn_err;
+
+ /* We know that we don't have any connection pending, so we will
+ * try to get a new one, and wait in this state if it's queued
+ */
+ redispatch:
+ conn_err = assign_server_and_queue(s);
+ srv = objt_server(s->target);
+
+ switch (conn_err) {
+ case SRV_STATUS_OK:
+ break;
+
+ case SRV_STATUS_FULL:
+ /* The server has reached its maxqueue limit. Either PR_O_REDISP is set
+ * and we can redispatch to another server, or it is not and we return
+ * 503. This only makes sense in DIRECT mode however, because normal LB
+ * algorithms would never select such a server, and hash algorithms
+ * would bring us on the same server again. Note that s->target is set
+ * in this case.
+ */
+ if (((s->flags & (SF_DIRECT|SF_FORCE_PRST)) == SF_DIRECT) &&
+ (s->be->options & PR_O_REDISP)) {
+ s->flags &= ~(SF_DIRECT | SF_ASSIGNED);
+ sockaddr_free(&s->scb->dst);
+ goto redispatch;
+ }
+
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_QUEUE_ERR;
+ }
+
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ return 1;
+
+ case SRV_STATUS_NOSRV:
+ /* note: it is guaranteed that srv == NULL here */
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ }
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ return 1;
+
+ case SRV_STATUS_QUEUED:
+ s->conn_exp = tick_add_ifset(now_ms, s->be->timeout.queue);
+ s->scb->state = SC_ST_QUE;
+ /* do nothing else and do not wake any other stream up */
+ return 1;
+
+ case SRV_STATUS_INTERNAL:
+ default:
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ }
+
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+
+ /* release other streams waiting for this server */
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+ return 1;
+ }
+ /* if we get here, it's because we got SRV_STATUS_OK, which also
+ * means that the connection has not been queued.
+ */
+ return 0;
+}
+
+/* Check if the connection request is in such a state that it can be aborted. */
+static int back_may_abort_req(struct channel *req, struct stream *s)
+{
+ return ((s->scf->flags & SC_FL_ERROR) ||
+ ((s->scb->flags & (SC_FL_SHUT_WANTED|SC_FL_SHUT_DONE)) && /* empty and client aborted */
+ (!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE))));
+}
+
+/* Update back stream connector status for input states SC_ST_ASS, SC_ST_QUE,
+ * SC_ST_TAR. Other input states are simply ignored.
+ * Possible output states are SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ, SC_ST_CON
+ * and SC_ST_EST. Flags must have previously been updated for timeouts and other
+ * conditions.
+ */
+void back_try_conn_req(struct stream *s)
+{
+ struct server *srv = objt_server(s->target);
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (sc->state == SC_ST_ASS) {
+ /* Server assigned to connection request, we have to try to connect now */
+ int conn_err;
+
+ /* Before we try to initiate the connection, see if the
+ * request may be aborted instead.
+ */
+ if (back_may_abort_req(req, s)) {
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ conn_err = connect_server(s);
+ srv = objt_server(s->target);
+
+ if (conn_err == SF_ERR_NONE) {
+ /* state = SC_ST_CON or SC_ST_EST now */
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ DBG_TRACE_STATE("connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* We have received a synchronous error. We might have to
+ * abort, retry immediately or redispatch.
+ */
+ if (conn_err == SF_ERR_INTERNAL) {
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ }
+
+ if (srv)
+ srv_inc_sess_ctr(srv);
+ if (srv)
+ srv_set_sess_last(srv);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+
+ /* release other streams waiting for this server */
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+
+ /* Failed and not retryable. */
+ sc_abort(sc);
+ sc_shutdown(sc);
+ sc->flags |= SC_FL_ERROR;
+
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ /* no stream was ever accounted for this server */
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("internal error during connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* We are facing a retryable error, but we don't want to run a
+ * turn-around now, as the problem is likely a source port
+ * allocation problem, so we want to retry now.
+ */
+ sc->state = SC_ST_CER;
+ sc->flags &= ~SC_FL_ERROR;
+ back_handle_st_cer(s);
+
+ DBG_TRACE_STATE("connection error, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ /* now sc->state is one of SC_ST_CLO, SC_ST_TAR, SC_ST_ASS, SC_ST_REQ */
+ }
+ else if (sc->state == SC_ST_QUE) {
+ /* connection request was queued, check for any update */
+ if (!pendconn_dequeue(s)) {
+ /* The connection is not in the queue anymore. Either
+ * we have a server connection slot available and we
+ * go directly to the assigned state, or we need to
+ * load-balance first and go to the INI state.
+ */
+ s->conn_exp = TICK_ETERNITY;
+ if (unlikely(!(s->flags & SF_ASSIGNED)))
+ sc->state = SC_ST_REQ;
+ else {
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+ sc->state = SC_ST_ASS;
+ }
+ DBG_TRACE_STATE("dequeue connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Connection request still in queue... */
+ if (s->flags & SF_CONN_EXP) {
+ /* ... and timeout expired */
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ sc_abort(sc);
+ sc_shutdown(sc);
+ req->flags |= CF_WRITE_TIMEOUT;
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_QUEUE_TO;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("connection request still queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Connection remains in queue, check if we have to abort it */
+ if (back_may_abort_req(req, s)) {
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ /* we may need to know the position in the queue for logging */
+ pendconn_cond_unlink(s->pend_pos);
+
+ s->conn_err_type |= STRM_ET_QUEUE_ABRT;
+ DBG_TRACE_STATE("abort queued connection request", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ /* Nothing changed */
+ }
+ else if (sc->state == SC_ST_TAR) {
+ /* Connection request might be aborted */
+ if (back_may_abort_req(req, s)) {
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ DBG_TRACE_STATE("connection aborted", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto abort_connection;
+ }
+
+ if (!(s->flags & SF_CONN_EXP))
+ return; /* still in turn-around */
+
+ s->flags &= ~SF_CONN_EXP;
+ s->conn_exp = TICK_ETERNITY;
+
+ /* we keep trying on the same server as long as the stream is
+ * marked "assigned".
+ * FIXME: Should we force a redispatch attempt when the server is down ?
+ */
+ if (s->flags & SF_ASSIGNED)
+ sc->state = SC_ST_ASS;
+ else
+ sc->state = SC_ST_REQ;
+
+ DBG_TRACE_STATE("retry connection now", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ return;
+
+abort_connection:
+ /* give up */
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+ sc_abort(sc);
+ sc_shutdown(sc);
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ return;
+}
+
+/* This function initiates a server connection request on a stream connector
+ * already in SC_ST_REQ state. Upon success, the state goes to SC_ST_ASS for
+ * a real connection to a server, indicating that a server has been assigned,
+ * or SC_ST_RDY for a successful connection to an applet. It may also return
+ * SC_ST_QUE, or SC_ST_CLO upon error.
+ */
+void back_handle_st_req(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+
+ if (sc->state != SC_ST_REQ)
+ return;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
+ struct appctx *appctx;
+
+ /* The target is an applet but the SC is in SC_ST_REQ. Thus it
+ * means no appctx are attached to the SC. Otherwise, it will be
+ * in SC_ST_RDY state. So, try to create the appctx now.
+ */
+ BUG_ON(sc_appctx(sc));
+ appctx = sc_applet_create(sc, objt_applet(s->target));
+ if (!appctx) {
+ /* No more memory, let's immediately abort. Force the
+ * error code to ignore the ERR_LOCAL which is not a
+ * real error.
+ */
+ s->flags &= ~(SF_ERR_MASK | SF_FINST_MASK);
+
+ sc_abort(sc);
+ sc_shutdown(sc);
+ sc->flags |= SC_FL_ERROR;
+ s->conn_err_type = STRM_ET_CONN_RES;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("failed to register applet", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ DBG_TRACE_STATE("applet registered", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* Try to assign a server */
+ if (srv_redispatch_connect(s) != 0) {
+ /* We did not get a server. Either we queued the
+ * connection request, or we encountered an error.
+ */
+ if (sc->state == SC_ST_QUE) {
+ DBG_TRACE_STATE("connection request queued", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ goto end;
+ }
+
+ /* we did not get any server, let's check the cause */
+ sc_abort(sc);
+ sc_shutdown(sc);
+ sc->flags |= SC_FL_ERROR;
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("connection request failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* The server is assigned */
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+ sc->state = SC_ST_ASS;
+ be_set_sess_last(s->be);
+ DBG_TRACE_STATE("connection request assigned to a server", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_CON) meaning that a
+ * connection was attempted and that the file descriptor is already allocated.
+ * We must check for timeout, error and abort. Possible output states are
+ * SC_ST_CER (error), SC_ST_DIS (abort), and SC_ST_CON (no change). This only
+ * works with connection-based streams. We know that there were no I/O event
+ * when reaching this function. Timeouts and errors are *not* cleared.
+ */
+void back_handle_st_con(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ /* the client might want to abort */
+ if ((s->scf->flags & SC_FL_SHUT_DONE) ||
+ ((s->scb->flags & SC_FL_SHUT_WANTED) &&
+ (!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ sc->flags |= SC_FL_NOLINGER;
+ sc_shutdown(sc);
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ /* Note: state = SC_ST_DIS now */
+ DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ done:
+ /* retryable error ? */
+ if ((s->flags & SF_CONN_EXP) || (sc->flags & SC_FL_ERROR)) {
+ if (!s->conn_err_type) {
+ if ((sc->flags & SC_FL_ERROR))
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ else
+ s->conn_err_type = STRM_ET_CONN_TO;
+ }
+
+ sc->state = SC_ST_CER;
+ DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_CER) meaning that a
+ * previous connection attempt has failed and that the file descriptor
+ * has already been released. Possible causes include asynchronous error
+ * notification and time out. Possible output states are SC_ST_CLO when
+ * retries are exhausted, SC_ST_TAR when a delay is wanted before a new
+ * connection attempt, SC_ST_ASS when it's wise to retry on the same server,
+ * and SC_ST_REQ when an immediate redispatch is wanted. The buffers are
+ * marked as in error state. Timeouts and errors are cleared before retrying.
+ */
+void back_handle_st_cer(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ int must_tar = !!(sc->flags & SC_FL_ERROR);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+
+ /* we probably have to release last stream from the server */
+ if (objt_server(s->target)) {
+ struct connection *conn = sc_conn(sc);
+
+ health_adjust(__objt_server(s->target), HANA_STATUS_L4_ERR);
+
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+
+ if ((sc->flags & SC_FL_ERROR) &&
+ conn && conn->err_code == CO_ER_SSL_MISMATCH_SNI) {
+ /* We tried to connect to a server which is configured
+ * with "verify required" and which doesn't have the
+ * "verifyhost" directive. The server presented a wrong
+ * certificate (a certificate for an unexpected name),
+ * which implies that we have used SNI in the handshake,
+ * and that the server doesn't have the associated cert
+ * and presented a default one.
+ *
+ * This is a serious enough issue not to retry. It's
+ * especially important because this wrong name might
+ * either be the result of a configuration error, and
+ * retrying will only hammer the server, or is caused
+ * by the use of a wrong SNI value, most likely
+ * provided by the client and we don't want to let the
+ * client provoke retries.
+ */
+ s->conn_retries = s->be->conn_retries;
+ DBG_TRACE_DEVEL("Bad SSL cert, disable connection retries", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+ }
+
+ /* ensure that we have enough retries left */
+ if (s->conn_retries >= s->be->conn_retries || !(s->be->retry_type & PR_RE_CONN_FAILED)) {
+ if (!s->conn_err_type) {
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ }
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&objt_server(s->target)->counters.failed_conns);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_conns);
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(objt_server(s->target), s->be))
+ process_srv_queue(objt_server(s->target));
+
+ /* shutw is enough to stop a connecting socket */
+ sc_shutdown(sc);
+ sc->flags |= SC_FL_ERROR;
+
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+
+ DBG_TRACE_STATE("connection failed", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* At this stage, we will trigger a connection retry (with or without
+ * redispatch). Thus we must reset the SI endpoint on the server side
+ * an close the attached connection. It is especially important to do it
+ * now if the retry is not immediately performed, to be sure to release
+ * resources as soon as possible and to not catch errors from the lower
+ * layers in an unexpected state (i.e < ST_CONN).
+ *
+ * Note: the stream connector will be switched to ST_REQ, ST_ASS or
+ * ST_TAR and SC_FL_ERROR and SF_CONN_EXP flags will be unset.
+ */
+ if (sc_reset_endp(sc) < 0) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&objt_server(s->target)->counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(objt_server(s->target), s->be))
+ process_srv_queue(objt_server(s->target));
+
+ /* shutw is enough to stop a connecting socket */
+ sc_shutdown(sc);
+ sc->flags |= SC_FL_ERROR;
+
+ sc->state = SC_ST_CLO;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+
+ DBG_TRACE_STATE("error resetting endpoint", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ s->conn_retries++;
+ stream_choose_redispatch(s);
+
+ if (must_tar) {
+ /* The error was an asynchronous connection error, and we will
+ * likely have to retry connecting to the same server, most
+ * likely leading to the same result. To avoid this, we wait
+ * MIN(one second, connect timeout) before retrying. We don't
+ * do it when the failure happened on a reused connection
+ * though.
+ */
+
+ int delay = 1000;
+ const int reused = (s->flags & SF_SRV_REUSED) &&
+ !(s->flags & SF_SRV_REUSED_ANTICIPATED);
+
+ if (s->be->timeout.connect && s->be->timeout.connect < delay)
+ delay = s->be->timeout.connect;
+
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_ERR;
+
+ /* only wait when we're retrying on the same server */
+ if ((sc->state == SC_ST_ASS ||
+ (s->be->srv_act <= 1)) && !reused) {
+ sc->state = SC_ST_TAR;
+ s->conn_exp = tick_add(now_ms, MS_TO_TICKS(delay));
+ }
+ DBG_TRACE_STATE("retry a new connection", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ }
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* This function is called with (sc->state == SC_ST_RDY) meaning that a
+ * connection was attempted, that the file descriptor is already allocated,
+ * and that it has succeeded. We must still check for errors and aborts.
+ * Possible output states are SC_ST_EST (established), SC_ST_CER (error),
+ * and SC_ST_DIS (abort). This only works with connection-based streams.
+ * Timeouts and errors are *not* cleared.
+ */
+void back_handle_st_rdy(struct stream *s)
+{
+ struct stconn *sc = s->scb;
+ struct channel *req = &s->req;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+
+ if (unlikely(obj_type(s->target) == OBJ_TYPE_APPLET)) {
+ /* Here the appctx must exists because the SC was set to
+ * SC_ST_RDY state when the appctx was created.
+ */
+ BUG_ON(!sc_appctx(s->scb));
+
+ if (!s->logs.request_ts)
+ s->logs.request_ts = now_ns;
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+ be_set_sess_last(s->be);
+ }
+
+ /* We know the connection at least succeeded, though it could have
+ * since met an error for any other reason. At least it didn't time out
+ * even though the timeout might have been reported right after success.
+ * We need to take care of various situations here :
+ * - everything might be OK. We have to switch to established.
+ * - an I/O error might have been reported after a successful transfer,
+ * which is not retryable and needs to be logged correctly, and needs
+ * established as well
+ * - SC_ST_CON implies !CF_WROTE_DATA but not conversely as we could
+ * have validated a connection with incoming data (e.g. TCP with a
+ * banner protocol), or just a successful connect() probe.
+ * - the client might have requested a connection abort, this needs to
+ * be checked before we decide to retry anything.
+ */
+
+ /* it's still possible to handle client aborts or connection retries
+ * before any data were sent.
+ */
+ if (!(req->flags & CF_WROTE_DATA)) {
+ /* client abort ? */
+ if ((s->scf->flags & SC_FL_SHUT_DONE) ||
+ ((s->scb->flags & SC_FL_SHUT_WANTED) &&
+ (!co_data(req) || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ /* give up */
+ sc->flags |= SC_FL_NOLINGER;
+ sc_shutdown(sc);
+ s->conn_err_type |= STRM_ET_CONN_ABRT;
+ if (s->srv_error)
+ s->srv_error(s, sc);
+ DBG_TRACE_STATE("client abort during connection attempt", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+
+ /* retryable error ? */
+ if (sc->flags & SC_FL_ERROR) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_ERR;
+ sc->state = SC_ST_CER;
+ DBG_TRACE_STATE("connection failed, retry", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ goto end;
+ }
+ }
+
+ /* data were sent and/or we had no error, back_establish() will
+ * now take over.
+ */
+ DBG_TRACE_STATE("connection established", STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ s->conn_err_type = STRM_ET_NONE;
+ sc->state = SC_ST_EST;
+
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* sends a log message when a backend goes down, and also sets last
+ * change date.
+ */
+void set_backend_down(struct proxy *be)
+{
+ be->last_change = ns_to_sec(now_ns);
+ _HA_ATOMIC_INC(&be->down_trans);
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id);
+ send_log(be, LOG_EMERG, "%s %s has no server available!\n", proxy_type_str(be), be->id);
+ }
+}
+
+/* Apply RDP cookie persistence to the current stream. For this, the function
+ * tries to extract an RDP cookie from the request buffer, and look for the
+ * matching server in the list. If the server is found, it is assigned to the
+ * stream. This always returns 1, and the analyser removes itself from the
+ * list. Nothing is performed if a server was already assigned.
+ */
+int tcp_persist_rdp_cookie(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ int ret;
+ struct sample smp;
+ struct server *srv = px->srv;
+ uint16_t port;
+ uint32_t addr;
+ char *p;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ if (s->flags & SF_ASSIGNED)
+ goto no_cookie;
+
+ memset(&smp, 0, sizeof(smp));
+
+ ret = fetch_rdp_cookie_name(s, &smp, s->be->rdp_cookie_name, s->be->rdp_cookie_len);
+ if (ret == 0 || (smp.flags & SMP_F_MAY_CHANGE) || smp.data.u.str.data == 0)
+ goto no_cookie;
+
+ /* Considering an rdp cookie detected using acl, str ended with <cr><lf> and should return.
+ * The cookie format is <ip> "." <port> where "ip" is the integer corresponding to the
+ * server's IP address in network order, and "port" is the integer corresponding to the
+ * server's port in network order. Comments please Emeric.
+ */
+ addr = strtoul(smp.data.u.str.area, &p, 10);
+ if (*p != '.')
+ goto no_cookie;
+ p++;
+
+ port = ntohs(strtoul(p, &p, 10));
+ if (*p != '.')
+ goto no_cookie;
+
+ s->target = NULL;
+ while (srv) {
+ if (srv->addr.ss_family == AF_INET &&
+ port == srv->svc_port &&
+ addr == ((struct sockaddr_in *)&srv->addr)->sin_addr.s_addr) {
+ if ((srv->cur_state != SRV_ST_STOPPED) || (px->options & PR_O_PERSIST)) {
+ /* we found the server and it is usable */
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ }
+ }
+ srv = srv->next;
+ }
+
+no_cookie:
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+}
+
+int be_downtime(struct proxy *px) {
+ if (px->lbprm.tot_weight && px->last_change < ns_to_sec(now_ns)) // ignore negative time
+ return px->down_time;
+
+ return ns_to_sec(now_ns) - px->last_change + px->down_time;
+}
+
+/*
+ * This function returns a string containing the balancing
+ * mode of the proxy in a format suitable for stats.
+ */
+
+const char *backend_lb_algo_str(int algo) {
+
+ if (algo == BE_LB_ALGO_RR)
+ return "roundrobin";
+ else if (algo == BE_LB_ALGO_SRR)
+ return "static-rr";
+ else if (algo == BE_LB_ALGO_FAS)
+ return "first";
+ else if (algo == BE_LB_ALGO_LC)
+ return "leastconn";
+ else if (algo == BE_LB_ALGO_SH)
+ return "source";
+ else if (algo == BE_LB_ALGO_UH)
+ return "uri";
+ else if (algo == BE_LB_ALGO_PH)
+ return "url_param";
+ else if (algo == BE_LB_ALGO_HH)
+ return "hdr";
+ else if (algo == BE_LB_ALGO_RCH)
+ return "rdp-cookie";
+ else if (algo == BE_LB_ALGO_SMP)
+ return "hash";
+ else if (algo == BE_LB_ALGO_NONE)
+ return "none";
+ else
+ return "unknown";
+}
+
+/* This function parses a "balance" statement in a backend section describing
+ * <curproxy>. It returns -1 if there is any error, otherwise zero. If it
+ * returns -1, it will write an error message into the <err> buffer which will
+ * automatically be allocated and must be passed as NULL. The trailing '\n'
+ * will not be written. The function must be called with <args> pointing to the
+ * first word after "balance".
+ */
+int backend_parse_balance(const char **args, char **err, struct proxy *curproxy)
+{
+ if (!*(args[0])) {
+ /* if no option is set, use round-robin by default */
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ return 0;
+ }
+
+ if (strcmp(args[0], "roundrobin") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ }
+ else if (strcmp(args[0], "static-rr") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SRR;
+ }
+ else if (strcmp(args[0], "first") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_FAS;
+ }
+ else if (strcmp(args[0], "leastconn") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_LC;
+ }
+ else if (!strncmp(args[0], "random", 6)) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RND;
+ curproxy->lbprm.arg_opt1 = 2;
+
+ if (*(args[0] + 6) == '(' && *(args[0] + 7) != ')') { /* number of draws */
+ const char *beg;
+ char *end;
+
+ beg = args[0] + 7;
+ curproxy->lbprm.arg_opt1 = strtol(beg, &end, 0);
+
+ if (*end != ')') {
+ if (!*end)
+ memprintf(err, "random : missing closing parenthesis.");
+ else
+ memprintf(err, "random : unexpected character '%c' after argument.", *end);
+ return -1;
+ }
+
+ if (curproxy->lbprm.arg_opt1 < 1) {
+ memprintf(err, "random : number of draws must be at least 1.");
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "source") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SH;
+ }
+ else if (strcmp(args[0], "uri") == 0) {
+ int arg = 1;
+
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_UH;
+ curproxy->lbprm.arg_opt1 = 0; // "whole", "path-only"
+ curproxy->lbprm.arg_opt2 = 0; // "len"
+ curproxy->lbprm.arg_opt3 = 0; // "depth"
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "len") == 0) {
+ if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
+ memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
+ return -1;
+ }
+ curproxy->lbprm.arg_opt2 = atoi(args[arg+1]);
+ arg += 2;
+ }
+ else if (strcmp(args[arg], "depth") == 0) {
+ if (!*args[arg+1] || (atoi(args[arg+1]) <= 0)) {
+ memprintf(err, "%s : '%s' expects a positive integer (got '%s').", args[0], args[arg], args[arg+1]);
+ return -1;
+ }
+ /* hint: we store the position of the ending '/' (depth+1) so
+ * that we avoid a comparison while computing the hash.
+ */
+ curproxy->lbprm.arg_opt3 = atoi(args[arg+1]) + 1;
+ arg += 2;
+ }
+ else if (strcmp(args[arg], "whole") == 0) {
+ curproxy->lbprm.arg_opt1 |= 1;
+ arg += 1;
+ }
+ else if (strcmp(args[arg], "path-only") == 0) {
+ curproxy->lbprm.arg_opt1 |= 2;
+ arg += 1;
+ }
+ else {
+ memprintf(err, "%s only accepts parameters 'len', 'depth', 'path-only', and 'whole' (got '%s').", args[0], args[arg]);
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "url_param") == 0) {
+ if (!*args[1]) {
+ memprintf(err, "%s requires an URL parameter name.", args[0]);
+ return -1;
+ }
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_PH;
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup(args[1]);
+ curproxy->lbprm.arg_len = strlen(args[1]);
+ if (*args[2]) {
+ if (strcmp(args[2], "check_post") != 0) {
+ memprintf(err, "%s only accepts 'check_post' modifier (got '%s').", args[0], args[2]);
+ return -1;
+ }
+ }
+ }
+ else if (strcmp(args[0], "hash") == 0) {
+ if (!*args[1]) {
+ memprintf(err, "%s requires a sample expression.", args[0]);
+ return -1;
+ }
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_SMP;
+
+ ha_free(&curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup(args[1]);
+ curproxy->lbprm.arg_len = strlen(args[1]);
+
+ if (*args[2]) {
+ memprintf(err, "%s takes no other argument (got '%s').", args[0], args[2]);
+ return -1;
+ }
+ }
+ else if (!strncmp(args[0], "hdr(", 4)) {
+ const char *beg, *end;
+
+ beg = args[0] + 4;
+ end = strchr(beg, ')');
+
+ if (!end || end == beg) {
+ memprintf(err, "hdr requires an http header field name.");
+ return -1;
+ }
+
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_HH;
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_len = end - beg;
+ curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
+ curproxy->lbprm.arg_opt1 = 0;
+
+ if (*args[1]) {
+ if (strcmp(args[1], "use_domain_only") != 0) {
+ memprintf(err, "%s only accepts 'use_domain_only' modifier (got '%s').", args[0], args[1]);
+ return -1;
+ }
+ curproxy->lbprm.arg_opt1 = 1;
+ }
+ }
+ else if (!strncmp(args[0], "rdp-cookie", 10)) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RCH;
+
+ if ( *(args[0] + 10 ) == '(' ) { /* cookie name */
+ const char *beg, *end;
+
+ beg = args[0] + 11;
+ end = strchr(beg, ')');
+
+ if (!end || end == beg) {
+ memprintf(err, "rdp-cookie : missing cookie name.");
+ return -1;
+ }
+
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = my_strndup(beg, end - beg);
+ curproxy->lbprm.arg_len = end - beg;
+ }
+ else if ( *(args[0] + 10 ) == '\0' ) { /* default cookie name 'mstshash' */
+ free(curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup("mstshash");
+ curproxy->lbprm.arg_len = strlen(curproxy->lbprm.arg_str);
+ }
+ else { /* syntax */
+ memprintf(err, "rdp-cookie : missing cookie name.");
+ return -1;
+ }
+ }
+ else if (strcmp(args[0], "log-hash") == 0) {
+ if (!*args[1]) {
+ memprintf(err, "%s requires a converter list.", args[0]);
+ return -1;
+ }
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_LH;
+
+ ha_free(&curproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_str = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "sticky") == 0) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_LS;
+ }
+ else {
+ memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options.");
+ return -1;
+ }
+ return 0;
+}
+
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_nbsrv(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ smp->data.u.sint = be_usable_srv(px);
+
+ return 1;
+}
+
+/* report in smp->flags a success or failure depending on the designated
+ * server's state. There is no match function involved since there's no pattern.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_is_up(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = args->data.srv;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BOOL;
+ if (!(srv->cur_admin & SRV_ADMF_MAINT) &&
+ (!(srv->check.state & CHK_ST_CONFIGURED) || (srv->cur_state != SRV_ST_STOPPED)))
+ smp->data.u.sint = 1;
+ else
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_connslots(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *iterator;
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ for (iterator = px->srv; iterator; iterator = iterator->next) {
+ if (iterator->cur_state == SRV_ST_STOPPED)
+ continue;
+
+ if (iterator->maxconn == 0 || iterator->maxqueue == 0) {
+ /* configuration is stupid */
+ smp->data.u.sint = -1; /* FIXME: stupid value! */
+ return 1;
+ }
+
+ smp->data.u.sint += (iterator->maxconn - iterator->cur_sess)
+ + (iterator->maxqueue - iterator->queue.length);
+ }
+
+ return 1;
+}
+
+/* set temp integer to the id of the backend */
+static int
+smp_fetch_be_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->uuid;
+ return 1;
+}
+
+/* set string to the name of the backend */
+static int
+smp_fetch_be_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->data.u.str.area = (char *)px->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* set temp integer to the id of the server */
+static int
+smp_fetch_srv_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = NULL;
+
+ if (smp->strm)
+ srv = objt_server(smp->strm->target);
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ srv = __objt_check(smp->sess->origin)->server;
+ if (!srv)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = srv->puid;
+
+ return 1;
+}
+
+/* set string to the name of the server */
+static int
+smp_fetch_srv_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = NULL;
+
+ if (smp->strm)
+ srv = objt_server(smp->strm->target);
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ srv = __objt_check(smp->sess->origin)->server;
+ if (!srv)
+ return 0;
+
+ smp->data.u.str.area = srv->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* set temp integer to the number of connections per second reaching the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->be_sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->beconn;
+ return 1;
+}
+
+/* set temp integer to the number of available connections across available
+ * servers on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_be_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *iterator;
+ struct proxy *px = args->data.prx;
+ unsigned int maxconn;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ for (iterator = px->srv; iterator; iterator = iterator->next) {
+ if (iterator->cur_state == SRV_ST_STOPPED)
+ continue;
+
+ px = iterator->proxy;
+ if (!srv_currently_usable(iterator) ||
+ ((iterator->flags & SRV_F_BACKUP) &&
+ (px->srv_act || (iterator != px->lbprm.fbck && !(px->options & PR_O_USE_ALL_BK)))))
+ continue;
+
+ if (iterator->maxconn == 0) {
+ /* one active server is unlimited, return -1 */
+ smp->data.u.sint = -1;
+ return 1;
+ }
+
+ maxconn = srv_dynamic_maxconn(iterator);
+ if (maxconn > iterator->cur_sess)
+ smp->data.u.sint += maxconn - iterator->cur_sess;
+ }
+
+ return 1;
+}
+
+/* set temp integer to the total number of queued connections on the backend.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->totpend;
+ return 1;
+}
+
+/* set temp integer to the total number of queued connections on the backend divided
+ * by the number of running servers and rounded up. If there is no running
+ * server, we return twice the total, just as if we had half a running server.
+ * This is more or less correct anyway, since we expect the last server to come
+ * back soon.
+ * Accepts exactly 1 argument. Argument is a backend, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_avg_queue_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+ int nbsrv;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ nbsrv = be_usable_srv(px);
+
+ if (nbsrv > 0)
+ smp->data.u.sint = (px->totpend + nbsrv - 1) / nbsrv;
+ else
+ smp->data.u.sint = px->totpend * 2;
+
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the server in the backend.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->cur_sess;
+ return 1;
+}
+
+/* set temp integer to the number of available connections on the server in the backend.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_conn_free(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int maxconn;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+
+ if (args->data.srv->maxconn == 0) {
+ /* one active server is unlimited, return -1 */
+ smp->data.u.sint = -1;
+ return 1;
+ }
+
+ maxconn = srv_dynamic_maxconn(args->data.srv);
+ if (maxconn > args->data.srv->cur_sess)
+ smp->data.u.sint = maxconn - args->data.srv->cur_sess;
+ else
+ smp->data.u.sint = 0;
+
+ return 1;
+}
+
+/* set temp integer to the number of connections pending in the server's queue.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_queue(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->queue.length;
+ return 1;
+}
+
+/* set temp integer to the number of enabled servers on the proxy.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&args->data.srv->sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the server weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_weight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct server *srv = args->data.srv;
+ struct proxy *px = srv->proxy;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (srv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv;
+ return 1;
+}
+
+/* set temp integer to the server initial weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_iweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->iweight;
+ return 1;
+}
+
+/* set temp integer to the server user-specified weight.
+ * Accepts exactly 1 argument. Argument is a server, other types will lead to
+ * undefined behaviour.
+ */
+static int
+smp_fetch_srv_uweight(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.srv->uweight;
+ return 1;
+}
+
+static int
+smp_fetch_be_server_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(px->timeout.server);
+ return 1;
+}
+
+static int
+smp_fetch_be_tunnel_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ struct proxy *px = NULL;
+
+ if (smp->strm)
+ px = smp->strm->be;
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ px = __objt_check(smp->sess->origin)->proxy;
+ if (!px)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(px->timeout.tunnel);
+ return 1;
+}
+
+static int sample_conv_nbsrv(const struct arg *args, struct sample *smp, void *private)
+{
+
+ struct proxy *px;
+
+ if (!smp_make_safe(smp))
+ return 0;
+
+ px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
+ if (!px)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = be_usable_srv(px);
+
+ return 1;
+}
+
+static int
+sample_conv_srv_queue(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *px;
+ struct server *srv;
+ char *bksep;
+
+ if (!smp_make_safe(smp))
+ return 0;
+
+ bksep = strchr(smp->data.u.str.area, '/');
+
+ if (bksep) {
+ *bksep = '\0';
+ px = proxy_find_by_name(smp->data.u.str.area, PR_CAP_BE, 0);
+ if (!px)
+ return 0;
+ smp->data.u.str.area = bksep + 1;
+ } else {
+ if (!(smp->px->cap & PR_CAP_BE))
+ return 0;
+ px = smp->px;
+ }
+
+ srv = server_find_by_name(px, smp->data.u.str.area);
+ if (!srv)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = srv->queue.length;
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "avg_queue", smp_fetch_avg_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_conn", smp_fetch_be_conn, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_conn_free", smp_fetch_be_conn_free, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_id", smp_fetch_be_id, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "be_name", smp_fetch_be_name, 0, NULL, SMP_T_STR, SMP_USE_BKEND, },
+ { "be_server_timeout", smp_fetch_be_server_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "be_sess_rate", smp_fetch_be_sess_rate, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "be_tunnel_timeout", smp_fetch_be_tunnel_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "connslots", smp_fetch_connslots, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "nbsrv", smp_fetch_nbsrv, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "queue", smp_fetch_queue_size, ARG1(1,BE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_conn", smp_fetch_srv_conn, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_conn_free", smp_fetch_srv_conn_free, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_id", smp_fetch_srv_id, 0, NULL, SMP_T_SINT, SMP_USE_SERVR, },
+ { "srv_is_up", smp_fetch_srv_is_up, ARG1(1,SRV), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "srv_name", smp_fetch_srv_name, 0, NULL, SMP_T_STR, SMP_USE_SERVR, },
+ { "srv_queue", smp_fetch_srv_queue, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_sess_rate", smp_fetch_srv_sess_rate, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_weight", smp_fetch_srv_weight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_iweight", smp_fetch_srv_iweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "srv_uweight", smp_fetch_srv_uweight, ARG1(1,SRV), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "nbsrv", sample_conv_nbsrv, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "srv_queue", sample_conv_srv_queue, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/base64.c b/src/base64.c
new file mode 100644
index 0000000..0601bf6
--- /dev/null
+++ b/src/base64.c
@@ -0,0 +1,303 @@
+/*
+ * ASCII <-> Base64 conversion as described in RFC1421.
+ *
+ * Copyright 2006-2010 Willy Tarreau <w@1wt.eu>
+ * Copyright 2009-2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+
+#define B64BASE '#' /* arbitrary chosen base value */
+#define B64CMIN '+'
+#define UB64CMIN '-'
+#define B64CMAX 'z'
+#define B64PADV 64 /* Base64 chosen special pad value */
+
+const char base64tab[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+const char base64rev[]="b###cXYZ[\\]^_`a###d###$%&'()*+,-./0123456789:;<=######>?@ABCDEFGHIJKLMNOPQRSTUVW";
+const char ubase64tab[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+const char ubase64rev[]="b##XYZ[\\]^_`a###c###$%&'()*+,-./0123456789:;<=####c#>?@ABCDEFGHIJKLMNOPQRSTUVW";
+
+/* Encodes <ilen> bytes from <in> to <out> for at most <olen> chars (including
+ * the trailing zero). Returns the number of bytes written. No check is made
+ * for <in> or <out> to be NULL. Returns negative value if <olen> is too short
+ * to accept <ilen>. 4 output bytes are produced for 1 to 3 input bytes.
+ */
+int a2base64(char *in, int ilen, char *out, int olen)
+{
+ int convlen;
+
+ convlen = ((ilen + 2) / 3) * 4;
+
+ if (convlen >= olen)
+ return -1;
+
+ /* we don't need to check olen anymore */
+ while (ilen >= 3) {
+ out[0] = base64tab[(((unsigned char)in[0]) >> 2)];
+ out[1] = base64tab[(((unsigned char)in[0] & 0x03) << 4) | (((unsigned char)in[1]) >> 4)];
+ out[2] = base64tab[(((unsigned char)in[1] & 0x0F) << 2) | (((unsigned char)in[2]) >> 6)];
+ out[3] = base64tab[(((unsigned char)in[2] & 0x3F))];
+ out += 4;
+ in += 3; ilen -= 3;
+ }
+
+ if (!ilen) {
+ out[0] = '\0';
+ } else {
+ out[0] = base64tab[((unsigned char)in[0]) >> 2];
+ if (ilen == 1) {
+ out[1] = base64tab[((unsigned char)in[0] & 0x03) << 4];
+ out[2] = '=';
+ } else {
+ out[1] = base64tab[(((unsigned char)in[0] & 0x03) << 4) |
+ (((unsigned char)in[1]) >> 4)];
+ out[2] = base64tab[((unsigned char)in[1] & 0x0F) << 2];
+ }
+ out[3] = '=';
+ out[4] = '\0';
+ }
+
+ return convlen;
+}
+
+/* url variant of a2base64 */
+int a2base64url(const char *in, size_t ilen, char *out, size_t olen)
+{
+ int convlen;
+
+ convlen = ((ilen + 2) / 3) * 4;
+
+ if (convlen >= olen)
+ return -1;
+
+ /* we don't need to check olen anymore */
+ while (ilen >= 3) {
+ out[0] = ubase64tab[(((unsigned char)in[0]) >> 2)];
+ out[1] = ubase64tab[(((unsigned char)in[0] & 0x03) << 4) | (((unsigned char)in[1]) >> 4)];
+ out[2] = ubase64tab[(((unsigned char)in[1] & 0x0F) << 2) | (((unsigned char)in[2]) >> 6)];
+ out[3] = ubase64tab[(((unsigned char)in[2] & 0x3F))];
+ out += 4;
+ in += 3;
+ ilen -= 3;
+ }
+
+ if (!ilen) {
+ out[0] = '\0';
+ return convlen;
+ }
+
+ out[0] = ubase64tab[((unsigned char)in[0]) >> 2];
+ if (ilen == 1) {
+ out[1] = ubase64tab[((unsigned char)in[0] & 0x03) << 4];
+ out[2] = '\0';
+ convlen -= 2;
+ } else {
+ out[1] = ubase64tab[(((unsigned char)in[0] & 0x03) << 4) |
+ (((unsigned char)in[1]) >> 4)];
+ out[2] = ubase64tab[((unsigned char)in[1] & 0x0F) << 2];
+ out[3] = '\0';
+ convlen -= 1;
+ }
+
+ return convlen;
+}
+
+/* Decodes <ilen> bytes from <in> to <out> for at most <olen> chars.
+ * Returns the number of bytes converted. No check is made for
+ * <in> or <out> to be NULL. Returns -1 if <in> is invalid or ilen
+ * has wrong size, -2 if <olen> is too short.
+ * 1 to 3 output bytes are produced for 4 input bytes.
+ */
+int base64dec(const char *in, size_t ilen, char *out, size_t olen) {
+
+ unsigned char t[4];
+ signed char b;
+ int convlen = 0, i = 0, pad = 0;
+
+ if (ilen % 4)
+ return -1;
+
+ if (olen < ((ilen / 4 * 3)
+ - (in[ilen-1] == '=' ? 1 : 0)
+ - (in[ilen-2] == '=' ? 1 : 0)))
+ return -2;
+
+ while (ilen) {
+
+ /* if (*p < B64CMIN || *p > B64CMAX) */
+ b = (signed char)*in - B64CMIN;
+ if ((unsigned char)b > (B64CMAX-B64CMIN))
+ return -1;
+
+ b = base64rev[b] - B64BASE - 1;
+
+ /* b == -1: invalid character */
+ if (b < 0)
+ return -1;
+
+ /* padding has to be continuous */
+ if (pad && b != B64PADV)
+ return -1;
+
+ /* valid padding: "XX==" or "XXX=", but never "X===" or "====" */
+ if (pad && i < 2)
+ return -1;
+
+ if (b == B64PADV)
+ pad++;
+
+ t[i++] = b;
+
+ if (i == 4) {
+ /*
+ * WARNING: we allow to write little more data than we
+ * should, but the checks from the beginning of the
+ * functions guarantee that we can safely do that.
+ */
+
+ /* xx000000 xx001111 xx111122 xx222222 */
+ if (convlen < olen)
+ out[convlen] = ((t[0] << 2) + (t[1] >> 4));
+ if (convlen+1 < olen)
+ out[convlen+1] = ((t[1] << 4) + (t[2] >> 2));
+ if (convlen+2 < olen)
+ out[convlen+2] = ((t[2] << 6) + (t[3] >> 0));
+
+ convlen += 3-pad;
+
+ pad = i = 0;
+ }
+
+ in++;
+ ilen--;
+ }
+
+ return convlen;
+}
+
+/* url variant of base64dec */
+/* The reverse tab used to decode base64 is generated via /dev/base64/base64rev-gen.c */
+int base64urldec(const char *in, size_t ilen, char *out, size_t olen)
+{
+ unsigned char t[4];
+ signed char b;
+ int convlen = 0, i = 0, pad = 0, padlen = 0;
+
+ switch (ilen % 4) {
+ case 0:
+ break;
+ case 2:
+ padlen = pad = 2;
+ break;
+ case 3:
+ padlen = pad = 1;
+ break;
+ default:
+ return -1;
+ }
+
+ if (olen < (((ilen + pad) / 4 * 3) - pad))
+ return -2;
+
+ while (ilen + pad) {
+ if (ilen) {
+ /* if (*p < UB64CMIN || *p > B64CMAX) */
+ b = (signed char) * in - UB64CMIN;
+ if ((unsigned char)b > (B64CMAX - UB64CMIN))
+ return -1;
+
+ b = ubase64rev[b] - B64BASE - 1;
+ /* b == -1: invalid character */
+ if (b < 0)
+ return -1;
+
+ in++;
+ ilen--;
+
+ } else {
+ b = B64PADV;
+ pad--;
+ }
+
+ t[i++] = b;
+
+ if (i == 4) {
+ /*
+ * WARNING: we allow to write little more data than we
+ * should, but the checks from the beginning of the
+ * functions guarantee that we can safely do that.
+ */
+
+ /* xx000000 xx001111 xx111122 xx222222 */
+ if (convlen < olen)
+ out[convlen] = ((t[0] << 2) + (t[1] >> 4));
+ if (convlen+1 < olen)
+ out[convlen+1] = ((t[1] << 4) + (t[2] >> 2));
+ if (convlen+2 < olen)
+ out[convlen+2] = ((t[2] << 6) + (t[3] >> 0));
+
+ convlen += 3;
+ i = 0;
+ }
+ }
+ convlen -= padlen;
+
+ return convlen;
+}
+
+/* Converts the lower 30 bits of an integer to a 5-char base64 string. The
+ * caller is responsible for ensuring that the output buffer can accept 6 bytes
+ * (5 + the trailing zero). The pointer to the string is returned. The
+ * conversion is performed with MSB first and in a format that can be
+ * decoded with b64tos30(). This format is not padded and thus is not
+ * compatible with usual base64 routines.
+ */
+const char *s30tob64(int in, char *out)
+{
+ int i;
+ for (i = 0; i < 5; i++) {
+ out[i] = base64tab[(in >> 24) & 0x3F];
+ in <<= 6;
+ }
+ out[5] = '\0';
+ return out;
+}
+
+/* Converts a 5-char base64 string encoded by s30tob64() into a 30-bit integer.
+ * The caller is responsible for ensuring that the input contains at least 5
+ * chars. If any unexpected character is encountered, a negative value is
+ * returned. Otherwise the decoded value is returned.
+ */
+int b64tos30(const char *in)
+{
+ int i, out;
+ signed char b;
+
+ out = 0;
+ for (i = 0; i < 5; i++) {
+ b = (signed char)in[i] - B64CMIN;
+ if ((unsigned char)b > (B64CMAX - B64CMIN))
+ return -1; /* input character out of range */
+
+ b = base64rev[b] - B64BASE - 1;
+ if (b < 0) /* invalid character */
+ return -1;
+
+ if (b == B64PADV) /* padding not allowed */
+ return -1;
+
+ out = (out << 6) + b;
+ }
+ return out;
+}
diff --git a/src/cache.c b/src/cache.c
new file mode 100644
index 0000000..9f12f10
--- /dev/null
+++ b/src/cache.c
@@ -0,0 +1,3014 @@
+/*
+ * Cache management
+ *
+ * Copyright 2017 HAProxy Technologies
+ * William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <import/eb32tree.h>
+#include <import/sha1.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/shctx.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+#define CACHE_FLT_F_IMPLICIT_DECL 0x00000001 /* The cache filtre was implicitly declared (ie without
+ * the filter keyword) */
+#define CACHE_FLT_INIT 0x00000002 /* Whether the cache name was freed. */
+
+static uint64_t cache_hash_seed = 0;
+
+const char *cache_store_flt_id = "cache store filter";
+
+extern struct applet http_cache_applet;
+
+struct flt_ops cache_ops;
+
+struct cache_tree {
+ struct eb_root entries; /* head of cache entries based on keys */
+ __decl_thread(HA_RWLOCK_T lock);
+
+ struct list cleanup_list;
+ __decl_thread(HA_SPINLOCK_T cleanup_lock);
+} ALIGNED(64);
+
+struct cache {
+ struct cache_tree trees[CACHE_TREE_NUM];
+ struct list list; /* cache linked list */
+ unsigned int maxage; /* max-age */
+ unsigned int maxblocks;
+ unsigned int maxobjsz; /* max-object-size (in bytes) */
+ unsigned int max_secondary_entries; /* maximum number of secondary entries with the same primary hash */
+ uint8_t vary_processing_enabled; /* boolean : manage Vary header (disabled by default) */
+ char id[33]; /* cache name */
+};
+
+/* the appctx context of a cache applet, stored in appctx->svcctx */
+struct cache_appctx {
+ struct cache_tree *cache_tree;
+ struct cache_entry *entry; /* Entry to be sent from cache. */
+ unsigned int sent; /* The number of bytes already sent for this cache entry. */
+ unsigned int offset; /* start offset of remaining data relative to beginning of the next block */
+ unsigned int rem_data; /* Remaining bytes for the last data block (HTX only, 0 means process next block) */
+ unsigned int send_notmodified:1; /* In case of conditional request, we might want to send a "304 Not Modified" response instead of the stored data. */
+ unsigned int unused:31;
+ struct shared_block *next; /* The next block of data to be sent for this cache entry. */
+};
+
+/* cache config for filters */
+struct cache_flt_conf {
+ union {
+ struct cache *cache; /* cache used by the filter */
+ char *name; /* cache name used during conf parsing */
+ } c;
+ unsigned int flags; /* CACHE_FLT_F_* */
+};
+
+/* CLI context used during "show cache" */
+struct show_cache_ctx {
+ struct cache *cache;
+ struct cache_tree *cache_tree;
+ uint next_key;
+};
+
+
+/*
+ * Vary-related structures and functions
+ */
+enum vary_header_bit {
+ VARY_ACCEPT_ENCODING = (1 << 0),
+ VARY_REFERER = (1 << 1),
+ VARY_ORIGIN = (1 << 2),
+ VARY_LAST /* should always be last */
+};
+
+/*
+ * Encoding list extracted from
+ * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml
+ * and RFC7231#5.3.4.
+ */
+enum vary_encoding {
+ VARY_ENCODING_GZIP = (1 << 0),
+ VARY_ENCODING_DEFLATE = (1 << 1),
+ VARY_ENCODING_BR = (1 << 2),
+ VARY_ENCODING_COMPRESS = (1 << 3),
+ VARY_ENCODING_AES128GCM = (1 << 4),
+ VARY_ENCODING_EXI = (1 << 5),
+ VARY_ENCODING_PACK200_GZIP = (1 << 6),
+ VARY_ENCODING_ZSTD = (1 << 7),
+ VARY_ENCODING_IDENTITY = (1 << 8),
+ VARY_ENCODING_STAR = (1 << 9),
+ VARY_ENCODING_OTHER = (1 << 10)
+};
+
+struct vary_hashing_information {
+ struct ist hdr_name; /* Header name */
+ enum vary_header_bit value; /* Bit representing the header in a vary signature */
+ unsigned int hash_length; /* Size of the sub hash for this header's value */
+ int(*norm_fn)(struct htx*,struct ist hdr_name,char* buf,unsigned int* buf_len); /* Normalization function */
+ int(*cmp_fn)(const void *ref, const void *new, unsigned int len); /* Comparison function, should return 0 if the hashes are alike */
+};
+
+static int http_request_prebuild_full_secondary_key(struct stream *s);
+static int http_request_build_secondary_key(struct stream *s, int vary_signature);
+static int http_request_reduce_secondary_key(unsigned int vary_signature,
+ char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN]);
+
+static int parse_encoding_value(struct ist value, unsigned int *encoding_value,
+ unsigned int *has_null_weight);
+
+static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len);
+static int default_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len);
+
+static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len);
+
+/* Warning : do not forget to update HTTP_CACHE_SEC_KEY_LEN when new items are
+ * added to this array. */
+const struct vary_hashing_information vary_information[] = {
+ { IST("accept-encoding"), VARY_ACCEPT_ENCODING, sizeof(uint32_t), &accept_encoding_normalizer, &accept_encoding_bitmap_cmp },
+ { IST("referer"), VARY_REFERER, sizeof(uint64_t), &default_normalizer, NULL },
+ { IST("origin"), VARY_ORIGIN, sizeof(uint64_t), &default_normalizer, NULL },
+};
+
+
+static inline void cache_rdlock(struct cache_tree *cache)
+{
+ HA_RWLOCK_RDLOCK(CACHE_LOCK, &cache->lock);
+}
+
+static inline void cache_rdunlock(struct cache_tree *cache)
+{
+ HA_RWLOCK_RDUNLOCK(CACHE_LOCK, &cache->lock);
+}
+
+static inline void cache_wrlock(struct cache_tree *cache)
+{
+ HA_RWLOCK_WRLOCK(CACHE_LOCK, &cache->lock);
+}
+
+static inline void cache_wrunlock(struct cache_tree *cache)
+{
+ HA_RWLOCK_WRUNLOCK(CACHE_LOCK, &cache->lock);
+}
+
+/*
+ * cache ctx for filters
+ */
+struct cache_st {
+ struct shared_block *first_block;
+ struct list detached_head;
+};
+
+#define DEFAULT_MAX_SECONDARY_ENTRY 10
+
+struct cache_entry {
+ unsigned int complete; /* An entry won't be valid until complete is not null. */
+ unsigned int latest_validation; /* latest validation date */
+ unsigned int expire; /* expiration date (wall clock time) */
+ unsigned int age; /* Origin server "Age" header value */
+
+ int refcount;
+
+ struct eb32_node eb; /* ebtree node used to hold the cache object */
+ char hash[20];
+
+ struct list cleanup_list;/* List used between the cache_free_blocks and cache_reserve_finish calls */
+
+ char secondary_key[HTTP_CACHE_SEC_KEY_LEN]; /* Optional secondary key. */
+ unsigned int secondary_key_signature; /* Bitfield of the HTTP headers that should be used
+ * to build secondary keys for this cache entry. */
+ unsigned int secondary_entries_count; /* Should only be filled in the last entry of a list of dup entries */
+ unsigned int last_clear_ts; /* Timestamp of the last call to clear_expired_duplicates. */
+
+ unsigned int etag_length; /* Length of the ETag value (if one was found in the response). */
+ unsigned int etag_offset; /* Offset of the ETag value in the data buffer. */
+
+ time_t last_modified; /* Origin server "Last-Modified" header value converted in
+ * seconds since epoch. If no "Last-Modified"
+ * header is found, use "Date" header value,
+ * otherwise use reception time. This field will
+ * be used in case of an "If-Modified-Since"-based
+ * conditional request. */
+
+ unsigned char data[0];
+};
+
+#define CACHE_BLOCKSIZE 1024
+#define CACHE_ENTRY_MAX_AGE 2147483648U
+
+static struct list caches = LIST_HEAD_INIT(caches);
+static struct list caches_config = LIST_HEAD_INIT(caches_config); /* cache config to init */
+static struct cache *tmp_cache_config = NULL;
+
+DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st));
+
+static struct eb32_node *insert_entry(struct cache *cache, struct cache_tree *tree, struct cache_entry *new_entry);
+static void delete_entry(struct cache_entry *del_entry);
+static void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry);
+static void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry);
+
+/*
+ * Find a cache_entry in the <cache>'s tree that has the hash <hash>.
+ * If <delete_expired> is 0 then the entry is left untouched if it is found but
+ * is already expired, and NULL is returned. Otherwise, the expired entry is
+ * removed from the tree and NULL is returned.
+ * Returns a valid (not expired) cache_tree pointer.
+ * The returned entry is not retained, it should be explicitly retained only
+ * when necessary.
+ *
+ * This function must be called under a cache lock, either read if
+ * delete_expired==0, write otherwise.
+ */
+struct cache_entry *get_entry(struct cache_tree *cache_tree, char *hash, int delete_expired)
+{
+ struct eb32_node *node;
+ struct cache_entry *entry;
+
+ node = eb32_lookup(&cache_tree->entries, read_u32(hash));
+ if (!node)
+ return NULL;
+
+ entry = eb32_entry(node, struct cache_entry, eb);
+
+ /* if that's not the right node */
+ if (memcmp(entry->hash, hash, sizeof(entry->hash)))
+ return NULL;
+
+ if (entry->expire > date.tv_sec) {
+ return entry;
+ } else if (delete_expired) {
+ release_entry_locked(cache_tree, entry);
+ }
+ return NULL;
+}
+
+/*
+ * Increment a cache_entry's reference counter.
+ */
+static void retain_entry(struct cache_entry *entry)
+{
+ if (entry)
+ HA_ATOMIC_INC(&entry->refcount);
+}
+
+/*
+ * Decrement a cache_entry's reference counter and remove it from the <cache>'s
+ * tree if the reference counter becomes 0.
+ * If <needs_locking> is 0 then the cache lock was already taken by the caller,
+ * otherwise it must be taken in write mode before actually deleting the entry.
+ */
+static void release_entry(struct cache_tree *cache, struct cache_entry *entry, int needs_locking)
+{
+ if (!entry)
+ return;
+
+ if (HA_ATOMIC_SUB_FETCH(&entry->refcount, 1) <= 0) {
+ if (needs_locking) {
+ cache_wrlock(cache);
+ /* The value might have changed between the last time we
+ * checked it and now, we need to recheck it just in
+ * case.
+ */
+ if (HA_ATOMIC_LOAD(&entry->refcount) > 0) {
+ cache_wrunlock(cache);
+ return;
+ }
+ }
+ delete_entry(entry);
+ if (needs_locking) {
+ cache_wrunlock(cache);
+ }
+ }
+}
+
+/*
+ * Decrement a cache_entry's reference counter and remove it from the <cache>'s
+ * tree if the reference counter becomes 0.
+ * This function must be called under the cache lock in write mode.
+ */
+static inline void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry)
+{
+ release_entry(cache, entry, 0);
+}
+
+/*
+ * Decrement a cache_entry's reference counter and remove it from the <cache>'s
+ * tree if the reference counter becomes 0.
+ * This function must not be called under the cache lock or the shctx lock. The
+ * cache lock might be taken in write mode (if the entry gets deleted).
+ */
+static inline void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry)
+{
+ release_entry(cache, entry, 1);
+}
+
+
+/*
+ * Compare a newly built secondary key to the one found in a cache_entry.
+ * Every sub-part of the key is compared to the reference through the dedicated
+ * comparison function of the sub-part (that might do more than a simple
+ * memcmp).
+ * Returns 0 if the keys are alike.
+ */
+static int secondary_key_cmp(const char *ref_key, const char *new_key)
+{
+ int retval = 0;
+ size_t idx = 0;
+ unsigned int offset = 0;
+ const struct vary_hashing_information *info;
+
+ for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && !retval; ++idx) {
+ info = &vary_information[idx];
+
+ if (info->cmp_fn)
+ retval = info->cmp_fn(&ref_key[offset], &new_key[offset], info->hash_length);
+ else
+ retval = memcmp(&ref_key[offset], &new_key[offset], info->hash_length);
+
+ offset += info->hash_length;
+ }
+
+ return retval;
+}
+
+/*
+ * There can be multiple entries with the same primary key in the ebtree so in
+ * order to get the proper one out of the list, we use a secondary_key.
+ * This function simply iterates over all the entries with the same primary_key
+ * until it finds the right one.
+ * If <delete_expired> is 0 then the entry is left untouched if it is found but
+ * is already expired, and NULL is returned. Otherwise, the expired entry is
+ * removed from the tree and NULL is returned.
+ * Returns the cache_entry in case of success, NULL otherwise.
+ *
+ * This function must be called under a cache lock, either read if
+ * delete_expired==0, write otherwise.
+ */
+struct cache_entry *get_secondary_entry(struct cache_tree *cache, struct cache_entry *entry,
+ const char *secondary_key, int delete_expired)
+{
+ struct eb32_node *node = &entry->eb;
+
+ if (!entry->secondary_key_signature)
+ return NULL;
+
+ while (entry && secondary_key_cmp(entry->secondary_key, secondary_key) != 0) {
+ node = eb32_next_dup(node);
+
+ /* Make the best use of this iteration and clear expired entries
+ * when we find them. Calling delete_entry would be too costly
+ * so we simply call eb32_delete. The secondary_entry count will
+ * be updated when we try to insert a new entry to this list. */
+ if (entry->expire <= date.tv_sec && delete_expired) {
+ release_entry_locked(cache, entry);
+ }
+
+ entry = node ? eb32_entry(node, struct cache_entry, eb) : NULL;
+ }
+
+ /* Expired entry */
+ if (entry && entry->expire <= date.tv_sec) {
+ if (delete_expired) {
+ release_entry_locked(cache, entry);
+ }
+ entry = NULL;
+ }
+
+ return entry;
+}
+
+static inline struct cache_tree *get_cache_tree_from_hash(struct cache *cache, unsigned int hash)
+{
+ if (!cache)
+ return NULL;
+
+ return &cache->trees[hash % CACHE_TREE_NUM];
+}
+
+
+/*
+ * Remove all expired entries from a list of duplicates.
+ * Return the number of alive entries in the list and sets dup_tail to the
+ * current last item of the list.
+ *
+ * This function must be called under a cache write lock.
+ */
+static unsigned int clear_expired_duplicates(struct cache_tree *cache, struct eb32_node **dup_tail)
+{
+ unsigned int entry_count = 0;
+ struct cache_entry *entry = NULL;
+ struct eb32_node *prev = *dup_tail;
+ struct eb32_node *tail = NULL;
+
+ while (prev) {
+ entry = container_of(prev, struct cache_entry, eb);
+ prev = eb32_prev_dup(prev);
+ if (entry->expire <= date.tv_sec) {
+ release_entry_locked(cache, entry);
+ }
+ else {
+ if (!tail)
+ tail = &entry->eb;
+ ++entry_count;
+ }
+ }
+
+ *dup_tail = tail;
+
+ return entry_count;
+}
+
+
+/*
+ * This function inserts a cache_entry in the cache's ebtree. In case of
+ * duplicate entries (vary), it then checks that the number of entries did not
+ * reach the max number of secondary entries. If this entry should not have been
+ * created, remove it.
+ * In the regular case (unique entries), this function does not do more than a
+ * simple insert. In case of secondary entries, it will at most cost an
+ * insertion+max_sec_entries time checks and entry deletion.
+ * Returns the newly inserted node in case of success, NULL otherwise.
+ *
+ * This function must be called under a cache write lock.
+ */
+static struct eb32_node *insert_entry(struct cache *cache, struct cache_tree *tree, struct cache_entry *new_entry)
+{
+ struct eb32_node *prev = NULL;
+ struct cache_entry *entry = NULL;
+ unsigned int entry_count = 0;
+ unsigned int last_clear_ts = date.tv_sec;
+
+ struct eb32_node *node = eb32_insert(&tree->entries, &new_entry->eb);
+
+ new_entry->refcount = 1;
+
+ /* We should not have multiple entries with the same primary key unless
+ * the entry has a non null vary signature. */
+ if (!new_entry->secondary_key_signature)
+ return node;
+
+ prev = eb32_prev_dup(node);
+ if (prev != NULL) {
+ /* The last entry of a duplicate list should contain the current
+ * number of entries in the list. */
+ entry = container_of(prev, struct cache_entry, eb);
+ entry_count = entry->secondary_entries_count;
+ last_clear_ts = entry->last_clear_ts;
+
+ if (entry_count >= cache->max_secondary_entries) {
+ /* Some entries of the duplicate list might be expired so
+ * we will iterate over all the items in order to free some
+ * space. In order to avoid going over the same list too
+ * often, we first check the timestamp of the last check
+ * performed. */
+ if (last_clear_ts == date.tv_sec) {
+ /* Too many entries for this primary key, clear the
+ * one that was inserted. */
+ release_entry_locked(tree, entry);
+ return NULL;
+ }
+
+ entry_count = clear_expired_duplicates(tree, &prev);
+ if (entry_count >= cache->max_secondary_entries) {
+ /* Still too many entries for this primary key, delete
+ * the newly inserted one. */
+ entry = container_of(prev, struct cache_entry, eb);
+ entry->last_clear_ts = date.tv_sec;
+ release_entry_locked(tree, entry);
+ return NULL;
+ }
+ }
+ }
+
+ new_entry->secondary_entries_count = entry_count + 1;
+ new_entry->last_clear_ts = last_clear_ts;
+
+ return node;
+}
+
+
+/*
+ * This function removes an entry from the ebtree. If the entry was a duplicate
+ * (in case of Vary), it updates the secondary entry counter in another
+ * duplicate entry (the last entry of the dup list).
+ *
+ * This function must be called under a cache write lock.
+ */
+static void delete_entry(struct cache_entry *del_entry)
+{
+ struct eb32_node *prev = NULL, *next = NULL;
+ struct cache_entry *entry = NULL;
+ struct eb32_node *last = NULL;
+
+ /* The entry might have been removed from the cache before. In such a
+ * case calling eb32_next_dup would crash. */
+ if (del_entry->secondary_key_signature && del_entry->eb.key != 0) {
+ next = &del_entry->eb;
+
+ /* Look for last entry of the duplicates list. */
+ while ((next = eb32_next_dup(next))) {
+ last = next;
+ }
+
+ if (last) {
+ entry = container_of(last, struct cache_entry, eb);
+ --entry->secondary_entries_count;
+ }
+ else {
+ /* The current entry is the last one, look for the
+ * previous one to update its counter. */
+ prev = eb32_prev_dup(&del_entry->eb);
+ if (prev) {
+ entry = container_of(prev, struct cache_entry, eb);
+ entry->secondary_entries_count = del_entry->secondary_entries_count - 1;
+ }
+ }
+ }
+ eb32_delete(&del_entry->eb);
+ del_entry->eb.key = 0;
+}
+
+
+static inline struct shared_context *shctx_ptr(struct cache *cache)
+{
+ return (struct shared_context *)((unsigned char *)cache - offsetof(struct shared_context, data));
+}
+
+static inline struct shared_block *block_ptr(struct cache_entry *entry)
+{
+ return (struct shared_block *)((unsigned char *)entry - offsetof(struct shared_block, data));
+}
+
+
+
+static int
+cache_store_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static void
+cache_store_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct cache_flt_conf *cconf = fconf->conf;
+
+ if (!(cconf->flags & CACHE_FLT_INIT))
+ free(cconf->c.name);
+ free(cconf);
+}
+
+static int
+cache_store_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct cache_flt_conf *cconf = fconf->conf;
+ struct flt_conf *f;
+ struct cache *cache;
+ int comp = 0;
+
+ /* Find the cache corresponding to the name in the filter config. The
+ * cache will not be referenced now in the filter config because it is
+ * not fully allocated. This step will be performed during the cache
+ * post_check.
+ */
+ list_for_each_entry(cache, &caches_config, list) {
+ if (strcmp(cache->id, cconf->c.name) == 0)
+ goto found;
+ }
+
+ ha_alert("config: %s '%s': unable to find the cache '%s' referenced by the filter 'cache'.\n",
+ proxy_type_str(px), px->id, (char *)cconf->c.name);
+ return 1;
+
+ found:
+ /* Here <cache> points on the cache the filter must use and <cconf>
+ * points on the cache filter configuration. */
+
+ /* Check all filters for proxy <px> to know if the compression is
+ * enabled and if it is after the cache. When the compression is before
+ * the cache, an error is returned. Also check if the cache filter must
+ * be explicitly declaired or not. */
+ list_for_each_entry(f, &px->filter_configs, list) {
+ if (f == fconf) {
+ /* The compression filter must be evaluated after the cache. */
+ if (comp) {
+ ha_alert("config: %s '%s': unable to enable the compression filter before "
+ "the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
+ return 1;
+ }
+ }
+ else if (f->id == http_comp_flt_id)
+ comp = 1;
+ else if (f->id == fcgi_flt_id)
+ continue;
+ else if ((f->id != fconf->id) && (cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
+ /* Implicit declaration is only allowed with the
+ * compression and fcgi. For other filters, an implicit
+ * declaration is required. */
+ ha_alert("config: %s '%s': require an explicit filter declaration "
+ "to use the cache '%s'.\n", proxy_type_str(px), px->id, cache->id);
+ return 1;
+ }
+
+ }
+ return 0;
+}
+
+static int
+cache_store_strm_init(struct stream *s, struct filter *filter)
+{
+ struct cache_st *st;
+
+ st = pool_alloc(pool_head_cache_st);
+ if (st == NULL)
+ return -1;
+
+ st->first_block = NULL;
+ filter->ctx = st;
+
+ /* Register post-analyzer on AN_RES_WAIT_HTTP */
+ filter->post_analyzers |= AN_RES_WAIT_HTTP;
+ return 1;
+}
+
+static void
+cache_store_strm_deinit(struct stream *s, struct filter *filter)
+{
+ struct cache_st *st = filter->ctx;
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+
+ /* Everything should be released in the http_end filter, but we need to do it
+ * there too, in case of errors */
+ if (st && st->first_block) {
+ struct cache_entry *object = (struct cache_entry *)st->first_block->data;
+ if (!object->complete) {
+ /* The stream was closed but the 'complete' flag was not
+ * set which means that cache_store_http_end was not
+ * called. The stream must have been closed before we
+ * could store the full answer in the cache.
+ */
+ release_entry_unlocked(&cache->trees[object->eb.key % CACHE_TREE_NUM], object);
+ }
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, st->first_block);
+ shctx_wrunlock(shctx);
+ }
+ if (st) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+}
+
+static int
+cache_store_post_analyze(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned an_bit)
+{
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct cache_st *st = filter->ctx;
+
+ if (an_bit != AN_RES_WAIT_HTTP)
+ goto end;
+
+ /* Here we need to check if any compression filter precedes the cache
+ * filter. This is only possible when the compression is configured in
+ * the frontend while the cache filter is configured on the
+ * backend. This case cannot be detected during HAProxy startup. So in
+ * such cases, the cache is disabled.
+ */
+ if (st && (msg->flags & HTTP_MSGF_COMPRESSING)) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+
+ end:
+ return 1;
+}
+
+static int
+cache_store_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct cache_st *st = filter->ctx;
+
+ if (!(msg->chn->flags & CF_ISRESP) || !st)
+ return 1;
+
+ if (st->first_block)
+ register_data_filter(s, msg->chn, filter);
+ return 1;
+}
+
+static inline void disable_cache_entry(struct cache_st *st,
+ struct filter *filter, struct shared_context *shctx)
+{
+ struct cache_entry *object;
+ struct cache *cache = (struct cache*)shctx->data;
+
+ object = (struct cache_entry *)st->first_block->data;
+ filter->ctx = NULL; /* disable cache */
+ release_entry_unlocked(&cache->trees[object->eb.key % CACHE_TREE_NUM], object);
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, st->first_block);
+ shctx_wrunlock(shctx);
+ pool_free(pool_head_cache_st, st);
+}
+
+static int
+cache_store_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct cache_st *st = filter->ctx;
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct shared_block *fb;
+ struct htx_ret htxret;
+ unsigned int orig_len, to_forward;
+ int ret;
+
+ if (!len)
+ return len;
+
+ if (!st->first_block) {
+ unregister_data_filter(s, msg->chn, filter);
+ return len;
+ }
+
+ chunk_reset(&trash);
+ orig_len = len;
+ to_forward = 0;
+
+ htxret = htx_find_offset(htx, offset);
+ blk = htxret.blk;
+ offset = htxret.ret;
+ for (; blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t info, sz = htx_get_blksz(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ v = isttrim(v, len);
+
+ info = (type << 28) + v.len;
+ chunk_memcat(&trash, (char *)&info, sizeof(info));
+ chunk_istcat(&trash, v);
+ to_forward += v.len;
+ len -= v.len;
+ break;
+
+ default:
+ /* Here offset must always be 0 because only
+ * DATA blocks can be partially transferred. */
+ if (offset)
+ goto no_cache;
+ if (sz > len)
+ goto end;
+
+ chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
+ chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
+ to_forward += sz;
+ len -= sz;
+ break;
+ }
+
+ offset = 0;
+ }
+
+ end:
+
+ fb = shctx_row_reserve_hot(shctx, st->first_block, trash.data);
+ if (!fb) {
+ goto no_cache;
+ }
+
+ ret = shctx_row_data_append(shctx, st->first_block,
+ (unsigned char *)b_head(&trash), b_data(&trash));
+ if (ret < 0)
+ goto no_cache;
+
+ return to_forward;
+
+ no_cache:
+ disable_cache_entry(st, filter, shctx);
+ unregister_data_filter(s, msg->chn, filter);
+ return orig_len;
+}
+
+static int
+cache_store_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct cache_st *st = filter->ctx;
+ struct cache_flt_conf *cconf = FLT_CONF(filter);
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct cache_entry *object;
+
+ if (!(msg->chn->flags & CF_ISRESP))
+ return 1;
+
+ if (st && st->first_block) {
+
+ object = (struct cache_entry *)st->first_block->data;
+
+ shctx_wrlock(shctx);
+ /* The whole payload was cached, the entry can now be used. */
+ object->complete = 1;
+ /* remove from the hotlist */
+ shctx_row_reattach(shctx, st->first_block);
+ shctx_wrunlock(shctx);
+
+ }
+ if (st) {
+ pool_free(pool_head_cache_st, st);
+ filter->ctx = NULL;
+ }
+
+ return 1;
+}
+
+ /*
+ * This intends to be used when checking HTTP headers for some
+ * word=value directive. Return a pointer to the first character of value, if
+ * the word was not found or if there wasn't any value assigned to it return NULL
+ */
+char *directive_value(const char *sample, int slen, const char *word, int wlen)
+{
+ int st = 0;
+
+ if (slen < wlen)
+ return 0;
+
+ while (wlen) {
+ char c = *sample ^ *word;
+ if (c && c != ('A' ^ 'a'))
+ return NULL;
+ sample++;
+ word++;
+ slen--;
+ wlen--;
+ }
+
+ while (slen) {
+ if (st == 0) {
+ if (*sample != '=')
+ return NULL;
+ sample++;
+ slen--;
+ st = 1;
+ continue;
+ } else {
+ return (char *)sample;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Return the maxage in seconds of an HTTP response.
+ * The returned value will always take the cache's configuration into account
+ * (cache->maxage) but the actual max age of the response will be set in the
+ * true_maxage parameter. It will be used to determine if a response is already
+ * stale or not.
+ * Compute the maxage using either:
+ * - the assigned max-age of the cache
+ * - the s-maxage directive
+ * - the max-age directive
+ * - (Expires - Data) headers
+ * - the default-max-age of the cache
+ *
+ */
+int http_calc_maxage(struct stream *s, struct cache *cache, int *true_maxage)
+{
+ struct htx *htx = htxbuf(&s->res.buf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ long smaxage = -1;
+ long maxage = -1;
+ int expires = -1;
+ struct tm tm = {};
+ time_t expires_val = 0;
+ char *endptr = NULL;
+ int offset = 0;
+
+ /* The Cache-Control max-age and s-maxage directives should be followed by
+ * a positive numerical value (see RFC 7234#5.2.1.1). According to the
+ * specs, a sender "should not" generate a quoted-string value but we will
+ * still accept this format since it isn't strictly forbidden. */
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ char *value;
+
+ value = directive_value(ctx.value.ptr, ctx.value.len, "s-maxage", 8);
+ if (value) {
+ struct buffer *chk = get_trash_chunk();
+
+ chunk_memcat(chk, value, ctx.value.len - 8 + 1);
+ chunk_memcat(chk, "", 1);
+ offset = (*chk->area == '"') ? 1 : 0;
+ smaxage = strtol(chk->area + offset, &endptr, 10);
+ if (unlikely(smaxage < 0 || endptr == chk->area + offset))
+ return -1;
+ }
+
+ value = directive_value(ctx.value.ptr, ctx.value.len, "max-age", 7);
+ if (value) {
+ struct buffer *chk = get_trash_chunk();
+
+ chunk_memcat(chk, value, ctx.value.len - 7 + 1);
+ chunk_memcat(chk, "", 1);
+ offset = (*chk->area == '"') ? 1 : 0;
+ maxage = strtol(chk->area + offset, &endptr, 10);
+ if (unlikely(maxage < 0 || endptr == chk->area + offset))
+ return -1;
+ }
+ }
+
+ /* Look for Expires header if no s-maxage or max-age Cache-Control data
+ * was found. */
+ if (maxage == -1 && smaxage == -1) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("expires"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ expires_val = my_timegm(&tm);
+ /* A request having an expiring date earlier
+ * than the current date should be considered as
+ * stale. */
+ expires = (expires_val >= date.tv_sec) ?
+ (expires_val - date.tv_sec) : 0;
+ }
+ else {
+ /* Following RFC 7234#5.3, an invalid date
+ * format must be treated as a date in the past
+ * so the cache entry must be seen as already
+ * expired. */
+ expires = 0;
+ }
+ }
+ }
+
+
+ if (smaxage > 0) {
+ if (true_maxage)
+ *true_maxage = smaxage;
+ return MIN(smaxage, cache->maxage);
+ }
+
+ if (maxage > 0) {
+ if (true_maxage)
+ *true_maxage = maxage;
+ return MIN(maxage, cache->maxage);
+ }
+
+ if (expires >= 0) {
+ if (true_maxage)
+ *true_maxage = expires;
+ return MIN(expires, cache->maxage);
+ }
+
+ return cache->maxage;
+
+}
+
+
+static void cache_free_blocks(struct shared_block *first, void *data)
+{
+ struct cache_entry *object = (struct cache_entry *)first->data;
+ struct cache *cache = (struct cache *)data;
+ struct cache_tree *cache_tree;
+
+ if (object->eb.key) {
+ object->complete = 0;
+ cache_tree = &cache->trees[object->eb.key % CACHE_TREE_NUM];
+ retain_entry(object);
+ HA_SPIN_LOCK(CACHE_LOCK, &cache_tree->cleanup_lock);
+ LIST_INSERT(&cache_tree->cleanup_list, &object->cleanup_list);
+ HA_SPIN_UNLOCK(CACHE_LOCK, &cache_tree->cleanup_lock);
+ }
+}
+
+static void cache_reserve_finish(struct shared_context *shctx)
+{
+ struct cache_entry *object, *back;
+ struct cache *cache = (struct cache *)shctx->data;
+ struct cache_tree *cache_tree;
+ int cache_tree_idx = 0;
+
+ for (; cache_tree_idx < CACHE_TREE_NUM; ++cache_tree_idx) {
+ cache_tree = &cache->trees[cache_tree_idx];
+
+ cache_wrlock(cache_tree);
+ HA_SPIN_LOCK(CACHE_LOCK, &cache_tree->cleanup_lock);
+
+ list_for_each_entry_safe(object, back, &cache_tree->cleanup_list, cleanup_list) {
+ LIST_DELETE(&object->cleanup_list);
+ /*
+ * At this point we locked the cache tree in write mode
+ * so no new thread could retain the current entry
+ * because the only two places where it can happen is in
+ * the cache_use case which is under cache_rdlock and
+ * the reserve_hot case which would require the
+ * corresponding block to still be in the avail list,
+ * which is impossible (we reserved it for a thread and
+ * took it out of the avail list already). The only two
+ * references are then the default one (upon cache_entry
+ * creation) and the one in this cleanup list.
+ */
+ BUG_ON(object->refcount > 2);
+ delete_entry(object);
+ }
+
+ HA_SPIN_UNLOCK(CACHE_LOCK, &cache_tree->cleanup_lock);
+ cache_wrunlock(cache_tree);
+ }
+}
+
+
+/* As per RFC 7234#4.3.2, in case of "If-Modified-Since" conditional request, the
+ * date value should be compared to a date determined by in a previous response (for
+ * the same entity). This date could either be the "Last-Modified" value, or the "Date"
+ * value of the response's reception time (by decreasing order of priority). */
+static time_t get_last_modified_time(struct htx *htx)
+{
+ time_t last_modified = 0;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct tm tm = {};
+
+ if (http_find_header(htx, ist("last-modified"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ last_modified = my_timegm(&tm);
+ }
+ }
+
+ if (!last_modified) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("date"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ last_modified = my_timegm(&tm);
+ }
+ }
+ }
+
+ /* Fallback on the current time if no "Last-Modified" or "Date" header
+ * was found. */
+ if (!last_modified)
+ last_modified = date.tv_sec;
+
+ return last_modified;
+}
+
+/*
+ * Checks the vary header's value. The headers on which vary should be applied
+ * must be explicitly supported in the vary_information array (see cache.c). If
+ * any other header is mentioned, we won't store the response.
+ * Returns 1 if Vary-based storage can work, 0 otherwise.
+ */
+static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature)
+{
+ unsigned int vary_idx;
+ unsigned int vary_info_count;
+ const struct vary_hashing_information *vary_info;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ int retval = 1;
+
+ *vary_signature = 0;
+
+ vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ while (retval && http_find_header(htx, ist("Vary"), &ctx, 0)) {
+ for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
+ vary_info = &vary_information[vary_idx];
+ if (isteqi(ctx.value, vary_info->hdr_name)) {
+ *vary_signature |= vary_info->value;
+ break;
+ }
+ }
+ retval = (vary_idx < vary_info_count);
+ }
+
+ return retval;
+}
+
+
+/*
+ * Look for the accept-encoding part of the secondary_key and replace the
+ * encoding bitmap part of the hash with the actual encoding of the response,
+ * extracted from the content-encoding header value.
+ * Responses that have an unknown encoding will not be cached if they also
+ * "vary" on the accept-encoding value.
+ * Returns 0 if we found a known encoding in the response, -1 otherwise.
+ */
+static int set_secondary_key_encoding(struct htx *htx, char *secondary_key)
+{
+ unsigned int resp_encoding_bitmap = 0;
+ const struct vary_hashing_information *info = vary_information;
+ unsigned int offset = 0;
+ unsigned int count = 0;
+ unsigned int hash_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ unsigned int encoding_value;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ /* Look for the accept-encoding part of the secondary_key. */
+ while (count < hash_info_count && info->value != VARY_ACCEPT_ENCODING) {
+ offset += info->hash_length;
+ ++info;
+ ++count;
+ }
+
+ if (count == hash_info_count)
+ return -1;
+
+ while (http_find_header(htx, ist("content-encoding"), &ctx, 0)) {
+ if (parse_encoding_value(ctx.value, &encoding_value, NULL))
+ return -1; /* Do not store responses with an unknown encoding */
+ resp_encoding_bitmap |= encoding_value;
+ }
+
+ if (!resp_encoding_bitmap)
+ resp_encoding_bitmap |= VARY_ENCODING_IDENTITY;
+
+ /* Rewrite the bitmap part of the hash with the new bitmap that only
+ * corresponds the the response's encoding. */
+ write_u32(secondary_key + offset, resp_encoding_bitmap);
+
+ return 0;
+}
+
+
+/*
+ * This function will store the headers of the response in a buffer and then
+ * register a filter to store the data
+ */
+enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ int effective_maxage = 0;
+ int true_maxage = 0;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct filter *filter;
+ struct shared_block *first = NULL;
+ struct cache_flt_conf *cconf = rule->arg.act.p[0];
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct cache_st *cache_ctx = NULL;
+ struct cache_entry *object, *old;
+ unsigned int key = read_u32(txn->cache_hash);
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ size_t hdrs_len = 0;
+ int32_t pos;
+ unsigned int vary_signature = 0;
+ struct cache_tree *cache_tree = NULL;
+
+ /* Don't cache if the response came from a cache */
+ if ((obj_type(s->target) == OBJ_TYPE_APPLET) &&
+ s->target == &http_cache_applet.obj_type) {
+ goto out;
+ }
+
+ /* cache only HTTP/1.1 */
+ if (!(txn->req.flags & HTTP_MSGF_VER_11))
+ goto out;
+
+ cache_tree = get_cache_tree_from_hash(cache, read_u32(txn->cache_hash));
+
+ /* cache only GET method */
+ if (txn->meth != HTTP_METH_GET) {
+ /* In case of successful unsafe method on a stored resource, the
+ * cached entry must be invalidated (see RFC7234#4.4).
+ * A "non-error response" is one with a 2xx (Successful) or 3xx
+ * (Redirection) status code. */
+ if (txn->status >= 200 && txn->status < 400) {
+ switch (txn->meth) {
+ case HTTP_METH_OPTIONS:
+ case HTTP_METH_GET:
+ case HTTP_METH_HEAD:
+ case HTTP_METH_TRACE:
+ break;
+
+ default: /* Any unsafe method */
+ /* Discard any corresponding entry in case of successful
+ * unsafe request (such as PUT, POST or DELETE). */
+ cache_wrlock(cache_tree);
+
+ old = get_entry(cache_tree, txn->cache_hash, 1);
+ if (old)
+ release_entry_locked(cache_tree, old);
+ cache_wrunlock(cache_tree);
+ }
+ }
+ goto out;
+ }
+
+ /* cache key was not computed */
+ if (!key)
+ goto out;
+
+ /* cache only 200 status code */
+ if (txn->status != 200)
+ goto out;
+
+ /* Find the corresponding filter instance for the current stream */
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (FLT_ID(filter) == cache_store_flt_id && FLT_CONF(filter) == cconf) {
+ /* No filter ctx, don't cache anything */
+ if (!filter->ctx)
+ goto out;
+ cache_ctx = filter->ctx;
+ break;
+ }
+ }
+
+ /* from there, cache_ctx is always defined */
+ htx = htxbuf(&s->res.buf);
+
+ /* Do not cache too big objects. */
+ if ((msg->flags & HTTP_MSGF_CNT_LEN) && shctx->max_obj_size > 0 &&
+ htx->data + htx->extra > shctx->max_obj_size)
+ goto out;
+
+ /* Only a subset of headers are supported in our Vary implementation. If
+ * any other header is present in the Vary header value, we won't be
+ * able to use the cache. Likewise, if Vary header support is disabled,
+ * avoid caching responses that contain such a header. */
+ ctx.blk = NULL;
+ if (cache->vary_processing_enabled) {
+ if (!http_check_vary_header(htx, &vary_signature))
+ goto out;
+ if (vary_signature) {
+ /* If something went wrong during the secondary key
+ * building, do not store the response. */
+ if (!(txn->flags & TX_CACHE_HAS_SEC_KEY))
+ goto out;
+ http_request_reduce_secondary_key(vary_signature, txn->cache_secondary_hash);
+ }
+ }
+ else if (http_find_header(htx, ist("Vary"), &ctx, 0)) {
+ goto out;
+ }
+
+ http_check_response_for_cacheability(s, &s->res);
+
+ if (!(txn->flags & TX_CACHEABLE) || !(txn->flags & TX_CACHE_COOK))
+ goto out;
+
+ cache_wrlock(cache_tree);
+ old = get_entry(cache_tree, txn->cache_hash, 1);
+ if (old) {
+ if (vary_signature)
+ old = get_secondary_entry(cache_tree, old,
+ txn->cache_secondary_hash, 1);
+ if (old) {
+ if (!old->complete) {
+ /* An entry with the same primary key is already being
+ * created, we should not try to store the current
+ * response because it will waste space in the cache. */
+ cache_wrunlock(cache_tree);
+ goto out;
+ }
+ release_entry_locked(cache_tree, old);
+ }
+ }
+ cache_wrunlock(cache_tree);
+
+ first = shctx_row_reserve_hot(shctx, NULL, sizeof(struct cache_entry));
+ if (!first) {
+ goto out;
+ }
+
+ /* the received memory is not initialized, we need at least to mark
+ * the object as not indexed yet.
+ */
+ object = (struct cache_entry *)first->data;
+ memset(object, 0, sizeof(*object));
+ object->eb.key = key;
+ object->secondary_key_signature = vary_signature;
+ /* We need to temporarily set a valid expiring time until the actual one
+ * is set by the end of this function (in case of concurrent accesses to
+ * the same resource). This way the second access will find an existing
+ * but not yet usable entry in the tree and will avoid storing its data. */
+ object->expire = date.tv_sec + 2;
+
+ memcpy(object->hash, txn->cache_hash, sizeof(object->hash));
+ if (vary_signature)
+ memcpy(object->secondary_key, txn->cache_secondary_hash, HTTP_CACHE_SEC_KEY_LEN);
+
+ cache_wrlock(cache_tree);
+ /* Insert the entry in the tree even if the payload is not cached yet. */
+ if (insert_entry(cache, cache_tree, object) != &object->eb) {
+ object->eb.key = 0;
+ cache_wrunlock(cache_tree);
+ goto out;
+ }
+ cache_wrunlock(cache_tree);
+
+ /* reserve space for the cache_entry structure */
+ first->len = sizeof(struct cache_entry);
+ first->last_append = NULL;
+
+ /* Determine the entry's maximum age (taking into account the cache's
+ * configuration) as well as the response's explicit max age (extracted
+ * from cache-control directives or the expires header). */
+ effective_maxage = http_calc_maxage(s, cache, &true_maxage);
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Age"), &ctx, 0)) {
+ long long hdr_age;
+ if (!strl2llrc(ctx.value.ptr, ctx.value.len, &hdr_age) && hdr_age > 0) {
+ if (unlikely(hdr_age > CACHE_ENTRY_MAX_AGE))
+ hdr_age = CACHE_ENTRY_MAX_AGE;
+ /* A response with an Age value greater than its
+ * announced max age is stale and should not be stored. */
+ object->age = hdr_age;
+ if (unlikely(object->age > true_maxage))
+ goto out;
+ }
+ else
+ goto out;
+ http_remove_header(htx, &ctx);
+ }
+
+ /* Build a last-modified time that will be stored in the cache_entry and
+ * compared to a future If-Modified-Since client header. */
+ object->last_modified = get_last_modified_time(htx);
+
+ chunk_reset(&trash);
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ hdrs_len += sizeof(*blk) + sz;
+ chunk_memcat(&trash, (char *)&blk->info, sizeof(blk->info));
+ chunk_memcat(&trash, htx_get_blk_ptr(htx, blk), sz);
+
+ /* Look for optional ETag header.
+ * We need to store the offset of the ETag value in order for
+ * future conditional requests to be able to perform ETag
+ * comparisons. */
+ if (type == HTX_BLK_HDR) {
+ struct ist header_name = htx_get_blk_name(htx, blk);
+ if (isteq(header_name, ist("etag"))) {
+ object->etag_length = sz - istlen(header_name);
+ object->etag_offset = sizeof(struct cache_entry) + b_data(&trash) - sz + istlen(header_name);
+ }
+ }
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ /* Do not cache objects if the headers are too big. */
+ if (hdrs_len > htx->size - global.tune.maxrewrite)
+ goto out;
+
+ /* If the response has a secondary_key, fill its key part related to
+ * encodings with the actual encoding of the response. This way any
+ * subsequent request having the same primary key will have its accepted
+ * encodings tested upon the cached response's one.
+ * We will not cache a response that has an unknown encoding (not
+ * explicitly supported in parse_encoding_value function). */
+ if (cache->vary_processing_enabled && vary_signature)
+ if (set_secondary_key_encoding(htx, object->secondary_key))
+ goto out;
+
+ if (!shctx_row_reserve_hot(shctx, first, trash.data)) {
+ goto out;
+ }
+
+ /* cache the headers in a http action because it allows to chose what
+ * to cache, for example you might want to cache a response before
+ * modifying some HTTP headers, or on the contrary after modifying
+ * those headers.
+ */
+ /* does not need to be locked because it's in the "hot" list,
+ * copy the headers */
+ if (shctx_row_data_append(shctx, first, (unsigned char *)trash.area, trash.data) < 0)
+ goto out;
+
+ /* register the buffer in the filter ctx for filling it with data*/
+ if (cache_ctx) {
+ cache_ctx->first_block = first;
+ LIST_INIT(&cache_ctx->detached_head);
+ /* store latest value and expiration time */
+ object->latest_validation = date.tv_sec;
+ object->expire = date.tv_sec + effective_maxage;
+ return ACT_RET_CONT;
+ }
+
+out:
+ /* if does not cache */
+ if (first) {
+ first->len = 0;
+ if (object->eb.key) {
+ release_entry_unlocked(cache_tree, object);
+ }
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, first);
+ shctx_wrunlock(shctx);
+ }
+
+ return ACT_RET_CONT;
+}
+
+#define HTX_CACHE_INIT 0 /* Initial state. */
+#define HTX_CACHE_HEADER 1 /* Cache entry headers forwarding */
+#define HTX_CACHE_DATA 2 /* Cache entry data forwarding */
+#define HTX_CACHE_EOM 3 /* Cache entry completely forwarded. Finish the HTX message */
+#define HTX_CACHE_END 4 /* Cache entry treatment terminated */
+
+static void http_cache_applet_release(struct appctx *appctx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct cache_entry *cache_ptr = ctx->entry;
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct shared_block *first = block_ptr(cache_ptr);
+
+ release_entry(ctx->cache_tree, cache_ptr, 1);
+
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, first);
+ shctx_wrunlock(shctx);
+}
+
+
+static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, enum htx_blk_type type,
+ uint32_t info, struct shared_block *shblk, unsigned int offset)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct htx_blk *blk;
+ char *ptr;
+ unsigned int max, total;
+ uint32_t blksz;
+
+ max = htx_get_max_blksz(htx,
+ channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx));
+ if (!max)
+ return 0;
+ blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR)
+ ? (info & 0xff) + ((info >> 8) & 0xfffff)
+ : info & 0xfffffff);
+ if (blksz > max)
+ return 0;
+
+ blk = htx_add_blk(htx, type, blksz);
+ if (!blk)
+ return 0;
+
+ blk->info = info;
+ total = 4;
+ ptr = htx_get_blk_ptr(htx, blk);
+ while (blksz) {
+ max = MIN(blksz, shctx->block_size - offset);
+ memcpy(ptr, (const char *)shblk->data + offset, max);
+ offset += max;
+ blksz -= max;
+ total += max;
+ ptr += max;
+ if (blksz || offset == shctx->block_size) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ offset = 0;
+ }
+ }
+ ctx->offset = offset;
+ ctx->next = shblk;
+ ctx->sent += total;
+ return total;
+}
+
+static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *htx,
+ uint32_t info, struct shared_block *shblk, unsigned int offset)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ unsigned int max, total, rem_data;
+ uint32_t blksz;
+
+ max = htx_get_max_blksz(htx,
+ channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx));
+ if (!max)
+ return 0;
+
+ rem_data = 0;
+ if (ctx->rem_data) {
+ blksz = ctx->rem_data;
+ total = 0;
+ }
+ else {
+ blksz = (info & 0xfffffff);
+ total = 4;
+ }
+ if (blksz > max) {
+ rem_data = blksz - max;
+ blksz = max;
+ }
+
+ while (blksz) {
+ size_t sz;
+
+ max = MIN(blksz, shctx->block_size - offset);
+ sz = htx_add_data(htx, ist2(shblk->data + offset, max));
+ offset += sz;
+ blksz -= sz;
+ total += sz;
+ if (sz < max)
+ break;
+ if (blksz || offset == shctx->block_size) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ offset = 0;
+ }
+ }
+
+ ctx->offset = offset;
+ ctx->next = shblk;
+ ctx->sent += total;
+ ctx->rem_data = rem_data + blksz;
+ return total;
+}
+
+static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigned int len,
+ enum htx_blk_type mark)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0];
+ struct shared_context *shctx = shctx_ptr(cconf->c.cache);
+ struct shared_block *shblk;
+ unsigned int offset, sz;
+ unsigned int ret, total = 0;
+
+ while (len) {
+ enum htx_blk_type type;
+ uint32_t info;
+
+ shblk = ctx->next;
+ offset = ctx->offset;
+ if (ctx->rem_data) {
+ type = HTX_BLK_DATA;
+ info = 0;
+ goto add_data_blk;
+ }
+
+ /* Get info of the next HTX block. May be split on 2 shblk */
+ sz = MIN(4, shctx->block_size - offset);
+ memcpy((char *)&info, (const char *)shblk->data + offset, sz);
+ offset += sz;
+ if (sz < 4) {
+ shblk = LIST_NEXT(&shblk->list, typeof(shblk), list);
+ memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz);
+ offset = (4 - sz);
+ }
+
+ /* Get payload of the next HTX block and insert it. */
+ type = (info >> 28);
+ if (type != HTX_BLK_DATA)
+ ret = htx_cache_dump_blk(appctx, htx, type, info, shblk, offset);
+ else {
+ add_data_blk:
+ ret = htx_cache_dump_data_blk(appctx, htx, info, shblk, offset);
+ }
+
+ if (!ret)
+ break;
+ total += ret;
+ len -= ret;
+
+ if (ctx->rem_data || type == mark)
+ break;
+ }
+
+ return total;
+}
+
+static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_entry *cache_ptr = ctx->entry;
+ unsigned int age;
+ char *end;
+
+ chunk_reset(&trash);
+ age = MAX(0, (int)(date.tv_sec - cache_ptr->latest_validation)) + cache_ptr->age;
+ if (unlikely(age > CACHE_ENTRY_MAX_AGE))
+ age = CACHE_ENTRY_MAX_AGE;
+ end = ultoa_o(age, b_head(&trash), b_size(&trash));
+ b_set_data(&trash, end - b_head(&trash));
+ if (!http_add_header(htx, ist("Age"), ist2(b_head(&trash), b_data(&trash))))
+ return 0;
+ return 1;
+}
+
+static void http_cache_io_handler(struct appctx *appctx)
+{
+ struct cache_appctx *ctx = appctx->svcctx;
+ struct cache_entry *cache_ptr = ctx->entry;
+ struct shared_block *first = block_ptr(cache_ptr);
+ struct stconn *sc = appctx_sc(appctx);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct htx *req_htx, *res_htx;
+ struct buffer *errmsg;
+ unsigned int len;
+ size_t ret, total = 0;
+
+ res_htx = htx_from_buf(&res->buf);
+ total = res_htx->data;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW))))
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ if (appctx->st0 == HTX_CACHE_INIT) {
+ ctx->next = block_ptr(cache_ptr);
+ ctx->offset = sizeof(*cache_ptr);
+ ctx->sent = 0;
+ ctx->rem_data = 0;
+ appctx->st0 = HTX_CACHE_HEADER;
+ }
+
+ if (appctx->st0 == HTX_CACHE_HEADER) {
+ /* Headers must be dump at once. Otherwise it is an error */
+ len = first->len - sizeof(*cache_ptr) - ctx->sent;
+ ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH);
+ if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) ||
+ !htx_cache_add_age_hdr(appctx, res_htx))
+ goto error;
+
+ /* In case of a conditional request, we might want to send a
+ * "304 Not Modified" response instead of the stored data. */
+ if (ctx->send_notmodified) {
+ if (!http_replace_res_status(res_htx, ist("304"), ist("Not Modified"))) {
+ /* If replacing the status code fails we need to send the full response. */
+ ctx->send_notmodified = 0;
+ }
+ }
+
+ /* Skip response body for HEAD requests or in case of "304 Not
+ * Modified" response. */
+ if (__sc_strm(sc)->txn->meth == HTTP_METH_HEAD || ctx->send_notmodified)
+ appctx->st0 = HTX_CACHE_EOM;
+ else
+ appctx->st0 = HTX_CACHE_DATA;
+ }
+
+ if (appctx->st0 == HTX_CACHE_DATA) {
+ len = first->len - sizeof(*cache_ptr) - ctx->sent;
+ if (len) {
+ ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_UNUSED);
+ if (ret < len) {
+ sc_need_room(sc, channel_htx_recv_max(res, res_htx) + 1);
+ goto out;
+ }
+ }
+ appctx->st0 = HTX_CACHE_EOM;
+ }
+
+ if (appctx->st0 == HTX_CACHE_EOM) {
+ /* no more data are expected. */
+ res_htx->flags |= HTX_FL_EOM;
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+
+ appctx->st0 = HTX_CACHE_END;
+ }
+
+ end:
+ if (appctx->st0 == HTX_CACHE_END)
+ se_fl_set(appctx->sedesc, SE_FL_EOS);
+
+ out:
+ total = res_htx->data - total;
+ if (total)
+ channel_add_input(res, total);
+ htx_to_buf(res_htx, &res->buf);
+
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ return;
+
+ error:
+ /* Sent and HTTP error 500 */
+ b_reset(&res->buf);
+ errmsg = &http_err_chunks[HTTP_ERR_500];
+ res->buf.data = b_data(errmsg);
+ memcpy(res->buf.area, b_head(errmsg), b_data(errmsg));
+ res_htx = htx_from_buf(&res->buf);
+
+ total = 0;
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ appctx->st0 = HTX_CACHE_END;
+ goto end;
+}
+
+
+static int parse_cache_rule(struct proxy *proxy, const char *name, struct act_rule *rule, char **err)
+{
+ struct flt_conf *fconf;
+ struct cache_flt_conf *cconf = NULL;
+
+ if (!*name || strcmp(name, "if") == 0 || strcmp(name, "unless") == 0) {
+ memprintf(err, "expects a cache name");
+ goto err;
+ }
+
+ /* check if a cache filter was already registered with this cache
+ * name, if that's the case, must use it. */
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->id == cache_store_flt_id) {
+ cconf = fconf->conf;
+ if (cconf && strcmp((char *)cconf->c.name, name) == 0) {
+ rule->arg.act.p[0] = cconf;
+ return 1;
+ }
+ }
+ }
+
+ /* Create the filter cache config */
+ cconf = calloc(1, sizeof(*cconf));
+ if (!cconf) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+ cconf->flags = CACHE_FLT_F_IMPLICIT_DECL;
+ cconf->c.name = strdup(name);
+ if (!cconf->c.name) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+
+ /* register a filter to fill the cache buffer */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ memprintf(err, "out of memory\n");
+ goto err;
+ }
+ fconf->id = cache_store_flt_id;
+ fconf->conf = cconf;
+ fconf->ops = &cache_ops;
+ LIST_APPEND(&proxy->filter_configs, &fconf->list);
+
+ rule->arg.act.p[0] = cconf;
+ return 1;
+
+ err:
+ free(cconf);
+ return 0;
+}
+
+enum act_parse_ret parse_cache_store(const char **args, int *orig_arg, struct proxy *proxy,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_store_cache;
+
+ if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
+ return ACT_RET_PRS_ERR;
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+/* This produces a sha1 hash of the concatenation of the HTTP method,
+ * the first occurrence of the Host header followed by the path component
+ * if it begins with a slash ('/'). */
+int sha1_hosturi(struct stream *s)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx;
+ struct ist uri;
+ blk_SHA_CTX sha1_ctx;
+ struct buffer *trash;
+
+ trash = get_trash_chunk();
+ ctx.blk = NULL;
+
+ sl = http_get_stline(htx);
+ uri = htx_sl_req_uri(sl); // whole uri
+ if (!uri.len)
+ return 0;
+
+ /* In HTTP/1, most URIs are seen in origin form ('/path/to/resource'),
+ * unless haproxy is deployed in front of an outbound cache. In HTTP/2,
+ * URIs are almost always sent in absolute form with their scheme. In
+ * this case, the scheme is almost always "https". In order to support
+ * sharing of cache objects between H1 and H2, we'll hash the absolute
+ * URI whenever known, or prepend "https://" + the Host header for
+ * relative URIs. The difference will only appear on absolute HTTP/1
+ * requests sent to an origin server, which practically is never met in
+ * the real world so we don't care about the ability to share the same
+ * key here.URIs are normalized from the absolute URI to an origin form as
+ * well.
+ */
+ if (!(sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
+ chunk_istcat(trash, ist("https://"));
+ if (!http_find_header(htx, ist("Host"), &ctx, 0))
+ return 0;
+ chunk_istcat(trash, ctx.value);
+ }
+
+ chunk_istcat(trash, uri);
+
+ /* hash everything */
+ blk_SHA1_Init(&sha1_ctx);
+ blk_SHA1_Update(&sha1_ctx, trash->area, trash->data);
+ blk_SHA1_Final((unsigned char *)txn->cache_hash, &sha1_ctx);
+
+ return 1;
+}
+
+/* Looks for "If-None-Match" headers in the request and compares their value
+ * with the one that might have been stored in the cache_entry. If any of them
+ * matches, a "304 Not Modified" response should be sent instead of the cached
+ * data.
+ * Although unlikely in a GET/HEAD request, the "If-None-Match: *" syntax is
+ * valid and should receive a "304 Not Modified" response (RFC 7234#4.3.2).
+ *
+ * If no "If-None-Match" header was found, look for an "If-Modified-Since"
+ * header and compare its value (date) to the one stored in the cache_entry.
+ * If the request's date is later than the cached one, we also send a
+ * "304 Not Modified" response (see RFCs 7232#3.3 and 7234#4.3.2).
+ *
+ * Returns 1 if "304 Not Modified" should be sent, 0 otherwise.
+ */
+static int should_send_notmodified_response(struct cache *cache, struct htx *htx,
+ struct cache_entry *entry)
+{
+ int retval = 0;
+
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist cache_entry_etag = IST_NULL;
+ struct buffer *etag_buffer = NULL;
+ int if_none_match_found = 0;
+
+ struct tm tm = {};
+ time_t if_modified_since = 0;
+
+ /* If we find a "If-None-Match" header in the request, rebuild the
+ * cache_entry's ETag in order to perform comparisons.
+ * There could be multiple "if-none-match" header lines. */
+ while (http_find_header(htx, ist("if-none-match"), &ctx, 0)) {
+ if_none_match_found = 1;
+
+ /* A '*' matches everything. */
+ if (isteq(ctx.value, ist("*")) != 0) {
+ retval = 1;
+ break;
+ }
+
+ /* No need to rebuild an etag if none was stored in the cache. */
+ if (entry->etag_length == 0)
+ break;
+
+ /* Rebuild the stored ETag. */
+ if (etag_buffer == NULL) {
+ etag_buffer = get_trash_chunk();
+
+ if (shctx_row_data_get(shctx_ptr(cache), block_ptr(entry),
+ (unsigned char*)b_orig(etag_buffer),
+ entry->etag_offset, entry->etag_length) == 0) {
+ cache_entry_etag = ist2(b_orig(etag_buffer), entry->etag_length);
+ } else {
+ /* We could not rebuild the ETag in one go, we
+ * won't send a "304 Not Modified" response. */
+ break;
+ }
+ }
+
+ if (http_compare_etags(cache_entry_etag, ctx.value) == 1) {
+ retval = 1;
+ break;
+ }
+ }
+
+ /* If the request did not contain an "If-None-Match" header, we look for
+ * an "If-Modified-Since" header (see RFC 7232#3.3). */
+ if (retval == 0 && if_none_match_found == 0) {
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("if-modified-since"), &ctx, 1)) {
+ if (parse_http_date(istptr(ctx.value), istlen(ctx.value), &tm)) {
+ if_modified_since = my_timegm(&tm);
+
+ /* We send a "304 Not Modified" response if the
+ * entry's last modified date is earlier than
+ * the one found in the "If-Modified-Since"
+ * header. */
+ retval = (entry->last_modified <= if_modified_since);
+ }
+ }
+ }
+
+ return retval;
+}
+
+enum act_return http_action_req_cache_use(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+
+ struct http_txn *txn = s->txn;
+ struct cache_entry *res, *sec_entry = NULL;
+ struct cache_flt_conf *cconf = rule->arg.act.p[0];
+ struct cache *cache = cconf->c.cache;
+ struct shared_context *shctx = shctx_ptr(cache);
+ struct shared_block *entry_block;
+
+ struct cache_tree *cache_tree = NULL;
+
+ /* Ignore cache for HTTP/1.0 requests and for requests other than GET
+ * and HEAD */
+ if (!(txn->req.flags & HTTP_MSGF_VER_11) ||
+ (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD))
+ txn->flags |= TX_CACHE_IGNORE;
+
+ http_check_request_for_cacheability(s, &s->req);
+
+ /* The request's hash has to be calculated for all requests, even POSTs
+ * or PUTs for instance because RFC7234 specifies that a successful
+ * "unsafe" method on a stored resource must invalidate it
+ * (see RFC7234#4.4). */
+ if (!sha1_hosturi(s))
+ return ACT_RET_CONT;
+
+ if (s->txn->flags & TX_CACHE_IGNORE)
+ return ACT_RET_CONT;
+
+ if (px == strm_fe(s))
+ _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_lookups);
+ else
+ _HA_ATOMIC_INC(&px->be_counters.p.http.cache_lookups);
+
+ cache_tree = get_cache_tree_from_hash(cache, read_u32(s->txn->cache_hash));
+
+ if (!cache_tree)
+ return ACT_RET_CONT;
+
+ cache_rdlock(cache_tree);
+ res = get_entry(cache_tree, s->txn->cache_hash, 0);
+ /* We must not use an entry that is not complete but the check will be
+ * performed after we look for a potential secondary entry (in case of
+ * Vary). */
+ if (res) {
+ struct appctx *appctx;
+ int detached = 0;
+
+ retain_entry(res);
+
+ entry_block = block_ptr(res);
+ shctx_wrlock(shctx);
+ if (res->complete) {
+ shctx_row_detach(shctx, entry_block);
+ detached = 1;
+ } else {
+ release_entry(cache_tree, res, 0);
+ res = NULL;
+ }
+ shctx_wrunlock(shctx);
+ cache_rdunlock(cache_tree);
+
+ /* In case of Vary, we could have multiple entries with the same
+ * primary hash. We need to calculate the secondary hash in order
+ * to find the actual entry we want (if it exists). */
+ if (res && res->secondary_key_signature) {
+ if (!http_request_build_secondary_key(s, res->secondary_key_signature)) {
+ cache_rdlock(cache_tree);
+ sec_entry = get_secondary_entry(cache_tree, res,
+ s->txn->cache_secondary_hash, 0);
+ if (sec_entry && sec_entry != res) {
+ /* The wrong row was added to the hot list. */
+ release_entry(cache_tree, res, 0);
+ retain_entry(sec_entry);
+ shctx_wrlock(shctx);
+ if (detached)
+ shctx_row_reattach(shctx, entry_block);
+ entry_block = block_ptr(sec_entry);
+ shctx_row_detach(shctx, entry_block);
+ shctx_wrunlock(shctx);
+ }
+ res = sec_entry;
+ cache_rdunlock(cache_tree);
+ }
+ else {
+ release_entry(cache_tree, res, 1);
+
+ res = NULL;
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, entry_block);
+ shctx_wrunlock(shctx);
+ }
+ }
+
+ /* We either looked for a valid secondary entry and could not
+ * find one, or the entry we want to use is not complete. We
+ * can't use the cache's entry and must forward the request to
+ * the server. */
+ if (!res) {
+ return ACT_RET_CONT;
+ } else if (!res->complete) {
+ release_entry(cache_tree, res, 1);
+ return ACT_RET_CONT;
+ }
+
+ s->target = &http_cache_applet.obj_type;
+ if ((appctx = sc_applet_create(s->scb, objt_applet(s->target)))) {
+ struct cache_appctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ appctx->st0 = HTX_CACHE_INIT;
+ appctx->rule = rule;
+ ctx->cache_tree = cache_tree;
+ ctx->entry = res;
+ ctx->next = NULL;
+ ctx->sent = 0;
+ ctx->send_notmodified =
+ should_send_notmodified_response(cache, htxbuf(&s->req.buf), res);
+
+ if (px == strm_fe(s))
+ _HA_ATOMIC_INC(&px->fe_counters.p.http.cache_hits);
+ else
+ _HA_ATOMIC_INC(&px->be_counters.p.http.cache_hits);
+ return ACT_RET_CONT;
+ } else {
+ s->target = NULL;
+ release_entry(cache_tree, res, 1);
+ shctx_wrlock(shctx);
+ shctx_row_reattach(shctx, entry_block);
+ shctx_wrunlock(shctx);
+ return ACT_RET_CONT;
+ }
+ }
+ cache_rdunlock(cache_tree);
+
+ /* Shared context does not need to be locked while we calculate the
+ * secondary hash. */
+ if (!res && cache->vary_processing_enabled) {
+ /* Build a complete secondary hash until the server response
+ * tells us which fields should be kept (if any). */
+ http_request_prebuild_full_secondary_key(s);
+ }
+ return ACT_RET_CONT;
+}
+
+
+enum act_parse_ret parse_cache_use(const char **args, int *orig_arg, struct proxy *proxy,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_cache_use;
+
+ if (!parse_cache_rule(proxy, args[*orig_arg], rule, err))
+ return ACT_RET_PRS_ERR;
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+int cfg_parse_cache(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+
+ if (strcmp(args[0], "cache") == 0) { /* new cache section */
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects a <name> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (tmp_cache_config == NULL) {
+ struct cache *cache_config;
+
+ tmp_cache_config = calloc(1, sizeof(*tmp_cache_config));
+ if (!tmp_cache_config) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ strlcpy2(tmp_cache_config->id, args[1], 33);
+ if (strlen(args[1]) > 32) {
+ ha_warning("parsing [%s:%d]: cache name is limited to 32 characters, truncate to '%s'.\n",
+ file, linenum, tmp_cache_config->id);
+ err_code |= ERR_WARN;
+ }
+
+ list_for_each_entry(cache_config, &caches_config, list) {
+ if (strcmp(tmp_cache_config->id, cache_config->id) == 0) {
+ ha_alert("parsing [%s:%d]: Duplicate cache name '%s'.\n",
+ file, linenum, tmp_cache_config->id);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ tmp_cache_config->maxage = 60;
+ tmp_cache_config->maxblocks = 0;
+ tmp_cache_config->maxobjsz = 0;
+ tmp_cache_config->max_secondary_entries = DEFAULT_MAX_SECONDARY_ENTRY;
+ }
+ } else if (strcmp(args[0], "total-max-size") == 0) {
+ unsigned long int maxsize;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ maxsize = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0') {
+ ha_warning("parsing [%s:%d]: total-max-size wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (maxsize > (UINT_MAX >> 20)) {
+ ha_warning("parsing [%s:%d]: \"total-max-size\" (%s) must not be greater than %u\n",
+ file, linenum, args[1], UINT_MAX >> 20);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ /* size in megabytes */
+ maxsize *= 1024 * 1024 / CACHE_BLOCKSIZE;
+ tmp_cache_config->maxblocks = maxsize;
+ } else if (strcmp(args[0], "max-age") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects an age parameter in seconds.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ tmp_cache_config->maxage = atoi(args[1]);
+ } else if (strcmp(args[0], "max-object-size") == 0) {
+ unsigned int maxobjsz;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects a maximum file size parameter in bytes.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ maxobjsz = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0') {
+ ha_warning("parsing [%s:%d]: max-object-size wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+ tmp_cache_config->maxobjsz = maxobjsz;
+ } else if (strcmp(args[0], "process-vary") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+ if (strcmp(args[1], "on") == 0)
+ tmp_cache_config->vary_processing_enabled = 1;
+ else if (strcmp(args[1], "off") == 0)
+ tmp_cache_config->vary_processing_enabled = 0;
+ else {
+ ha_warning("parsing [%s:%d]: '%s' expects \"on\" or \"off\" (enable or disable vary processing).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+ } else if (strcmp(args[0], "max-secondary-entries") == 0) {
+ unsigned int max_sec_entries;
+ char *err;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_warning("parsing [%s:%d]: '%s' expects a strictly positive number.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ max_sec_entries = strtoul(args[1], &err, 10);
+ if (err == args[1] || *err != '\0' || max_sec_entries == 0) {
+ ha_warning("parsing [%s:%d]: max-secondary-entries wrong value '%s'\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+ tmp_cache_config->max_secondary_entries = max_sec_entries;
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in 'cache' section\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+out:
+ return err_code;
+}
+
+/* once the cache section is parsed */
+
+int cfg_post_parse_section_cache()
+{
+ int err_code = 0;
+
+ if (tmp_cache_config) {
+
+ if (tmp_cache_config->maxblocks <= 0) {
+ ha_alert("Size not specified for cache '%s'\n", tmp_cache_config->id);
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+
+ if (!tmp_cache_config->maxobjsz) {
+ /* Default max. file size is a 256th of the cache size. */
+ tmp_cache_config->maxobjsz =
+ (tmp_cache_config->maxblocks * CACHE_BLOCKSIZE) >> 8;
+ }
+ else if (tmp_cache_config->maxobjsz > tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2) {
+ ha_alert("\"max-object-size\" is limited to an half of \"total-max-size\" => %u\n", tmp_cache_config->maxblocks * CACHE_BLOCKSIZE / 2);
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+
+ /* add to the list of cache to init and reinit tmp_cache_config
+ * for next cache section, if any.
+ */
+ LIST_APPEND(&caches_config, &tmp_cache_config->list);
+ tmp_cache_config = NULL;
+ return err_code;
+ }
+out:
+ ha_free(&tmp_cache_config);
+ return err_code;
+
+}
+
+int post_check_cache()
+{
+ struct proxy *px;
+ struct cache *back, *cache_config, *cache;
+ struct shared_context *shctx;
+ int ret_shctx;
+ int err_code = ERR_NONE;
+ int i;
+
+ list_for_each_entry_safe(cache_config, back, &caches_config, list) {
+
+ ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE,
+ cache_config->maxobjsz, sizeof(struct cache));
+
+ if (ret_shctx <= 0) {
+ if (ret_shctx == SHCTX_E_INIT_LOCK)
+ ha_alert("Unable to initialize the lock for the cache.\n");
+ else
+ ha_alert("Unable to allocate cache.\n");
+
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+ shctx->free_block = cache_free_blocks;
+ shctx->reserve_finish = cache_reserve_finish;
+ shctx->cb_data = (void*)shctx->data;
+ /* the cache structure is stored in the shctx and added to the
+ * caches list, we can remove the entry from the caches_config
+ * list */
+ memcpy(shctx->data, cache_config, sizeof(struct cache));
+ cache = (struct cache *)shctx->data;
+ LIST_APPEND(&caches, &cache->list);
+ LIST_DELETE(&cache_config->list);
+ free(cache_config);
+ for (i = 0; i < CACHE_TREE_NUM; ++i) {
+ cache->trees[i].entries = EB_ROOT;
+ HA_RWLOCK_INIT(&cache->trees[i].lock);
+
+ LIST_INIT(&cache->trees[i].cleanup_list);
+ HA_SPIN_INIT(&cache->trees[i].cleanup_lock);
+ }
+
+ /* Find all references for this cache in the existing filters
+ * (over all proxies) and reference it in matching filters.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ struct flt_conf *fconf;
+ struct cache_flt_conf *cconf;
+
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ if (fconf->id != cache_store_flt_id)
+ continue;
+
+ cconf = fconf->conf;
+ if (strcmp(cache->id, cconf->c.name) == 0) {
+ free(cconf->c.name);
+ cconf->flags |= CACHE_FLT_INIT;
+ cconf->c.cache = cache;
+ break;
+ }
+ }
+ }
+ }
+
+out:
+ return err_code;
+
+}
+
+struct flt_ops cache_ops = {
+ .init = cache_store_init,
+ .check = cache_store_check,
+ .deinit = cache_store_deinit,
+
+ /* Handle stream init/deinit */
+ .attach = cache_store_strm_init,
+ .detach = cache_store_strm_deinit,
+
+ /* Handle channels activity */
+ .channel_post_analyze = cache_store_post_analyze,
+
+ /* Filter HTTP requests and responses */
+ .http_headers = cache_store_http_headers,
+ .http_payload = cache_store_http_payload,
+ .http_end = cache_store_http_end,
+};
+
+
+#define CHECK_ENCODING(str, encoding_name, encoding_value) \
+ ({ \
+ int retval = 0; \
+ if (istmatch(str, (struct ist){ .ptr = encoding_name+1, .len = sizeof(encoding_name) - 2 })) { \
+ retval = encoding_value; \
+ encoding = istadv(encoding, sizeof(encoding_name) - 2); \
+ } \
+ (retval); \
+ })
+
+/*
+ * Parse the encoding <encoding> and try to match the encoding part upon an
+ * encoding list of explicitly supported encodings (which all have a specific
+ * bit in an encoding bitmap). If a weight is included in the value, find out if
+ * it is null or not. The bit value will be set in the <encoding_value>
+ * parameter and the <has_null_weight> will be set to 1 if the weight is strictly
+ * 0, 1 otherwise.
+ * The encodings list is extracted from
+ * https://www.iana.org/assignments/http-parameters/http-parameters.xhtml.
+ * Returns 0 in case of success and -1 in case of error.
+ */
+static int parse_encoding_value(struct ist encoding, unsigned int *encoding_value,
+ unsigned int *has_null_weight)
+{
+ int retval = 0;
+
+ if (!encoding_value)
+ return -1;
+
+ if (!istlen(encoding))
+ return -1; /* Invalid encoding */
+
+ *encoding_value = 0;
+ if (has_null_weight)
+ *has_null_weight = 0;
+
+ switch (*encoding.ptr) {
+ case 'a':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "aes128gcm", VARY_ENCODING_AES128GCM);
+ break;
+ case 'b':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "br", VARY_ENCODING_BR);
+ break;
+ case 'c':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "compress", VARY_ENCODING_COMPRESS);
+ break;
+ case 'd':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "deflate", VARY_ENCODING_DEFLATE);
+ break;
+ case 'e':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "exi", VARY_ENCODING_EXI);
+ break;
+ case 'g':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "gzip", VARY_ENCODING_GZIP);
+ break;
+ case 'i':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "identity", VARY_ENCODING_IDENTITY);
+ break;
+ case 'p':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "pack200-gzip", VARY_ENCODING_PACK200_GZIP);
+ break;
+ case 'x':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "x-gzip", VARY_ENCODING_GZIP);
+ if (!*encoding_value)
+ *encoding_value = CHECK_ENCODING(encoding, "x-compress", VARY_ENCODING_COMPRESS);
+ break;
+ case 'z':
+ encoding = istnext(encoding);
+ *encoding_value = CHECK_ENCODING(encoding, "zstd", VARY_ENCODING_ZSTD);
+ break;
+ case '*':
+ encoding = istnext(encoding);
+ *encoding_value = VARY_ENCODING_STAR;
+ break;
+ default:
+ retval = -1; /* Unmanaged encoding */
+ break;
+ }
+
+ /* Process the optional weight part of the encoding. */
+ if (*encoding_value) {
+ encoding = http_trim_leading_spht(encoding);
+ if (istlen(encoding)) {
+ if (*encoding.ptr != ';')
+ return -1;
+
+ if (has_null_weight) {
+ encoding = istnext(encoding);
+
+ encoding = http_trim_leading_spht(encoding);
+
+ *has_null_weight = isteq(encoding, ist("q=0"));
+ }
+ }
+ }
+
+ return retval;
+}
+
+#define ACCEPT_ENCODING_MAX_ENTRIES 16
+/*
+ * Build a bitmap of the accept-encoding header.
+ *
+ * The bitmap is built by matching every sub-part of the accept-encoding value
+ * with a subset of explicitly supported encodings, which all have their own bit
+ * in the bitmap. This bitmap will be used to determine if a response can be
+ * served to a client (that is if it has an encoding that is accepted by the
+ * client). Any unknown encodings will be indicated by the VARY_ENCODING_OTHER
+ * bit.
+ *
+ * Returns 0 in case of success and -1 in case of error.
+ */
+static int accept_encoding_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len)
+{
+ size_t count = 0;
+ uint32_t encoding_bitmap = 0;
+ unsigned int encoding_bmp_bl = -1;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ unsigned int encoding_value;
+ unsigned int rejected_encoding;
+
+ /* A user agent always accepts an unencoded value unless it explicitly
+ * refuses it through an "identity;q=0" accept-encoding value. */
+ encoding_bitmap |= VARY_ENCODING_IDENTITY;
+
+ /* Iterate over all the ACCEPT_ENCODING_MAX_ENTRIES first accept-encoding
+ * values that might span acrosse multiple accept-encoding headers. */
+ while (http_find_header(htx, hdr_name, &ctx, 0) && count < ACCEPT_ENCODING_MAX_ENTRIES) {
+ count++;
+
+ /* As per RFC7231#5.3.4, "An Accept-Encoding header field with a
+ * combined field-value that is empty implies that the user agent
+ * does not want any content-coding in response."
+ *
+ * We must (and did) count the existence of this empty header to not
+ * hit the `count == 0` case below, but must ignore the value to not
+ * include VARY_ENCODING_OTHER into the final bitmap.
+ */
+ if (istlen(ctx.value) == 0)
+ continue;
+
+ /* Turn accept-encoding value to lower case */
+ ist2bin_lc(istptr(ctx.value), ctx.value);
+
+ /* Try to identify a known encoding and to manage null weights. */
+ if (!parse_encoding_value(ctx.value, &encoding_value, &rejected_encoding)) {
+ if (rejected_encoding)
+ encoding_bmp_bl &= ~encoding_value;
+ else
+ encoding_bitmap |= encoding_value;
+ }
+ else {
+ /* Unknown encoding */
+ encoding_bitmap |= VARY_ENCODING_OTHER;
+ }
+ }
+
+ /* If a "*" was found in the accepted encodings (without a null weight),
+ * all the encoding are accepted except the ones explicitly rejected. */
+ if (encoding_bitmap & VARY_ENCODING_STAR) {
+ encoding_bitmap = ~0;
+ }
+
+ /* Clear explicitly rejected encodings from the bitmap */
+ encoding_bitmap &= encoding_bmp_bl;
+
+ /* As per RFC7231#5.3.4, "If no Accept-Encoding field is in the request,
+ * any content-coding is considered acceptable by the user agent". */
+ if (count == 0)
+ encoding_bitmap = ~0;
+
+ /* A request with more than ACCEPT_ENCODING_MAX_ENTRIES accepted
+ * encodings might be illegitimate so we will not use it. */
+ if (count == ACCEPT_ENCODING_MAX_ENTRIES)
+ return -1;
+
+ write_u32(buf, encoding_bitmap);
+ *buf_len = sizeof(encoding_bitmap);
+
+ /* This function fills the hash buffer correctly even if no header was
+ * found, hence the 0 return value (success). */
+ return 0;
+}
+#undef ACCEPT_ENCODING_MAX_ENTRIES
+
+/*
+ * Normalizer used by default for the Referer and Origin header. It only
+ * calculates a hash of the whole value using xxhash algorithm.
+ * Only the first occurrence of the header will be taken into account in the
+ * hash.
+ * Returns 0 in case of success, 1 if the hash buffer should be filled with 0s
+ * and -1 in case of error.
+ */
+static int default_normalizer(struct htx *htx, struct ist hdr_name,
+ char *buf, unsigned int *buf_len)
+{
+ int retval = 1;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+
+ if (http_find_header(htx, hdr_name, &ctx, 1)) {
+ retval = 0;
+ write_u64(buf, XXH3(istptr(ctx.value), istlen(ctx.value), cache_hash_seed));
+ *buf_len = sizeof(uint64_t);
+ }
+
+ return retval;
+}
+
+/*
+ * Accept-Encoding bitmap comparison function.
+ * Returns 0 if the bitmaps are compatible.
+ */
+static int accept_encoding_bitmap_cmp(const void *ref, const void *new, unsigned int len)
+{
+ uint32_t ref_bitmap = read_u32(ref);
+ uint32_t new_bitmap = read_u32(new);
+
+ if (!(ref_bitmap & VARY_ENCODING_OTHER)) {
+ /* All the bits set in the reference bitmap correspond to the
+ * stored response' encoding and should all be set in the new
+ * encoding bitmap in order for the client to be able to manage
+ * the response.
+ *
+ * If this is the case the cached response has encodings that
+ * are accepted by the client. It can be served directly by
+ * the cache (as far as the accept-encoding part is concerned).
+ */
+
+ return (ref_bitmap & new_bitmap) != ref_bitmap;
+ }
+ else {
+ return 1;
+ }
+}
+
+
+/*
+ * Pre-calculate the hashes of all the supported headers (in our Vary
+ * implementation) of a given request. We have to calculate all the hashes
+ * in advance because the actual Vary signature won't be known until the first
+ * response.
+ * Only the first occurrence of every header will be taken into account in the
+ * hash.
+ * If the header is not present, the hash portion of the given header will be
+ * filled with zeros.
+ * Returns 0 in case of success.
+ */
+static int http_request_prebuild_full_secondary_key(struct stream *s)
+{
+ /* The fake signature (second parameter) will ensure that every part of the
+ * secondary key is calculated. */
+ return http_request_build_secondary_key(s, ~0);
+}
+
+
+/*
+ * Calculate the secondary key for a request for which we already have a known
+ * vary signature. The key is made by aggregating hashes calculated for every
+ * header mentioned in the vary signature.
+ * Only the first occurrence of every header will be taken into account in the
+ * hash.
+ * If the header is not present, the hash portion of the given header will be
+ * filled with zeros.
+ * Returns 0 in case of success.
+ */
+static int http_request_build_secondary_key(struct stream *s, int vary_signature)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx = htxbuf(&s->req.buf);
+
+ unsigned int idx;
+ const struct vary_hashing_information *info = NULL;
+ unsigned int hash_length = 0;
+ int retval = 0;
+ int offset = 0;
+
+ for (idx = 0; idx < sizeof(vary_information)/sizeof(*vary_information) && retval >= 0; ++idx) {
+ info = &vary_information[idx];
+
+ /* The normalizing functions will be in charge of getting the
+ * header values from the htx. This way they can manage multiple
+ * occurrences of their processed header. */
+ if ((vary_signature & info->value) && info->norm_fn != NULL &&
+ !(retval = info->norm_fn(htx, info->hdr_name, &txn->cache_secondary_hash[offset], &hash_length))) {
+ offset += hash_length;
+ }
+ else {
+ /* Fill hash with 0s. */
+ hash_length = info->hash_length;
+ memset(&txn->cache_secondary_hash[offset], 0, hash_length);
+ offset += hash_length;
+ }
+ }
+
+ if (retval >= 0)
+ txn->flags |= TX_CACHE_HAS_SEC_KEY;
+
+ return (retval < 0);
+}
+
+/*
+ * Build the actual secondary key of a given request out of the prebuilt key and
+ * the actual vary signature (extracted from the response).
+ * Returns 0 in case of success.
+ */
+static int http_request_reduce_secondary_key(unsigned int vary_signature,
+ char prebuilt_key[HTTP_CACHE_SEC_KEY_LEN])
+{
+ int offset = 0;
+ int global_offset = 0;
+ int vary_info_count = 0;
+ int keep = 0;
+ unsigned int vary_idx;
+ const struct vary_hashing_information *vary_info;
+
+ vary_info_count = sizeof(vary_information)/sizeof(*vary_information);
+ for (vary_idx = 0; vary_idx < vary_info_count; ++vary_idx) {
+ vary_info = &vary_information[vary_idx];
+ keep = (vary_signature & vary_info->value) ? 0xff : 0;
+
+ for (offset = 0; offset < vary_info->hash_length; ++offset,++global_offset) {
+ prebuilt_key[global_offset] &= keep;
+ }
+ }
+
+ return 0;
+}
+
+
+
+static int
+parse_cache_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *f, *back;
+ struct cache_flt_conf *cconf = NULL;
+ char *name = NULL;
+ int pos = *cur_arg;
+
+ /* Get the cache filter name. <pos> point on "cache" keyword */
+ if (!*args[pos + 1]) {
+ memprintf(err, "%s : expects a <name> argument", args[pos]);
+ goto error;
+ }
+ name = strdup(args[pos + 1]);
+ if (!name) {
+ memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
+ goto error;
+ }
+ pos += 2;
+
+ /* Check if an implicit filter with the same name already exists. If so,
+ * we remove the implicit filter to use the explicit one. */
+ list_for_each_entry_safe(f, back, &px->filter_configs, list) {
+ if (f->id != cache_store_flt_id)
+ continue;
+
+ cconf = f->conf;
+ if (strcmp(name, cconf->c.name) != 0) {
+ cconf = NULL;
+ continue;
+ }
+
+ if (!(cconf->flags & CACHE_FLT_F_IMPLICIT_DECL)) {
+ cconf = NULL;
+ memprintf(err, "%s: multiple explicit declarations of the cache filter '%s'",
+ px->id, name);
+ goto error;
+ }
+
+ /* Remove the implicit filter. <cconf> is kept for the explicit one */
+ LIST_DELETE(&f->list);
+ free(f);
+ free(name);
+ break;
+ }
+
+ /* No implicit cache filter found, create configuration for the explicit one */
+ if (!cconf) {
+ cconf = calloc(1, sizeof(*cconf));
+ if (!cconf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ cconf->c.name = name;
+ }
+
+ cconf->flags = 0;
+ fconf->id = cache_store_flt_id;
+ fconf->conf = cconf;
+ fconf->ops = &cache_ops;
+
+ *cur_arg = pos;
+ return 0;
+
+ error:
+ free(name);
+ free(cconf);
+ return -1;
+}
+
+/* It reserves a struct show_cache_ctx for the local variables */
+static int cli_parse_show_cache(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cache_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ctx->cache = LIST_ELEM((caches).n, typeof(struct cache *), list);
+ return 0;
+}
+
+/* It uses a struct show_cache_ctx for the local variables */
+static int cli_io_handler_show_cache(struct appctx *appctx)
+{
+ struct show_cache_ctx *ctx = appctx->svcctx;
+ struct cache* cache = ctx->cache;
+ struct buffer *buf = alloc_trash_chunk();
+
+ if (buf == NULL)
+ return 1;
+
+ list_for_each_entry_from(cache, &caches, list) {
+ struct eb32_node *node = NULL;
+ unsigned int next_key;
+ struct cache_entry *entry;
+ unsigned int i;
+ struct shared_context *shctx = shctx_ptr(cache);
+ int cache_tree_index = 0;
+ struct cache_tree *cache_tree = NULL;
+
+ next_key = ctx->next_key;
+ if (!next_key) {
+ shctx_rdlock(shctx);
+ chunk_printf(buf, "%p: %s (shctx:%p, available blocks:%d)\n", cache, cache->id, shctx_ptr(cache), shctx_ptr(cache)->nbav);
+ shctx_rdunlock(shctx);
+ if (applet_putchk(appctx, buf) == -1) {
+ goto yield;
+ }
+ }
+
+ ctx->cache = cache;
+
+ if (ctx->cache_tree)
+ cache_tree_index = (ctx->cache_tree - ctx->cache->trees);
+
+ for (;cache_tree_index < CACHE_TREE_NUM; ++cache_tree_index) {
+
+ ctx->cache_tree = cache_tree = &ctx->cache->trees[cache_tree_index];
+
+ cache_rdlock(cache_tree);
+
+ while (1) {
+ node = eb32_lookup_ge(&cache_tree->entries, next_key);
+ if (!node) {
+ ctx->next_key = 0;
+ break;
+ }
+
+ entry = container_of(node, struct cache_entry, eb);
+ next_key = node->key + 1;
+
+ if (entry->expire > date.tv_sec) {
+ chunk_printf(buf, "%p hash:%u vary:0x", entry, read_u32(entry->hash));
+ for (i = 0; i < HTTP_CACHE_SEC_KEY_LEN; ++i)
+ chunk_appendf(buf, "%02x", (unsigned char)entry->secondary_key[i]);
+ chunk_appendf(buf, " size:%u (%u blocks), refcount:%u, expire:%d\n",
+ block_ptr(entry)->len, block_ptr(entry)->block_count,
+ block_ptr(entry)->refcount, entry->expire - (int)date.tv_sec);
+ }
+
+ ctx->next_key = next_key;
+
+ if (applet_putchk(appctx, buf) == -1) {
+ cache_rdunlock(cache_tree);
+ goto yield;
+ }
+ }
+ cache_rdunlock(cache_tree);
+ }
+ }
+
+ free_trash_chunk(buf);
+ return 1;
+
+yield:
+ free_trash_chunk(buf);
+ return 0;
+}
+
+
+/*
+ * boolean, returns true if response was built out of a cache entry.
+ */
+static int
+smp_fetch_res_cache_hit(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (smp->strm ? (smp->strm->target == &http_cache_applet.obj_type) : 0);
+
+ return 1;
+}
+
+/*
+ * string, returns cache name (if response came from a cache).
+ */
+static int
+smp_fetch_res_cache_name(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct appctx *appctx = NULL;
+
+ struct cache_flt_conf *cconf = NULL;
+ struct cache *cache = NULL;
+
+ if (!smp->strm || smp->strm->target != &http_cache_applet.obj_type)
+ return 0;
+
+ /* Get appctx from the stream connector. */
+ appctx = sc_appctx(smp->strm->scb);
+ if (appctx && appctx->rule) {
+ cconf = appctx->rule->arg.act.p[0];
+ if (cconf) {
+ cache = cconf->c.cache;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = cache->id;
+ smp->data.u.str.data = strlen(cache->id);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+
+/* early boot initialization */
+static void cache_init()
+{
+ cache_hash_seed = ha_random64();
+}
+
+INITCALL0(STG_PREPARE, cache_init);
+
+/* Declare the filter parser for "cache" keyword */
+static struct flt_kw_list filter_kws = { "CACHE", { }, {
+ { "cache", parse_cache_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+static struct cli_kw_list cli_kws = {{},{
+ { { "show", "cache", NULL }, "show cache : show cache status", cli_parse_show_cache, cli_io_handler_show_cache, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct action_kw_list http_res_actions = {
+ .kw = {
+ { "cache-store", parse_cache_store },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+static struct action_kw_list http_req_actions = {
+ .kw = {
+ { "cache-use", parse_cache_use },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+struct applet http_cache_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<CACHE>", /* used for logging */
+ .fct = http_cache_io_handler,
+ .release = http_cache_applet_release,
+};
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache);
+REGISTER_POST_CHECK(post_check_cache);
+
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "res.cache_hit", smp_fetch_res_cache_hit, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
+ { "res.cache_name", smp_fetch_res_cache_name, 0, NULL, SMP_T_STR, SMP_USE_HRSHP, SMP_VAL_RESPONSE },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/calltrace.c b/src/calltrace.c
new file mode 100644
index 0000000..3946b28
--- /dev/null
+++ b/src/calltrace.c
@@ -0,0 +1,286 @@
+/*
+ * Function call tracing for gcc >= 2.95
+ * WARNING! THIS CODE IS NOT THREAD-SAFE!
+ *
+ * Copyright 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * gcc is able to call a specific function when entering and leaving any
+ * function when compiled with -finstrument-functions. This code must not
+ * be built with this argument. The performance impact is huge, so this
+ * feature should only be used when debugging.
+ *
+ * The entry and exits of all functions will be dumped into a file designated
+ * by the HAPROXY_TRACE environment variable, or by default "trace.out". If the
+ * trace file name is empty or "/dev/null", then traces are disabled. If
+ * opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is
+ * used, then the time is taken from the global <now> variable. Last, if
+ * HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the
+ * real time (almost twice as fast).
+ *
+ * The output format is :
+ *
+ * <sec.usec> <level> <caller_ptr> <dir> <callee_ptr>
+ * or :
+ * <tsc> <level> <caller_ptr> <dir> <callee_ptr>
+ *
+ * where <dir> is '>' when entering a function and '<' when leaving.
+ *
+ * It is also possible to emit comments using the calltrace() function which uses
+ * the printf() format. Such comments are then inserted by replacing the caller
+ * pointer with a sharp ('#') like this :
+ *
+ * <sec.usec> <level> # <comment>
+ * or :
+ * <tsc> <level> # <comment>
+ *
+ * The article below is a nice explanation of how this works :
+ * http://balau82.wordpress.com/2010/10/06/trace-and-profile-function-calls-with-gcc/
+ */
+
+#include <sys/time.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/tools.h>
+
+static FILE *log;
+static int level;
+static int disabled;
+static int fast_time;
+static int use_tsc;
+static struct timeval trace_now;
+static struct timeval *now_ptr;
+static char line[128]; /* more than enough for a message (9+1+6+1+3+1+18+1+1+18+1+1) */
+
+static int open_trace()
+{
+ const char *output = getenv("HAPROXY_TRACE");
+
+ if (!output)
+ output = "trace.out";
+
+ if (!*output || strcmp(output, "/dev/null") == 0) {
+ disabled = 1;
+ return 0;
+ }
+
+ log = fopen(output, "w");
+ if (!log)
+ log = stderr;
+
+ now_ptr = &date;
+ if (getenv("HAPROXY_TRACE_FAST") != NULL) {
+ fast_time = 1;
+ now_ptr = &trace_now;
+ }
+ if (getenv("HAPROXY_TRACE_TSC") != NULL) {
+ fast_time = 1;
+ use_tsc = 1;
+ }
+ return 1;
+}
+
+/* This function first divides the number by 100M then iteratively multiplies it
+ * by 100 (using adds and shifts). The trick is that dividing by 100M is equivalent
+ * to multiplying by 1/100M, which approximates to 1441151881/2^57. All local
+ * variables fit in registers on x86. This version outputs two digits per round.
+ * <min_pairs> indicates the minimum number of pairs of digits that have to be
+ * emitted, which might be left-padded with zeroes.
+ * It returns the pointer to the ending '\0'.
+ */
+static char *ultoad2(unsigned int x, char *out, int min_pairs)
+{
+ unsigned int q;
+ char *p = out;
+ int pos = 4;
+ unsigned long long y;
+
+ static const unsigned short bcd[100] = {
+ 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+ 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+ 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+ 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+ 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+ 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+ 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+ 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+ 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+ 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 };
+
+ y = x * 1441151881ULL; /* y>>57 will be the integer part of x/100M */
+ while (1) {
+ q = y >> 57;
+ /* Q is composed of the first digit in the lower byte and the second
+ * digit in the higher byte.
+ */
+ if (p != out || q > 9 || pos < min_pairs) {
+#if defined(__i386__) || defined(__x86_64__)
+ /* unaligned accesses are fast on x86 */
+ *(unsigned short *)p = bcd[q];
+ p += 2;
+#else
+ *(p++) = bcd[q];
+ *(p++) = bcd[q] >> 8;
+#endif
+ }
+ else if (q || !pos) {
+ /* only at most one digit */
+ *(p++) = bcd[q] >> 8;
+ }
+ if (--pos < 0)
+ break;
+
+ y &= 0x1FFFFFFFFFFFFFFULL; // remainder
+
+ if (sizeof(long) >= sizeof(long long)) {
+ /* shifting is preferred on 64-bit archs, while mult is faster on 32-bit.
+ * We multiply by 100 by doing *5, *5 and *4, all of which are trivial.
+ */
+ y += (y << 2);
+ y += (y << 2);
+ y <<= 2;
+ }
+ else
+ y *= 100;
+ }
+
+ *p = '\0';
+ return p;
+}
+
+/* Send <h> as hex into <out>. Returns the pointer to the ending '\0'. */
+static char *emit_hex(unsigned long h, char *out)
+{
+ static unsigned char hextab[16] = "0123456789abcdef";
+ int shift = sizeof(h) * 8 - 4;
+ unsigned int idx;
+
+ do {
+ idx = (h >> shift);
+ if (idx || !shift)
+ *out++ = hextab[idx & 15];
+ shift -= 4;
+ } while (shift >= 0);
+ *out = '\0';
+ return out;
+}
+
+static void make_line(void *from, void *to, int level, char dir, long ret)
+{
+ char *p = line;
+
+ if (unlikely(!log) && !open_trace())
+ return;
+
+ if (unlikely(!fast_time))
+ gettimeofday(now_ptr, NULL);
+
+#ifdef USE_SLOW_FPRINTF
+ if (!use_tsc)
+ fprintf(log, "%u.%06u %d %p %c %p\n",
+ (unsigned int)now_ptr->tv_sec,
+ (unsigned int)now_ptr->tv_usec,
+ level, from, dir, to);
+ else
+ fprintf(log, "%llx %d %p %c %p\n",
+ rdtsc(), level, from, dir, to);
+ return;
+#endif
+
+ if (unlikely(!use_tsc)) {
+ /* "%u.06u", tv_sec, tv_usec */
+ p = ultoad2(now_ptr->tv_sec, p, 0);
+ *p++ = '.';
+ p = ultoad2(now_ptr->tv_usec, p, 3);
+ } else {
+ /* "%08x%08x", high, low */
+ unsigned long long t = rdtsc();
+ if (sizeof(long) < sizeof(long long))
+ p = emit_hex((unsigned long)(t >> 32U), p);
+ p = emit_hex((unsigned long)(t), p);
+ }
+
+ /* " %u", level */
+ *p++ = ' ';
+ p = ultoad2(level, p, 0);
+
+ /* " %p", from */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex((unsigned long)from, p);
+
+ /* " %c", dir */
+ *p++ = ' '; *p++ = dir;
+
+ /* " %p", to */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex((unsigned long)to, p);
+
+ if (dir == '<') {
+ /* " %x", ret */
+ *p++ = ' '; *p++ = '0'; *p++ = 'x';
+ p = emit_hex(ret, p);
+ }
+
+ *p++ = '\n';
+
+ fwrite(line, p - line, 1, log);
+}
+
+/* These are the functions GCC calls */
+void __cyg_profile_func_enter(void *to, void *from)
+{
+ if (!disabled)
+ return make_line(from, to, ++level, '>', 0);
+}
+
+void __cyg_profile_func_exit(void *to, void *from)
+{
+ long ret = 0;
+
+#if defined(__x86_64__)
+ /* on x86_64, the return value (eax) is temporarily stored in ebx
+ * during the call to __cyg_profile_func_exit() so we can snoop it.
+ */
+ asm volatile("mov %%rbx, %0" : "=r"(ret));
+#endif
+ if (!disabled)
+ return make_line(from, to, level--, '<', ret);
+}
+
+/* the one adds comments in the trace above. The output format is :
+ * <timestamp> <level> # <string>
+ */
+__attribute__((format(printf, 1, 2)))
+void calltrace(char *fmt, ...)
+{
+ va_list ap;
+
+ if (unlikely(!log) && !open_trace())
+ return;
+
+ if (unlikely(!fast_time))
+ gettimeofday(now_ptr, NULL);
+
+ if (!use_tsc)
+ fprintf(log, "%u.%06u %d # ",
+ (unsigned int)now_ptr->tv_sec,
+ (unsigned int)now_ptr->tv_usec,
+ level + 1);
+ else
+ fprintf(log, "%llx %d # ",
+ rdtsc(), level + 1);
+
+ va_start(ap, fmt);
+ vfprintf(log, fmt, ap);
+ va_end(ap);
+ fputc('\n', log);
+ fflush(log);
+}
diff --git a/src/cbuf.c b/src/cbuf.c
new file mode 100644
index 0000000..b36bbeb
--- /dev/null
+++ b/src/cbuf.c
@@ -0,0 +1,59 @@
+/*
+ * Circular buffer management
+ *
+ * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaill@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/cbuf-t.h>
+
+DECLARE_POOL(pool_head_cbuf, "cbuf", sizeof(struct cbuf));
+
+/* Allocate and return a new circular buffer with <buf> as <sz> byte internal buffer
+ * if succeeded, NULL if not.
+ */
+struct cbuf *cbuf_new(unsigned char *buf, size_t sz)
+{
+ struct cbuf *cbuf;
+
+ cbuf = pool_alloc(pool_head_cbuf);
+ if (cbuf) {
+ cbuf->sz = sz;
+ cbuf->buf = buf;
+ cbuf->wr = 0;
+ cbuf->rd = 0;
+ }
+
+ return cbuf;
+}
+
+/* Free QUIC ring <cbuf> */
+void cbuf_free(struct cbuf *cbuf)
+{
+ if (!cbuf)
+ return;
+
+ pool_free(pool_head_cbuf, cbuf);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cfgcond.c b/src/cfgcond.c
new file mode 100644
index 0000000..117cf6c
--- /dev/null
+++ b/src/cfgcond.c
@@ -0,0 +1,559 @@
+/*
+ * Configuration condition preprocessor
+ *
+ * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/global.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/tools.h>
+
+/* supported condition predicates */
+const struct cond_pred_kw cond_predicates[] = {
+ { "defined", CFG_PRED_DEFINED, ARG1(1, STR) },
+ { "feature", CFG_PRED_FEATURE, ARG1(1, STR) },
+ { "streq", CFG_PRED_STREQ, ARG2(2, STR, STR) },
+ { "strneq", CFG_PRED_STRNEQ, ARG2(2, STR, STR) },
+ { "strstr", CFG_PRED_STRSTR, ARG2(2, STR, STR) },
+ { "version_atleast", CFG_PRED_VERSION_ATLEAST, ARG1(1, STR) },
+ { "version_before", CFG_PRED_VERSION_BEFORE, ARG1(1, STR) },
+ { "openssl_version_atleast", CFG_PRED_OSSL_VERSION_ATLEAST, ARG1(1, STR) },
+ { "openssl_version_before", CFG_PRED_OSSL_VERSION_BEFORE, ARG1(1, STR) },
+ { "ssllib_name_startswith", CFG_PRED_SSLLIB_NAME_STARTSWITH, ARG1(1, STR) },
+ { "enabled", CFG_PRED_ENABLED, ARG1(1, STR) },
+ { NULL, CFG_PRED_NONE, 0 }
+};
+
+/* looks up a cond predicate matching the keyword in <str>, possibly followed
+ * by a parenthesis. Returns a pointer to it or NULL if not found.
+ */
+const struct cond_pred_kw *cfg_lookup_cond_pred(const char *str)
+{
+ const struct cond_pred_kw *ret;
+ int len = strcspn(str, " (");
+
+ for (ret = &cond_predicates[0]; ret->word; ret++) {
+ if (len != strlen(ret->word))
+ continue;
+ if (strncmp(str, ret->word, len) != 0)
+ continue;
+ return ret;
+ }
+ return NULL;
+}
+
+/* Frees <term> and its args. NULL is supported and does nothing. */
+void cfg_free_cond_term(struct cfg_cond_term *term)
+{
+ if (!term)
+ return;
+
+ if (term->type == CCTT_PAREN) {
+ cfg_free_cond_expr(term->expr);
+ term->expr = NULL;
+ }
+
+ free_args(term->args);
+ free(term->args);
+ free(term);
+}
+
+/* Parse an indirect input text as a possible config condition term.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <term> is allocated and filled with the parsed info, and <text>
+ * is updated on success to point to the first unparsed character, or is left
+ * untouched on failure. On success, the caller must free <term> using
+ * cfg_free_cond_term(). An error will be set in <err> on error, and only
+ * in this case. In this case the first bad character will be reported in
+ * <errptr>. <maxdepth> corresponds to the maximum recursion depth permitted,
+ * it is decremented on each recursive call and the parsing will fail one
+ * reaching <= 0.
+ */
+int cfg_parse_cond_term(const char **text, struct cfg_cond_term **term, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_term *t;
+ const char *in = *text;
+ const char *end_ptr;
+ int err_arg;
+ int nbargs;
+ char *end;
+ long val;
+
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ if (!*in) /* empty term does not parse */
+ return 0;
+
+ *term = NULL;
+ if (maxdepth <= 0)
+ goto fail0;
+
+ t = *term = calloc(1, sizeof(**term));
+ if (!t) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto fail1;
+ }
+
+ t->type = CCTT_NONE;
+ t->args = NULL;
+ t->neg = 0;
+
+ /* !<term> negates the term. White spaces permitted */
+ while (*in == '!') {
+ t->neg = !t->neg;
+ do { in++; } while (*in == ' ' || *in == '\t');
+ }
+
+ val = strtol(in, &end, 0);
+ if (end != in) {
+ t->type = val ? CCTT_TRUE : CCTT_FALSE;
+ *text = end;
+ return 1;
+ }
+
+ /* Try to parse '(' EXPR ')' */
+ if (*in == '(') {
+ int ret;
+
+ t->type = CCTT_PAREN;
+ t->args = NULL;
+
+ do { in++; } while (*in == ' ' || *in == '\t');
+ ret = cfg_parse_cond_expr(&in, &t->expr, err, errptr, maxdepth - 1);
+ if (ret == -1)
+ goto fail2;
+ if (ret == 0)
+ goto fail0;
+
+ /* find the closing ')' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+ if (*in != ')') {
+ memprintf(err, "expected ')' after conditional expression '%s'", *text);
+ goto fail1;
+ }
+ do { in++; } while (*in == ' ' || *in == '\t');
+ *text = in;
+ return 1;
+ }
+
+ /* below we'll likely all make_arg_list() so we must return only via
+ * the <done> label which frees the arg list.
+ */
+ t->pred = cfg_lookup_cond_pred(in);
+ if (t->pred) {
+ t->type = CCTT_PRED;
+ nbargs = make_arg_list(in + strlen(t->pred->word), -1,
+ t->pred->arg_mask, &t->args, err,
+ &end_ptr, &err_arg, NULL);
+ if (nbargs < 0) {
+ memprintf(err, "%s in argument %d of predicate '%s' used in conditional expression", *err, err_arg, t->pred->word);
+ if (errptr)
+ *errptr = end_ptr;
+ goto fail2;
+ }
+ *text = end_ptr;
+ return 1;
+ }
+
+ fail0:
+ memprintf(err, "unparsable conditional expression '%s'", *text);
+ fail1:
+ if (errptr)
+ *errptr = *text;
+ fail2:
+ cfg_free_cond_term(*term);
+ *term = NULL;
+ return -1;
+}
+
+/* evaluate a "enabled" expression. Only a subset of options are matched. It
+ * returns 1 if the option is enabled. 0 is returned is the option is not
+ * enabled or if it is not recognized.
+ */
+static int cfg_eval_cond_enabled(const char *str)
+{
+ if (strcmp(str, "POLL") == 0)
+ return !!(global.tune.options & GTUNE_USE_POLL);
+ else if (strcmp(str, "EPOLL") == 0)
+ return !!(global.tune.options & GTUNE_USE_EPOLL);
+ else if (strcmp(str, "KQUEUE") == 0)
+ return !!(global.tune.options & GTUNE_USE_EPOLL);
+ else if (strcmp(str, "EVPORTS") == 0)
+ return !!(global.tune.options & GTUNE_USE_EVPORTS);
+ else if (strcmp(str, "SPLICE") == 0)
+ return !!(global.tune.options & GTUNE_USE_SPLICE);
+ else if (strcmp(str, "GETADDRINFO") == 0)
+ return !!(global.tune.options & GTUNE_USE_GAI);
+ else if (strcmp(str, "REUSEPORT") == 0)
+ return !!(proto_tcpv4.flags & PROTO_F_REUSEPORT_SUPPORTED);
+ else if (strcmp(str, "FAST-FORWARD") == 0)
+ return !!(global.tune.options & GTUNE_USE_FAST_FWD);
+ else if (strcmp(str, "SERVER-SSL-VERIFY-NONE") == 0)
+ return !!(global.ssl_server_verify == SSL_SERVER_VERIFY_NONE);
+ return 0;
+}
+
+/* evaluate a condition term on a .if/.elif line. The condition was already
+ * parsed in <term>. Returns -1 on error (in which case err is filled with a
+ * message, and only in this case), 0 if the condition is false, 1 if it's
+ * true.
+ */
+int cfg_eval_cond_term(const struct cfg_cond_term *term, char **err)
+{
+ int ret = -1;
+
+ if (term->type == CCTT_FALSE)
+ ret = 0;
+ else if (term->type == CCTT_TRUE)
+ ret = 1;
+ else if (term->type == CCTT_PRED) {
+ /* here we know we have a valid predicate with valid arguments
+ * placed in term->args (which the caller will free).
+ */
+ switch (term->pred->prd) {
+ case CFG_PRED_DEFINED: // checks if arg exists as an environment variable
+ ret = getenv(term->args[0].data.str.area) != NULL;
+ break;
+
+ case CFG_PRED_FEATURE: { // checks if the arg matches an enabled feature
+ const char *p;
+
+ ret = 0; // assume feature not found
+ for (p = build_features; (p = strstr(p, term->args[0].data.str.area)); p++) {
+ if (p > build_features &&
+ (p[term->args[0].data.str.data] == ' ' ||
+ p[term->args[0].data.str.data] == 0)) {
+ if (*(p-1) == '+') { // e.g. "+OPENSSL"
+ ret = 1;
+ break;
+ }
+ else if (*(p-1) == '-') { // e.g. "-OPENSSL"
+ ret = 0;
+ break;
+ }
+ /* it was a sub-word, let's restart from next place */
+ }
+ }
+ break;
+ }
+ case CFG_PRED_STREQ: // checks if the two arg are equal
+ ret = strcmp(term->args[0].data.str.area, term->args[1].data.str.area) == 0;
+ break;
+
+ case CFG_PRED_STRNEQ: // checks if the two arg are different
+ ret = strcmp(term->args[0].data.str.area, term->args[1].data.str.area) != 0;
+ break;
+
+ case CFG_PRED_STRSTR: // checks if the 2nd arg is found in the first one
+ ret = strstr(term->args[0].data.str.area, term->args[1].data.str.area) != NULL;
+ break;
+
+ case CFG_PRED_VERSION_ATLEAST: // checks if the current version is at least this one
+ ret = compare_current_version(term->args[0].data.str.area) <= 0;
+ break;
+
+ case CFG_PRED_VERSION_BEFORE: // checks if the current version is older than this one
+ ret = compare_current_version(term->args[0].data.str.area) > 0;
+ break;
+
+ case CFG_PRED_OSSL_VERSION_ATLEAST: { // checks if the current openssl version is at least this one
+ int opensslret = openssl_compare_current_version(term->args[0].data.str.area);
+
+ if (opensslret < -1) /* can't parse the string or no openssl available */
+ ret = -1;
+ else
+ ret = opensslret <= 0;
+ break;
+ }
+ case CFG_PRED_OSSL_VERSION_BEFORE: { // checks if the current openssl version is older than this one
+ int opensslret = openssl_compare_current_version(term->args[0].data.str.area);
+
+ if (opensslret < -1) /* can't parse the string or no openssl available */
+ ret = -1;
+ else
+ ret = opensslret > 0;
+ break;
+ }
+ case CFG_PRED_SSLLIB_NAME_STARTSWITH: { // checks if the current SSL library's name starts with a specified string (can be used to distinguish OpenSSL from LibreSSL or BoringSSL)
+ ret = openssl_compare_current_name(term->args[0].data.str.area) == 0;
+ break;
+ }
+ case CFG_PRED_ENABLED: { // checks if the arg matches on a subset of enabled options
+ ret = cfg_eval_cond_enabled(term->args[0].data.str.area) != 0;
+ break;
+ }
+ default:
+ memprintf(err, "internal error: unhandled conditional expression predicate '%s'", term->pred->word);
+ break;
+ }
+ }
+ else if (term->type == CCTT_PAREN) {
+ ret = cfg_eval_cond_expr(term->expr, err);
+ }
+ else {
+ memprintf(err, "internal error: unhandled condition term type %d", (int)term->type);
+ }
+
+ if (ret >= 0 && term->neg)
+ ret = !ret;
+ return ret;
+}
+
+
+/* Frees <expr> and its terms and args. NULL is supported and does nothing. */
+void cfg_free_cond_and(struct cfg_cond_and *expr)
+{
+ struct cfg_cond_and *prev;
+
+ while (expr) {
+ cfg_free_cond_term(expr->left);
+ prev = expr;
+ expr = expr->right;
+ free(prev);
+ }
+}
+
+/* Frees <expr> and its terms and args. NULL is supported and does nothing. */
+void cfg_free_cond_expr(struct cfg_cond_expr *expr)
+{
+ struct cfg_cond_expr *prev;
+
+ while (expr) {
+ cfg_free_cond_and(expr->left);
+ prev = expr;
+ expr = expr->right;
+ free(prev);
+ }
+}
+
+/* Parse an indirect input text as a possible config condition sub-expr.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <expr> is filled with the parsed info, and <text> is updated on
+ * success to point to the first unparsed character, or is left untouched
+ * on failure. On success, the caller will have to free all lower-level
+ * allocated structs using cfg_free_cond_expr(). An error will be set in
+ * <err> on error, and only in this case. In this case the first bad
+ * character will be reported in <errptr>. <maxdepth> corresponds to the
+ * maximum recursion depth permitted, it is decremented on each recursive
+ * call and the parsing will fail one reaching <= 0.
+ */
+int cfg_parse_cond_and(const char **text, struct cfg_cond_and **expr, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_and *e;
+ const char *in = *text;
+ int ret = -1;
+
+ if (!*in) /* empty expr does not parse */
+ return 0;
+
+ *expr = NULL;
+ if (maxdepth <= 0) {
+ memprintf(err, "unparsable conditional sub-expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ goto done;
+ }
+
+ e = *expr = calloc(1, sizeof(**expr));
+ if (!e) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto done;
+ }
+
+ ret = cfg_parse_cond_term(&in, &e->left, err, errptr, maxdepth - 1);
+ if (ret == -1) // parse error, error already reported
+ goto done;
+
+ if (ret == 0) {
+ /* ret == 0, no other way to parse this */
+ memprintf(err, "unparsable conditional sub-expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ ret = -1;
+ goto done;
+ }
+
+ /* ret=1, we have a term in the left hand set */
+
+ /* find an optional '&&' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ *text = in;
+ if (in[0] != '&' || in[1] != '&')
+ goto done;
+
+ /* we have a '&&', let's parse the right handset's subexp */
+ in += 2;
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ ret = cfg_parse_cond_and(&in, &e->right, err, errptr, maxdepth - 1);
+ if (ret > 0)
+ *text = in;
+ done:
+ if (ret < 0) {
+ cfg_free_cond_and(*expr);
+ *expr = NULL;
+ }
+ return ret;
+}
+
+/* Parse an indirect input text as a possible config condition term.
+ * Returns <0 on parsing error, 0 if the parser is desynchronized, or >0 on
+ * success. <expr> is filled with the parsed info, and <text> is updated on
+ * success to point to the first unparsed character, or is left untouched
+ * on failure. On success, the caller will have to free all lower-level
+ * allocated structs using cfg_free_cond_expr(). An error will be set in
+ * <err> on error, and only in this case. In this case the first bad
+ * character will be reported in <errptr>. <maxdepth> corresponds to the
+ * maximum recursion depth permitted, it is decremented on each recursive call
+ * and the parsing will fail one reaching <= 0.
+ */
+int cfg_parse_cond_expr(const char **text, struct cfg_cond_expr **expr, char **err, const char **errptr, int maxdepth)
+{
+ struct cfg_cond_expr *e;
+ const char *in = *text;
+ int ret = -1;
+
+ if (!*in) /* empty expr does not parse */
+ return 0;
+
+ *expr = NULL;
+ if (maxdepth <= 0) {
+ memprintf(err, "unparsable conditional expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ goto done;
+ }
+
+ e = *expr = calloc(1, sizeof(**expr));
+ if (!e) {
+ memprintf(err, "memory allocation error while parsing conditional expression '%s'", *text);
+ goto done;
+ }
+
+ ret = cfg_parse_cond_and(&in, &e->left, err, errptr, maxdepth - 1);
+ if (ret == -1) // parse error, error already reported
+ goto done;
+
+ if (ret == 0) {
+ /* ret == 0, no other way to parse this */
+ memprintf(err, "unparsable conditional expression '%s'", in);
+ if (errptr)
+ *errptr = in;
+ ret = -1;
+ goto done;
+ }
+
+ /* ret=1, we have a sub-expr in the left hand set */
+
+ /* find an optional '||' */
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ *text = in;
+ if (in[0] != '|' || in[1] != '|')
+ goto done;
+
+ /* we have a '||', let's parse the right handset's subexp */
+ in += 2;
+ while (*in == ' ' || *in == '\t')
+ in++;
+
+ ret = cfg_parse_cond_expr(&in, &e->right, err, errptr, maxdepth - 1);
+ if (ret > 0)
+ *text = in;
+ done:
+ if (ret < 0) {
+ cfg_free_cond_expr(*expr);
+ *expr = NULL;
+ }
+ return ret;
+}
+
+/* evaluate an sub-expression on a .if/.elif line. The expression is valid and
+ * was already parsed in <expr>. Returns -1 on error (in which case err is
+ * filled with a message, and only in this case), 0 if the condition is false,
+ * 1 if it's true.
+ */
+int cfg_eval_cond_and(struct cfg_cond_and *expr, char **err)
+{
+ int ret;
+
+ /* AND: loop on terms and sub-exp's terms as long as they're TRUE
+ * (stop on FALSE and ERROR).
+ */
+ while ((ret = cfg_eval_cond_term(expr->left, err)) > 0 && expr->right)
+ expr = expr->right;
+ return ret;
+}
+
+/* evaluate an expression on a .if/.elif line. The expression is valid and was
+ * already parsed in <expr>. Returns -1 on error (in which case err is filled
+ * with a message, and only in this case), 0 if the condition is false, 1 if
+ * it's true.
+ */
+int cfg_eval_cond_expr(struct cfg_cond_expr *expr, char **err)
+{
+ int ret;
+
+ /* OR: loop on sub-exps as long as they're FALSE (stop on TRUE and ERROR) */
+ while ((ret = cfg_eval_cond_and(expr->left, err)) == 0 && expr->right)
+ expr = expr->right;
+ return ret;
+}
+
+/* evaluate a condition on a .if/.elif line. The condition is already tokenized
+ * in <err>. Returns -1 on error (in which case err is filled with a message,
+ * and only in this case), 0 if the condition is false, 1 if it's true. If
+ * <errptr> is not NULL, it's set to the first invalid character on error.
+ */
+int cfg_eval_condition(char **args, char **err, const char **errptr)
+{
+ struct cfg_cond_expr *expr = NULL;
+ const char *text = args[0];
+ int ret = -1;
+
+ if (!*text) /* note: empty = false */
+ return 0;
+
+ ret = cfg_parse_cond_expr(&text, &expr, err, errptr, MAX_CFG_RECURSION);
+ if (ret != 0) {
+ if (ret == -1) // parse error, error already reported
+ goto done;
+ while (*text == ' ' || *text == '\t')
+ text++;
+
+ if (*text) {
+ ret = -1;
+ memprintf(err, "unexpected character '%c' at the end of conditional expression '%s'",
+ *text, args[0]);
+ goto fail;
+ }
+
+ ret = cfg_eval_cond_expr(expr, err);
+ goto done;
+ }
+
+ /* ret == 0, no other way to parse this */
+ ret = -1;
+ memprintf(err, "unparsable conditional expression '%s'", args[0]);
+ fail:
+ if (errptr)
+ *errptr = text;
+ done:
+ cfg_free_cond_expr(expr);
+ return ret;
+}
diff --git a/src/cfgdiag.c b/src/cfgdiag.c
new file mode 100644
index 0000000..f8e4a9e
--- /dev/null
+++ b/src/cfgdiag.c
@@ -0,0 +1,97 @@
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/cfgdiag.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+
+/* Use this function to emit diagnostic.
+ * This can be used as a shortcut to set value pointed by <ret> to 1 at the
+ * same time.
+ */
+static inline void diag_warning(int *ret, char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ *ret = 1;
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+}
+
+/* Use this for dynamic allocation in diagnostics.
+ * In case of allocation failure, this will immediately terminates haproxy.
+ */
+static inline void *diag_alloc(size_t size)
+{
+ void *out = NULL;
+
+ if (!(out = malloc(size))) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+
+ return out;
+}
+
+/* Checks that two servers from the same backend does not share the same cookie
+ * value. Backup servers are not taken into account as it can be quite common to
+ * share cookie values in this case.
+ */
+static void check_server_cookies(int *ret)
+{
+ struct cookie_entry {
+ struct ebpt_node node;
+ };
+
+ struct proxy *px;
+ struct server *srv;
+
+ struct eb_root cookies_tree = EB_ROOT_UNIQUE;
+ struct ebpt_node *cookie_node;
+ struct cookie_entry *cookie_entry;
+ struct ebpt_node *node;
+
+ for (px = proxies_list; px; px = px->next) {
+ for (srv = px->srv; srv; srv = srv->next) {
+ /* do not take into account backup servers */
+ if (!srv->cookie || (srv->flags & SRV_F_BACKUP))
+ continue;
+
+ cookie_node = ebis_lookup(&cookies_tree, srv->cookie);
+ if (cookie_node) {
+ diag_warning(ret, "parsing [%s:%d] : 'server %s' : same cookie value is set for a previous non-backup server in the same backend, it may break connection persistence\n",
+ srv->conf.file, srv->conf.line, srv->id);
+ continue;
+ }
+
+ cookie_entry = diag_alloc(sizeof(*cookie_entry));
+ cookie_entry->node.key = srv->cookie;
+ ebis_insert(&cookies_tree, &cookie_entry->node);
+ }
+
+ /* clear the tree and free its entries */
+ while ((node = ebpt_first(&cookies_tree))) {
+ cookie_entry = ebpt_entry(node, struct cookie_entry, node);
+ eb_delete(&node->node);
+ free(cookie_entry);
+ }
+ }
+}
+
+/* Placeholder to execute various diagnostic checks after the configuration file
+ * has been fully parsed. It will output a warning for each diagnostic found.
+ *
+ * Returns 0 if no diagnostic message has been found else 1.
+ */
+int cfg_run_diagnostics()
+{
+ int ret = 0;
+
+ check_server_cookies(&ret);
+
+ return ret;
+}
diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c
new file mode 100644
index 0000000..f31e7a0
--- /dev/null
+++ b/src/cfgparse-global.c
@@ -0,0 +1,1396 @@
+#define _GNU_SOURCE /* for cpu_set_t from haproxy/cpuset.h */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <import/sha1.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/compression.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/peers.h>
+#include <haproxy/protocol.h>
+#include <haproxy/tools.h>
+
+int cluster_secret_isset;
+
+/* some keywords that are still being parsed using strcmp() and are not
+ * registered anywhere. They are used as suggestions for mistyped words.
+ */
+static const char *common_kw_list[] = {
+ "global", "daemon", "master-worker", "noepoll", "nokqueue",
+ "noevports", "nopoll", "busy-polling", "set-dumpable",
+ "insecure-fork-wanted", "insecure-setuid-wanted", "nosplice",
+ "nogetaddrinfo", "noreuseport", "quiet", "zero-warning",
+ "tune.runqueue-depth", "tune.maxpollevents", "tune.maxaccept",
+ "tune.recv_enough", "tune.buffers.limit",
+ "tune.buffers.reserve", "tune.bufsize", "tune.maxrewrite",
+ "tune.idletimer", "tune.rcvbuf.client", "tune.rcvbuf.server",
+ "tune.sndbuf.client", "tune.sndbuf.server", "tune.pipesize",
+ "tune.http.cookielen", "tune.http.logurilen", "tune.http.maxhdr",
+ "tune.comp.maxlevel", "tune.pattern.cache-size",
+ "tune.fast-forward", "uid", "gid",
+ "external-check", "user", "group", "nbproc", "maxconn",
+ "ssl-server-verify", "maxconnrate", "maxsessrate", "maxsslrate",
+ "maxcomprate", "maxpipes", "maxzlibmem", "maxcompcpuusage", "ulimit-n",
+ "chroot", "description", "node", "pidfile", "unix-bind", "log",
+ "log-send-hostname", "server-state-base", "server-state-file",
+ "log-tag", "spread-checks", "max-spread-checks", "cpu-map", "setenv",
+ "presetenv", "unsetenv", "resetenv", "strict-limits", "localpeer",
+ "numa-cpu-mapping", "defaults", "listen", "frontend", "backend",
+ "peers", "resolvers", "cluster-secret", "no-quic", "limited-quic",
+ NULL /* must be last */
+};
+
+/*
+ * parse a line in a <global> section. Returns the error code, 0 if OK, or
+ * any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_global(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "global") == 0) { /* new section */
+ /* no option, nothing special to do */
+ alertif_too_many_args(0, file, linenum, args, &err_code);
+ goto out;
+ }
+ else if (strcmp(args[0], "expose-experimental-directives") == 0) {
+ experimental_directives_allowed = 1;
+ }
+ else if (strcmp(args[0], "daemon") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_DAEMON;
+ }
+ else if (strcmp(args[0], "master-worker") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1]) {
+ if (strcmp(args[1], "no-exit-on-failure") == 0) {
+ global.tune.options |= GTUNE_NOEXIT_ONFAILURE;
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'no-exit-on-failure' option.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ global.mode |= MODE_MWORKER;
+ }
+ else if (strcmp(args[0], "noepoll") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_EPOLL;
+ }
+ else if (strcmp(args[0], "nokqueue") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_KQUEUE;
+ }
+ else if (strcmp(args[0], "noevports") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_EVPORTS;
+ }
+ else if (strcmp(args[0], "nopoll") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_POLL;
+ }
+ else if (strcmp(args[0], "limited-quic") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+
+ global.tune.options |= GTUNE_LIMITED_QUIC;
+ }
+ else if (strcmp(args[0], "no-quic") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+
+ global.tune.options |= GTUNE_NO_QUIC;
+ }
+ else if (strcmp(args[0], "busy-polling") == 0) { /* "no busy-polling" or "busy-polling" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+ else
+ global.tune.options |= GTUNE_BUSY_POLLING;
+ }
+ else if (strcmp(args[0], "set-dumpable") == 0) { /* "no set-dumpable" or "set-dumpable" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_SET_DUMPABLE;
+ else
+ global.tune.options |= GTUNE_SET_DUMPABLE;
+ }
+ else if (strcmp(args[0], "h2-workaround-bogus-websocket-clients") == 0) { /* "no h2-workaround-bogus-websocket-clients" or "h2-workaround-bogus-websocket-clients" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_DISABLE_H2_WEBSOCKET;
+ else
+ global.tune.options |= GTUNE_DISABLE_H2_WEBSOCKET;
+ }
+ else if (strcmp(args[0], "insecure-fork-wanted") == 0) { /* "no insecure-fork-wanted" or "insecure-fork-wanted" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_INSECURE_FORK;
+ else
+ global.tune.options |= GTUNE_INSECURE_FORK;
+ }
+ else if (strcmp(args[0], "insecure-setuid-wanted") == 0) { /* "no insecure-setuid-wanted" or "insecure-setuid-wanted" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_INSECURE_SETUID;
+ else
+ global.tune.options |= GTUNE_INSECURE_SETUID;
+ }
+ else if (strcmp(args[0], "nosplice") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_SPLICE;
+ }
+ else if (strcmp(args[0], "nogetaddrinfo") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_GAI;
+ }
+ else if (strcmp(args[0], "noreuseport") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ protocol_clrf_all(PROTO_F_REUSEPORT_SUPPORTED);
+ }
+ else if (strcmp(args[0], "quiet") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_QUIET;
+ }
+ else if (strcmp(args[0], "zero-warning") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.mode |= MODE_ZERO_WARNING;
+ }
+ else if (strcmp(args[0], "tune.runqueue-depth") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.runqueue_depth != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.runqueue_depth = atol(args[1]);
+
+ }
+ else if (strcmp(args[0], "tune.maxpollevents") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.maxpollevents != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxpollevents = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.maxaccept") == 0) {
+ long max;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.maxaccept != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ max = atol(args[1]);
+ if (/*max < -1 || */max > INT_MAX) {
+ ha_alert("parsing [%s:%d] : '%s' expects -1 or an integer from 0 to INT_MAX.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxaccept = max;
+ }
+ else if (strcmp(args[0], "tune.chksize") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more (tune.bufsize is used instead).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "tune.recv_enough") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.recv_enough = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.buffers.limit") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.buf_limit = atol(args[1]);
+ if (global.tune.buf_limit) {
+ if (global.tune.buf_limit < 3)
+ global.tune.buf_limit = 3;
+ if (global.tune.buf_limit <= global.tune.reserved_bufs)
+ global.tune.buf_limit = global.tune.reserved_bufs + 1;
+ }
+ }
+ else if (strcmp(args[0], "tune.buffers.reserve") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.reserved_bufs = atol(args[1]);
+ if (global.tune.reserved_bufs < 2)
+ global.tune.reserved_bufs = 2;
+ if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs)
+ global.tune.buf_limit = global.tune.reserved_bufs + 1;
+ }
+ else if (strcmp(args[0], "tune.bufsize") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.bufsize = atol(args[1]);
+ /* round it up to support a two-pointer alignment at the end */
+ global.tune.bufsize = (global.tune.bufsize + 2 * sizeof(void *) - 1) & -(2 * sizeof(void *));
+ if (global.tune.bufsize <= 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.maxrewrite") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.maxrewrite = atol(args[1]);
+ if (global.tune.maxrewrite < 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.idletimer") == 0) {
+ unsigned int idle;
+ const char *res;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a timer value between 0 and 65535 ms.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ res = parse_time_err(args[1], &idle, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 65535 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (idle > 65535) {
+ ha_alert("parsing [%s:%d] : '%s' expects a timer value between 0 and 65535 ms.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.idle_timer = idle;
+ }
+ else if (strcmp(args[0], "tune.rcvbuf.client") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.client_rcvbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.client_rcvbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.rcvbuf.server") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.server_rcvbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.server_rcvbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.sndbuf.client") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.client_sndbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.client_sndbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.sndbuf.server") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.tune.server_sndbuf != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.server_sndbuf = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.pipesize") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.pipesize = atol(args[1]);
+ }
+ else if (strcmp(args[0], "tune.http.cookielen") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.cookie_len = atol(args[1]) + 1;
+ }
+ else if (strcmp(args[0], "tune.http.logurilen") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.requri_len = atol(args[1]) + 1;
+ }
+ else if (strcmp(args[0], "tune.http.maxhdr") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.tune.max_http_hdr = atoi(args[1]);
+ if (global.tune.max_http_hdr < 1 || global.tune.max_http_hdr > 32767) {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 32767\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.comp.maxlevel") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1]) {
+ global.tune.comp_maxlevel = atoi(args[1]);
+ if (global.tune.comp_maxlevel < 1 || global.tune.comp_maxlevel > 9) {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 9\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects a numeric value between 1 and 9\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.pattern.cache-size") == 0) {
+ if (*args[1]) {
+ global.tune.pattern_cache = atoi(args[1]);
+ if (global.tune.pattern_cache < 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive numeric value\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "tune.disable-fast-forward") == 0) {
+ if (!experimental_directives_allowed) {
+ ha_alert("parsing [%s:%d] : '%s' directive is experimental, must be allowed via a global 'expose-experimental-directives'",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options &= ~GTUNE_USE_FAST_FWD;
+ }
+ else if (strcmp(args[0], "tune.disable-zero-copy-forwarding") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD;
+ }
+ else if (strcmp(args[0], "cluster-secret") == 0) {
+ blk_SHA_CTX sha1_ctx;
+ unsigned char sha1_out[20];
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*args[1] == 0) {
+ ha_alert("parsing [%s:%d] : expects an ASCII string argument.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (cluster_secret_isset) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ blk_SHA1_Init(&sha1_ctx);
+ blk_SHA1_Update(&sha1_ctx, args[1], strlen(args[1]));
+ blk_SHA1_Final(sha1_out, &sha1_ctx);
+ BUG_ON(sizeof sha1_out < sizeof global.cluster_secret);
+ memcpy(global.cluster_secret, sha1_out, sizeof global.cluster_secret);
+ cluster_secret_isset = 1;
+ }
+ else if (strcmp(args[0], "uid") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.uid != 0) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strl2irc(args[1], strlen(args[1]), &global.uid) != 0) {
+ ha_warning("parsing [%s:%d] : uid: string '%s' is not a number.\n | You might want to use the 'user' parameter to use a system user name.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "gid") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.gid != 0) {
+ ha_alert("parsing [%s:%d] : group/gid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strl2irc(args[1], strlen(args[1]), &global.gid) != 0) {
+ ha_warning("parsing [%s:%d] : gid: string '%s' is not a number.\n | You might want to use the 'group' parameter to use a system group name.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "external-check") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ global.external_check = 1;
+ if (strcmp(args[1], "preserve-env") == 0) {
+ global.external_check = 2;
+ } else if (*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'preserve-env' as an argument, found '%s'.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ /* user/group name handling */
+ else if (strcmp(args[0], "user") == 0) {
+ struct passwd *ha_user;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.uid != 0) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ errno = 0;
+ ha_user = getpwnam(args[1]);
+ if (ha_user != NULL) {
+ global.uid = (int)ha_user->pw_uid;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : cannot find user id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "group") == 0) {
+ struct group *ha_group;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.gid != 0) {
+ ha_alert("parsing [%s:%d] : gid/group was already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ errno = 0;
+ ha_group = getgrnam(args[1]);
+ if (ha_group != NULL) {
+ global.gid = (int)ha_group->gr_gid;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : cannot find group id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ /* end of user/group name handling*/
+ else if (strcmp(args[0], "nbproc") == 0) {
+ ha_alert("parsing [%s:%d] : nbproc is not supported any more since HAProxy 2.5. Threads will automatically be used on multi-processor machines if available.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "maxconn") == 0) {
+ char *stop;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.maxconn != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxconn = strtol(args[1], &stop, 10);
+ if (*stop != '\0') {
+ ha_alert("parsing [%s:%d] : cannot parse '%s' value '%s', an integer is expected.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN && cfg_maxconn <= SYSTEM_MAXCONN) {
+ ha_alert("parsing [%s:%d] : maxconn value %d too high for this system.\nLimiting to %d. Please use '-n' to force the value.\n", file, linenum, global.maxconn, SYSTEM_MAXCONN);
+ global.maxconn = SYSTEM_MAXCONN;
+ err_code |= ERR_ALERT;
+ }
+#endif /* SYSTEM_MAXCONN */
+ }
+ else if (strcmp(args[0], "ssl-server-verify") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcmp(args[1],"none") == 0)
+ global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
+ else if (strcmp(args[1],"required") == 0)
+ global.ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'none' or 'required' as argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "maxconnrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.cps_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.cps_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxsessrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.sps_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.sps_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxsslrate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.ssl_lim != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.ssl_lim = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxcomprate") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument in kb/s.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.comp_rate_lim = atoi(args[1]) * 1024;
+ }
+ else if (strcmp(args[0], "maxpipes") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.maxpipes != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxpipes = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxzlibmem") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.maxzlibmem = atol(args[1]) * 1024L * 1024L;
+ }
+ else if (strcmp(args[0], "maxcompcpuusage") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument between 0 and 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ compress_min_idle = 100 - atoi(args[1]);
+ if (compress_min_idle > 100) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument between 0 and 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "fd-hard-limit") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.fd_hard_limit != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.fd_hard_limit = atol(args[1]);
+ }
+ else if (strcmp(args[0], "ulimit-n") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.rlimit_nofile != 0) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.rlimit_nofile = atol(args[1]);
+ }
+ else if (strcmp(args[0], "chroot") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.chroot != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a directory as an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.chroot = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "description") == 0) {
+ int i, len=0;
+ char *d;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects a string argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (i = 1; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ if (global.desc)
+ free(global.desc);
+
+ global.desc = d = calloc(1, len);
+
+ d += snprintf(d, global.desc + len - d, "%s", args[1]);
+ for (i = 2; *args[i]; i++)
+ d += snprintf(d, global.desc + len - d, " %s", args[i]);
+ }
+ else if (strcmp(args[0], "node") == 0) {
+ int i;
+ char c;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ for (i=0; args[1][i]; i++) {
+ c = args[1][i];
+ if (!isupper((unsigned char)c) && !islower((unsigned char)c) &&
+ !isdigit((unsigned char)c) && c != '_' && c != '-' && c != '.')
+ break;
+ }
+
+ if (!i || args[1][i]) {
+ ha_alert("parsing [%s:%d]: '%s' requires valid node name - non-empty string"
+ " with digits(0-9), letters(A-Z, a-z), dot(.), hyphen(-) or underscode(_).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (global.node)
+ free(global.node);
+
+ global.node = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "pidfile") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.pidfile != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a file name as an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.pidfile = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "unix-bind") == 0) {
+ int cur_arg = 1;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "prefix") == 0) {
+ if (global.unix_bind.prefix != NULL) {
+ ha_alert("parsing [%s:%d] : unix-bind '%s' already specified. Continuing.\n", file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT;
+ cur_arg += 2;
+ continue;
+ }
+
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : unix_bind '%s' expects a path as an argument.\n", file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.unix_bind.prefix = strdup(args[cur_arg+1]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "mode") == 0) {
+
+ global.unix_bind.ux.mode = strtol(args[cur_arg + 1], NULL, 8);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "uid") == 0) {
+
+ global.unix_bind.ux.uid = atol(args[cur_arg + 1 ]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "gid") == 0) {
+
+ global.unix_bind.ux.gid = atol(args[cur_arg + 1 ]);
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "user") == 0) {
+ struct passwd *user;
+
+ user = getpwnam(args[cur_arg + 1]);
+ if (!user) {
+ ha_alert("parsing [%s:%d] : '%s' : '%s' unknown user.\n",
+ file, linenum, args[0], args[cur_arg + 1 ]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ global.unix_bind.ux.uid = user->pw_uid;
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "group") == 0) {
+ struct group *group;
+
+ group = getgrnam(args[cur_arg + 1]);
+ if (!group) {
+ ha_alert("parsing [%s:%d] : '%s' : '%s' unknown group.\n",
+ file, linenum, args[0], args[cur_arg + 1 ]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ global.unix_bind.ux.gid = group->gr_gid;
+ cur_arg += 2;
+ continue;
+ }
+
+ ha_alert("parsing [%s:%d] : '%s' only supports the 'prefix', 'mode', 'uid', 'gid', 'user' and 'group' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) { /* "no log" or "log ..." */
+ if (!parse_logger(args, &global.loggers, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log-send-hostname") == 0) { /* set the hostname in syslog header */
+ char *name;
+
+ if (global.log_send_hostname != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (*(args[1]))
+ name = args[1];
+ else
+ name = hostname;
+
+ free(global.log_send_hostname);
+ global.log_send_hostname = strdup(name);
+ }
+ else if (strcmp(args[0], "server-state-base") == 0) { /* path base where HAProxy can find server state files */
+ if (global.server_state_base != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects one argument: a directory path.\n", file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ global.server_state_base = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "server-state-file") == 0) { /* path to the file where HAProxy can load the server states */
+ if (global.server_state_file != NULL) {
+ ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expect one argument: a file path.\n", file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ global.server_state_file = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a tag for use in syslog.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chunk_destroy(&global.log_tag);
+ chunk_initlen(&global.log_tag, strdup(args[1]), strlen(args[1]), strlen(args[1]));
+ if (b_orig(&global.log_tag) == NULL) {
+ chunk_destroy(&global.log_tag);
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "spread-checks") == 0) { /* random time between checks (0-50) */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (global.spread_checks != 0) {
+ ha_alert("parsing [%s:%d]: spread-checks already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument (0..50).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ global.spread_checks = atol(args[1]);
+ if (global.spread_checks < 0 || global.spread_checks > 50) {
+ ha_alert("parsing [%s:%d]: 'spread-checks' needs a positive value in range 0..50.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "max-spread-checks") == 0) { /* maximum time between first and last check */
+ const char *err;
+ unsigned int val;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument (0..50).\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = parse_time_err(args[1], &val, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d]: unsupported character '%c' in '%s' (wants an integer delay).\n", file, linenum, *err, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ global.max_spread_checks = val;
+ }
+ else if (strcmp(args[0], "cpu-map") == 0) {
+ /* map a process list to a CPU set */
+#ifdef USE_CPU_AFFINITY
+ char *slash;
+ unsigned long tgroup = 0, thread = 0;
+ int g, j, n, autoinc;
+ struct hap_cpuset cpus, cpus_copy;
+
+ if (!*args[1] || !*args[2]) {
+ ha_alert("parsing [%s:%d] : %s expects a thread group number "
+ " ('all', 'odd', 'even', a number from 1 to %d or a range), "
+ " followed by a list of CPU ranges with numbers from 0 to %d.\n",
+ file, linenum, args[0], LONGBITS, LONGBITS - 1);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((slash = strchr(args[1], '/')) != NULL)
+ *slash = 0;
+
+ /* note: we silently ignore thread group numbers over MAX_TGROUPS
+ * and threads over MAX_THREADS so as not to make configurations a
+ * pain to maintain.
+ */
+ if (parse_process_number(args[1], &tgroup, LONGBITS, &autoinc, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (slash) {
+ if (parse_process_number(slash+1, &thread, LONGBITS, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ *slash = '/';
+ } else
+ thread = ~0UL; /* missing '/' = 'all' */
+
+ /* from now on, thread cannot be NULL anymore */
+
+ if (parse_cpu_set((const char **)args+2, &cpus, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (autoinc &&
+ my_popcountl(tgroup) != ha_cpuset_count(&cpus) &&
+ my_popcountl(thread) != ha_cpuset_count(&cpus)) {
+ ha_alert("parsing [%s:%d] : %s : TGROUP/THREAD range and CPU sets "
+ "must have the same size to be automatically bound\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* we now have to deal with 3 real cases :
+ * cpu-map P-Q => mapping for whole tgroups, numbers P to Q
+ * cpu-map P-Q/1 => mapping of first thread of groups P to Q
+ * cpu-map P/T-U => mapping of threads T to U of tgroup P
+ */
+ /* first tgroup, iterate on threads. E.g. cpu-map 1/1-4 0-3 */
+ for (g = 0; g < MAX_TGROUPS; g++) {
+ /* No mapping for this tgroup */
+ if (!(tgroup & (1UL << g)))
+ continue;
+
+ ha_cpuset_assign(&cpus_copy, &cpus);
+
+ /* a thread set is specified, apply the
+ * CPU set to these threads.
+ */
+ for (j = n = 0; j < MAX_THREADS_PER_GROUP; j++) {
+ /* No mapping for this thread */
+ if (!(thread & (1UL << j)))
+ continue;
+
+ if (!autoinc)
+ ha_cpuset_assign(&cpu_map[g].thread[j], &cpus);
+ else {
+ ha_cpuset_zero(&cpu_map[g].thread[j]);
+ n = ha_cpuset_ffs(&cpus_copy) - 1;
+ ha_cpuset_clr(&cpus_copy, n);
+ ha_cpuset_set(&cpu_map[g].thread[j], n);
+ }
+ }
+ }
+#else
+ ha_alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif /* ! USE_CPU_AFFINITY */
+ }
+ else if (strcmp(args[0], "setenv") == 0 || strcmp(args[0], "presetenv") == 0) {
+ if (alertif_too_many_args(3, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects a name and a value.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* "setenv" overwrites, "presetenv" only sets if not yet set */
+ if (setenv(args[1], args[2], (args[0][0] == 's')) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed on variable '%s' : %s.\n", file, linenum, args[0], args[1], strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "unsetenv") == 0) {
+ int arg;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects at least one variable name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (arg = 1; *args[arg]; arg++) {
+ if (unsetenv(args[arg]) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed on variable '%s' : %s.\n", file, linenum, args[0], args[arg], strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ }
+ else if (strcmp(args[0], "resetenv") == 0) {
+ extern char **environ;
+ char **env = environ;
+
+ /* args contain variable names to keep, one per argument */
+ while (*env) {
+ int arg;
+
+ /* look for current variable in among all those we want to keep */
+ for (arg = 1; *args[arg]; arg++) {
+ if (strncmp(*env, args[arg], strlen(args[arg])) == 0 &&
+ (*env)[strlen(args[arg])] == '=')
+ break;
+ }
+
+ /* delete this variable */
+ if (!*args[arg]) {
+ char *delim = strchr(*env, '=');
+
+ if (!delim || delim - *env >= trash.size) {
+ ha_alert("parsing [%s:%d]: '%s' failed to unset invalid variable '%s'.\n", file, linenum, args[0], *env);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ memcpy(trash.area, *env, delim - *env);
+ trash.area[delim - *env] = 0;
+
+ if (unsetenv(trash.area) != 0) {
+ ha_alert("parsing [%s:%d]: '%s' failed to unset variable '%s' : %s.\n", file, linenum, args[0], *env, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else
+ env++;
+ }
+ }
+ else if (strcmp(args[0], "quick-exit") == 0) {
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ global.tune.options |= GTUNE_QUICK_EXIT;
+ }
+ else if (strcmp(args[0], "strict-limits") == 0) { /* "no strict-limits" or "strict-limits" */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_NO)
+ global.tune.options &= ~GTUNE_STRICT_LIMITS;
+ }
+ else if (strcmp(args[0], "localpeer") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a name as an argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (global.localpeer_cmdline != 0) {
+ ha_warning("parsing [%s:%d] : '%s' ignored since it is already set by using the '-L' "
+ "command line argument.\n", file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ if (cfg_peers) {
+ ha_warning("parsing [%s:%d] : '%s' ignored since it is used after 'peers' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ free(localpeer);
+ if ((localpeer = strdup(args[1])) == NULL) {
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ setenv("HAPROXY_LOCALPEER", localpeer, 1);
+ }
+ else if (strcmp(args[0], "numa-cpu-mapping") == 0) {
+ global.numa_cpu_mapping = (kwm == KWM_NO) ? 0 : 1;
+ }
+ else if (strcmp(args[0], "anonkey") == 0) {
+ long long tmp = 0;
+
+ if (*args[1] == 0) {
+ ha_alert("parsing [%s:%d]: a key is expected after '%s'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (HA_ATOMIC_LOAD(&global.anon_key) == 0) {
+ tmp = atoll(args[1]);
+ if (tmp < 0 || tmp > UINT_MAX) {
+ ha_alert("parsing [%s:%d]: '%s' value must be within range %u-%u (was '%s').\n",
+ file, linenum, args[0], 0, UINT_MAX, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ HA_ATOMIC_STORE(&global.anon_key, tmp);
+ }
+ }
+ else {
+ struct cfg_kw_list *kwl;
+ const char *best;
+ int index;
+ int rc;
+
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != CFG_GLOBAL)
+ continue;
+ if (strcmp(kwl->kw[index].kw, args[0]) == 0) {
+ if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ rc = kwl->kw[index].parse(args, CFG_GLOBAL, NULL, NULL, file, linenum, &errmsg);
+ if (rc < 0) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (rc > 0) {
+ ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ goto out;
+ }
+ }
+ }
+
+ best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_GLOBAL, common_kw_list);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "global");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ out:
+ free(errmsg);
+ return err_code;
+}
+
+static int cfg_parse_prealloc_fd(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+
+ global.prealloc_fd = 1;
+
+ return 0;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "prealloc-fd", cfg_parse_prealloc_fd },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c
new file mode 100644
index 0000000..4f88b77
--- /dev/null
+++ b/src/cfgparse-listen.c
@@ -0,0 +1,3073 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/buf.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/compression-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/extcheck.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_ext.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/peers.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth.h>
+
+/* some keywords that are still being parsed using strcmp() and are not
+ * registered anywhere. They are used as suggestions for mistyped words.
+ */
+static const char *common_kw_list[] = {
+ "listen", "frontend", "backend", "defaults", "server",
+ "default-server", "server-template", "bind", "monitor-net",
+ "monitor-uri", "mode", "id", "description", "disabled", "enabled",
+ "acl", "dynamic-cookie-key", "cookie", "email-alert",
+ "persist", "appsession", "load-server-state-from-file",
+ "server-state-file-name", "max-session-srv-conns", "capture",
+ "retries", "http-request", "http-response", "http-after-response",
+ "http-send-name-header", "block", "redirect", "use_backend",
+ "use-server", "force-persist", "ignore-persist", "force-persist",
+ "stick-table", "stick", "stats", "option", "default_backend",
+ "http-reuse", "monitor", "transparent", "maxconn", "backlog",
+ "fullconn", "dispatch", "balance", "log-balance", "hash-type",
+ "hash-balance-factor", "unique-id-format", "unique-id-header",
+ "log-format", "log-format-sd", "log-tag", "log", "source", "usesrc",
+ "error-log-format",
+ NULL /* must be last */
+};
+
+static const char *common_options[] = {
+ "httpclose", "http-server-close", "http-keep-alive",
+ "redispatch", "httplog", "tcplog", "tcpka", "httpchk",
+ "ssl-hello-chk", "smtpchk", "pgsql-check", "redis-check",
+ "mysql-check", "ldap-check", "spop-check", "tcp-check",
+ "external-check", "forwardfor", "original-to", "forwarded",
+ NULL /* must be last */
+};
+
+/* Report a warning if a rule is placed after a 'tcp-request session' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_tcp_sess(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->tcp_req.l5_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'tcp-request session' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'tcp-request content' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_tcp_cont(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->tcp_req.inspect_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'tcp-request content' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'monitor fail' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_monitor(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->mon_fail_cond)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'monitor fail' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after an 'http_request' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_http_req(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->http_req_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after an 'http-request' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a redirect rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_redirect(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->redirect_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'redirect' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'use_backend' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_use_backend(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->switching_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'use_backend' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* Report a warning if a rule is placed after a 'use-server' rule.
+ * Return 1 if the warning has been emitted, otherwise 0.
+ */
+int warnif_rule_after_use_server(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ if (!LIST_ISEMPTY(&proxy->server_rules)) {
+ ha_warning("parsing [%s:%d] : a '%s' rule placed after a 'use-server' rule will still be processed before.\n",
+ file, line, arg);
+ return 1;
+ }
+ return 0;
+}
+
+/* report a warning if a redirect rule is dangerously placed */
+int warnif_misplaced_redirect(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_use_backend(proxy, file, line, arg) ||
+ warnif_rule_after_use_server(proxy, file, line, arg);
+}
+
+/* report a warning if an http-request rule is dangerously placed */
+int warnif_misplaced_http_req(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_redirect(proxy, file, line, arg) ||
+ warnif_misplaced_redirect(proxy, file, line, arg);
+}
+
+/* report a warning if a block rule is dangerously placed */
+int warnif_misplaced_monitor(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_http_req(proxy, file, line, arg) ||
+ warnif_misplaced_http_req(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request content" rule is dangerously placed */
+int warnif_misplaced_tcp_cont(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_monitor(proxy, file, line, arg) ||
+ warnif_misplaced_monitor(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request session" rule is dangerously placed */
+int warnif_misplaced_tcp_sess(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_tcp_cont(proxy, file, line, arg) ||
+ warnif_misplaced_tcp_cont(proxy, file, line, arg);
+}
+
+/* report a warning if a "tcp request connection" rule is dangerously placed */
+int warnif_misplaced_tcp_conn(struct proxy *proxy, const char *file, int line, const char *arg)
+{
+ return warnif_rule_after_tcp_sess(proxy, file, line, arg) ||
+ warnif_misplaced_tcp_sess(proxy, file, line, arg);
+}
+
+int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
+{
+ static struct proxy *curr_defproxy = NULL;
+ static struct proxy *last_defproxy = NULL;
+ const char *err;
+ int rc;
+ int err_code = 0;
+ struct acl_cond *cond = NULL;
+ char *errmsg = NULL;
+ struct bind_conf *bind_conf;
+
+ if (!last_defproxy) {
+ /* we need a default proxy and none was created yet */
+ last_defproxy = alloc_new_proxy("", PR_CAP_DEF|PR_CAP_LISTEN, &errmsg);
+
+ curr_defproxy = last_defproxy;
+ if (!last_defproxy) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ if (strcmp(args[0], "listen") == 0)
+ rc = PR_CAP_LISTEN | PR_CAP_LB;
+ else if (strcmp(args[0], "frontend") == 0)
+ rc = PR_CAP_FE | PR_CAP_LB;
+ else if (strcmp(args[0], "backend") == 0)
+ rc = PR_CAP_BE | PR_CAP_LB;
+ else if (strcmp(args[0], "defaults") == 0) {
+ /* "defaults" must first delete the last no-name defaults if any */
+ curr_defproxy = NULL;
+ rc = PR_CAP_DEF | PR_CAP_LISTEN;
+ }
+ else
+ rc = PR_CAP_NONE;
+
+ if ((rc & PR_CAP_LISTEN) && !(rc & PR_CAP_DEF)) { /* new proxy */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <id> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ curproxy = (rc & PR_CAP_FE) ? proxy_fe_by_name(args[1]) : proxy_be_by_name(args[1]);
+ if (curproxy) {
+ ha_alert("Parsing [%s:%d]: %s '%s' has the same name as %s '%s' declared at %s:%d.\n",
+ file, linenum, proxy_cap_str(rc), args[1], proxy_type_str(curproxy),
+ curproxy->id, curproxy->conf.file, curproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ curproxy = log_forward_by_name(args[1]);
+ if (curproxy) {
+ ha_alert("Parsing [%s:%d]: %s '%s' has the same name as log forward section '%s' declared at %s:%d.\n",
+ file, linenum, proxy_cap_str(rc), args[1],
+ curproxy->id, curproxy->conf.file, curproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[2] && (!*args[3] || strcmp(args[2], "from") != 0)) ||
+ alertif_too_many_args(3, file, linenum, args, &err_code)) {
+ if (rc & PR_CAP_FE)
+ ha_alert("parsing [%s:%d] : please use the 'bind' keyword for listening addresses.\n", file, linenum);
+ goto out;
+ }
+ }
+
+ if (rc & PR_CAP_LISTEN) { /* new proxy or defaults section */
+ const char *name = args[1];
+ int arg = 2;
+
+ if (rc & PR_CAP_DEF && strcmp(args[1], "from") == 0 && *args[2] && !*args[3]) {
+ // also support "defaults from blah" (no name then)
+ arg = 1;
+ name = "";
+ }
+
+ /* only regular proxies inherit from the previous defaults section */
+ if (!(rc & PR_CAP_DEF))
+ curr_defproxy = last_defproxy;
+
+ if (strcmp(args[arg], "from") == 0) {
+ struct ebpt_node *next_by_name;
+
+ curr_defproxy = proxy_find_by_name(args[arg+1], PR_CAP_DEF, 0);
+
+ if (!curr_defproxy) {
+ ha_alert("parsing [%s:%d] : defaults section '%s' not found for %s '%s'.\n", file, linenum, args[arg+1], proxy_cap_str(rc), name);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((next_by_name = ebpt_next_dup(&curr_defproxy->conf.by_name))) {
+ struct proxy *px2 = container_of(next_by_name, struct proxy, conf.by_name);
+
+ ha_alert("parsing [%s:%d] : ambiguous defaults section name '%s' referenced by %s '%s' exists at least at %s:%d and %s:%d.\n",
+ file, linenum, args[arg+1], proxy_cap_str(rc), name,
+ curr_defproxy->conf.file, curr_defproxy->conf.line, px2->conf.file, px2->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ err = invalid_char(args[arg+1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in defaults section name '%s' when designated by its name (section found at %s:%d).\n",
+ file, linenum, *err, args[arg+1], curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ curr_defproxy->flags |= PR_FL_EXPLICIT_REF;
+ }
+ else if (curr_defproxy)
+ curr_defproxy->flags |= PR_FL_IMPLICIT_REF;
+
+ if (curr_defproxy && (curr_defproxy->flags & (PR_FL_EXPLICIT_REF|PR_FL_IMPLICIT_REF)) == (PR_FL_EXPLICIT_REF|PR_FL_IMPLICIT_REF)) {
+ ha_warning("parsing [%s:%d] : defaults section '%s' (declared at %s:%d) is explicitly referenced by another proxy and implicitly used here."
+ " To avoid any ambiguity don't mix both usage. Add a last defaults section not explicitly used or always use explicit references.\n",
+ file, linenum, curr_defproxy->id, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_WARN;
+ }
+
+ curproxy = parse_new_proxy(name, rc, file, linenum, curr_defproxy);
+ if (!curproxy) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curr_defproxy && (!LIST_ISEMPTY(&curr_defproxy->http_req_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->http_res_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->http_after_res_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l4_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l5_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.inspect_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_rep.inspect_rules))) {
+ /* If the current default proxy defines TCP/HTTP rules, the
+ * current proxy will keep a reference on it. But some sanity
+ * checks are performed first:
+ *
+ * - It cannot be used to init a defaults section
+ * - It cannot be used to init a listen section
+ * - It cannot be used to init backend and frontend sections at
+ * same time. It can be used to init several sections of the
+ * same type only.
+ * - It cannot define L4/L5 TCP rules if it is used to init
+ * backend sections.
+ * - It cannot define 'tcp-response content' rules if it
+ * is used to init frontend sections.
+ *
+ * If no error is found, refcount of the default proxy is incremented.
+ */
+
+ /* Note: Add tcpcheck_rules too if unresolve args become allowed in defaults section */
+ if (rc & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: a defaults section cannot inherit from a defaults section defining TCP/HTTP rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if ((rc & PR_CAP_LISTEN) == PR_CAP_LISTEN) {
+ ha_alert("parsing [%s:%d]: a listen section cannot inherit from a defaults section defining TCP/HTTP rules.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else {
+ char defcap = (curr_defproxy->cap & PR_CAP_LISTEN);
+
+ if ((defcap == PR_CAP_BE || defcap == PR_CAP_FE) && (rc & PR_CAP_LISTEN) != defcap) {
+ ha_alert("parsing [%s:%d]: frontends and backends cannot inherit from the same defaults section"
+ " if it defines TCP/HTTP rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if (!(rc & PR_CAP_FE) && (!LIST_ISEMPTY(&curr_defproxy->tcp_req.l4_rules) ||
+ !LIST_ISEMPTY(&curr_defproxy->tcp_req.l5_rules))) {
+ ha_alert("parsing [%s:%d]: a backend section cannot inherit from a defaults section defining"
+ " 'tcp-request connection' or 'tcp-request session' rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else if (!(rc & PR_CAP_BE) && !LIST_ISEMPTY(&curr_defproxy->tcp_rep.inspect_rules)) {
+ ha_alert("parsing [%s:%d]: a frontend section cannot inherit from a defaults section defining"
+ " 'tcp-response content' rules (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ else {
+ curr_defproxy->cap = (curr_defproxy->cap & ~PR_CAP_LISTEN) | (rc & PR_CAP_LISTEN);
+ proxy_ref_defaults(curproxy, curr_defproxy);
+ }
+ }
+ }
+
+ if (curr_defproxy && (curr_defproxy->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) &&
+ (curproxy->cap & PR_CAP_LISTEN) == PR_CAP_BE) {
+ /* If the current default proxy defines tcpcheck rules, the
+ * current proxy will keep a reference on it. but only if the
+ * current proxy has the backend capability.
+ */
+ proxy_ref_defaults(curproxy, curr_defproxy);
+ }
+
+ if ((rc & PR_CAP_BE) && curr_defproxy && (curr_defproxy->nb_req_cap || curr_defproxy->nb_rsp_cap)) {
+ ha_alert("parsing [%s:%d]: backend or defaults sections cannot inherit from a defaults section defining"
+ " capptures (defaults section at %s:%d).\n",
+ file, linenum, curr_defproxy->conf.file, curr_defproxy->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+
+ if (rc & PR_CAP_DEF) {
+ /* last and current proxies must be updated to this one */
+ curr_defproxy = last_defproxy = curproxy;
+ } else {
+ /* regular proxies are in a list */
+ curproxy->next = proxies_list;
+ proxies_list = curproxy;
+ }
+ goto out;
+ }
+ else if (curproxy == NULL) {
+ ha_alert("parsing [%s:%d] : 'listen' or 'defaults' expected.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* update the current file and line being parsed */
+ curproxy->conf.args.file = curproxy->conf.file;
+ curproxy->conf.args.line = linenum;
+
+ /* Now let's parse the proxy-specific keywords */
+ if ((strcmp(args[0], "server") == 0)) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_PARSE_ADDR);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "default-server") == 0) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_DEFAULT_SERVER);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "server-template") == 0) {
+ err_code |= parse_server(file, linenum, args,
+ curproxy, curr_defproxy,
+ SRV_PARSE_TEMPLATE|SRV_PARSE_PARSE_ADDR);
+
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[0], "bind") == 0) { /* new listen addresses */
+ struct listener *l;
+ int cur_arg;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects {<path>|[addr1]:port1[-end1]}{,[addr]:port[-end]}... as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf = bind_conf_alloc(curproxy, file, linenum, args[1], xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto alloc_error;
+
+ /* use default settings for unix sockets */
+ bind_conf->settings.ux.uid = global.unix_bind.ux.uid;
+ bind_conf->settings.ux.gid = global.unix_bind.ux.gid;
+ bind_conf->settings.ux.mode = global.unix_bind.ux.mode;
+
+ /* NOTE: the following line might create several listeners if there
+ * are comma-separated IPs or port ranges. So all further processing
+ * will have to be applied to all listeners created after last_listen.
+ */
+ if (!str2listener(args[1], curproxy, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s' : %s\n", file, linenum, args[0], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s' : error encountered while parsing listening address '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ /* Set default global rights and owner for unix bind */
+ global.maxsock++;
+ }
+
+ cur_arg = 2;
+ err_code |= bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ goto out;
+ }
+ else if (strcmp(args[0], "monitor-net") == 0) { /* set the range of IPs to ignore */
+ ha_alert("parsing [%s:%d] : 'monitor-net' doesn't exist anymore. Please use 'http-request return status 200 if { src %s }' instead.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "monitor-uri") == 0) { /* set the URI to intercept */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an URI.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ istfree(&curproxy->monitor_uri);
+ curproxy->monitor_uri = istdup(ist(args[1]));
+ if (!isttest(curproxy->monitor_uri))
+ goto alloc_error;
+
+ goto out;
+ }
+ else if (strcmp(args[0], "mode") == 0) { /* sets the proxy mode */
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (strcmp(args[1], "http") == 0) curproxy->mode = PR_MODE_HTTP;
+ else if (strcmp(args[1], "tcp") == 0) curproxy->mode = PR_MODE_TCP;
+ else if (strcmp(args[1], "log") == 0 && (curproxy->cap & PR_CAP_BE)) curproxy->mode = PR_MODE_SYSLOG;
+ else if (strcmp(args[1], "health") == 0) {
+ ha_alert("parsing [%s:%d] : 'mode health' doesn't exist anymore. Please use 'http-request return status 200' instead.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown proxy mode '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "id") == 0) {
+ struct eb32_node *node;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->uuid = atol(args[1]);
+ curproxy->conf.id.key = curproxy->uuid;
+ curproxy->options |= PR_O_FORCED_ID;
+
+ if (curproxy->uuid <= 0) {
+ ha_alert("parsing [%s:%d]: custom id has to be > 0.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ node = eb32_lookup(&used_proxy_id, curproxy->uuid);
+ if (node) {
+ struct proxy *target = container_of(node, struct proxy, conf.id);
+ ha_alert("parsing [%s:%d]: %s %s reuses same custom id as %s %s (declared at %s:%d).\n",
+ file, linenum, proxy_type_str(curproxy), curproxy->id,
+ proxy_type_str(target), target->id, target->conf.file, target->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ eb32_insert(&used_proxy_id, &curproxy->conf.id);
+ }
+ else if (strcmp(args[0], "description") == 0) {
+ int i, len=0;
+ char *d;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects a string argument.\n",
+ file, linenum, args[0]);
+ return -1;
+ }
+
+ for (i = 1; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ d = calloc(1, len);
+ if (!d)
+ goto alloc_error;
+ curproxy->desc = d;
+
+ d += snprintf(d, curproxy->desc + len - d, "%s", args[1]);
+ for (i = 2; *args[i]; i++)
+ d += snprintf(d, curproxy->desc + len - d, " %s", args[i]);
+
+ }
+ else if (strcmp(args[0], "disabled") == 0) { /* disables this proxy */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ curproxy->flags |= PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "enabled") == 0) { /* enables this proxy (used to revert a disabled default) */
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ curproxy->flags &= ~PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "bind-process") == 0) { /* enable this proxy only on some processes */
+ ha_alert("parsing [%s:%d]: '%s' is not supported anymore.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (strcmp(args[0], "acl") == 0) { /* add an ACL */
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (parse_acl((const char **)args + 1, &curproxy->acl, &errmsg, &curproxy->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "dynamic-cookie-key") == 0) { /* Dynamic cookies secret key */
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects <secret_key> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->dyncookie_key);
+ curproxy->dyncookie_key = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "cookie") == 0) { /* cookie name */
+ int cur_arg;
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects <cookie_name> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->ck_opts = 0;
+ curproxy->cookie_maxidle = curproxy->cookie_maxlife = 0;
+ ha_free(&curproxy->cookie_domain);
+ free(curproxy->cookie_name);
+ curproxy->cookie_name = strdup(args[1]);
+ if (!curproxy->cookie_name)
+ goto alloc_error;
+ curproxy->cookie_len = strlen(curproxy->cookie_name);
+
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "rewrite") == 0) {
+ curproxy->ck_opts |= PR_CK_RW;
+ }
+ else if (strcmp(args[cur_arg], "indirect") == 0) {
+ curproxy->ck_opts |= PR_CK_IND;
+ }
+ else if (strcmp(args[cur_arg], "insert") == 0) {
+ curproxy->ck_opts |= PR_CK_INS;
+ }
+ else if (strcmp(args[cur_arg], "nocache") == 0) {
+ curproxy->ck_opts |= PR_CK_NOC;
+ }
+ else if (strcmp(args[cur_arg], "postonly") == 0) {
+ curproxy->ck_opts |= PR_CK_POST;
+ }
+ else if (strcmp(args[cur_arg], "preserve") == 0) {
+ curproxy->ck_opts |= PR_CK_PSV;
+ }
+ else if (strcmp(args[cur_arg], "prefix") == 0) {
+ curproxy->ck_opts |= PR_CK_PFX;
+ }
+ else if (strcmp(args[cur_arg], "httponly") == 0) {
+ curproxy->ck_opts |= PR_CK_HTTPONLY;
+ }
+ else if (strcmp(args[cur_arg], "secure") == 0) {
+ curproxy->ck_opts |= PR_CK_SECURE;
+ }
+ else if (strcmp(args[cur_arg], "domain") == 0) {
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <domain> as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!strchr(args[cur_arg + 1], '.')) {
+ /* rfc6265, 5.2.3 The Domain Attribute */
+ ha_warning("parsing [%s:%d]: domain '%s' contains no embedded dot,"
+ " this configuration may not work properly (see RFC6265#5.2.3).\n",
+ file, linenum, args[cur_arg + 1]);
+ err_code |= ERR_WARN;
+ }
+
+ err = invalid_domainchar(args[cur_arg + 1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in domain name '%s'.\n",
+ file, linenum, *err, args[cur_arg + 1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!curproxy->cookie_domain) {
+ curproxy->cookie_domain = strdup(args[cur_arg + 1]);
+ } else {
+ /* one domain was already specified, add another one by
+ * building the string which will be returned along with
+ * the cookie.
+ */
+ memprintf(&curproxy->cookie_domain, "%s; domain=%s", curproxy->cookie_domain, args[cur_arg+1]);
+ }
+
+ if (!curproxy->cookie_domain)
+ goto alloc_error;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "maxidle") == 0) {
+ unsigned int maxidle;
+ const char *res;
+
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <idletime> in seconds as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ res = parse_time_err(args[cur_arg + 1], &maxidle, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 s.\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->cookie_maxidle = maxidle;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "maxlife") == 0) {
+ unsigned int maxlife;
+ const char *res;
+
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <lifetime> in seconds as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+
+ res = parse_time_err(args[cur_arg + 1], &maxlife, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 s.\n",
+ file, linenum, args[cur_arg+1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->cookie_maxlife = maxlife;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "dynamic") == 0) { /* Dynamic persistent cookies secret key */
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[cur_arg], NULL))
+ err_code |= ERR_WARN;
+ curproxy->ck_opts |= PR_CK_DYNAMIC;
+ }
+ else if (strcmp(args[cur_arg], "attr") == 0) {
+ char *val;
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <value> as argument.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ val = args[cur_arg + 1];
+ while (*val) {
+ if (iscntrl((unsigned char)*val) || *val == ';') {
+ ha_alert("parsing [%s:%d]: character '%%x%02X' is not permitted in attribute value.\n",
+ file, linenum, *val);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ val++;
+ }
+ /* don't add ';' for the first attribute */
+ if (!curproxy->cookie_attrs)
+ curproxy->cookie_attrs = strdup(args[cur_arg + 1]);
+ else
+ memprintf(&curproxy->cookie_attrs, "%s; %s", curproxy->cookie_attrs, args[cur_arg + 1]);
+
+ if (!curproxy->cookie_attrs)
+ goto alloc_error;
+ cur_arg++;
+ }
+
+ else {
+ ha_alert("parsing [%s:%d] : '%s' supports 'rewrite', 'insert', 'prefix', 'indirect', 'nocache', 'postonly', 'domain', 'maxidle', 'dynamic', 'maxlife' and 'attr' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg++;
+ }
+ if (!POWEROF2(curproxy->ck_opts & (PR_CK_RW|PR_CK_IND))) {
+ ha_alert("parsing [%s:%d] : cookie 'rewrite' and 'indirect' modes are incompatible.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!POWEROF2(curproxy->ck_opts & (PR_CK_RW|PR_CK_INS|PR_CK_PFX))) {
+ ha_alert("parsing [%s:%d] : cookie 'rewrite', 'insert' and 'prefix' modes are incompatible.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((curproxy->ck_opts & (PR_CK_PSV | PR_CK_INS | PR_CK_IND)) == PR_CK_PSV) {
+ ha_alert("parsing [%s:%d] : cookie 'preserve' requires at least 'insert' or 'indirect'.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }/* end else if (!strcmp(args[0], "cookie")) */
+ else if (strcmp(args[0], "email-alert") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "from") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.from);
+ curproxy->email_alert.from = strdup(args[2]);
+ if (!curproxy->email_alert.from)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "mailers") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.mailers.name);
+ curproxy->email_alert.mailers.name = strdup(args[2]);
+ if (!curproxy->email_alert.mailers.name)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "myhostname") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.myhostname);
+ curproxy->email_alert.myhostname = strdup(args[2]);
+ if (!curproxy->email_alert.myhostname)
+ goto alloc_error;
+ }
+ else if (strcmp(args[1], "level") == 0) {
+ curproxy->email_alert.level = get_log_level(args[2]);
+ if (curproxy->email_alert.level < 0) {
+ ha_alert("parsing [%s:%d] : unknown log level '%s' after '%s'\n",
+ file, linenum, args[1], args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "to") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing argument after '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->email_alert.to);
+ curproxy->email_alert.to = strdup(args[2]);
+ if (!curproxy->email_alert.to)
+ goto alloc_error;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : email-alert: unknown argument '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* Indicate that the email_alert is at least partially configured */
+ curproxy->email_alert.set = 1;
+ }/* end else if (!strcmp(args[0], "email-alert")) */
+ else if (strcmp(args[0], "persist") == 0) { /* persist */
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : missing persist method.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!strncmp(args[1], "rdp-cookie", 10)) {
+ curproxy->options2 |= PR_O2_RDPC_PRST;
+
+ if (*(args[1] + 10) == '(') { /* cookie name */
+ const char *beg, *end;
+
+ beg = args[1] + 11;
+ end = strchr(beg, ')');
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!end || end == beg) {
+ ha_alert("parsing [%s:%d] : persist rdp-cookie(name)' requires an rdp cookie name.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ free(curproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_name = my_strndup(beg, end - beg);
+ if (!curproxy->rdp_cookie_name)
+ goto alloc_error;
+ curproxy->rdp_cookie_len = end-beg;
+ }
+ else if (*(args[1] + 10) == '\0') { /* default cookie name 'msts' */
+ free(curproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_name = strdup("msts");
+ if (!curproxy->rdp_cookie_name)
+ goto alloc_error;
+ curproxy->rdp_cookie_len = strlen(curproxy->rdp_cookie_name);
+ }
+ else { /* syntax */
+ ha_alert("parsing [%s:%d] : persist rdp-cookie(name)' requires an rdp cookie name.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown persist method.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "appsession") == 0) { /* cookie name */
+ ha_alert("parsing [%s:%d] : '%s' is not supported anymore since HAProxy 1.6.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "load-server-state-from-file") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (strcmp(args[1], "global") == 0) { /* use the file pointed to by global server-state-file directive */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_GLOBAL;
+ }
+ else if (strcmp(args[1], "local") == 0) { /* use the server-state-file-name variable to locate the server-state file */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_LOCAL;
+ }
+ else if (strcmp(args[1], "none") == 0) { /* don't use server-state-file directive for this backend */
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_NONE;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'global', 'local' or 'none'. Got '%s'\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "server-state-file-name") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ ha_free(&curproxy->server_state_file_name);
+
+ if (*(args[1]) == 0 || strcmp(args[1], "use-backend-name") == 0)
+ curproxy->server_state_file_name = strdup(curproxy->id);
+ else
+ curproxy->server_state_file_name = strdup(args[1]);
+
+ if (!curproxy->server_state_file_name)
+ goto alloc_error;
+ }
+ else if (strcmp(args[0], "max-session-srv-conns") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+ if (*(args[1]) == 0) {
+ ha_alert("parsine [%s:%d] : '%s' expects a number. Got no argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->max_out_conns = atoi(args[1]);
+ }
+ else if (strcmp(args[0], "capture") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "cookie") == 0) { /* name of a cookie to capture */
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[4]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'cookie' <cookie_name> 'len' <len>.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->capture_name);
+ curproxy->capture_name = strdup(args[2]);
+ if (!curproxy->capture_name)
+ goto alloc_error;
+ curproxy->capture_namelen = strlen(curproxy->capture_name);
+ curproxy->capture_len = atol(args[4]);
+ curproxy->to_log |= LW_COOKIE;
+ }
+ else if (strcmp(args[1], "request") == 0 && strcmp(args[2], "header") == 0) {
+ struct cap_hdr *hdr;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[3]) == 0 || strcmp(args[4], "len") != 0 || *(args[5]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'header' <header_name> 'len' <len>.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr)
+ goto req_caphdr_alloc_error;
+ hdr->next = curproxy->req_cap;
+ hdr->name = strdup(args[3]);
+ if (!hdr->name)
+ goto req_caphdr_alloc_error;
+ hdr->namelen = strlen(args[3]);
+ hdr->len = atol(args[5]);
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ if (!hdr->pool) {
+ req_caphdr_alloc_error:
+ if (hdr)
+ ha_free(&hdr->name);
+ ha_free(&hdr);
+ goto alloc_error;
+ }
+ hdr->index = curproxy->nb_req_cap++;
+ curproxy->req_cap = hdr;
+ curproxy->to_log |= LW_REQHDR;
+ }
+ else if (strcmp(args[1], "response") == 0 && strcmp(args[2], "header") == 0) {
+ struct cap_hdr *hdr;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args_idx(4, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[3]) == 0 || strcmp(args[4], "len") != 0 || *(args[5]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'header' <header_name> 'len' <len>.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr)
+ goto res_caphdr_alloc_error;
+ hdr->next = curproxy->rsp_cap;
+ hdr->name = strdup(args[3]);
+ if (!hdr->name)
+ goto res_caphdr_alloc_error;
+ hdr->namelen = strlen(args[3]);
+ hdr->len = atol(args[5]);
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ if (!hdr->pool) {
+ res_caphdr_alloc_error:
+ if (hdr)
+ ha_free(&hdr->name);
+ ha_free(&hdr);
+ goto alloc_error;
+ }
+ hdr->index = curproxy->nb_rsp_cap++;
+ curproxy->rsp_cap = hdr;
+ curproxy->to_log |= LW_RSPHDR;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'cookie' or 'request header' or 'response header'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "retries") == 0) { /* connection retries */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument (dispatch counts for one).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->conn_retries = atol(args[1]);
+ }
+ else if (strcmp(args[0], "http-request") == 0) { /* request access control: allow/deny/auth */
+ struct act_rule *rule;
+ int where = 0;
+
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_req_rules) &&
+ !LIST_PREV(&curproxy->http_req_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_req_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_req_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err_code |= warnif_misplaced_http_req(curproxy, file, linenum, args[0]);
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_req_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-response") == 0) { /* response access control */
+ struct act_rule *rule;
+ int where = 0;
+
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_res_rules) &&
+ !LIST_PREV(&curproxy->http_res_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_res_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_res_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_res_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-after-response") == 0) {
+ struct act_rule *rule;
+ int where = 0;
+ if ((curproxy->cap & PR_CAP_DEF) && strlen(curproxy->id) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in anonymous 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_after_res_rules) &&
+ !LIST_PREV(&curproxy->http_after_res_rules, struct act_rule *, list)->cond &&
+ (LIST_PREV(&curproxy->http_after_res_rules, struct act_rule *, list)->flags & ACT_FLAG_FINAL)) {
+ ha_warning("parsing [%s:%d]: previous '%s' action is final and has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_after_res_cond((const char **)args + 1, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+
+ LIST_APPEND(&curproxy->http_after_res_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "http-send-name-header") == 0) { /* send server name in request header */
+ /* set the header name and length into the proxy structure */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' requires a header string.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the desired header name, in lower case */
+ istfree(&curproxy->server_id_hdr_name);
+ curproxy->server_id_hdr_name = istdup(ist(args[1]));
+ if (!isttest(curproxy->server_id_hdr_name))
+ goto alloc_error;
+ ist2bin_lc(istptr(curproxy->server_id_hdr_name), curproxy->server_id_hdr_name);
+ }
+ else if (strcmp(args[0], "block") == 0) {
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. Use 'http-request deny' which uses the exact same syntax.\n", file, linenum, args[0]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "redirect") == 0) {
+ struct redirect_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((rule = http_parse_redirect_rule(file, linenum, curproxy, (const char **)args + 1, &errmsg, 0, 0)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing redirect rule : %s.\n",
+ file, linenum, proxy_type_str(curproxy), curproxy->id, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ LIST_APPEND(&curproxy->redirect_rules, &rule->list);
+ err_code |= warnif_misplaced_redirect(curproxy, file, linenum, args[0]);
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+ }
+ else if (strcmp(args[0], "use_backend") == 0) {
+ struct switching_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") == 0 || strcmp(args[2], "unless") == 0) {
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing switching rule : %s.\n",
+ file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_FE_SET_BCK, file, linenum);
+ }
+ else if (*args[2]) {
+ ha_alert("parsing [%s:%d] : unexpected keyword '%s' after switching rule, only 'if' and 'unless' are allowed.\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto use_backend_alloc_error;
+ rule->cond = cond;
+ rule->be.name = strdup(args[1]);
+ if (!rule->be.name)
+ goto use_backend_alloc_error;
+ rule->line = linenum;
+ rule->file = strdup(file);
+ if (!rule->file) {
+ use_backend_alloc_error:
+ free_acl_cond(cond);
+ if (rule)
+ ha_free(&(rule->be.name));
+ ha_free(&rule);
+ goto alloc_error;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->switching_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "use-server") == 0) {
+ struct server_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a server name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing switching rule : %s.\n",
+ file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_SET_SRV, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto use_server_alloc_error;
+ rule->cond = cond;
+ rule->srv.name = strdup(args[1]);
+ if (!rule->srv.name)
+ goto use_server_alloc_error;
+ rule->line = linenum;
+ rule->file = strdup(file);
+ if (!rule->file) {
+ use_server_alloc_error:
+ free_acl_cond(cond);
+ if (rule)
+ ha_free(&(rule->srv.name));
+ ha_free(&rule);
+ goto alloc_error;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->server_rules, &rule->list);
+ curproxy->be_req_ana |= AN_REQ_SRV_RULES;
+ }
+ else if ((strcmp(args[0], "force-persist") == 0) ||
+ (strcmp(args[0], "ignore-persist") == 0)) {
+ struct persist_rule *rule;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "if") != 0 && strcmp(args[1], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 1, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s' rule : %s.\n",
+ file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* note: BE_REQ_CNT is the first one after FE_SET_BCK, which is
+ * where force-persist is applied.
+ */
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_REQ_CNT, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ free_acl_cond(cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ if (strcmp(args[0], "force-persist") == 0) {
+ rule->type = PERSIST_TYPE_FORCE;
+ } else {
+ rule->type = PERSIST_TYPE_IGNORE;
+ }
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->persist_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "stick-table") == 0) {
+ struct stktable *other;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : 'stick-table' is not supported in 'defaults' section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ other = stktable_find_by_name(curproxy->id);
+ if (other) {
+ ha_alert("parsing [%s:%d] : stick-table name '%s' conflicts with table declared in %s '%s' at %s:%d.\n",
+ file, linenum, curproxy->id,
+ other->proxy ? proxy_cap_str(other->proxy->cap) : "peers",
+ other->proxy ? other->id : other->peers.p->id,
+ other->conf.file, other->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->table = calloc(1, sizeof *curproxy->table);
+ if (!curproxy->table) {
+ ha_alert("parsing [%s:%d]: '%s %s' : memory allocation failed\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= parse_stick_table(file, linenum, args, curproxy->table,
+ curproxy->id, curproxy->id, NULL);
+ if (err_code & ERR_FATAL) {
+ ha_free(&curproxy->table);
+ goto out;
+ }
+
+ /* Store the proxy in the stick-table. */
+ curproxy->table->proxy = curproxy;
+
+ stktable_store_name(curproxy->table);
+ curproxy->table->next = stktables_list;
+ stktables_list = curproxy->table;
+
+ /* Add this proxy to the list of proxies which refer to its stick-table. */
+ if (curproxy->table->proxies_list != curproxy) {
+ curproxy->next_stkt_ref = curproxy->table->proxies_list;
+ curproxy->table->proxies_list = curproxy;
+ }
+ }
+ else if (strcmp(args[0], "stick") == 0) {
+ struct sticking_rule *rule;
+ struct sample_expr *expr;
+ int myidx = 0;
+ const char *name = NULL;
+ int flags;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ myidx++;
+ if ((strcmp(args[myidx], "store") == 0) ||
+ (strcmp(args[myidx], "store-request") == 0)) {
+ myidx++;
+ flags = STK_IS_STORE;
+ }
+ else if (strcmp(args[myidx], "store-response") == 0) {
+ myidx++;
+ flags = STK_IS_STORE | STK_ON_RSP;
+ }
+ else if (strcmp(args[myidx], "match") == 0) {
+ myidx++;
+ flags = STK_IS_MATCH;
+ }
+ else if (strcmp(args[myidx], "on") == 0) {
+ myidx++;
+ flags = STK_IS_MATCH | STK_IS_STORE;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'on', 'match', or 'store'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (*(args[myidx]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a fetch method.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conf.args.ctx = ARGC_STK;
+ expr = sample_parse_expr(args, &myidx, file, linenum, &errmsg, &curproxy->conf.args, NULL);
+ if (!expr) {
+ ha_alert("parsing [%s:%d] : '%s': %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (flags & STK_ON_RSP) {
+ if (!(expr->fetch->val & SMP_VAL_BE_STO_RUL)) {
+ ha_alert("parsing [%s:%d] : '%s': fetch method '%s' extracts information from '%s', none of which is available for 'store-response'.\n",
+ file, linenum, args[0], expr->fetch->kw, sample_src_names(expr->fetch->use));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ } else {
+ if (!(expr->fetch->val & SMP_VAL_BE_SET_SRV)) {
+ ha_alert("parsing [%s:%d] : '%s': fetch method '%s' extracts information from '%s', none of which is available during request.\n",
+ file, linenum, args[0], expr->fetch->kw, sample_src_names(expr->fetch->use));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curproxy->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ if (strcmp(args[myidx], "table") == 0) {
+ myidx++;
+ name = args[myidx++];
+ }
+
+ if (strcmp(args[myidx], "if") == 0 || strcmp(args[myidx], "unless") == 0) {
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + myidx, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : '%s': error detected while parsing sticking condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ }
+ else if (*(args[myidx])) {
+ ha_alert("parsing [%s:%d] : '%s': unknown keyword '%s'.\n",
+ file, linenum, args[0], args[myidx]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(expr);
+ goto out;
+ }
+ if (flags & STK_ON_RSP)
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_STO_RUL, file, linenum);
+ else
+ err_code |= warnif_cond_conflicts(cond, SMP_VAL_BE_SET_SRV, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ free_acl_cond(cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ rule->expr = expr;
+ rule->flags = flags;
+ rule->table.name = name ? strdup(name) : NULL;
+ LIST_INIT(&rule->list);
+ if (flags & STK_ON_RSP)
+ LIST_APPEND(&curproxy->storersp_rules, &rule->list);
+ else
+ LIST_APPEND(&curproxy->sticking_rules, &rule->list);
+ }
+ else if (strcmp(args[0], "stats") == 0) {
+ if (!(curproxy->cap & PR_CAP_DEF) && curproxy->uri_auth == curr_defproxy->uri_auth)
+ curproxy->uri_auth = NULL; /* we must detach from the default config */
+
+ if (!*args[1]) {
+ goto stats_error_parsing;
+ } else if (strcmp(args[1], "admin") == 0) {
+ struct stats_admin_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s %s' not allowed in 'defaults' section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s %s' rule : %s.\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(cond, where, file, linenum);
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ free_acl_cond(cond);
+ goto alloc_error;
+ }
+ rule->cond = cond;
+ LIST_INIT(&rule->list);
+ LIST_APPEND(&curproxy->uri_auth->admin_rules, &rule->list);
+ } else if (strcmp(args[1], "uri") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'uri' needs an URI prefix.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_uri(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "realm") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'realm' needs an realm name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_realm(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "refresh") == 0) {
+ unsigned interval;
+
+ err = parse_time_err(args[2], &interval, TIME_UNIT_S);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to stats refresh interval, maximum value is 2147483647 s (~68 years).\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to stats refresh interval, minimum non-null value is 1 s.\n",
+ file, linenum, args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to stats refresh interval.\n",
+ file, linenum, *err);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_set_refresh(&curproxy->uri_auth, interval))
+ goto alloc_error;
+ } else if (strcmp(args[1], "http-request") == 0) { /* request access control: allow/deny/auth */
+ struct act_rule *rule;
+ int where = 0;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d]: '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+
+ if (!LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) &&
+ !LIST_PREV(&curproxy->uri_auth->http_req_rules, struct act_rule *, list)->cond) {
+ ha_warning("parsing [%s:%d]: previous '%s' action has no condition attached, further entries are NOOP.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_WARN;
+ }
+
+ rule = parse_http_req_cond((const char **)args + 2, file, linenum, curproxy);
+
+ if (!rule) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ err_code |= warnif_cond_conflicts(rule->cond, where, file, linenum);
+ LIST_APPEND(&curproxy->uri_auth->http_req_rules, &rule->list);
+
+ } else if (strcmp(args[1], "auth") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'auth' needs a user:password account.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_add_auth(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "scope") == 0) {
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : 'scope' needs a proxy name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ } else if (!stats_add_scope(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "enable") == 0) {
+ if (!stats_check_init_uri_auth(&curproxy->uri_auth))
+ goto alloc_error;
+ } else if (strcmp(args[1], "hide-version") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_HIDEVER))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-legends") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_SHLGNDS))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-modules") == 0) {
+ if (!stats_set_flag(&curproxy->uri_auth, STAT_SHMODULES))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-node") == 0) {
+
+ if (*args[2]) {
+ int i;
+ char c;
+
+ for (i=0; args[2][i]; i++) {
+ c = args[2][i];
+ if (!isupper((unsigned char)c) && !islower((unsigned char)c) &&
+ !isdigit((unsigned char)c) && c != '_' && c != '-' && c != '.')
+ break;
+ }
+
+ if (!i || args[2][i]) {
+ ha_alert("parsing [%s:%d]: '%s %s' invalid node name - should be a string"
+ "with digits(0-9), letters(A-Z, a-z), hyphen(-) or underscode(_).\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if (!stats_set_node(&curproxy->uri_auth, args[2]))
+ goto alloc_error;
+ } else if (strcmp(args[1], "show-desc") == 0) {
+ char *desc = NULL;
+
+ if (*args[2]) {
+ int i, len=0;
+ char *d;
+
+ for (i = 2; *args[i]; i++)
+ len += strlen(args[i]) + 1;
+
+ desc = d = calloc(1, len);
+
+ d += snprintf(d, desc + len - d, "%s", args[2]);
+ for (i = 3; *args[i]; i++)
+ d += snprintf(d, desc + len - d, " %s", args[i]);
+ }
+
+ if (!*args[2] && !global.desc)
+ ha_warning("parsing [%s:%d]: '%s' requires a parameter or 'desc' to be set in the global section.\n",
+ file, linenum, args[1]);
+ else {
+ if (!stats_set_desc(&curproxy->uri_auth, desc)) {
+ free(desc);
+ goto alloc_error;
+ }
+ free(desc);
+ }
+ } else {
+stats_error_parsing:
+ ha_alert("parsing [%s:%d]: %s '%s', expects 'admin', 'uri', 'realm', 'auth', 'scope', 'enable', 'hide-version', 'show-node', 'show-desc' or 'show-legends'.\n",
+ file, linenum, *args[1]?"unknown stats parameter":"missing keyword in", args[*args[1]?1:0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "option") == 0) {
+ int optnum;
+
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++) {
+ if (strcmp(args[1], cfg_opts[optnum].name) == 0) {
+ if (cfg_opts[optnum].cap == PR_CAP_NONE) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported due to build options.\n",
+ file, linenum, cfg_opts[optnum].name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (warnifnotcap(curproxy, cfg_opts[optnum].cap, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options &= ~cfg_opts[optnum].val;
+ curproxy->options &= ~cfg_opts[optnum].val;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options |= cfg_opts[optnum].val;
+ break;
+ case KWM_NO:
+ curproxy->no_options |= cfg_opts[optnum].val;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+
+ goto out;
+ }
+ }
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++) {
+ if (strcmp(args[1], cfg_opts2[optnum].name) == 0) {
+ if (cfg_opts2[optnum].cap == PR_CAP_NONE) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported due to build options.\n",
+ file, linenum, cfg_opts2[optnum].name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (warnifnotcap(curproxy, cfg_opts2[optnum].cap, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options2 &= ~cfg_opts2[optnum].val;
+ curproxy->options2 &= ~cfg_opts2[optnum].val;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options2 |= cfg_opts2[optnum].val;
+ break;
+ case KWM_NO:
+ curproxy->no_options2 |= cfg_opts2[optnum].val;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+ goto out;
+ }
+ }
+
+ /* HTTP options override each other. They can be cancelled using
+ * "no option xxx" which only switches to default mode if the mode
+ * was this one (useful for cancelling options set in defaults
+ * sections).
+ */
+ if (strcmp(args[1], "forceclose") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.0, please just remove it, or use 'option httpclose' if absolutely needed.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "httpclose") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_CLO;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-server-close") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_SCL;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_SCL)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-keep-alive") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD) {
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ curproxy->options |= PR_O_HTTP_KAL;
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if ((curproxy->options & PR_O_HTTP_MODE) == PR_O_HTTP_KAL)
+ curproxy->options &= ~PR_O_HTTP_MODE;
+ goto out;
+ }
+ }
+ else if (strcmp(args[1], "http-tunnel") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.1, please just remove it, it shouldn't be needed.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "forwarded") == 0) {
+ if (kwm == KWM_STD) {
+ err_code |= proxy_http_parse_7239(args, 0, curproxy, curr_defproxy, file, linenum);
+ goto out;
+ }
+ else if (kwm == KWM_NO) {
+ if (curproxy->http_ext)
+ http_ext_7239_clean(curproxy);
+ goto out;
+ }
+ }
+
+ /* Redispatch can take an integer argument that control when the
+ * resispatch occurs. All values are relative to the retries option.
+ * This can be cancelled using "no option xxx".
+ */
+ if (strcmp(args[1], "redispatch") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[1], NULL)) {
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ curproxy->no_options &= ~PR_O_REDISP;
+ curproxy->options &= ~PR_O_REDISP;
+
+ switch (kwm) {
+ case KWM_STD:
+ curproxy->options |= PR_O_REDISP;
+ curproxy->redispatch_after = -1;
+ if(*args[2]) {
+ curproxy->redispatch_after = atol(args[2]);
+ }
+ break;
+ case KWM_NO:
+ curproxy->no_options |= PR_O_REDISP;
+ curproxy->redispatch_after = 0;
+ break;
+ case KWM_DEF: /* already cleared */
+ break;
+ }
+ goto out;
+ }
+
+ if (strcmp(args[1], "http_proxy") == 0) {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported any more since HAProxy 2.5. This option stopped working in HAProxy 1.9 and usually had nasty side effects. It can be more reliably implemented with combinations of 'http-request set-dst' and 'http-request set-uri', and even 'http-request do-resolve' if DNS resolution is desired.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (kwm != KWM_STD) {
+ ha_alert("parsing [%s:%d]: negation/default is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "httplog") == 0) {
+ char *logformat;
+ /* generate a complete HTTP log */
+ logformat = default_http_log_format;
+ if (*(args[2]) != '\0') {
+ if (strcmp(args[2], "clf") == 0) {
+ curproxy->options2 |= PR_O2_CLFLOG;
+ logformat = clf_http_log_format;
+ } else {
+ ha_alert("parsing [%s:%d] : keyword '%s' only supports option 'clf'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+ char *clflogformat = "";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ if (logformat == clf_http_log_format)
+ clflogformat = " clf";
+ ha_warning("parsing [%s:%d]: 'option httplog%s' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, clflogformat, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = logformat;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option httplog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "tcplog") == 0) {
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'option tcplog' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ /* generate a detailed TCP log */
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = default_tcp_log_format;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option tcplog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "httpslog") == 0) {
+ char *logformat;
+ /* generate a complete HTTP log */
+ logformat = default_https_log_format;
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'option httplog' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = logformat;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'option httpslog' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[1], "tcpka") == 0) {
+ /* enable TCP keep-alives on client and server streams */
+ if (warnifnotcap(curproxy, PR_CAP_BE | PR_CAP_FE, file, linenum, args[1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+
+ if (curproxy->cap & PR_CAP_FE)
+ curproxy->options |= PR_O_TCP_CLI_KA;
+ if (curproxy->cap & PR_CAP_BE)
+ curproxy->options |= PR_O_TCP_SRV_KA;
+ }
+ else if (strcmp(args[1], "httpchk") == 0) {
+ err_code |= proxy_parse_httpchk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "ssl-hello-chk") == 0) {
+ err_code |= proxy_parse_ssl_hello_chk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "smtpchk") == 0) {
+ err_code |= proxy_parse_smtpchk_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "pgsql-check") == 0) {
+ err_code |= proxy_parse_pgsql_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "redis-check") == 0) {
+ err_code |= proxy_parse_redis_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "mysql-check") == 0) {
+ err_code |= proxy_parse_mysql_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "ldap-check") == 0) {
+ err_code |= proxy_parse_ldap_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "spop-check") == 0) {
+ err_code |= proxy_parse_spop_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "tcp-check") == 0) {
+ err_code |= proxy_parse_tcp_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "external-check") == 0) {
+ err_code |= proxy_parse_external_check_opt(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "forwardfor") == 0) {
+ err_code |= proxy_http_parse_xff(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "originalto") == 0) {
+ err_code |= proxy_http_parse_xot(args, 0, curproxy, curr_defproxy, file, linenum);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ else if (strcmp(args[1], "http-restrict-req-hdr-names") == 0) {
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : missing parameter. option '%s' expects 'preserve', 'reject' or 'delete' option.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->options2 &= ~PR_O2_RSTRICT_REQ_HDR_NAMES_MASK;
+ if (strcmp(args[2], "preserve") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_NOOP;
+ else if (strcmp(args[2], "reject") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_BLK;
+ else if (strcmp(args[2], "delete") == 0)
+ curproxy->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_DEL;
+ else {
+ ha_alert("parsing [%s:%d] : invalid parameter '%s'. option '%s' expects 'preserve', 'reject' or 'delete' option.\n",
+ file, linenum, args[2], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ const char *best = proxy_find_best_option(args[1], common_options);
+
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown option '%s'; did you mean '%s' maybe ?\n", file, linenum, args[1], best);
+ else
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ goto out;
+ }
+ else if (strcmp(args[0], "default_backend") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->defbe.name);
+ curproxy->defbe.name = strdup(args[1]);
+ if (!curproxy->defbe.name)
+ goto alloc_error;
+
+ if (alertif_too_many_args_idx(1, 0, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "redispatch") == 0 || strcmp(args[0], "redisp") == 0) {
+ ha_alert("parsing [%s:%d] : keyword '%s' directive is not supported anymore since HAProxy 2.1. Use 'option redispatch'.\n", file, linenum, args[0]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "http-reuse") == 0) {
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "never") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_NEVR;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "safe") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_SAFE;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "aggressive") == 0) {
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_AGGR;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[1], "always") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curproxy->options &= ~PR_O_REUSE_MASK;
+ curproxy->options |= PR_O_REUSE_ALWS;
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'never', 'safe', 'aggressive', 'always'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "monitor") == 0) {
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "fail") == 0) {
+ /* add a condition to fail monitor requests */
+ if (strcmp(args[2], "if") != 0 && strcmp(args[2], "unless") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' requires either 'if' or 'unless' followed by a condition.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err_code |= warnif_misplaced_monitor(curproxy, file, linenum, "monitor fail");
+ if ((cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + 2, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing a '%s %s' condition : %s.\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(&curproxy->mon_fail_cond, &cond->list);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'fail'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+#ifdef USE_TPROXY
+ else if (strcmp(args[0], "transparent") == 0) {
+ /* enable transparent proxy connections */
+ curproxy->options |= PR_O_TRANSP;
+ if (alertif_too_many_args(0, file, linenum, args, &err_code))
+ goto out;
+ }
+#endif
+ else if (strcmp(args[0], "maxconn") == 0) { /* maxconn */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], " Maybe you want 'fullconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->maxconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "backlog") == 0) { /* backlog */
+ if (warnifnotcap(curproxy, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->backlog = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "fullconn") == 0) { /* fullconn */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], " Maybe you want 'maxconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->fullconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "grace") == 0) { /* grace time (ms) */
+ ha_alert("parsing [%s:%d]: the '%s' keyword is not supported any more since HAProxy version 2.5.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "dispatch") == 0) { /* dispatch address */
+ struct sockaddr_storage *sk;
+ int port1, port2;
+
+ if (curproxy->cap & PR_CAP_DEF) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ sk = str2sa_range(args[1], NULL, &port1, &port2, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s' : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ curproxy->dispatch_addr = *sk;
+ curproxy->options |= PR_O_DISPATCH;
+ }
+ else if (strcmp(args[0], "balance") == 0) { /* set balancing with optional algorithm */
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (backend_parse_balance((const char **)args + 1, &errmsg, curproxy) < 0) {
+ ha_alert("parsing [%s:%d] : %s %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "hash-type") == 0) { /* set hashing method */
+ /**
+ * The syntax for hash-type config element is
+ * hash-type {map-based|consistent} [[<algo>] avalanche]
+ *
+ * The default hash function is sdbm for map-based and sdbm+avalanche for consistent.
+ */
+ curproxy->lbprm.algo &= ~(BE_LB_HASH_TYPE | BE_LB_HASH_FUNC | BE_LB_HASH_MOD);
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (strcmp(args[1], "consistent") == 0) { /* use consistent hashing */
+ curproxy->lbprm.algo |= BE_LB_HASH_CONS;
+ }
+ else if (strcmp(args[1], "map-based") == 0) { /* use map-based hashing */
+ curproxy->lbprm.algo |= BE_LB_HASH_MAP;
+ }
+ else if (strcmp(args[1], "avalanche") == 0) {
+ ha_alert("parsing [%s:%d] : experimental feature '%s %s' is not supported anymore, please use '%s map-based sdbm avalanche' instead.\n", file, linenum, args[0], args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'consistent' and 'map-based'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the hash function to use */
+ if (!*args[2]) {
+ /* the default algo is sdbm */
+ curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+
+ /* if consistent with no argument, then avalanche modifier is also applied */
+ if ((curproxy->lbprm.algo & BE_LB_HASH_TYPE) == BE_LB_HASH_CONS)
+ curproxy->lbprm.algo |= BE_LB_HMOD_AVAL;
+ } else {
+ /* set the hash function */
+ if (strcmp(args[2], "sdbm") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_SDBM;
+ }
+ else if (strcmp(args[2], "djb2") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_DJB2;
+ }
+ else if (strcmp(args[2], "wt6") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_WT6;
+ }
+ else if (strcmp(args[2], "crc32") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_CRC32;
+ }
+ else if (strcmp(args[2], "none") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HFCN_NONE;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'sdbm', 'djb2', 'crc32', or 'wt6' hash functions.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* set the hash modifier */
+ if (strcmp(args[3], "avalanche") == 0) {
+ curproxy->lbprm.algo |= BE_LB_HMOD_AVAL;
+ }
+ else if (*args[3]) {
+ ha_alert("parsing [%s:%d] : '%s' only supports 'avalanche' as a modifier for hash functions.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ }
+ else if (strcmp(args[0], "hash-balance-factor") == 0) {
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->lbprm.hash_balance_factor = atol(args[1]);
+ if (curproxy->lbprm.hash_balance_factor != 0 && curproxy->lbprm.hash_balance_factor <= 100) {
+ ha_alert("parsing [%s:%d] : '%s' must be 0 or greater than 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "unique-id-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->conf.uniqueid_format_string);
+ curproxy->conf.uniqueid_format_string = strdup(args[1]);
+ if (!curproxy->conf.uniqueid_format_string)
+ goto alloc_error;
+
+ free(curproxy->conf.uif_file);
+ curproxy->conf.uif_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.uif_line = curproxy->conf.args.line;
+ }
+
+ else if (strcmp(args[0], "unique-id-header") == 0) {
+ char *copy;
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ copy = strdup(args[1]);
+ if (copy == NULL) {
+ ha_alert("parsing [%s:%d] : failed to allocate memory for unique-id-header\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ istfree(&curproxy->header_unique_id);
+ curproxy->header_unique_id = ist(copy);
+ }
+
+ else if (strcmp(args[0], "log-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) {
+ char *oldlogformat = "log-format";
+
+ if (curproxy->conf.logformat_string == default_http_log_format)
+ oldlogformat = "option httplog";
+ else if (curproxy->conf.logformat_string == default_tcp_log_format)
+ oldlogformat = "option tcplog";
+ else if (curproxy->conf.logformat_string == clf_http_log_format)
+ oldlogformat = "option httplog clf";
+ else if (curproxy->conf.logformat_string == default_https_log_format)
+ oldlogformat = "option httpslog";
+ ha_warning("parsing [%s:%d]: 'log-format' overrides previous '%s' in 'defaults' section.\n",
+ file, linenum, oldlogformat);
+ }
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = strdup(args[1]);
+ if (!curproxy->conf.logformat_string)
+ goto alloc_error;
+
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'log-format' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "log-format-sd") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(curproxy->conf.logformat_sd_string);
+ curproxy->conf.logformat_sd_string = strdup(args[1]);
+ if (!curproxy->conf.logformat_sd_string)
+ goto alloc_error;
+
+ free(curproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfsd_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format-sd error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'log-format-sd' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "error-log-format") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*(args[2])) {
+ ha_alert("parsing [%s:%d] : %s expects only one argument, don't forget to escape spaces!\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (curproxy->conf.error_logformat_string && curproxy->cap & PR_CAP_DEF) {
+ ha_warning("parsing [%s:%d]: 'error-log-format' overrides previous 'error-log-format' in 'defaults' section.\n",
+ file, linenum);
+ }
+ free(curproxy->conf.error_logformat_string);
+ curproxy->conf.error_logformat_string = strdup(args[1]);
+ if (!curproxy->conf.error_logformat_string)
+ goto alloc_error;
+
+ free(curproxy->conf.elfs_file);
+ curproxy->conf.elfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.elfs_line = curproxy->conf.args.line;
+
+ /* get a chance to improve log-format error reporting by
+ * reporting the correct line-number when possible.
+ */
+ if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) {
+ ha_warning("parsing [%s:%d] : backend '%s' : 'error-log-format' directive is ignored in backends.\n",
+ file, linenum, curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects a tag for use in syslog.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chunk_destroy(&curproxy->log_tag);
+ chunk_initlen(&curproxy->log_tag, strdup(args[1]), strlen(args[1]), strlen(args[1]));
+ if (b_orig(&curproxy->log_tag) == NULL) {
+ chunk_destroy(&curproxy->log_tag);
+ ha_alert("parsing [%s:%d]: cannot allocate memory for '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) { /* "no log" or "log ..." */
+ if (!parse_logger(args, &curproxy->loggers, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "source") == 0) { /* address to which we bind when connecting */
+ int cur_arg;
+ int port1, port2;
+ struct sockaddr_storage *sk;
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], and optionally '%s' <addr>, and '%s' <name>.\n",
+ file, linenum, "source", "usesrc", "interface");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* we must first clear any optional default setting */
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ ha_free(&curproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_len = 0;
+
+ sk = str2sa_range(args[1], NULL, &port1, &port2, NULL, NULL, NULL,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n",
+ file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conn_src.source_addr = *sk;
+ curproxy->conn_src.opts |= CO_SRC_BIND;
+
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "usesrc") == 0) { /* address to use outside */
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], 'client', or 'clientip' as argument.\n",
+ file, linenum, "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[cur_arg + 1], "client") == 0) {
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_CLI;
+ } else if (strcmp(args[cur_arg + 1], "clientip") == 0) {
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_CIP;
+ } else if (!strncmp(args[cur_arg + 1], "hdr_ip(", 7)) {
+ char *name, *end;
+
+ name = args[cur_arg+1] + 7;
+ while (isspace((unsigned char)*name))
+ name++;
+
+ end = name;
+ while (*end && !isspace((unsigned char)*end) && *end != ',' && *end != ')')
+ end++;
+
+ curproxy->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_DYN;
+ free(curproxy->conn_src.bind_hdr_name);
+ curproxy->conn_src.bind_hdr_name = calloc(1, end - name + 1);
+ if (!curproxy->conn_src.bind_hdr_name)
+ goto alloc_error;
+ curproxy->conn_src.bind_hdr_len = end - name;
+ memcpy(curproxy->conn_src.bind_hdr_name, name, end - name);
+ curproxy->conn_src.bind_hdr_name[end-name] = '\0';
+ curproxy->conn_src.bind_hdr_occ = -1;
+
+ /* now look for an occurrence number */
+ while (isspace((unsigned char)*end))
+ end++;
+ if (*end == ',') {
+ end++;
+ name = end;
+ if (*end == '-')
+ end++;
+ while (isdigit((unsigned char)*end))
+ end++;
+ curproxy->conn_src.bind_hdr_occ = strl2ic(name, end-name);
+ }
+
+ if (curproxy->conn_src.bind_hdr_occ < -MAX_HDR_HISTORY) {
+ ha_alert("parsing [%s:%d] : usesrc hdr_ip(name,num) does not support negative"
+ " occurrences values smaller than %d.\n",
+ file, linenum, MAX_HDR_HISTORY);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } else {
+ struct sockaddr_storage *sk;
+
+ sk = str2sa_range(args[cur_arg + 1], NULL, &port1, &port2, NULL, NULL, NULL,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n",
+ file, linenum, args[cur_arg], args[cur_arg+1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curproxy->conn_src.tproxy_addr = *sk;
+ curproxy->conn_src.opts |= CO_SRC_TPROXY_ADDR;
+ }
+ global.last_checks |= LSTCHK_NETADM;
+#else /* no TPROXY support */
+ ha_alert("parsing [%s:%d] : '%s' not allowed here because support for TPROXY was not compiled in.\n",
+ file, linenum, "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ cur_arg += 2;
+ continue;
+ }
+
+ if (strcmp(args[cur_arg], "interface") == 0) { /* specifically bind to this interface */
+#ifdef SO_BINDTODEVICE
+ if (!*args[cur_arg + 1]) {
+ ha_alert("parsing [%s:%d] : '%s' : missing interface name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ free(curproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_name = strdup(args[cur_arg + 1]);
+ if (!curproxy->conn_src.iface_name)
+ goto alloc_error;
+ curproxy->conn_src.iface_len = strlen(curproxy->conn_src.iface_name);
+ global.last_checks |= LSTCHK_NETADM;
+#else
+ ha_alert("parsing [%s:%d] : '%s' : '%s' option not implemented.\n",
+ file, linenum, args[0], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ cur_arg += 2;
+ continue;
+ }
+ ha_alert("parsing [%s:%d] : '%s' only supports optional keywords '%s' and '%s'.\n",
+ file, linenum, args[0], "interface", "usesrc");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "usesrc") == 0) { /* address to use outside: needs "source" first */
+ ha_alert("parsing [%s:%d] : '%s' only allowed after a '%s' statement.\n",
+ file, linenum, "usesrc", "source");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "cliexp") == 0 || strcmp(args[0], "reqrep") == 0) { /* replace request header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request replace-path', 'http-request replace-uri' or 'http-request replace-header' instead.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqdel") == 0) { /* delete request header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqdeny") == 0) { /* deny a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' not supported anymore since HAProxy 2.1. "
+ "Use 'http-request deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqpass") == 0) { /* pass this header without allowing or denying the request */
+ ha_alert("parsing [%s:%d] : The '%s' not supported anymore since HAProxy 2.1.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqallow") == 0) { /* allow a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request allow' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqtarpit") == 0) { /* tarpit a request if a header matches this regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request tarpit' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqirep") == 0) { /* replace request header from a regex, ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqidel") == 0) { /* delete request header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqideny") == 0) { /* deny a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqipass") == 0) { /* pass this header without allowing or denying the request */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqiallow") == 0) { /* allow a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request allow' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqitarpit") == 0) { /* tarpit a request if a header matches this regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request tarpit' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "reqadd") == 0) { /* add request header */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-request add-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "srvexp") == 0 || strcmp(args[0], "rsprep") == 0) { /* replace response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspdel") == 0) { /* delete response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response del-header' .\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspdeny") == 0) { /* block response header from a regex */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspirep") == 0) { /* replace response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response replace-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspidel") == 0) { /* delete response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response del-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspideny") == 0) { /* block response header from a regex ignoring case */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response deny' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "rspadd") == 0) { /* add response header */
+ ha_alert("parsing [%s:%d] : The '%s' directive is not supported anymore since HAProxy 2.1. "
+ "Use 'http-response add-header' instead.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else {
+ struct cfg_kw_list *kwl;
+ const char *best;
+ int index;
+
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != CFG_LISTEN)
+ continue;
+ if (strcmp(kwl->kw[index].kw, args[0]) == 0) {
+ if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* prepare error message just in case */
+ rc = kwl->kw[index].parse(args, CFG_LISTEN, curproxy, curr_defproxy, file, linenum, &errmsg);
+ if (rc < 0) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (rc > 0) {
+ ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ goto out;
+ }
+ }
+ }
+
+ best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_LISTEN, common_kw_list);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ free(errmsg);
+ return err_code;
+
+ alloc_error:
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+}
diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c
new file mode 100644
index 0000000..3b38efa
--- /dev/null
+++ b/src/cfgparse-quic.c
@@ -0,0 +1,292 @@
+#include <errno.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/listener.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_cc-t.h>
+#include <haproxy/tools.h>
+
+#define QUIC_CC_NEWRENO_STR "newreno"
+#define QUIC_CC_CUBIC_STR "cubic"
+#define QUIC_CC_NO_CC_STR "nocc"
+
+static int bind_parse_quic_force_retry(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_QUIC_FORCE_RETRY;
+ return 0;
+}
+
+/* parse "quic-cc-algo" bind keyword */
+static int bind_parse_quic_cc_algo(char **args, int cur_arg, struct proxy *px,
+ struct bind_conf *conf, char **err)
+{
+ struct quic_cc_algo *cc_algo;
+ const char *algo = NULL;
+ char *arg;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing control congestion algorithm", args[cur_arg]);
+ goto fail;
+ }
+
+ arg = args[cur_arg + 1];
+ if (strncmp(arg, QUIC_CC_NEWRENO_STR, strlen(QUIC_CC_NEWRENO_STR)) == 0) {
+ /* newreno */
+ algo = QUIC_CC_NEWRENO_STR;
+ cc_algo = &quic_cc_algo_nr;
+ arg += strlen(QUIC_CC_NEWRENO_STR);
+ }
+ else if (strncmp(arg, QUIC_CC_CUBIC_STR, strlen(QUIC_CC_CUBIC_STR)) == 0) {
+ /* cubic */
+ algo = QUIC_CC_CUBIC_STR;
+ cc_algo = &quic_cc_algo_cubic;
+ arg += strlen(QUIC_CC_CUBIC_STR);
+ }
+ else if (strncmp(arg, QUIC_CC_NO_CC_STR, strlen(QUIC_CC_NO_CC_STR)) == 0) {
+ /* nocc */
+ if (!experimental_directives_allowed) {
+ ha_alert("'%s' algo is experimental, must be allowed via a global "
+ "'expose-experimental-directives'\n", arg);
+ goto fail;
+ }
+
+ algo = QUIC_CC_NO_CC_STR;
+ cc_algo = &quic_cc_algo_nocc;
+ arg += strlen(QUIC_CC_NO_CC_STR);
+ }
+ else {
+ memprintf(err, "'%s' : unknown control congestion algorithm", args[cur_arg + 1]);
+ goto fail;
+ }
+
+ if (*arg++ == '(') {
+ unsigned long cwnd;
+ char *end_opt;
+
+ errno = 0;
+ cwnd = strtoul(arg, &end_opt, 0);
+ if (end_opt == arg || errno != 0) {
+ memprintf(err, "'%s' : could not parse congestion window value", args[cur_arg + 1]);
+ goto fail;
+ }
+
+ if (*end_opt == 'k') {
+ cwnd <<= 10;
+ end_opt++;
+ }
+ else if (*end_opt == 'm') {
+ cwnd <<= 20;
+ end_opt++;
+ }
+ else if (*end_opt == 'g') {
+ cwnd <<= 30;
+ end_opt++;
+ }
+
+ if (*end_opt != ')') {
+ memprintf(err, "'%s' : expects %s(<max window>)", args[cur_arg + 1], algo);
+ goto fail;
+ }
+
+ if (cwnd < 10240 || cwnd > (4UL << 30)) {
+ memprintf(err, "'%s' : should be greater than 10k and smaller than 4g", args[cur_arg + 1]);
+ goto fail;
+ }
+
+ conf->max_cwnd = cwnd;
+ }
+
+ conf->quic_cc_algo = cc_algo;
+ return 0;
+
+ fail:
+ return ERR_ALERT | ERR_FATAL;
+}
+
+static int bind_parse_quic_socket(char **args, int cur_arg, struct proxy *px,
+ struct bind_conf *conf, char **err)
+{
+ char *arg;
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing argument, use either connection or listener.", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ arg = args[cur_arg + 1];
+ if (strcmp(arg, "connection") == 0) {
+ conf->quic_mode = QUIC_SOCK_MODE_CONN;
+ }
+ else if (strcmp(arg, "listener") == 0) {
+ conf->quic_mode = QUIC_SOCK_MODE_LSTNR;
+ }
+ else {
+ memprintf(err, "'%s' : unknown argument, use either connection or listener.", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+static struct bind_kw_list bind_kws = { "QUIC", { }, {
+ { "quic-force-retry", bind_parse_quic_force_retry, 0 },
+ { "quic-cc-algo", bind_parse_quic_cc_algo, 1 },
+ { "quic-socket", bind_parse_quic_socket, 1 },
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* parse "tune.quic.socket-owner", accepts "listener" or "connection" */
+static int cfg_parse_quic_tune_socket_owner(char **args, int section_type,
+ struct proxy *curpx,
+ const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "connection") == 0) {
+ global.tune.options |= GTUNE_QUIC_SOCK_PER_CONN;
+ }
+ else if (strcmp(args[1], "listener") == 0) {
+ global.tune.options &= ~GTUNE_QUIC_SOCK_PER_CONN;
+ }
+ else {
+ memprintf(err, "'%s' expects either 'listener' or 'connection' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Must be used to parse tune.quic.* setting which requires a time
+ * as value.
+ * Return -1 on alert, or 0 if succeeded.
+ */
+static int cfg_parse_quic_time(char **args, int section_type,
+ struct proxy *curpx,
+ const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ unsigned int time;
+ const char *res, *name, *value;
+ int prefix_len = strlen("tune.quic.");
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ name = args[0];
+ value = args[1];
+ res = parse_time_err(value, &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' "
+ "(maximum value is 2147483647 ms or ~24.8 days)", value, name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' "
+ "(minimum non-null value is 1 ms)", value, name);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in '%s'", *res, name);
+ return -1;
+ }
+
+ if (strcmp(name + prefix_len, "frontend.max-idle-timeout") == 0)
+ global.tune.quic_frontend_max_idle_timeout = time;
+ else if (strcmp(name + prefix_len, "backend.max-idle-timeout") == 0)
+ global.tune.quic_backend_max_idle_timeout = time;
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Parse any tune.quic.* setting with strictly positive integer values.
+ * Return -1 on alert, or 0 if succeeded.
+ */
+static int cfg_parse_quic_tune_setting(char **args, int section_type,
+ struct proxy *curpx,
+ const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ unsigned int arg = 0;
+ int prefix_len = strlen("tune.quic.");
+ const char *suffix;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 1) {
+ memprintf(err, "'%s' expects a positive integer.", args[0]);
+ return -1;
+ }
+
+ suffix = args[0] + prefix_len;
+ if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0)
+ global.tune.quic_streams_buf = arg;
+ else if (strcmp(suffix, "frontend.max-streams-bidi") == 0)
+ global.tune.quic_frontend_max_streams_bidi = arg;
+ else if (strcmp(suffix, "max-frame-loss") == 0)
+ global.tune.quic_max_frame_loss = arg;
+ else if (strcmp(suffix, "reorder-ratio") == 0) {
+ if (arg > 100) {
+ memprintf(err, "'%s' expects an integer argument between 0 and 100.", args[0]);
+ return -1;
+ }
+
+ global.tune.quic_reorder_ratio = arg;
+ }
+ else if (strcmp(suffix, "retry-threshold") == 0)
+ global.tune.quic_retry_threshold = arg;
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* config parser for global "tune.quic.zero-copy-fwd-send" */
+static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND;
+ else {
+ memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner },
+ { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time },
+ { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time },
+ { CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting },
+ { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/cfgparse-ssl.c b/src/cfgparse-ssl.c
new file mode 100644
index 0000000..5666336
--- /dev/null
+++ b/src/cfgparse-ssl.c
@@ -0,0 +1,2382 @@
+/*
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Configuration parsing for SSL.
+ * This file is split in 3 parts:
+ * - global section parsing
+ * - bind keyword parsing
+ * - server keyword parsing
+ *
+ * Please insert the new keywords at the right place
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/listener.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/tools.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_ocsp.h>
+
+
+/****************** Global Section Parsing ********************************************/
+
+static int ssl_load_global_issuers_from_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *path;
+ struct dirent **de_list;
+ int i, n;
+ struct stat buf;
+ char *end;
+ char fp[MAXPATHLEN+1];
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ path = args[1];
+ if (*path == 0 || stat(path, &buf)) {
+ memprintf(err, "%sglobal statement '%s' expects a directory path as an argument.\n",
+ err && *err ? *err : "", args[0]);
+ return -1;
+ }
+ if (S_ISDIR(buf.st_mode) == 0) {
+ memprintf(err, "%sglobal statement '%s': %s is not a directory.\n",
+ err && *err ? *err : "", args[0], path);
+ return -1;
+ }
+
+ /* strip trailing slashes, including first one */
+ for (end = path + strlen(path) - 1; end >= path && *end == '/'; end--)
+ *end = 0;
+ /* path already parsed? */
+ if (global_ssl.issuers_chain_path && strcmp(global_ssl.issuers_chain_path, path) == 0)
+ return 0;
+ /* overwrite old issuers_chain_path */
+ free(global_ssl.issuers_chain_path);
+ global_ssl.issuers_chain_path = strdup(path);
+ ssl_free_global_issuers();
+
+ n = scandir(path, &de_list, 0, alphasort);
+ if (n < 0) {
+ memprintf(err, "%sglobal statement '%s': unable to scan directory '%s' : %s.\n",
+ err && *err ? *err : "", args[0], path, strerror(errno));
+ return -1;
+ }
+ for (i = 0; i < n; i++) {
+ struct dirent *de = de_list[i];
+ BIO *in = NULL;
+ char *warn = NULL;
+
+ snprintf(fp, sizeof(fp), "%s/%s", path, de->d_name);
+ free(de);
+ if (stat(fp, &buf) != 0) {
+ ha_warning("unable to stat certificate from file '%s' : %s.\n", fp, strerror(errno));
+ goto next;
+ }
+ if (!S_ISREG(buf.st_mode))
+ goto next;
+
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto next;
+ if (BIO_read_filename(in, fp) <= 0)
+ goto next;
+ ssl_load_global_issuer_from_BIO(in, fp, &warn);
+ if (warn) {
+ ha_warning("%s", warn);
+ ha_free(&warn);
+ }
+ next:
+ if (in)
+ BIO_free(in);
+ }
+ free(de_list);
+
+ return 0;
+}
+
+/* parse the "ssl-mode-async" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_async(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+#ifdef SSL_MODE_ASYNC
+ global_ssl.async = 1;
+ global.ssl_used_async_engines = nb_engines;
+ return 0;
+#else
+ memprintf(err, "'%s': openssl library does not support async mode", args[0]);
+ return -1;
+#endif
+}
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+/* parse the "ssl-engine" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_engine(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *algo;
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a valid engine name as an argument.", args[0]);
+ return ret;
+ }
+
+ if (*(args[2]) == 0) {
+ /* if no list of algorithms is given, it defaults to ALL */
+ algo = strdup("ALL");
+ goto add_engine;
+ }
+
+ /* otherwise the expected format is ssl-engine <engine_name> algo <list of algo> */
+ if (strcmp(args[2], "algo") != 0) {
+ memprintf(err, "global statement '%s' expects to have algo keyword.", args[0]);
+ return ret;
+ }
+
+ if (*(args[3]) == 0) {
+ memprintf(err, "global statement '%s' expects algorithm names as an argument.", args[0]);
+ return ret;
+ }
+ algo = strdup(args[3]);
+
+add_engine:
+ if (ssl_init_single_engine(args[1], algo)==0) {
+ openssl_engines_initialized++;
+ ret = 0;
+ }
+ free(algo);
+ return ret;
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+/* parse the "ssl-propquery" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_propquery(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a property string as an argument.", args[0]);
+ return ret;
+ }
+
+ if (EVP_set_default_properties(NULL, args[1]))
+ ret = 0;
+
+ return ret;
+}
+
+/* parse the "ssl-provider" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_provider(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a valid engine provider name as an argument.", args[0]);
+ return ret;
+ }
+
+ if (ssl_init_provider(args[1]) == 0)
+ ret = 0;
+
+ return ret;
+}
+
+/* parse the "ssl-provider-path" keyword in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ssl_provider_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a directory path as an argument.", args[0]);
+ return -1;
+ }
+
+ OSSL_PROVIDER_set_default_search_path(NULL, args[1]);
+
+ return 0;
+}
+#endif
+
+/* parse the "ssl-default-bind-ciphers" / "ssl-default-server-ciphers" keywords
+ * in global section. Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ciphers(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_ciphers : &global_ssl.connect_default_ciphers;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a cipher suite as an argument.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+
+/* parse the "ssl-default-bind-ciphersuites" / "ssl-default-server-ciphersuites" keywords
+ * in global section. Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ciphersuites(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_ciphersuites : &global_ssl.connect_default_ciphersuites;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a cipher suite as an argument.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+#else /* ! HAVE_SSL_CTX_SET_CIPHERSUITES */
+ memprintf(err, "'%s' not supported for your SSL library (%s).", args[0], OPENSSL_VERSION_TEXT);
+ return -1;
+
+#endif
+}
+
+#if defined(SSL_CTX_set1_curves_list)
+/*
+ * parse the "ssl-default-bind-curves" keyword in a global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_curves(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_curves : &global_ssl.connect_default_curves;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a curves suite as an arguments.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+#endif
+
+#if defined(SSL_CTX_set1_sigalgs_list)
+/*
+ * parse the "ssl-default-bind-sigalgs" and "ssl-default-server-sigalgs" keyword in a global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_sigalgs(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_sigalgs : &global_ssl.connect_default_sigalgs;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a curves suite as an arguments.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+#endif
+
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+/*
+ * parse the "ssl-default-bind-client-sigalgs" keyword in a global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_client_sigalgs(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][12] == 'b') ? &global_ssl.listen_default_client_sigalgs : &global_ssl.connect_default_client_sigalgs;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects signature algorithms as an arguments.", args[0]);
+ return -1;
+ }
+
+ free(*target);
+ *target = strdup(args[1]);
+ return 0;
+}
+#endif
+
+/* parse various global tune.ssl settings consisting in positive integers.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_int(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int *target;
+
+ if (strcmp(args[0], "tune.ssl.cachesize") == 0)
+ target = &global.tune.sslcachesize;
+ else if (strcmp(args[0], "tune.ssl.maxrecord") == 0)
+ target = (int *)&global_ssl.max_record;
+ else if (strcmp(args[0], "tune.ssl.hard-maxrecord") == 0)
+ target = (int *)&global_ssl.hard_max_record;
+ else if (strcmp(args[0], "tune.ssl.ssl-ctx-cache-size") == 0)
+ target = &global_ssl.ctx_cache;
+ else if (strcmp(args[0], "maxsslconn") == 0)
+ target = &global.maxsslconn;
+ else if (strcmp(args[0], "tune.ssl.capture-buffer-size") == 0)
+ target = &global_ssl.capture_buffer_size;
+ else if (strcmp(args[0], "tune.ssl.capture-cipherlist-size") == 0) {
+ target = &global_ssl.capture_buffer_size;
+ ha_warning("parsing [%s:%d]: '%s' is deprecated and will be removed in version 2.7. Please use 'tune.ssl.capture-buffer-size' instead.\n",
+ file, line, args[0]);
+ }
+ else {
+ memprintf(err, "'%s' keyword not unhandled (please report this bug).", args[0]);
+ return -1;
+ }
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ *target = atoi(args[1]);
+ if (*target < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int ssl_parse_global_capture_buffer(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret;
+
+ ret = ssl_parse_global_int(args, section_type, curpx, defpx, file, line, err);
+ if (ret != 0)
+ return ret;
+
+ if (pool_head_ssl_capture) {
+ memprintf(err, "'%s' is already configured.", args[0]);
+ return -1;
+ }
+
+ pool_head_ssl_capture = create_pool("ssl-capture", sizeof(struct ssl_capture) + global_ssl.capture_buffer_size, MEM_F_SHARED);
+ if (!pool_head_ssl_capture) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+ return 0;
+}
+
+/* init the SSLKEYLOGFILE pool */
+#ifdef HAVE_SSL_KEYLOG
+static int ssl_parse_global_keylog(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global_ssl.keylog = 1;
+ else if (strcmp(args[1], "off") == 0)
+ global_ssl.keylog = 0;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+
+ if (pool_head_ssl_keylog) /* already configured */
+ return 0;
+
+ pool_head_ssl_keylog = create_pool("ssl-keylogfile", sizeof(struct ssl_keylog), MEM_F_SHARED);
+ if (!pool_head_ssl_keylog) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+
+ pool_head_ssl_keylog_str = create_pool("ssl-keylogfile-str", sizeof(char) * SSL_KEYLOG_MAX_SECRET_SIZE, MEM_F_SHARED);
+ if (!pool_head_ssl_keylog_str) {
+ memprintf(err, "Out of memory error.");
+ return -1;
+ }
+
+ return 0;
+}
+#else
+static int ssl_parse_global_keylog(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ memprintf(err, "'%s' requires at least OpenSSL 1.1.1.", args[0]);
+ return -1;
+}
+#endif
+
+/* parse "ssl.force-private-cache".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_private_cache(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+
+ global_ssl.private_cache = 1;
+ return 0;
+}
+
+/* parse "ssl.lifetime".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_lifetime(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects ssl sessions <lifetime> in seconds as argument.", args[0]);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &global_ssl.life_time, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to <%s> (maximum value is 2147483647 s or ~68 years).",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to <%s> (minimum non-null value is 1 s).",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+#ifndef OPENSSL_NO_DH
+/* parse "ssl-dh-param-file".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_dh_param_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file path as an argument.", args[0]);
+ return -1;
+ }
+
+ if (ssl_sock_load_global_dh_param_from_file(args[1])) {
+ memprintf(err, "'%s': unable to load DH parameters from file <%s>.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* parse "ssl.default-dh-param".
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_default_dh(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ global_ssl.default_dh_param = atoi(args[1]);
+ if (global_ssl.default_dh_param < 1024) {
+ memprintf(err, "'%s' expects a value >= 1024.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+#endif
+
+
+/*
+ * parse "ssl-load-extra-files".
+ * multiple arguments are allowed: "bundle", "sctl", "ocsp", "issuer", "all", "none"
+ */
+static int ssl_parse_global_extra_files(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int i;
+ int gf = SSL_GF_NONE;
+
+ if (*(args[1]) == 0)
+ goto err_arg;
+
+ for (i = 1; *args[i]; i++) {
+
+ if (strcmp("bundle", args[i]) == 0) {
+ gf |= SSL_GF_BUNDLE;
+
+ } else if (strcmp("sctl", args[i]) == 0) {
+ gf |= SSL_GF_SCTL;
+
+ } else if (strcmp("ocsp", args[i]) == 0){
+ gf |= SSL_GF_OCSP;
+
+ } else if (strcmp("issuer", args[i]) == 0){
+ gf |= SSL_GF_OCSP_ISSUER;
+
+ } else if (strcmp("key", args[i]) == 0) {
+ gf |= SSL_GF_KEY;
+
+ } else if (strcmp("none", args[i]) == 0) {
+ if (gf != SSL_GF_NONE)
+ goto err_alone;
+ gf = SSL_GF_NONE;
+ i++;
+ break;
+
+ } else if (strcmp("all", args[i]) == 0) {
+ if (gf != SSL_GF_NONE)
+ goto err_alone;
+ gf = SSL_GF_ALL;
+ i++;
+ break;
+ } else {
+ goto err_arg;
+ }
+ }
+ /* break from loop but there are still arguments */
+ if (*args[i])
+ goto err_alone;
+
+ global_ssl.extra_files = gf;
+
+ return 0;
+
+err_alone:
+ memprintf(err, "'%s' 'none' and 'all' can be only used alone", args[0]);
+ return -1;
+
+err_arg:
+ memprintf(err, "'%s' expects one or multiple arguments (none, all, bundle, sctl, ocsp, issuer).", args[0]);
+ return -1;
+}
+
+
+/* parse 'ssl-load-extra-del-ext */
+static int ssl_parse_global_extra_noext(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ global_ssl.extra_files_noext = 1;
+ return 0;
+}
+
+
+/***************************** Bind keyword Parsing ********************************************/
+
+/* for ca-file and ca-verify-file */
+static int ssl_bind_parse_ca_file_common(char **args, int cur_arg, char **ca_file_p, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && (*args[cur_arg + 1] != '@') && global_ssl.ca_base)
+ memprintf(ca_file_p, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(ca_file_p, "%s", args[cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(*ca_file_p, !from_cli, CAFILE_CERT)) {
+ memprintf(err, "'%s' : unable to load %s", args[cur_arg], *ca_file_p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "ca-file" bind keyword */
+static int ssl_bind_parse_ca_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ return ssl_bind_parse_ca_file_common(args, cur_arg, &conf->ca_file, from_cli, err);
+}
+static int bind_parse_ca_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ca_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ca-verify-file" bind keyword */
+static int ssl_bind_parse_ca_verify_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ return ssl_bind_parse_ca_file_common(args, cur_arg, &conf->ca_verify_file, from_cli, err);
+}
+static int bind_parse_ca_verify_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ca_verify_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ca-sign-file" bind keyword */
+static int bind_parse_ca_sign_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && (*args[cur_arg + 1] != '@') && global_ssl.ca_base)
+ memprintf(&conf->ca_sign_file, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(&conf->ca_sign_file, "%s", args[cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "ca-sign-pass" bind keyword */
+static int bind_parse_ca_sign_pass(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAkey password", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ memprintf(&conf->ca_sign_pass, "%s", args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "ciphers" bind keyword */
+static int ssl_bind_parse_ciphers(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->ciphers);
+ conf->ciphers = strdup(args[cur_arg + 1]);
+ return 0;
+}
+static int bind_parse_ciphers(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ciphers(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ciphersuites" bind keyword */
+static int ssl_bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->ciphersuites);
+ conf->ciphersuites = strdup(args[cur_arg + 1]);
+ return 0;
+#else
+ memprintf(err, "'%s' keyword not supported for this SSL library version (%s).", args[cur_arg], OPENSSL_VERSION_TEXT);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+static int bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ciphersuites(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "crt" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */
+static int bind_parse_crt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char path[MAXPATHLEN];
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate location", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(args[cur_arg + 1]) + 1) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, args[cur_arg + 1]) > sizeof(path)) {
+ memprintf(err, "'%s' : path too long", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return ssl_sock_load_cert(path, conf, err);
+ }
+
+ return ssl_sock_load_cert(args[cur_arg + 1], conf, err);
+}
+
+/* parse the "crt-list" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */
+static int bind_parse_crt_list(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int err_code;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate location", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ err_code = ssl_sock_load_cert_list_file(args[cur_arg + 1], 0, conf, px, err);
+ if (err_code)
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+
+ return err_code;
+}
+
+/* parse the "crl-file" bind keyword */
+static int ssl_bind_parse_crl_file(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#ifndef X509_V_FLAG_CRL_CHECK
+ memprintf(err, "'%s' : library does not support CRL verify", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CRLfile path", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[cur_arg + 1] != '/') && (*args[cur_arg + 1] != '@') && global_ssl.ca_base)
+ memprintf(&conf->crl_file, "%s/%s", global_ssl.ca_base, args[cur_arg + 1]);
+ else
+ memprintf(&conf->crl_file, "%s", args[cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(conf->crl_file, !from_cli, CAFILE_CRL)) {
+ memprintf(err, "'%s' : unable to load %s", args[cur_arg], conf->crl_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+static int bind_parse_crl_file(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_crl_file(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "curves" bind keyword keyword */
+static int ssl_bind_parse_curves(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(SSL_CTX_set1_curves_list)
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing curve suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ conf->curves = strdup(args[cur_arg + 1]);
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support curve suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+static int bind_parse_curves(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_curves(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "sigalgs" bind keyword */
+static int ssl_bind_parse_sigalgs(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(SSL_CTX_set1_sigalgs_list)
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing signature algorithm list", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ conf->sigalgs = strdup(args[cur_arg + 1]);
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support setting signature algorithms", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+static int bind_parse_sigalgs(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_sigalgs(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "client-sigalgs" bind keyword */
+static int ssl_bind_parse_client_sigalgs(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing signature algorithm list", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ conf->client_sigalgs = strdup(args[cur_arg + 1]);
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support setting signature algorithms", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+static int bind_parse_client_sigalgs(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_client_sigalgs(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+
+/* parse the "ecdhe" bind keyword keyword */
+static int ssl_bind_parse_ecdhe(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if !defined(SSL_CTX_set_tmp_ecdh)
+ memprintf(err, "'%s' : library does not support elliptic curve Diffie-Hellman (too old)", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#elif defined(OPENSSL_NO_ECDH)
+ memprintf(err, "'%s' : library does not support elliptic curve Diffie-Hellman (disabled via OPENSSL_NO_ECDH)", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing named curve", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->ecdhe = strdup(args[cur_arg + 1]);
+
+ return 0;
+#endif
+}
+static int bind_parse_ecdhe(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_ecdhe(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "crt-ignore-err" and "ca-ignore-err" bind keywords */
+static int bind_parse_ignore_err(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int code;
+ char *s1 = NULL, *s2 = NULL;
+ char *token = NULL;
+ char *p = args[cur_arg + 1];
+ char *str;
+ unsigned long long *ignerr = conf->crt_ignerr_bitfield;
+
+ if (!*p) {
+ memprintf(err, "'%s' : missing error IDs list", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg], "ca-ignore-err") == 0)
+ ignerr = conf->ca_ignerr_bitfield;
+
+ if (strcmp(p, "all") == 0) {
+ cert_ignerr_bitfield_set_all(ignerr);
+ return 0;
+ }
+
+ /* copy the string to be able to dump the complete one in case of
+ * error, because strtok_r is writing \0 inside. */
+ str = strdup(p);
+ if (!str) {
+ memprintf(err, "'%s' : Could not allocate memory", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ s1 = str;
+ while ((token = strtok_r(s1, ",", &s2))) {
+ s1 = NULL;
+ if (isdigit((int)*token)) {
+ code = atoi(token);
+ if ((code <= 0) || (code > SSL_MAX_VFY_ERROR_CODE)) {
+ memprintf(err, "'%s' : ID '%d' out of range (1..%d) in error IDs list '%s'",
+ args[cur_arg], code, SSL_MAX_VFY_ERROR_CODE, args[cur_arg + 1]);
+ free(str);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ } else {
+ code = x509_v_err_str_to_int(token);
+ if (code < 0) {
+ memprintf(err, "'%s' : error constant '%s' unknown in error IDs list '%s'",
+ args[cur_arg], token, args[cur_arg + 1]);
+ free(str);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+ cert_ignerr_bitfield_set(ignerr, code);
+ }
+
+ free(str);
+ return 0;
+}
+
+/* parse tls_method_options "no-xxx" and "force-xxx" */
+static int parse_tls_method_options(char *arg, struct tls_version_filter *methods, char **err)
+{
+ uint16_t v;
+ char *p;
+ p = strchr(arg, '-');
+ if (!p)
+ goto fail;
+ p++;
+ if (strcmp(p, "sslv3") == 0)
+ v = CONF_SSLV3;
+ else if (strcmp(p, "tlsv10") == 0)
+ v = CONF_TLSV10;
+ else if (strcmp(p, "tlsv11") == 0)
+ v = CONF_TLSV11;
+ else if (strcmp(p, "tlsv12") == 0)
+ v = CONF_TLSV12;
+ else if (strcmp(p, "tlsv13") == 0)
+ v = CONF_TLSV13;
+ else
+ goto fail;
+ if (!strncmp(arg, "no-", 3))
+ methods->flags |= methodVersions[v].flag;
+ else if (!strncmp(arg, "force-", 6))
+ methods->min = methods->max = v;
+ else
+ goto fail;
+ return 0;
+ fail:
+ memprintf(err, "'%s' : option not implemented", arg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+static int bind_parse_tls_method_options(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return parse_tls_method_options(args[cur_arg], &conf->ssl_conf.ssl_methods, err);
+}
+
+static int srv_parse_tls_method_options(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ return parse_tls_method_options(args[*cur_arg], &newsrv->ssl_ctx.methods, err);
+}
+
+/* parse tls_method min/max: "ssl-min-ver" and "ssl-max-ver" */
+static int parse_tls_method_minmax(char **args, int cur_arg, struct tls_version_filter *methods, char **err)
+{
+ uint16_t i, v = 0;
+ char *argv = args[cur_arg + 1];
+ if (!*argv) {
+ memprintf(err, "'%s' : missing the ssl/tls version", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (strcmp(argv, methodVersions[i].name) == 0)
+ v = i;
+ if (!v) {
+ memprintf(err, "'%s' : unknown ssl/tls version", args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (strcmp("ssl-min-ver", args[cur_arg]) == 0)
+ methods->min = v;
+ else if (strcmp("ssl-max-ver", args[cur_arg]) == 0)
+ methods->max = v;
+ else {
+ memprintf(err, "'%s' : option not implemented", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+static int ssl_bind_parse_tls_method_minmax(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ int ret;
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L) && !defined(OPENSSL_IS_BORINGSSL)
+ ha_warning("crt-list: ssl-min-ver and ssl-max-ver are not supported with this Openssl version (skipped).\n");
+#endif
+ ret = parse_tls_method_minmax(args, cur_arg, &conf->ssl_methods_cfg, err);
+ if (ret != ERR_NONE)
+ return ret;
+
+ conf->ssl_methods.min = conf->ssl_methods_cfg.min;
+ conf->ssl_methods.max = conf->ssl_methods_cfg.max;
+
+ return ret;
+}
+static int bind_parse_tls_method_minmax(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return parse_tls_method_minmax(args, cur_arg, &conf->ssl_conf.ssl_methods, err);
+}
+
+static int srv_parse_tls_method_minmax(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ return parse_tls_method_minmax(args, *cur_arg, &newsrv->ssl_ctx.methods, err);
+}
+
+/* parse the "no-tls-tickets" bind keyword */
+static int bind_parse_no_tls_tickets(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_options |= BC_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+
+/* parse the "allow-0rtt" bind keyword */
+static int ssl_bind_parse_allow_0rtt(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ conf->early_data = 1;
+ return 0;
+}
+
+static int bind_parse_allow_0rtt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_conf.early_data = 1;
+ return 0;
+}
+
+/* parse the "npn" bind keyword */
+static int ssl_bind_parse_npn(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ char *p1, *p2;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing the comma-delimited NPN protocol suite", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(conf->npn_str);
+
+ /* the NPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ conf->npn_len = strlen(args[cur_arg + 1]) + 1;
+ conf->npn_str = calloc(1, conf->npn_len + 1);
+ if (!conf->npn_str) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ memcpy(conf->npn_str + 1, args[cur_arg + 1], conf->npn_len);
+
+ /* replace commas with the name length */
+ p1 = conf->npn_str;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', conf->npn_str + conf->npn_len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "'%s' : NPN protocol name too long : '%s'", args[cur_arg], p1 + 1);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support TLS NPN extension", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+static int bind_parse_npn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_npn(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+
+/* Parses a alpn string and converts it to the right format for the SSL api */
+int ssl_sock_parse_alpn(char *arg, char **alpn_str, int *alpn_len, char **err)
+{
+ char *p1, *p2, *alpn = NULL;
+ int len, ret = 0;
+
+ *alpn_str = NULL;
+ *alpn_len = 0;
+
+ if (!*arg) {
+ memprintf(err, "missing the comma-delimited ALPN protocol suite");
+ goto error;
+ }
+
+ /* the ALPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ len = strlen(arg) + 1;
+ alpn = calloc(1, len+1);
+ if (!alpn) {
+ memprintf(err, "'%s' : out of memory", arg);
+ goto error;
+ }
+ memcpy(alpn+1, arg, len);
+
+ /* replace commas with the name length */
+ p1 = alpn;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', alpn + len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "ALPN protocol name too long : '%s'", p1 + 1);
+ goto error;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+
+ *alpn_str = alpn;
+ *alpn_len = len;
+
+ out:
+ return ret;
+
+ error:
+ free(alpn);
+ ret = ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* parse the "alpn" bind keyword */
+static int ssl_bind_parse_alpn(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret;
+
+ free(conf->alpn_str);
+
+ ret = ssl_sock_parse_alpn(args[cur_arg + 1], &conf->alpn_str, &conf->alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+static int bind_parse_alpn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_alpn(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "ssl" bind keyword */
+static int bind_parse_ssl(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_USE_SSL;
+
+ if (global_ssl.listen_default_ciphers && !conf->ssl_conf.ciphers)
+ conf->ssl_conf.ciphers = strdup(global_ssl.listen_default_ciphers);
+#if defined(SSL_CTX_set1_curves_list)
+ if (global_ssl.listen_default_curves && !conf->ssl_conf.curves)
+ conf->ssl_conf.curves = strdup(global_ssl.listen_default_curves);
+#endif
+#if defined(SSL_CTX_set1_sigalgs_list)
+ if (global_ssl.listen_default_sigalgs && !conf->ssl_conf.sigalgs)
+ conf->ssl_conf.sigalgs = strdup(global_ssl.listen_default_sigalgs);
+#endif
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.listen_default_ciphersuites && !conf->ssl_conf.ciphersuites)
+ conf->ssl_conf.ciphersuites = strdup(global_ssl.listen_default_ciphersuites);
+#endif
+ conf->ssl_options |= global_ssl.listen_default_ssloptions;
+ conf->ssl_conf.ssl_methods.flags |= global_ssl.listen_default_sslmethods.flags;
+ if (!conf->ssl_conf.ssl_methods.min)
+ conf->ssl_conf.ssl_methods.min = global_ssl.listen_default_sslmethods.min;
+ if (!conf->ssl_conf.ssl_methods.max)
+ conf->ssl_conf.ssl_methods.max = global_ssl.listen_default_sslmethods.max;
+
+ return 0;
+}
+
+/* parse the "prefer-client-ciphers" bind keyword */
+static int bind_parse_pcc(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->ssl_options |= BC_SSL_O_PREF_CLIE_CIPH;
+ return 0;
+}
+
+/* parse the "generate-certificates" bind keyword */
+static int bind_parse_generate_certs(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ conf->options |= BC_O_GENERATE_CERTS;
+#else
+ memprintf(err, "%sthis version of openssl cannot generate SSL certificates.\n",
+ err && *err ? *err : "");
+#endif
+ return 0;
+}
+
+/* parse the "strict-sni" bind keyword */
+static int bind_parse_strict_sni(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->strict_sni = 1;
+ return 0;
+}
+
+/* parse the "tls-ticket-keys" bind keyword */
+static int bind_parse_tls_ticket_keys(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ FILE *f = NULL;
+ int i = 0;
+ char thisline[LINESIZE];
+ struct tls_keys_ref *keys_ref = NULL;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TLS ticket keys file path", args[cur_arg]);
+ goto fail;
+ }
+
+ keys_ref = tlskeys_ref_lookup(args[cur_arg + 1]);
+ if (keys_ref) {
+ keys_ref->refcount++;
+ conf->keys_ref = keys_ref;
+ return 0;
+ }
+
+ keys_ref = calloc(1, sizeof(*keys_ref));
+ if (!keys_ref) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->tlskeys = malloc(TLS_TICKETS_NO * sizeof(union tls_sess_key));
+ if (!keys_ref->tlskeys) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ if ((f = fopen(args[cur_arg + 1], "r")) == NULL) {
+ memprintf(err, "'%s' : unable to load ssl tickets keys file", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->filename = strdup(args[cur_arg + 1]);
+ if (!keys_ref->filename) {
+ memprintf(err, "'%s' : allocation error", args[cur_arg+1]);
+ goto fail;
+ }
+
+ keys_ref->key_size_bits = 0;
+ while (fgets(thisline, sizeof(thisline), f) != NULL) {
+ int len = strlen(thisline);
+ int dec_size;
+
+ /* Strip newline characters from the end */
+ if(thisline[len - 1] == '\n')
+ thisline[--len] = 0;
+
+ if(thisline[len - 1] == '\r')
+ thisline[--len] = 0;
+
+ dec_size = base64dec(thisline, len, (char *) (keys_ref->tlskeys + i % TLS_TICKETS_NO), sizeof(union tls_sess_key));
+ if (dec_size < 0) {
+ memprintf(err, "'%s' : unable to decode base64 key on line %d", args[cur_arg+1], i + 1);
+ goto fail;
+ }
+ else if (!keys_ref->key_size_bits && (dec_size == sizeof(struct tls_sess_key_128))) {
+ keys_ref->key_size_bits = 128;
+ }
+ else if (!keys_ref->key_size_bits && (dec_size == sizeof(struct tls_sess_key_256))) {
+ keys_ref->key_size_bits = 256;
+ }
+ else if (((dec_size != sizeof(struct tls_sess_key_128)) && (dec_size != sizeof(struct tls_sess_key_256)))
+ || ((dec_size == sizeof(struct tls_sess_key_128) && (keys_ref->key_size_bits != 128)))
+ || ((dec_size == sizeof(struct tls_sess_key_256) && (keys_ref->key_size_bits != 256)))) {
+ memprintf(err, "'%s' : wrong sized key on line %d", args[cur_arg+1], i + 1);
+ goto fail;
+ }
+ i++;
+ }
+
+ if (i < TLS_TICKETS_NO) {
+ memprintf(err, "'%s' : please supply at least %d keys in the tls-tickets-file", args[cur_arg+1], TLS_TICKETS_NO);
+ goto fail;
+ }
+
+ fclose(f);
+
+ /* Use penultimate key for encryption, handle when TLS_TICKETS_NO = 1 */
+ i -= 2;
+ keys_ref->tls_ticket_enc_index = i < 0 ? 0 : i % TLS_TICKETS_NO;
+ keys_ref->unique_id = -1;
+ keys_ref->refcount = 1;
+ HA_RWLOCK_INIT(&keys_ref->lock);
+ conf->keys_ref = keys_ref;
+
+ LIST_INSERT(&tlskeys_reference, &keys_ref->list);
+
+ return 0;
+
+ fail:
+ if (f)
+ fclose(f);
+ if (keys_ref) {
+ free(keys_ref->filename);
+ free(keys_ref->tlskeys);
+ free(keys_ref);
+ }
+ return ERR_ALERT | ERR_FATAL;
+
+#else
+ memprintf(err, "'%s' : TLS ticket callback extension not supported", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif /* SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB */
+}
+
+/* parse the "verify" bind keyword */
+static int ssl_bind_parse_verify(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing verify method", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "none") == 0)
+ conf->verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[cur_arg + 1], "optional") == 0)
+ conf->verify = SSL_SOCK_VERIFY_OPTIONAL;
+ else if (strcmp(args[cur_arg + 1], "required") == 0)
+ conf->verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ memprintf(err, "'%s' : unknown verify method '%s', only 'none', 'optional', and 'required' are supported\n",
+ args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+static int bind_parse_verify(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_verify(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+/* parse the "no-alpn" ssl-bind keyword, storing an empty ALPN string */
+static int ssl_bind_parse_no_alpn(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ free(conf->alpn_str);
+ conf->alpn_len = 0;
+ conf->alpn_str = strdup("");
+
+ if (!conf->alpn_str) {
+ memprintf(err, "'%s' : out of memory", *args);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "no-alpn" bind keyword, storing an empty ALPN string */
+static int bind_parse_no_alpn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_no_alpn(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+
+/* parse the "no-ca-names" bind keyword */
+static int ssl_bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, struct ssl_bind_conf *conf, int from_cli, char **err)
+{
+ conf->no_ca_names = 1;
+ return 0;
+}
+
+static int bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ return ssl_bind_parse_no_ca_names(args, cur_arg, px, &conf->ssl_conf, 0, err);
+}
+
+
+static int ssl_bind_parse_ocsp_update(char **args, int cur_arg, struct proxy *px,
+ struct ssl_bind_conf *ssl_conf, int from_cli, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "on") == 0)
+ ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_ON;
+ else if (strcmp(args[cur_arg + 1], "off") == 0)
+ ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_OFF;
+ else {
+ memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (ssl_conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_ON) {
+ /* We might need to create the main ocsp update task */
+ int ret = ssl_create_ocsp_update_task(err);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+
+/***************************** "server" keywords Parsing ********************************************/
+
+/* parse the "npn" bind keyword */
+static int srv_parse_npn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ char *p1, *p2;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing the comma-delimited NPN protocol suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.npn_str);
+
+ /* the NPN string is built as a suite of (<len> <name>)*,
+ * so we reuse each comma to store the next <len> and need
+ * one more for the end of the string.
+ */
+ newsrv->ssl_ctx.npn_len = strlen(args[*cur_arg + 1]) + 1;
+ newsrv->ssl_ctx.npn_str = calloc(1, newsrv->ssl_ctx.npn_len + 1);
+ if (!newsrv->ssl_ctx.npn_str) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ memcpy(newsrv->ssl_ctx.npn_str + 1, args[*cur_arg + 1],
+ newsrv->ssl_ctx.npn_len);
+
+ /* replace commas with the name length */
+ p1 = newsrv->ssl_ctx.npn_str;
+ p2 = p1 + 1;
+ while (1) {
+ p2 = memchr(p1 + 1, ',', newsrv->ssl_ctx.npn_str +
+ newsrv->ssl_ctx.npn_len - (p1 + 1));
+ if (!p2)
+ p2 = p1 + 1 + strlen(p1 + 1);
+
+ if (p2 - (p1 + 1) > 255) {
+ *p2 = '\0';
+ memprintf(err, "'%s' : NPN protocol name too long : '%s'", args[*cur_arg], p1 + 1);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ *p1 = p2 - (p1 + 1);
+ p1 = p2;
+
+ if (!*p2)
+ break;
+
+ *(p2++) = '\0';
+ }
+ return 0;
+#else
+ memprintf(err, "'%s' : library does not support TLS NPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+static int parse_alpn(char *alpn, char **out_alpn_str, int *out_alpn_len, char **err)
+{
+ free(*out_alpn_str);
+ return ssl_sock_parse_alpn(alpn, out_alpn_str, out_alpn_len, err);
+}
+#endif
+
+/* parse the "alpn" server keyword */
+static int srv_parse_alpn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret = parse_alpn(args[*cur_arg + 1],
+ &newsrv->ssl_ctx.alpn_str,
+ &newsrv->ssl_ctx.alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[*cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* parse the "check-alpn" server keyword */
+static int srv_parse_check_alpn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ int ret = parse_alpn(args[*cur_arg + 1],
+ &newsrv->check.alpn_str,
+ &newsrv->check.alpn_len, err);
+ if (ret)
+ memprintf(err, "'%s' : %s", args[*cur_arg], *err);
+ return ret;
+#else
+ memprintf(err, "'%s' : library does not support TLS ALPN extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* parse the "ca-file" server keyword */
+static int srv_parse_ca_file(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ const int create_if_none = newsrv->flags & SRV_F_DYNAMIC ? 0 : 1;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CAfile path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && (*args[*cur_arg + 1] != '@') && global_ssl.ca_base)
+ memprintf(&newsrv->ssl_ctx.ca_file, "%s/%s", global_ssl.ca_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.ca_file, "%s", args[*cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(newsrv->ssl_ctx.ca_file, create_if_none, CAFILE_CERT)) {
+ memprintf(err, "'%s' : unable to load %s", args[*cur_arg], newsrv->ssl_ctx.ca_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "check-sni" server keyword */
+static int srv_parse_check_sni(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing SNI", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->check.sni = strdup(args[*cur_arg + 1]);
+ if (!newsrv->check.sni) {
+ memprintf(err, "'%s' : failed to allocate memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+
+}
+
+/* common function to init ssl_ctx */
+static int ssl_sock_init_srv(struct server *s)
+{
+ if (global_ssl.connect_default_ciphers && !s->ssl_ctx.ciphers)
+ s->ssl_ctx.ciphers = strdup(global_ssl.connect_default_ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.connect_default_ciphersuites && !s->ssl_ctx.ciphersuites) {
+ s->ssl_ctx.ciphersuites = strdup(global_ssl.connect_default_ciphersuites);
+ if (!s->ssl_ctx.ciphersuites)
+ return 1;
+ }
+#endif
+ s->ssl_ctx.options |= global_ssl.connect_default_ssloptions;
+ s->ssl_ctx.methods.flags |= global_ssl.connect_default_sslmethods.flags;
+
+ if (!s->ssl_ctx.methods.min)
+ s->ssl_ctx.methods.min = global_ssl.connect_default_sslmethods.min;
+
+ if (!s->ssl_ctx.methods.max)
+ s->ssl_ctx.methods.max = global_ssl.connect_default_sslmethods.max;
+
+#if defined(SSL_CTX_set1_sigalgs_list)
+ if (global_ssl.connect_default_sigalgs && !s->ssl_ctx.sigalgs) {
+ s->ssl_ctx.sigalgs = strdup(global_ssl.connect_default_sigalgs);
+ if (!s->ssl_ctx.sigalgs)
+ return 1;
+ }
+#endif
+
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ if (global_ssl.connect_default_client_sigalgs && !s->ssl_ctx.client_sigalgs) {
+ s->ssl_ctx.client_sigalgs = strdup(global_ssl.connect_default_client_sigalgs);
+ if (!s->ssl_ctx.client_sigalgs)
+ return 1;
+ }
+#endif
+
+#if defined(SSL_CTX_set1_curves_list)
+ if (global_ssl.connect_default_curves && !s->ssl_ctx.curves) {
+ s->ssl_ctx.curves = strdup(global_ssl.connect_default_curves);
+ if (!s->ssl_ctx.curves)
+ return 1;
+ }
+#endif
+
+ return 0;
+}
+
+/* parse the "check-ssl" server keyword */
+static int srv_parse_check_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->check.use_ssl = 1;
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ciphers" server keyword */
+static int srv_parse_ciphers(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.ciphers);
+ newsrv->ssl_ctx.ciphers = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.ciphers) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ciphersuites" server keyword */
+static int srv_parse_ciphersuites(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing cipher suite", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.ciphersuites);
+ newsrv->ssl_ctx.ciphersuites = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.ciphersuites) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+#else /* ! HAVE_SSL_CTX_SET_CIPHERSUITES */
+ memprintf(err, "'%s' not supported for your SSL library (%s).", args[*cur_arg], OPENSSL_VERSION_TEXT);
+ return ERR_ALERT | ERR_FATAL;
+
+#endif
+}
+
+/* parse the "client-sigalgs" server keyword */
+static int srv_parse_client_sigalgs(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef SSL_CTX_set1_client_sigalgs_list
+ memprintf(err, "'%s' : library does not support setting signature algorithms", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : missing signature algorithm list", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->ssl_ctx.client_sigalgs = strdup(arg);
+ if (!newsrv->ssl_ctx.client_sigalgs) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+
+
+/* parse the "crl-file" server keyword */
+static int srv_parse_crl_file(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef X509_V_FLAG_CRL_CHECK
+ memprintf(err, "'%s' : library does not support CRL verify", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ const int create_if_none = newsrv->flags & SRV_F_DYNAMIC ? 0 : 1;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing CRLfile path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && (*args[*cur_arg + 1] != '@') && global_ssl.ca_base)
+ memprintf(&newsrv->ssl_ctx.crl_file, "%s/%s", global_ssl.ca_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.crl_file, "%s", args[*cur_arg + 1]);
+
+ if (!ssl_store_load_locations_file(newsrv->ssl_ctx.crl_file, create_if_none, CAFILE_CRL)) {
+ memprintf(err, "'%s' : unable to load %s", args[*cur_arg], newsrv->ssl_ctx.crl_file);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+
+/* parse the "curves" server keyword */
+static int srv_parse_curves(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef SSL_CTX_set1_curves_list
+ memprintf(err, "'%s' : library does not support setting curves list", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : missing curves list", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->ssl_ctx.curves = strdup(arg);
+ if (!newsrv->ssl_ctx.curves) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+
+/* parse the "crt" server keyword */
+static int srv_parse_crt(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing certificate file path", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((*args[*cur_arg + 1] != '/') && global_ssl.crt_base)
+ memprintf(&newsrv->ssl_ctx.client_crt, "%s/%s", global_ssl.crt_base, args[*cur_arg + 1]);
+ else
+ memprintf(&newsrv->ssl_ctx.client_crt, "%s", args[*cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "no-check-ssl" server keyword */
+static int srv_parse_no_check_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->check.use_ssl = -1;
+ ha_free(&newsrv->ssl_ctx.ciphers);
+ newsrv->ssl_ctx.options &= ~global_ssl.connect_default_ssloptions;
+ return 0;
+}
+
+/* parse the "no-send-proxy-v2-ssl" server keyword */
+static int srv_parse_no_send_proxy_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts &= ~SRV_PP_V2;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL;
+ return 0;
+}
+
+/* parse the "no-send-proxy-v2-ssl-cn" server keyword */
+static int srv_parse_no_send_proxy_cn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts &= ~SRV_PP_V2;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL;
+ newsrv->pp_opts &= ~SRV_PP_V2_SSL_CN;
+ return 0;
+}
+
+/* parse the "no-ssl" server keyword */
+static int srv_parse_no_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ /* if default-server have use_ssl, prepare ssl settings */
+ if (newsrv->use_ssl == 1) {
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_free(&newsrv->ssl_ctx.ciphers);
+ }
+ newsrv->use_ssl = -1;
+ return 0;
+}
+
+/* parse the "allow-0rtt" server keyword */
+static int srv_parse_allow_0rtt(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_EARLY_DATA;
+ return 0;
+}
+
+/* parse the "no-ssl-reuse" server keyword */
+static int srv_parse_no_ssl_reuse(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_NO_REUSE;
+ return 0;
+}
+
+/* parse the "no-tls-tickets" server keyword */
+static int srv_parse_no_tls_tickets(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options |= SRV_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+/* parse the "send-proxy-v2-ssl" server keyword */
+static int srv_parse_send_proxy_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts |= SRV_PP_V2;
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ return 0;
+}
+
+/* parse the "send-proxy-v2-ssl-cn" server keyword */
+static int srv_parse_send_proxy_cn(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->pp_opts |= SRV_PP_V2;
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CN;
+ return 0;
+}
+
+/* parse the "sigalgs" server keyword */
+static int srv_parse_sigalgs(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef SSL_CTX_set1_sigalgs_list
+ memprintf(err, "'%s' : library does not support setting signature algorithms", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : missing signature algorithm list", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->ssl_ctx.sigalgs = strdup(arg);
+ if (!newsrv->ssl_ctx.sigalgs) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+#endif
+}
+
+/* parse the "sni" server keyword */
+static int srv_parse_sni(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ memprintf(err, "'%s' : the current SSL library doesn't support the SNI TLS extension", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#else
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : missing sni expression", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->sni_expr);
+ newsrv->sni_expr = strdup(arg);
+ if (!newsrv->sni_expr) {
+ memprintf(err, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+#endif
+}
+
+/* parse the "ssl" server keyword */
+static int srv_parse_ssl(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->use_ssl = 1;
+ if (ssl_sock_init_srv(newsrv)) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ssl-reuse" server keyword */
+static int srv_parse_ssl_reuse(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options &= ~SRV_SSL_O_NO_REUSE;
+ return 0;
+}
+
+/* parse the "tls-tickets" server keyword */
+static int srv_parse_tls_tickets(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->ssl_ctx.options &= ~SRV_SSL_O_NO_TLS_TICKETS;
+ return 0;
+}
+
+/* parse the "verify" server keyword */
+static int srv_parse_verify(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing verify method", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[*cur_arg + 1], "none") == 0)
+ newsrv->ssl_ctx.verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[*cur_arg + 1], "required") == 0)
+ newsrv->ssl_ctx.verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ memprintf(err, "'%s' : unknown verify method '%s', only 'none' and 'required' are supported\n",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "verifyhost" server keyword */
+static int srv_parse_verifyhost(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing hostname to verify against", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->ssl_ctx.verify_host);
+ newsrv->ssl_ctx.verify_host = strdup(args[*cur_arg + 1]);
+
+ if (!newsrv->ssl_ctx.verify_host) {
+ memprintf(err, "'%s' : not enough memory", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "ssl-default-bind-options" keyword in global section */
+static int ssl_parse_default_bind_options(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err) {
+ int i = 1;
+
+ if (*(args[i]) == 0) {
+ memprintf(err, "global statement '%s' expects an option as an argument.", args[0]);
+ return -1;
+ }
+ while (*(args[i])) {
+ if (strcmp(args[i], "no-tls-tickets") == 0)
+ global_ssl.listen_default_ssloptions |= BC_SSL_O_NO_TLS_TICKETS;
+ else if (strcmp(args[i], "prefer-client-ciphers") == 0)
+ global_ssl.listen_default_ssloptions |= BC_SSL_O_PREF_CLIE_CIPH;
+ else if (strcmp(args[i], "ssl-min-ver") == 0 || strcmp(args[i], "ssl-max-ver") == 0) {
+ if (!parse_tls_method_minmax(args, i, &global_ssl.listen_default_sslmethods, err))
+ i++;
+ else {
+ memprintf(err, "%s on global statement '%s'.", *err, args[0]);
+ return -1;
+ }
+ }
+ else if (parse_tls_method_options(args[i], &global_ssl.listen_default_sslmethods, err)) {
+ memprintf(err, "unknown option '%s' on global statement '%s'.", args[i], args[0]);
+ return -1;
+ }
+ i++;
+ }
+ return 0;
+}
+
+/* parse the "ssl-default-server-options" keyword in global section */
+static int ssl_parse_default_server_options(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err) {
+ int i = 1;
+
+ if (*(args[i]) == 0) {
+ memprintf(err, "global statement '%s' expects an option as an argument.", args[0]);
+ return -1;
+ }
+ while (*(args[i])) {
+ if (strcmp(args[i], "no-tls-tickets") == 0)
+ global_ssl.connect_default_ssloptions |= SRV_SSL_O_NO_TLS_TICKETS;
+ else if (strcmp(args[i], "ssl-min-ver") == 0 || strcmp(args[i], "ssl-max-ver") == 0) {
+ if (!parse_tls_method_minmax(args, i, &global_ssl.connect_default_sslmethods, err))
+ i++;
+ else {
+ memprintf(err, "%s on global statement '%s'.", *err, args[0]);
+ return -1;
+ }
+ }
+ else if (parse_tls_method_options(args[i], &global_ssl.connect_default_sslmethods, err)) {
+ memprintf(err, "unknown option '%s' on global statement '%s'.", args[i], args[0]);
+ return -1;
+ }
+ i++;
+ }
+ return 0;
+}
+
+/* parse the "ca-base" / "crt-base" keywords in global section.
+ * Returns <0 on alert, >0 on warning, 0 on success.
+ */
+static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char **target;
+
+ target = (args[0][1] == 'a') ? &global_ssl.ca_base : &global_ssl.crt_base;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*target) {
+ memprintf(err, "'%s' already specified.", args[0]);
+ return -1;
+ }
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "global statement '%s' expects a directory path as an argument.", args[0]);
+ return -1;
+ }
+ *target = strdup(args[1]);
+ return 0;
+}
+
+/* parse the "ssl-skip-self-issued-ca" keyword in global section. */
+static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+#ifdef SSL_CTX_build_cert_chain
+ global_ssl.skip_self_issued_ca = 1;
+ return 0;
+#else
+ memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]);
+ return -1;
+#endif
+}
+
+
+static int ssl_parse_global_ocsp_maxdelay(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int value = 0;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ value = atoi(args[1]);
+ if (value < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+
+ if (global_ssl.ocsp_update.delay_min > value) {
+ memprintf(err, "'%s' can not be lower than tune.ssl.ocsp-update.mindelay.", args[0]);
+ return -1;
+ }
+
+ global_ssl.ocsp_update.delay_max = value;
+
+ return 0;
+}
+
+static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int value = 0;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument.", args[0]);
+ return -1;
+ }
+
+ value = atoi(args[1]);
+ if (value < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+
+ if (value > global_ssl.ocsp_update.delay_max) {
+ memprintf(err, "'%s' can not be higher than tune.ssl.ocsp-update.maxdelay.", args[0]);
+ return -1;
+ }
+
+ global_ssl.ocsp_update.delay_min = value;
+
+ return 0;
+}
+
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+
+/* the <ssl_crtlist_kws> keywords are used for crt-list parsing, they *MUST* be safe
+ * with their proxy argument NULL and must only fill the ssl_bind_conf */
+struct ssl_crtlist_kw ssl_crtlist_kws[] = {
+ { "allow-0rtt", ssl_bind_parse_allow_0rtt, 0 }, /* allow 0-RTT */
+ { "alpn", ssl_bind_parse_alpn, 1 }, /* set ALPN supported protocols */
+ { "ca-file", ssl_bind_parse_ca_file, 1 }, /* set CAfile to process ca-names and verify on client cert */
+ { "ca-verify-file", ssl_bind_parse_ca_verify_file, 1 }, /* set CAverify file to process verify on client cert */
+ { "ciphers", ssl_bind_parse_ciphers, 1 }, /* set SSL cipher suite */
+ { "ciphersuites", ssl_bind_parse_ciphersuites, 1 }, /* set TLS 1.3 cipher suite */
+ { "client-sigalgs", ssl_bind_parse_client_sigalgs, 1 }, /* set SSL client signature algorithms */
+ { "crl-file", ssl_bind_parse_crl_file, 1 }, /* set certificate revocation list file use on client cert verify */
+ { "curves", ssl_bind_parse_curves, 1 }, /* set SSL curve suite */
+ { "ecdhe", ssl_bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */
+ { "no-alpn", ssl_bind_parse_no_alpn, 0 }, /* disable sending ALPN */
+ { "no-ca-names", ssl_bind_parse_no_ca_names, 0 }, /* do not send ca names to clients (ca_file related) */
+ { "npn", ssl_bind_parse_npn, 1 }, /* set NPN supported protocols */
+ { "sigalgs", ssl_bind_parse_sigalgs, 1 }, /* set SSL signature algorithms */
+ { "ssl-min-ver", ssl_bind_parse_tls_method_minmax,1 }, /* minimum version */
+ { "ssl-max-ver", ssl_bind_parse_tls_method_minmax,1 }, /* maximum version */
+ { "verify", ssl_bind_parse_verify, 1 }, /* set SSL verify method */
+ { "ocsp-update", ssl_bind_parse_ocsp_update, 1 }, /* ocsp update mode (on or off) */
+ { NULL, NULL, 0 },
+};
+
+/* no initcall for ssl_bind_kws, these ones are parsed in the parser loop */
+
+static struct bind_kw_list bind_kws = { "SSL", { }, {
+ { "allow-0rtt", bind_parse_allow_0rtt, 0 }, /* Allow 0RTT */
+ { "alpn", bind_parse_alpn, 1 }, /* set ALPN supported protocols */
+ { "ca-file", bind_parse_ca_file, 1 }, /* set CAfile to process ca-names and verify on client cert */
+ { "ca-verify-file", bind_parse_ca_verify_file, 1 }, /* set CAverify file to process verify on client cert */
+ { "ca-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth > 0 */
+ { "ca-sign-file", bind_parse_ca_sign_file, 1 }, /* set CAFile used to generate and sign server certs */
+ { "ca-sign-pass", bind_parse_ca_sign_pass, 1 }, /* set CAKey passphrase */
+ { "ciphers", bind_parse_ciphers, 1 }, /* set SSL cipher suite */
+ { "ciphersuites", bind_parse_ciphersuites, 1 }, /* set TLS 1.3 cipher suite */
+ { "client-sigalgs", bind_parse_client_sigalgs, 1 }, /* set SSL client signature algorithms */
+ { "crl-file", bind_parse_crl_file, 1 }, /* set certificate revocation list file use on client cert verify */
+ { "crt", bind_parse_crt, 1 }, /* load SSL certificates from this location */
+ { "crt-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth == 0 */
+ { "crt-list", bind_parse_crt_list, 1 }, /* load a list of crt from this location */
+ { "curves", bind_parse_curves, 1 }, /* set SSL curve suite */
+ { "ecdhe", bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */
+ { "force-sslv3", bind_parse_tls_method_options, 0 }, /* force SSLv3 */
+ { "force-tlsv10", bind_parse_tls_method_options, 0 }, /* force TLSv10 */
+ { "force-tlsv11", bind_parse_tls_method_options, 0 }, /* force TLSv11 */
+ { "force-tlsv12", bind_parse_tls_method_options, 0 }, /* force TLSv12 */
+ { "force-tlsv13", bind_parse_tls_method_options, 0 }, /* force TLSv13 */
+ { "generate-certificates", bind_parse_generate_certs, 0 }, /* enable the server certificates generation */
+ { "no-alpn", bind_parse_no_alpn, 0 }, /* disable sending ALPN */
+ { "no-ca-names", bind_parse_no_ca_names, 0 }, /* do not send ca names to clients (ca_file related) */
+ { "no-sslv3", bind_parse_tls_method_options, 0 }, /* disable SSLv3 */
+ { "no-tlsv10", bind_parse_tls_method_options, 0 }, /* disable TLSv10 */
+ { "no-tlsv11", bind_parse_tls_method_options, 0 }, /* disable TLSv11 */
+ { "no-tlsv12", bind_parse_tls_method_options, 0 }, /* disable TLSv12 */
+ { "no-tlsv13", bind_parse_tls_method_options, 0 }, /* disable TLSv13 */
+ { "no-tls-tickets", bind_parse_no_tls_tickets, 0 }, /* disable session resumption tickets */
+ { "sigalgs", bind_parse_sigalgs, 1 }, /* set SSL signature algorithms */
+ { "ssl", bind_parse_ssl, 0 }, /* enable SSL processing */
+ { "ssl-min-ver", bind_parse_tls_method_minmax, 1 }, /* minimum version */
+ { "ssl-max-ver", bind_parse_tls_method_minmax, 1 }, /* maximum version */
+ { "strict-sni", bind_parse_strict_sni, 0 }, /* refuse negotiation if sni doesn't match a certificate */
+ { "tls-ticket-keys", bind_parse_tls_ticket_keys, 1 }, /* set file to load TLS ticket keys from */
+ { "verify", bind_parse_verify, 1 }, /* set SSL verify method */
+ { "npn", bind_parse_npn, 1 }, /* set NPN supported protocols */
+ { "prefer-client-ciphers", bind_parse_pcc, 0 }, /* prefer client ciphers */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct srv_kw_list srv_kws = { "SSL", { }, {
+ { "allow-0rtt", srv_parse_allow_0rtt, 0, 1, 1 }, /* Allow using early data on this server */
+ { "alpn", srv_parse_alpn, 1, 1, 1 }, /* Set ALPN supported protocols */
+ { "ca-file", srv_parse_ca_file, 1, 1, 1 }, /* set CAfile to process verify server cert */
+ { "check-alpn", srv_parse_check_alpn, 1, 1, 1 }, /* Set ALPN used for checks */
+ { "check-sni", srv_parse_check_sni, 1, 1, 1 }, /* set SNI */
+ { "check-ssl", srv_parse_check_ssl, 0, 1, 1 }, /* enable SSL for health checks */
+ { "ciphers", srv_parse_ciphers, 1, 1, 1 }, /* select the cipher suite */
+ { "ciphersuites", srv_parse_ciphersuites, 1, 1, 1 }, /* select the cipher suite */
+ { "client-sigalgs", srv_parse_client_sigalgs, 1, 1, 1 }, /* signature algorithms */
+ { "crl-file", srv_parse_crl_file, 1, 1, 1 }, /* set certificate revocation list file use on server cert verify */
+ { "curves", srv_parse_curves, 1, 1, 1 }, /* set TLS curves list */
+ { "crt", srv_parse_crt, 1, 1, 1 }, /* set client certificate */
+ { "force-sslv3", srv_parse_tls_method_options, 0, 1, 1 }, /* force SSLv3 */
+ { "force-tlsv10", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv10 */
+ { "force-tlsv11", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv11 */
+ { "force-tlsv12", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv12 */
+ { "force-tlsv13", srv_parse_tls_method_options, 0, 1, 1 }, /* force TLSv13 */
+ { "no-check-ssl", srv_parse_no_check_ssl, 0, 1, 0 }, /* disable SSL for health checks */
+ { "no-send-proxy-v2-ssl", srv_parse_no_send_proxy_ssl, 0, 1, 0 }, /* do not send PROXY protocol header v2 with SSL info */
+ { "no-send-proxy-v2-ssl-cn", srv_parse_no_send_proxy_cn, 0, 1, 0 }, /* do not send PROXY protocol header v2 with CN */
+ { "no-ssl", srv_parse_no_ssl, 0, 1, 0 }, /* disable SSL processing */
+ { "no-ssl-reuse", srv_parse_no_ssl_reuse, 0, 1, 1 }, /* disable session reuse */
+ { "no-sslv3", srv_parse_tls_method_options, 0, 0, 1 }, /* disable SSLv3 */
+ { "no-tlsv10", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv10 */
+ { "no-tlsv11", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv11 */
+ { "no-tlsv12", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv12 */
+ { "no-tlsv13", srv_parse_tls_method_options, 0, 0, 1 }, /* disable TLSv13 */
+ { "no-tls-tickets", srv_parse_no_tls_tickets, 0, 1, 1 }, /* disable session resumption tickets */
+ { "npn", srv_parse_npn, 1, 1, 1 }, /* Set NPN supported protocols */
+ { "send-proxy-v2-ssl", srv_parse_send_proxy_ssl, 0, 1, 1 }, /* send PROXY protocol header v2 with SSL info */
+ { "send-proxy-v2-ssl-cn", srv_parse_send_proxy_cn, 0, 1, 1 }, /* send PROXY protocol header v2 with CN */
+ { "sigalgs", srv_parse_sigalgs, 1, 1, 1 }, /* signature algorithms */
+ { "sni", srv_parse_sni, 1, 1, 1 }, /* send SNI extension */
+ { "ssl", srv_parse_ssl, 0, 1, 1 }, /* enable SSL processing */
+ { "ssl-min-ver", srv_parse_tls_method_minmax, 1, 1, 1 }, /* minimum version */
+ { "ssl-max-ver", srv_parse_tls_method_minmax, 1, 1, 1 }, /* maximum version */
+ { "ssl-reuse", srv_parse_ssl_reuse, 0, 1, 0 }, /* enable session reuse */
+ { "tls-tickets", srv_parse_tls_tickets, 0, 1, 1 }, /* enable session resumption tickets */
+ { "verify", srv_parse_verify, 1, 1, 1 }, /* set SSL verify method */
+ { "verifyhost", srv_parse_verifyhost, 1, 1, 1 }, /* require that SSL cert verifies for hostname */
+ { NULL, NULL, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "ca-base", ssl_parse_global_ca_crt_base },
+ { CFG_GLOBAL, "crt-base", ssl_parse_global_ca_crt_base },
+ { CFG_GLOBAL, "issuers-chain-path", ssl_load_global_issuers_from_path },
+ { CFG_GLOBAL, "maxsslconn", ssl_parse_global_int },
+ { CFG_GLOBAL, "ssl-default-bind-options", ssl_parse_default_bind_options },
+ { CFG_GLOBAL, "ssl-default-server-options", ssl_parse_default_server_options },
+#ifndef OPENSSL_NO_DH
+ { CFG_GLOBAL, "ssl-dh-param-file", ssl_parse_global_dh_param_file },
+#endif
+ { CFG_GLOBAL, "ssl-mode-async", ssl_parse_global_ssl_async },
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ { CFG_GLOBAL, "ssl-engine", ssl_parse_global_ssl_engine },
+#endif
+#ifdef HAVE_SSL_PROVIDERS
+ { CFG_GLOBAL, "ssl-propquery", ssl_parse_global_ssl_propquery },
+ { CFG_GLOBAL, "ssl-provider", ssl_parse_global_ssl_provider },
+ { CFG_GLOBAL, "ssl-provider-path", ssl_parse_global_ssl_provider_path },
+#endif
+ { CFG_GLOBAL, "ssl-skip-self-issued-ca", ssl_parse_skip_self_issued_ca },
+ { CFG_GLOBAL, "tune.ssl.cachesize", ssl_parse_global_int },
+#ifndef OPENSSL_NO_DH
+ { CFG_GLOBAL, "tune.ssl.default-dh-param", ssl_parse_global_default_dh },
+#endif
+ { CFG_GLOBAL, "tune.ssl.force-private-cache", ssl_parse_global_private_cache },
+ { CFG_GLOBAL, "tune.ssl.lifetime", ssl_parse_global_lifetime },
+ { CFG_GLOBAL, "tune.ssl.maxrecord", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.hard-maxrecord", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.ssl-ctx-cache-size", ssl_parse_global_int },
+ { CFG_GLOBAL, "tune.ssl.capture-cipherlist-size", ssl_parse_global_capture_buffer },
+ { CFG_GLOBAL, "tune.ssl.capture-buffer-size", ssl_parse_global_capture_buffer },
+ { CFG_GLOBAL, "tune.ssl.keylog", ssl_parse_global_keylog },
+ { CFG_GLOBAL, "ssl-default-bind-ciphers", ssl_parse_global_ciphers },
+ { CFG_GLOBAL, "ssl-default-server-ciphers", ssl_parse_global_ciphers },
+#if defined(SSL_CTX_set1_curves_list)
+ { CFG_GLOBAL, "ssl-default-bind-curves", ssl_parse_global_curves },
+ { CFG_GLOBAL, "ssl-default-server-curves", ssl_parse_global_curves },
+#endif
+#if defined(SSL_CTX_set1_sigalgs_list)
+ { CFG_GLOBAL, "ssl-default-bind-sigalgs", ssl_parse_global_sigalgs },
+ { CFG_GLOBAL, "ssl-default-server-sigalgs", ssl_parse_global_sigalgs },
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ { CFG_GLOBAL, "ssl-default-bind-client-sigalgs", ssl_parse_global_client_sigalgs },
+ { CFG_GLOBAL, "ssl-default-server-client-sigalgs", ssl_parse_global_client_sigalgs },
+#endif
+ { CFG_GLOBAL, "ssl-default-bind-ciphersuites", ssl_parse_global_ciphersuites },
+ { CFG_GLOBAL, "ssl-default-server-ciphersuites", ssl_parse_global_ciphersuites },
+ { CFG_GLOBAL, "ssl-load-extra-files", ssl_parse_global_extra_files },
+ { CFG_GLOBAL, "ssl-load-extra-del-ext", ssl_parse_global_extra_noext },
+#ifndef OPENSSL_NO_OCSP
+ { CFG_GLOBAL, "tune.ssl.ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay },
+ { CFG_GLOBAL, "tune.ssl.ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay },
+#endif
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c
new file mode 100644
index 0000000..a4f6f29
--- /dev/null
+++ b/src/cfgparse-tcp.c
@@ -0,0 +1,275 @@
+/*
+ * Configuration parsing for TCP (bind and server keywords)
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+
+
+#ifdef IPV6_V6ONLY
+/* parse the "v4v6" bind keyword */
+static int bind_parse_v4v6(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_V4V6;
+ return 0;
+}
+
+/* parse the "v6only" bind keyword */
+static int bind_parse_v6only(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_V6ONLY;
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_HAP_TRANSPARENT
+/* parse the "transparent" bind keyword */
+static int bind_parse_transparent(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->settings.options |= RX_O_FOREIGN;
+ return 0;
+}
+#endif
+
+#if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER)
+/* parse the "defer-accept" bind keyword */
+static int bind_parse_defer_accept(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_DEF_ACCEPT;
+ return 0;
+}
+#endif
+
+#ifdef TCP_FASTOPEN
+/* parse the "tfo" bind keyword */
+static int bind_parse_tfo(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_TCP_FO;
+ return 0;
+}
+#endif
+
+#ifdef TCP_MAXSEG
+/* parse the "mss" bind keyword */
+static int bind_parse_mss(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int mss;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing MSS value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ mss = atoi(args[cur_arg + 1]);
+ if (!mss || abs(mss) > 65535) {
+ memprintf(err, "'%s' : expects an MSS with and absolute value between 1 and 65535", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->maxseg = mss;
+ return 0;
+}
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+/* parse the "tcp-ut" bind keyword */
+static int bind_parse_tcp_ut(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ const char *ptr = NULL;
+ unsigned int timeout;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TCP User Timeout value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ ptr = parse_time_err(args[cur_arg + 1], &timeout, TIME_UNIT_MS);
+ if (ptr == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[cur_arg+1], args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[cur_arg+1], args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr) {
+ memprintf(err, "'%s' : expects a positive delay in milliseconds", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->tcp_ut = timeout;
+ return 0;
+}
+#endif
+
+#ifdef SO_BINDTODEVICE
+/* parse the "interface" bind keyword */
+static int bind_parse_interface(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing interface name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ ha_free(&conf->settings.interface);
+ conf->settings.interface = strdup(args[cur_arg + 1]);
+ return 0;
+}
+#endif
+
+#ifdef USE_NS
+/* parse the "namespace" bind keyword */
+static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *namespace = NULL;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ namespace = args[cur_arg + 1];
+
+ conf->settings.netns = netns_store_lookup(namespace, strlen(namespace));
+
+ if (conf->settings.netns == NULL)
+ conf->settings.netns = netns_store_insert(namespace);
+
+ if (conf->settings.netns == NULL) {
+ ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+/* parse the "tcp-ut" server keyword */
+static int srv_parse_tcp_ut(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ const char *ptr = NULL;
+ unsigned int timeout;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing TCP User Timeout value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ ptr = parse_time_err(args[*cur_arg + 1], &timeout, TIME_UNIT_MS);
+ if (ptr == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (ptr) {
+ memprintf(err, "'%s' : expects a positive delay in milliseconds", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (newsrv->addr.ss_family == AF_INET || newsrv->addr.ss_family == AF_INET6)
+ newsrv->tcp_ut = timeout;
+
+ return 0;
+}
+#endif
+
+
+/************************************************************************/
+/* All supported bind keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "TCP", { }, {
+#if defined(TCP_DEFER_ACCEPT) || defined(SO_ACCEPTFILTER)
+ { "defer-accept", bind_parse_defer_accept, 0 }, /* wait for some data for 1 second max before doing accept */
+#endif
+#ifdef SO_BINDTODEVICE
+ { "interface", bind_parse_interface, 1 }, /* specifically bind to this interface */
+#endif
+#ifdef TCP_MAXSEG
+ { "mss", bind_parse_mss, 1 }, /* set MSS of listening socket */
+#endif
+#ifdef TCP_USER_TIMEOUT
+ { "tcp-ut", bind_parse_tcp_ut, 1 }, /* set User Timeout on listening socket */
+#endif
+#ifdef TCP_FASTOPEN
+ { "tfo", bind_parse_tfo, 0 }, /* enable TCP_FASTOPEN of listening socket */
+#endif
+#ifdef CONFIG_HAP_TRANSPARENT
+ { "transparent", bind_parse_transparent, 0 }, /* transparently bind to the specified addresses */
+#endif
+#ifdef IPV6_V6ONLY
+ { "v4v6", bind_parse_v4v6, 0 }, /* force socket to bind to IPv4+IPv6 */
+ { "v6only", bind_parse_v6only, 0 }, /* force socket to bind to IPv6 only */
+#endif
+#ifdef USE_NS
+ { "namespace", bind_parse_namespace, 1 },
+#endif
+ /* the versions with the NULL parse function*/
+ { "defer-accept", NULL, 0 },
+ { "interface", NULL, 1 },
+ { "mss", NULL, 1 },
+ { "transparent", NULL, 0 },
+ { "v4v6", NULL, 0 },
+ { "v6only", NULL, 0 },
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+static struct srv_kw_list srv_kws = { "TCP", { }, {
+#ifdef TCP_USER_TIMEOUT
+ { "tcp-ut", srv_parse_tcp_ut, 1, 1, 0 }, /* set TCP user timeout on server */
+#endif
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cfgparse-unix.c b/src/cfgparse-unix.c
new file mode 100644
index 0000000..b1fb1e2
--- /dev/null
+++ b/src/cfgparse-unix.c
@@ -0,0 +1,135 @@
+/*
+ * Configuration parsing for UNIX sockets (bind and server keywords)
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+
+/* parse the "mode" bind keyword */
+static int bind_parse_mode(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ char *endptr;
+
+ conf->settings.ux.mode = strtol(args[cur_arg + 1], &endptr, 8);
+
+ if (!*args[cur_arg + 1] || *endptr) {
+ memprintf(err, "'%s' : missing or invalid mode '%s' (octal integer expected)", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "gid" bind keyword */
+static int bind_parse_gid(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.gid = atol(args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "group" bind keyword */
+static int bind_parse_group(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct group *group;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing group name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ group = getgrnam(args[cur_arg + 1]);
+ if (!group) {
+ memprintf(err, "'%s' : unknown group name '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.gid = group->gr_gid;
+ return 0;
+}
+
+/* parse the "uid" bind keyword */
+static int bind_parse_uid(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.uid = atol(args[cur_arg + 1]);
+ return 0;
+}
+
+/* parse the "user" bind keyword */
+static int bind_parse_user(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct passwd *user;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing user name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ user = getpwnam(args[cur_arg + 1]);
+ if (!user) {
+ memprintf(err, "'%s' : unknown user name '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->settings.ux.uid = user->pw_uid;
+ return 0;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "UNIX", { }, {
+ { "gid", bind_parse_gid, 1 }, /* set the socket's gid */
+ { "group", bind_parse_group, 1 }, /* set the socket's gid from the group name */
+ { "mode", bind_parse_mode, 1 }, /* set the socket's mode (eg: 0644)*/
+ { "uid", bind_parse_uid, 1 }, /* set the socket's uid */
+ { "user", bind_parse_user, 1 }, /* set the socket's uid from the user name */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
diff --git a/src/cfgparse.c b/src/cfgparse.c
new file mode 100644
index 0000000..2744f97
--- /dev/null
+++ b/src/cfgparse.c
@@ -0,0 +1,4798 @@
+/*
+ * Configuration parser
+ *
+ * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This is to have crypt() and sched_setaffinity() defined on Linux */
+#define _GNU_SOURCE
+
+#ifdef USE_LIBCRYPT
+#ifdef USE_CRYPT_H
+/* some platforms such as Solaris need this */
+#include <crypt.h>
+#endif
+#endif /* USE_LIBCRYPT */
+
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <grp.h>
+#include <errno.h>
+#ifdef USE_CPU_AFFINITY
+#include <sched.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/backend.h>
+#include <haproxy/capture.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/clock.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/lb_chash.h>
+#include <haproxy/lb_fas.h>
+#include <haproxy/lb_fwlc.h>
+#include <haproxy/lb_fwrr.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/sink.h>
+#include <haproxy/mailers.h>
+#include <haproxy/namespace.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/peers-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+
+
+/* Used to chain configuration sections definitions. This list
+ * stores struct cfg_section
+ */
+struct list sections = LIST_HEAD_INIT(sections);
+
+struct list postparsers = LIST_HEAD_INIT(postparsers);
+
+extern struct proxy *mworker_proxy;
+
+/* curproxy is only valid during parsing and will be NULL afterwards. */
+struct proxy *curproxy = NULL;
+
+char *cursection = NULL;
+int cfg_maxpconn = 0; /* # of simultaneous connections per proxy (-N) */
+int cfg_maxconn = 0; /* # of simultaneous connections, (-n) */
+char *cfg_scope = NULL; /* the current scope during the configuration parsing */
+int non_global_section_parsed = 0;
+
+/* how to handle default paths */
+static enum default_path_mode {
+ DEFAULT_PATH_CURRENT = 0, /* "current": paths are relative to CWD (this is the default) */
+ DEFAULT_PATH_CONFIG, /* "config": paths are relative to config file */
+ DEFAULT_PATH_PARENT, /* "parent": paths are relative to config file's ".." */
+ DEFAULT_PATH_ORIGIN, /* "origin": paths are relative to default_path_origin */
+} default_path_mode;
+
+static char initial_cwd[PATH_MAX];
+static char current_cwd[PATH_MAX];
+
+/* List head of all known configuration keywords */
+struct cfg_kw_list cfg_keywords = {
+ .list = LIST_HEAD_INIT(cfg_keywords.list)
+};
+
+/*
+ * converts <str> to a list of listeners which are dynamically allocated.
+ * The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where :
+ * - <addr> can be empty or "*" to indicate INADDR_ANY ;
+ * - <port> is a numerical port from 1 to 65535 ;
+ * - <end> indicates to use the range from <port> to <end> instead (inclusive).
+ * This can be repeated as many times as necessary, separated by a coma.
+ * Function returns 1 for success or 0 if error. In case of errors, if <err> is
+ * not NULL, it must be a valid pointer to either NULL or a freeable area that
+ * will be replaced with an error message.
+ */
+int str2listener(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err)
+{
+ struct protocol *proto;
+ char *next, *dupstr;
+ int port, end;
+
+ next = dupstr = strdup(str);
+
+ while (next && *next) {
+ struct sockaddr_storage *ss2;
+ int fd = -1;
+
+ str = next;
+ /* 1) look for the end of the first address */
+ if ((next = strchr(str, ',')) != NULL) {
+ *next++ = 0;
+ }
+
+ ss2 = str2sa_range(str, NULL, &port, &end, &fd, &proto, NULL, err,
+ (curproxy == global.cli_fe || curproxy == mworker_proxy) ? NULL : global.unix_bind.prefix,
+ NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_PORT_RANGE |
+ PA_O_SOCKET_FD | PA_O_STREAM | PA_O_XPRT);
+ if (!ss2)
+ goto fail;
+
+ if (ss2->ss_family == AF_CUST_RHTTP_SRV) {
+ /* Check if a previous non reverse HTTP present is
+ * already defined. If DGRAM or STREAM is set, this
+ * indicates that we are currently parsing the second
+ * or more address.
+ */
+ if (bind_conf->options & (BC_O_USE_SOCK_DGRAM|BC_O_USE_SOCK_STREAM) &&
+ !(bind_conf->options & BC_O_REVERSE_HTTP)) {
+ memprintf(err, "Cannot mix reverse HTTP bind with others.\n");
+ goto fail;
+ }
+
+ bind_conf->rhttp_srvname = strdup(str + strlen("rhttp@"));
+ if (!bind_conf->rhttp_srvname) {
+ memprintf(err, "Cannot allocate reverse HTTP bind.\n");
+ goto fail;
+ }
+
+ bind_conf->options |= BC_O_REVERSE_HTTP;
+ }
+ else if (bind_conf->options & BC_O_REVERSE_HTTP) {
+ /* Standard address mixed with a previous reverse HTTP one. */
+ memprintf(err, "Cannot mix reverse HTTP bind with others.\n");
+ goto fail;
+ }
+
+ /* OK the address looks correct */
+ if (proto->proto_type == PROTO_TYPE_DGRAM)
+ bind_conf->options |= BC_O_USE_SOCK_DGRAM;
+ else
+ bind_conf->options |= BC_O_USE_SOCK_STREAM;
+
+ if (proto->xprt_type == PROTO_TYPE_DGRAM)
+ bind_conf->options |= BC_O_USE_XPRT_DGRAM;
+ else
+ bind_conf->options |= BC_O_USE_XPRT_STREAM;
+
+ if (!create_listeners(bind_conf, ss2, port, end, fd, proto, err)) {
+ memprintf(err, "%s for address '%s'.\n", *err, str);
+ goto fail;
+ }
+ } /* end while(next) */
+ free(dupstr);
+ return 1;
+ fail:
+ free(dupstr);
+ return 0;
+}
+
+/*
+ * converts <str> to a list of datagram-oriented listeners which are dynamically
+ * allocated.
+ * The format is "{addr|'*'}:port[-end][,{addr|'*'}:port[-end]]*", where :
+ * - <addr> can be empty or "*" to indicate INADDR_ANY ;
+ * - <port> is a numerical port from 1 to 65535 ;
+ * - <end> indicates to use the range from <port> to <end> instead (inclusive).
+ * This can be repeated as many times as necessary, separated by a coma.
+ * Function returns 1 for success or 0 if error. In case of errors, if <err> is
+ * not NULL, it must be a valid pointer to either NULL or a freeable area that
+ * will be replaced with an error message.
+ */
+int str2receiver(char *str, struct proxy *curproxy, struct bind_conf *bind_conf, const char *file, int line, char **err)
+{
+ struct protocol *proto;
+ char *next, *dupstr;
+ int port, end;
+
+ next = dupstr = strdup(str);
+
+ while (next && *next) {
+ struct sockaddr_storage *ss2;
+ int fd = -1;
+
+ str = next;
+ /* 1) look for the end of the first address */
+ if ((next = strchr(str, ',')) != NULL) {
+ *next++ = 0;
+ }
+
+ ss2 = str2sa_range(str, NULL, &port, &end, &fd, &proto, NULL, err,
+ curproxy == global.cli_fe ? NULL : global.unix_bind.prefix,
+ NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_PORT_RANGE |
+ PA_O_SOCKET_FD | PA_O_DGRAM | PA_O_XPRT);
+ if (!ss2)
+ goto fail;
+
+ /* OK the address looks correct */
+ if (!create_listeners(bind_conf, ss2, port, end, fd, proto, err)) {
+ memprintf(err, "%s for address '%s'.\n", *err, str);
+ goto fail;
+ }
+ } /* end while(next) */
+ free(dupstr);
+ return 1;
+ fail:
+ free(dupstr);
+ return 0;
+}
+
+/*
+ * Sends a warning if proxy <proxy> does not have at least one of the
+ * capabilities in <cap>. An optional <hint> may be added at the end
+ * of the warning to help the user. Returns 1 if a warning was emitted
+ * or 0 if the condition is valid.
+ */
+int warnifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint)
+{
+ char *msg;
+
+ switch (cap) {
+ case PR_CAP_BE: msg = "no backend"; break;
+ case PR_CAP_FE: msg = "no frontend"; break;
+ case PR_CAP_BE|PR_CAP_FE: msg = "neither frontend nor backend"; break;
+ default: msg = "not enough"; break;
+ }
+
+ if (!(proxy->cap & cap)) {
+ ha_warning("parsing [%s:%d] : '%s' ignored because %s '%s' has %s capability.%s\n",
+ file, line, arg, proxy_type_str(proxy), proxy->id, msg, hint ? hint : "");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Sends an alert if proxy <proxy> does not have at least one of the
+ * capabilities in <cap>. An optional <hint> may be added at the end
+ * of the alert to help the user. Returns 1 if an alert was emitted
+ * or 0 if the condition is valid.
+ */
+int failifnotcap(struct proxy *proxy, int cap, const char *file, int line, const char *arg, const char *hint)
+{
+ char *msg;
+
+ switch (cap) {
+ case PR_CAP_BE: msg = "no backend"; break;
+ case PR_CAP_FE: msg = "no frontend"; break;
+ case PR_CAP_BE|PR_CAP_FE: msg = "neither frontend nor backend"; break;
+ default: msg = "not enough"; break;
+ }
+
+ if (!(proxy->cap & cap)) {
+ ha_alert("parsing [%s:%d] : '%s' not allowed because %s '%s' has %s capability.%s\n",
+ file, line, arg, proxy_type_str(proxy), proxy->id, msg, hint ? hint : "");
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Report an error in <msg> when there are too many arguments. This version is
+ * intended to be used by keyword parsers so that the message will be included
+ * into the general error message. The index is the current keyword in args.
+ * Return 0 if the number of argument is correct, otherwise build a message and
+ * return 1. Fill err_code with an ERR_ALERT and an ERR_FATAL if not null. The
+ * message may also be null, it will simply not be produced (useful to check only).
+ * <msg> and <err_code> are only affected on error.
+ */
+int too_many_args_idx(int maxarg, int index, char **args, char **msg, int *err_code)
+{
+ int i;
+
+ if (!*args[index + maxarg + 1])
+ return 0;
+
+ if (msg) {
+ *msg = NULL;
+ memprintf(msg, "%s", args[0]);
+ for (i = 1; i <= index; i++)
+ memprintf(msg, "%s %s", *msg, args[i]);
+
+ memprintf(msg, "'%s' cannot handle unexpected argument '%s'.", *msg, args[index + maxarg + 1]);
+ }
+ if (err_code)
+ *err_code |= ERR_ALERT | ERR_FATAL;
+
+ return 1;
+}
+
+/*
+ * same as too_many_args_idx with a 0 index
+ */
+int too_many_args(int maxarg, char **args, char **msg, int *err_code)
+{
+ return too_many_args_idx(maxarg, 0, args, msg, err_code);
+}
+
+/*
+ * Report a fatal Alert when there is too much arguments
+ * The index is the current keyword in args
+ * Return 0 if the number of argument is correct, otherwise emit an alert and return 1
+ * Fill err_code with an ERR_ALERT and an ERR_FATAL
+ */
+int alertif_too_many_args_idx(int maxarg, int index, const char *file, int linenum, char **args, int *err_code)
+{
+ char *kw = NULL;
+ int i;
+
+ if (!*args[index + maxarg + 1])
+ return 0;
+
+ memprintf(&kw, "%s", args[0]);
+ for (i = 1; i <= index; i++) {
+ memprintf(&kw, "%s %s", kw, args[i]);
+ }
+
+ ha_alert("parsing [%s:%d] : '%s' cannot handle unexpected argument '%s'.\n", file, linenum, kw, args[index + maxarg + 1]);
+ free(kw);
+ *err_code |= ERR_ALERT | ERR_FATAL;
+ return 1;
+}
+
+/*
+ * same as alertif_too_many_args_idx with a 0 index
+ */
+int alertif_too_many_args(int maxarg, const char *file, int linenum, char **args, int *err_code)
+{
+ return alertif_too_many_args_idx(maxarg, 0, file, linenum, args, err_code);
+}
+
+
+/* Report it if a request ACL condition uses some keywords that are incompatible
+ * with the place where the ACL is used. It returns either 0 or ERR_WARN so that
+ * its result can be or'ed with err_code. Note that <cond> may be NULL and then
+ * will be ignored.
+ */
+int warnif_cond_conflicts(const struct acl_cond *cond, unsigned int where, const char *file, int line)
+{
+ const struct acl *acl;
+ const char *kw;
+
+ if (!cond)
+ return 0;
+
+ acl = acl_cond_conflicts(cond, where);
+ if (acl) {
+ if (acl->name && *acl->name)
+ ha_warning("parsing [%s:%d] : acl '%s' will never match because it only involves keywords that are incompatible with '%s'\n",
+ file, line, acl->name, sample_ckp_names(where));
+ else
+ ha_warning("parsing [%s:%d] : anonymous acl will never match because it uses keyword '%s' which is incompatible with '%s'\n",
+ file, line, LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw, sample_ckp_names(where));
+ return ERR_WARN;
+ }
+ if (!acl_cond_kw_conflicts(cond, where, &acl, &kw))
+ return 0;
+
+ if (acl->name && *acl->name)
+ ha_warning("parsing [%s:%d] : acl '%s' involves keywords '%s' which is incompatible with '%s'\n",
+ file, line, acl->name, kw, sample_ckp_names(where));
+ else
+ ha_warning("parsing [%s:%d] : anonymous acl involves keyword '%s' which is incompatible with '%s'\n",
+ file, line, kw, sample_ckp_names(where));
+ return ERR_WARN;
+}
+
+/* Report it if an ACL uses a L6 sample fetch from an HTTP proxy. It returns
+ * either 0 or ERR_WARN so that its result can be or'ed with err_code. Note that
+ * <cond> may be NULL and then will be ignored.
+*/
+int warnif_tcp_http_cond(const struct proxy *px, const struct acl_cond *cond)
+{
+ if (!cond || px->mode != PR_MODE_HTTP)
+ return 0;
+
+ if (cond->use & (SMP_USE_L6REQ|SMP_USE_L6RES)) {
+ ha_warning("Proxy '%s': L6 sample fetches ignored on HTTP proxies (declared at %s:%d).\n",
+ px->id, cond->file, cond->line);
+ return ERR_WARN;
+ }
+ return 0;
+}
+
+/* try to find in <list> the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL. An optional array of extra
+ * words to compare may be passed in <extra>, but it must then be terminated
+ * by a NULL entry. If unused it may be NULL.
+ */
+const char *cfg_find_best_match(const char *word, const struct list *list, int section, const char **extra)
+{
+ uint8_t word_sig[1024]; // 0..25=letter, 26=digit, 27=other, 28=begin, 29=end
+ uint8_t list_sig[1024];
+ const struct cfg_kw_list *kwl;
+ int index;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].section != section)
+ continue;
+
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+ return best_ptr;
+}
+
+/* Parse a string representing a process number or a set of processes. It must
+ * be "all", "odd", "even", a number between 1 and <max> or a range with
+ * two such numbers delimited by a dash ('-'). On success, it returns
+ * 0. otherwise it returns 1 with an error message in <err>.
+ *
+ * Note: this function can also be used to parse a thread number or a set of
+ * threads.
+ */
+int parse_process_number(const char *arg, unsigned long *proc, int max, int *autoinc, char **err)
+{
+ if (autoinc) {
+ *autoinc = 0;
+ if (strncmp(arg, "auto:", 5) == 0) {
+ arg += 5;
+ *autoinc = 1;
+ }
+ }
+
+ if (strcmp(arg, "all") == 0)
+ *proc |= ~0UL;
+ else if (strcmp(arg, "odd") == 0)
+ *proc |= ~0UL/3UL; /* 0x555....555 */
+ else if (strcmp(arg, "even") == 0)
+ *proc |= (~0UL/3UL) << 1; /* 0xAAA...AAA */
+ else {
+ const char *p, *dash = NULL;
+ unsigned int low, high;
+
+ for (p = arg; *p; p++) {
+ if (*p == '-' && !dash)
+ dash = p;
+ else if (!isdigit((unsigned char)*p)) {
+ memprintf(err, "'%s' is not a valid number/range.", arg);
+ return -1;
+ }
+ }
+
+ low = high = str2uic(arg);
+ if (dash)
+ high = ((!*(dash+1)) ? max : str2uic(dash + 1));
+
+ if (high < low) {
+ unsigned int swap = low;
+ low = high;
+ high = swap;
+ }
+
+ if (low < 1 || low > max || high > max) {
+ memprintf(err, "'%s' is not a valid number/range."
+ " It supports numbers from 1 to %d.\n",
+ arg, max);
+ return 1;
+ }
+
+ for (;low <= high; low++)
+ *proc |= 1UL << (low-1);
+ }
+ *proc &= ~0UL >> (LONGBITS - max);
+
+ return 0;
+}
+
+/* Allocate and initialize the frontend of a "peers" section found in
+ * file <file> at line <linenum> with <id> as ID.
+ * Return 0 if succeeded, -1 if not.
+ * Note that this function may be called from "default-server"
+ * or "peer" lines.
+ */
+static int init_peers_frontend(const char *file, int linenum,
+ const char *id, struct peers *peers)
+{
+ struct proxy *p;
+
+ if (peers->peers_fe) {
+ p = peers->peers_fe;
+ goto out;
+ }
+
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ return -1;
+ }
+
+ init_new_proxy(p);
+ peers_setup_frontend(p);
+ p->parent = peers;
+ /* Finally store this frontend. */
+ peers->peers_fe = p;
+
+ out:
+ if (id && !p->id)
+ p->id = strdup(id);
+ free(p->conf.file);
+ p->conf.args.file = p->conf.file = strdup(file);
+ if (linenum != -1)
+ p->conf.args.line = p->conf.line = linenum;
+
+ return 0;
+}
+
+/* Only change ->file, ->line and ->arg struct bind_conf member values
+ * if already present.
+ */
+static struct bind_conf *bind_conf_uniq_alloc(struct proxy *p,
+ const char *file, int line,
+ const char *arg, struct xprt_ops *xprt)
+{
+ struct bind_conf *bind_conf;
+
+ if (!LIST_ISEMPTY(&p->conf.bind)) {
+ bind_conf = LIST_ELEM((&p->conf.bind)->n, typeof(bind_conf), by_fe);
+ /*
+ * We keep bind_conf->file and bind_conf->line unchanged
+ * to make them available for error messages
+ */
+ if (arg) {
+ free(bind_conf->arg);
+ bind_conf->arg = strdup(arg);
+ }
+ }
+ else {
+ bind_conf = bind_conf_alloc(p, file, line, arg, xprt);
+ }
+
+ return bind_conf;
+}
+
+/*
+ * Allocate a new struct peer parsed at line <linenum> in file <file>
+ * to be added to <peers>.
+ * Returns the new allocated structure if succeeded, NULL if not.
+ */
+static struct peer *cfg_peers_add_peer(struct peers *peers,
+ const char *file, int linenum,
+ const char *id, int local)
+{
+ struct peer *p;
+
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ return NULL;
+ }
+
+ /* the peers are linked backwards first */
+ peers->count++;
+ p->peers = peers;
+ p->next = peers->remote;
+ peers->remote = p;
+ p->conf.file = strdup(file);
+ p->conf.line = linenum;
+ p->last_change = ns_to_sec(now_ns);
+ p->xprt = xprt_get(XPRT_RAW);
+ p->sock_init_arg = NULL;
+ HA_SPIN_INIT(&p->lock);
+ if (id)
+ p->id = strdup(id);
+ if (local) {
+ p->local = 1;
+ peers->local = p;
+ }
+
+ return p;
+}
+
+/*
+ * Parse a line in a <listen>, <frontend> or <backend> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_peers(const char *file, int linenum, char **args, int kwm)
+{
+ static struct peers *curpeers = NULL;
+ static int nb_shards = 0;
+ struct peer *newpeer = NULL;
+ const char *err;
+ struct bind_conf *bind_conf;
+ int err_code = 0;
+ char *errmsg = NULL;
+ static int bind_line, peer_line;
+
+ if (strcmp(args[0], "bind") == 0 || strcmp(args[0], "default-bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ int ret;
+
+ cur_arg = 1;
+
+ if (init_peers_frontend(file, linenum, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ bind_conf = bind_conf_uniq_alloc(curpeers->peers_fe, file, linenum,
+ args[1], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : '%s %s' : cannot allocate memory.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf->maxaccept = 1;
+ bind_conf->accept = session_accept_fd;
+ bind_conf->options |= BC_O_UNLIMITED; /* don't make the peers subject to global limits */
+
+ if (*args[0] == 'b') {
+ struct listener *l;
+
+ if (peer_line) {
+ ha_alert("parsing [%s:%d] : mixing \"peer\" and \"bind\" line is forbidden\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&bind_conf->listeners)) {
+ ha_alert("parsing [%s:%d] : One listener per \"peers\" section is authorized but another is already configured at [%s:%d].\n", file, linenum, bind_conf->file, bind_conf->line);
+ err_code |= ERR_FATAL;
+ }
+
+ if (!str2listener(args[1], curpeers->peers_fe, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ /* Only one listener supported. Compare first listener
+ * against the last one. It must be the same one.
+ */
+ if (bind_conf->listeners.n != bind_conf->listeners.p) {
+ ha_alert("parsing [%s:%d] : Only one listener per \"peers\" section is authorized. Multiple listening addresses or port range are not supported.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /*
+ * Newly allocated listener is at the end of the list
+ */
+ l = LIST_ELEM(bind_conf->listeners.p, typeof(l), by_bind);
+
+ global.maxsock++; /* for the listening socket */
+
+ bind_line = 1;
+ if (cfg_peers->local) {
+ newpeer = cfg_peers->local;
+ }
+ else {
+ /* This peer is local.
+ * Note that we do not set the peer ID. This latter is initialized
+ * when parsing "peer" or "server" line.
+ */
+ newpeer = cfg_peers_add_peer(curpeers, file, linenum, NULL, 1);
+ if (!newpeer) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+ newpeer->addr = l->rx.addr;
+ newpeer->proto = l->rx.proto;
+ cur_arg++;
+ }
+
+ ret = bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ err_code |= ret;
+ if (ret != 0)
+ goto out;
+ }
+ else if (strcmp(args[0], "default-server") == 0) {
+ if (init_peers_frontend(file, -1, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ err_code |= parse_server(file, linenum, args, curpeers->peers_fe, NULL,
+ SRV_PARSE_DEFAULT_SERVER|SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_INITIAL_RESOLVE);
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ if (init_peers_frontend(file, linenum, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (!parse_logger(args, &curpeers->peers_fe->loggers, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "peers") == 0) { /* new peers section */
+ /* Initialize these static variables when entering a new "peers" section*/
+ bind_line = peer_line = 0;
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for peers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ for (curpeers = cfg_peers; curpeers != NULL; curpeers = curpeers->next) {
+ /*
+ * If there are two proxies with the same name only following
+ * combinations are allowed:
+ */
+ if (strcmp(curpeers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: peers section '%s' has the same name as another peers section declared at %s:%d.\n",
+ file, linenum, args[1], curpeers->conf.file, curpeers->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curpeers = calloc(1, sizeof(*curpeers))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curpeers->next = cfg_peers;
+ cfg_peers = curpeers;
+ curpeers->conf.file = strdup(file);
+ curpeers->conf.line = linenum;
+ curpeers->last_change = ns_to_sec(now_ns);
+ curpeers->id = strdup(args[1]);
+ curpeers->disabled = 0;
+ }
+ else if (strcmp(args[0], "peer") == 0 ||
+ strcmp(args[0], "server") == 0) { /* peer or server definition */
+ int local_peer, peer;
+ int parse_addr = 0;
+
+ peer = *args[0] == 'p';
+ local_peer = strcmp(args[1], localpeer) == 0;
+ /* The local peer may have already partially been parsed on a "bind" line. */
+ if (*args[0] == 'p') {
+ if (bind_line) {
+ ha_alert("parsing [%s:%d] : mixing \"peer\" and \"bind\" line is forbidden\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ peer_line = 1;
+ }
+ if (cfg_peers->local && !cfg_peers->local->id && local_peer) {
+ /* The local peer has already been initialized on a "bind" line.
+ * Let's use it and store its ID.
+ */
+ newpeer = cfg_peers->local;
+ newpeer->id = strdup(localpeer);
+ }
+ else {
+ if (local_peer && cfg_peers->local) {
+ ha_alert("parsing [%s:%d] : '%s %s' : local peer name already referenced at %s:%d. %s\n",
+ file, linenum, args[0], args[1],
+ curpeers->peers_fe->conf.file, curpeers->peers_fe->conf.line, cfg_peers->local->id);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+ newpeer = cfg_peers_add_peer(curpeers, file, linenum, args[1], local_peer);
+ if (!newpeer) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ /* Line number and peer ID are updated only if this peer is the local one. */
+ if (init_peers_frontend(file,
+ newpeer->local ? linenum: -1,
+ newpeer->local ? newpeer->id : NULL,
+ curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* This initializes curpeer->peers->peers_fe->srv.
+ * The server address is parsed only if we are parsing a "peer" line,
+ * or if we are parsing a "server" line and the current peer is not the local one.
+ */
+ parse_addr = (peer || !local_peer) ? SRV_PARSE_PARSE_ADDR : 0;
+ err_code |= parse_server(file, linenum, args, curpeers->peers_fe, NULL,
+ SRV_PARSE_IN_PEER_SECTION|parse_addr|SRV_PARSE_INITIAL_RESOLVE);
+ if (!curpeers->peers_fe->srv) {
+ /* Remove the newly allocated peer. */
+ if (newpeer != curpeers->local) {
+ struct peer *p;
+
+ p = curpeers->remote;
+ curpeers->remote = curpeers->remote->next;
+ free(p->id);
+ free(p);
+ }
+ goto out;
+ }
+
+ if (nb_shards && curpeers->peers_fe->srv->shard > nb_shards) {
+ ha_warning("parsing [%s:%d] : '%s %s' : %d peer shard greater value than %d shards value is ignored.\n",
+ file, linenum, args[0], args[1], curpeers->peers_fe->srv->shard, nb_shards);
+ curpeers->peers_fe->srv->shard = 0;
+ err_code |= ERR_WARN;
+ }
+
+ if (curpeers->peers_fe->srv->init_addr_methods || curpeers->peers_fe->srv->resolvers_id ||
+ curpeers->peers_fe->srv->do_check || curpeers->peers_fe->srv->do_agent) {
+ ha_warning("parsing [%s:%d] : '%s %s' : init_addr, resolvers, check and agent are ignored for peers.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_WARN;
+ }
+
+ /* If the peer address has just been parsed, let's copy it to <newpeer>
+ * and initializes ->proto.
+ */
+ if (peer || !local_peer) {
+ newpeer->addr = curpeers->peers_fe->srv->addr;
+ newpeer->proto = protocol_lookup(newpeer->addr.ss_family, PROTO_TYPE_STREAM, 0);
+ }
+
+ newpeer->xprt = xprt_get(XPRT_RAW);
+ newpeer->sock_init_arg = NULL;
+ HA_SPIN_INIT(&newpeer->lock);
+
+ newpeer->srv = curpeers->peers_fe->srv;
+ if (!newpeer->local)
+ goto out;
+
+ /* The lines above are reserved to "peer" lines. */
+ if (*args[0] == 's')
+ goto out;
+
+ bind_conf = bind_conf_uniq_alloc(curpeers->peers_fe, file, linenum, args[2], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : '%s %s' : Cannot allocate memory.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf->maxaccept = 1;
+ bind_conf->accept = session_accept_fd;
+ bind_conf->options |= BC_O_UNLIMITED; /* don't make the peers subject to global limits */
+
+ if (!LIST_ISEMPTY(&bind_conf->listeners)) {
+ ha_alert("parsing [%s:%d] : One listener per \"peers\" section is authorized but another is already configured at [%s:%d].\n", file, linenum, bind_conf->file, bind_conf->line);
+ err_code |= ERR_FATAL;
+ }
+
+ if (!str2listener(args[2], curpeers->peers_fe, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ global.maxsock++; /* for the listening socket */
+ }
+ else if (strcmp(args[0], "shards") == 0) {
+ char *endptr;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' : missing value\n", file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ curpeers->nb_shards = strtol(args[1], &endptr, 10);
+ if (*endptr != '\0') {
+ ha_alert("parsing [%s:%d] : '%s' : expects an integer argument, found '%s'\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ if (!curpeers->nb_shards) {
+ ha_alert("parsing [%s:%d] : '%s' : expects a strictly positive integer argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_FATAL;
+ goto out;
+ }
+
+ nb_shards = curpeers->nb_shards;
+ }
+ else if (strcmp(args[0], "table") == 0) {
+ struct stktable *t, *other;
+ char *id;
+ size_t prefix_len;
+
+ /* Line number and peer ID are updated only if this peer is the local one. */
+ if (init_peers_frontend(file, -1, NULL, curpeers) != 0) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* Build the stick-table name, concatenating the "peers" section name
+ * followed by a '/' character and the table name argument.
+ */
+ chunk_reset(&trash);
+ if (!chunk_strcpy(&trash, curpeers->id)) {
+ ha_alert("parsing [%s:%d]: '%s %s' : stick-table name too long.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ prefix_len = trash.data;
+ if (!chunk_memcat(&trash, "/", 1) || !chunk_strcat(&trash, args[1])) {
+ ha_alert("parsing [%s:%d]: '%s %s' : stick-table name too long.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ t = calloc(1, sizeof *t);
+ id = strdup(trash.area);
+ if (!t || !id) {
+ ha_alert("parsing [%s:%d]: '%s %s' : memory allocation failed\n",
+ file, linenum, args[0], args[1]);
+ free(t);
+ free(id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ other = stktable_find_by_name(trash.area);
+ if (other) {
+ ha_alert("parsing [%s:%d] : stick-table name '%s' conflicts with table declared in %s '%s' at %s:%d.\n",
+ file, linenum, args[1],
+ other->proxy ? proxy_cap_str(other->proxy->cap) : "peers",
+ other->proxy ? other->id : other->peers.p->id,
+ other->conf.file, other->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+
+ err_code |= parse_stick_table(file, linenum, args, t, id, id + prefix_len, curpeers);
+ if (err_code & ERR_FATAL) {
+ free(t);
+ free(id);
+ goto out;
+ }
+
+ stktable_store_name(t);
+ t->next = stktables_list;
+ stktables_list = t;
+ }
+ else if (strcmp(args[0], "disabled") == 0) { /* disables this peers section */
+ curpeers->disabled |= PR_FL_DISABLED;
+ }
+ else if (strcmp(args[0], "enabled") == 0) { /* enables this peers section (used to revert a disabled default) */
+ curpeers->disabled = 0;
+ }
+ else if (*args[0] != 0) {
+ struct peers_kw_list *pkwl;
+ int index;
+ int rc = -1;
+
+ list_for_each_entry(pkwl, &peers_keywords.list, list) {
+ for (index = 0; pkwl->kw[index].kw != NULL; index++) {
+ if (strcmp(pkwl->kw[index].kw, args[0]) == 0) {
+ rc = pkwl->kw[index].parse(args, curpeers, file, linenum, &errmsg);
+ if (rc < 0) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (rc > 0) {
+ ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+ goto out;
+ }
+ }
+ }
+
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+/*
+ * Parse a line in a <listen>, <frontend> or <backend> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_mailers(const char *file, int linenum, char **args, int kwm)
+{
+ static struct mailers *curmailers = NULL;
+ struct mailer *newmailer = NULL;
+ const char *err;
+ int err_code = 0;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "mailers") == 0) { /* new mailers section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for mailers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ for (curmailers = mailers; curmailers != NULL; curmailers = curmailers->next) {
+ /*
+ * If there are two proxies with the same name only following
+ * combinations are allowed:
+ */
+ if (strcmp(curmailers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: mailers section '%s' has the same name as another mailers section declared at %s:%d.\n",
+ file, linenum, args[1], curmailers->conf.file, curmailers->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curmailers = calloc(1, sizeof(*curmailers))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curmailers->next = mailers;
+ mailers = curmailers;
+ curmailers->conf.file = strdup(file);
+ curmailers->conf.line = linenum;
+ curmailers->id = strdup(args[1]);
+ curmailers->timeout.mail = DEF_MAILALERTTIME;/* XXX: Would like to Skip to the next alert, if any, ASAP.
+ * But need enough time so that timeouts don't occur
+ * during tcp procssing. For now just us an arbitrary default. */
+ }
+ else if (strcmp(args[0], "mailer") == 0) { /* mailer definition */
+ struct sockaddr_storage *sk;
+ int port1, port2;
+ struct protocol *proto;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in server name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((newmailer = calloc(1, sizeof(*newmailer))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* the mailers are linked backwards first */
+ curmailers->count++;
+ newmailer->next = curmailers->mailer_list;
+ curmailers->mailer_list = newmailer;
+ newmailer->mailers = curmailers;
+ newmailer->conf.file = strdup(file);
+ newmailer->conf.line = linenum;
+
+ newmailer->id = strdup(args[1]);
+
+ sk = str2sa_range(args[2], NULL, &port1, &port2, NULL, &proto, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (proto->sock_prot != IPPROTO_TCP) {
+ ha_alert("parsing [%s:%d] : '%s %s' : TCP not supported for this address family.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ newmailer->addr = *sk;
+ newmailer->proto = proto;
+ newmailer->xprt = xprt_get(XPRT_RAW);
+ newmailer->sock_init_arg = NULL;
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'mail' and <time> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "mail") == 0) {
+ const char *res;
+ unsigned int timeout_mail;
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout_mail, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curmailers->timeout.mail = timeout_mail;
+ } else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'mail' and <time> as arguments got '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+void free_email_alert(struct proxy *p)
+{
+ ha_free(&p->email_alert.mailers.name);
+ ha_free(&p->email_alert.from);
+ ha_free(&p->email_alert.to);
+ ha_free(&p->email_alert.myhostname);
+}
+
+
+int
+cfg_parse_netns(const char *file, int linenum, char **args, int kwm)
+{
+#ifdef USE_NS
+ const char *err;
+ const char *item = args[0];
+
+ if (strcmp(item, "namespace_list") == 0) {
+ return 0;
+ }
+ else if (strcmp(item, "namespace") == 0) {
+ size_t idx = 1;
+ const char *current;
+ while (*(current = args[idx++])) {
+ err = invalid_char(current);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, item, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (netns_store_lookup(current, strlen(current))) {
+ ha_alert("parsing [%s:%d]: Namespace '%s' is already added.\n",
+ file, linenum, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!netns_store_insert(current)) {
+ ha_alert("parsing [%s:%d]: Cannot open namespace '%s'.\n",
+ file, linenum, current);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+ }
+
+ return 0;
+#else
+ ha_alert("parsing [%s:%d]: namespace support is not compiled in.",
+ file, linenum);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+int
+cfg_parse_users(const char *file, int linenum, char **args, int kwm)
+{
+
+ int err_code = 0;
+ const char *err;
+
+ if (strcmp(args[0], "userlist") == 0) { /* new userlist */
+ struct userlist *newul;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (newul = userlist; newul; newul = newul->next)
+ if (strcmp(newul->name, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated userlist '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ newul = calloc(1, sizeof(*newul));
+ if (!newul) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newul->name = strdup(args[1]);
+ if (!newul->name) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ free(newul);
+ goto out;
+ }
+
+ newul->next = userlist;
+ userlist = newul;
+
+ } else if (strcmp(args[0], "group") == 0) { /* new group */
+ int cur_arg;
+ const char *err;
+ struct auth_groups *ag;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (!userlist)
+ goto out;
+
+ for (ag = userlist->groups; ag; ag = ag->next)
+ if (strcmp(ag->name, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated group '%s' in userlist '%s'.\n",
+ file, linenum, args[1], userlist->name);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ag = calloc(1, sizeof(*ag));
+ if (!ag) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ ag->name = strdup(args[1]);
+ if (!ag->name) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ free(ag);
+ goto out;
+ }
+
+ cur_arg = 2;
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "users") == 0) {
+ ag->groupusers = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else {
+ ha_alert("parsing [%s:%d]: '%s' only supports 'users' option.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(ag->groupusers);
+ free(ag->name);
+ free(ag);
+ goto out;
+ }
+ }
+
+ ag->next = userlist->groups;
+ userlist->groups = ag;
+
+ } else if (strcmp(args[0], "user") == 0) { /* new user */
+ struct auth_users *newuser;
+ int cur_arg;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!userlist)
+ goto out;
+
+ for (newuser = userlist->users; newuser; newuser = newuser->next)
+ if (strcmp(newuser->user, args[1]) == 0) {
+ ha_warning("parsing [%s:%d]: ignoring duplicated user '%s' in userlist '%s'.\n",
+ file, linenum, args[1], userlist->name);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ newuser = calloc(1, sizeof(*newuser));
+ if (!newuser) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newuser->user = strdup(args[1]);
+
+ newuser->next = userlist->users;
+ userlist->users = newuser;
+
+ cur_arg = 2;
+
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "password") == 0) {
+#ifdef USE_LIBCRYPT
+ if (!crypt("", args[cur_arg + 1])) {
+ ha_alert("parsing [%s:%d]: the encrypted password used for user '%s' is not supported by crypt(3).\n",
+ file, linenum, newuser->user);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+#else
+ ha_warning("parsing [%s:%d]: no crypt(3) support compiled, encrypted passwords will not work.\n",
+ file, linenum);
+ err_code |= ERR_ALERT;
+#endif
+ newuser->pass = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else if (strcmp(args[cur_arg], "insecure-password") == 0) {
+ newuser->pass = strdup(args[cur_arg + 1]);
+ newuser->flags |= AU_O_INSECURE;
+ cur_arg += 2;
+ continue;
+ } else if (strcmp(args[cur_arg], "groups") == 0) {
+ newuser->u.groups_names = strdup(args[cur_arg + 1]);
+ cur_arg += 2;
+ continue;
+ } else {
+ ha_alert("parsing [%s:%d]: '%s' only supports 'password', 'insecure-password' and 'groups' options.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ } else {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "users");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ return err_code;
+}
+
+int
+cfg_parse_scope(const char *file, int linenum, char *line)
+{
+ char *beg, *end, *scope = NULL;
+ int err_code = 0;
+ const char *err;
+
+ beg = line + 1;
+ end = strchr(beg, ']');
+
+ /* Detect end of scope declaration */
+ if (!end || end == beg) {
+ ha_alert("parsing [%s:%d] : empty scope name is forbidden.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* Get scope name and check its validity */
+ scope = my_strndup(beg, end-beg);
+ err = invalid_char(scope);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in a scope name.\n",
+ file, linenum, *err);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* Be sure to have a scope declaration alone on its line */
+ line = end+1;
+ while (isspace((unsigned char)*line))
+ line++;
+ if (*line && *line != '#' && *line != '\n' && *line != '\r') {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted after scope declaration.\n",
+ file, linenum, *line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* We have a valid scope declaration, save it */
+ free(cfg_scope);
+ cfg_scope = scope;
+ scope = NULL;
+
+ out:
+ free(scope);
+ return err_code;
+}
+
+int
+cfg_parse_track_sc_num(unsigned int *track_sc_num,
+ const char *arg, const char *end, char **errmsg)
+{
+ const char *p;
+ unsigned int num;
+
+ p = arg;
+ num = read_uint64(&arg, end);
+
+ if (arg != end) {
+ memprintf(errmsg, "Wrong track-sc number '%s'", p);
+ return -1;
+ }
+
+ if (num >= global.tune.nb_stk_ctr) {
+ if (!global.tune.nb_stk_ctr)
+ memprintf(errmsg, "%u track-sc number not usable, stick-counters "
+ "are disabled by tune.stick-counters", num);
+ else
+ memprintf(errmsg, "%u track-sc number exceeding "
+ "%d (tune.stick-counters-1) value", num, global.tune.nb_stk_ctr - 1);
+ return -1;
+ }
+
+ *track_sc_num = num;
+ return 0;
+}
+
+/*
+ * Detect a global section after a non-global one and output a diagnostic
+ * warning.
+ */
+static void check_section_position(char *section_name, const char *file, int linenum)
+{
+ if (strcmp(section_name, "global") == 0) {
+ if ((global.mode & MODE_DIAG) && non_global_section_parsed == 1)
+ _ha_diag_warning("parsing [%s:%d] : global section detected after a non-global one, the prevalence of their statements is unspecified\n", file, linenum);
+ }
+ else if (non_global_section_parsed == 0) {
+ non_global_section_parsed = 1;
+ }
+}
+
+/* apply the current default_path setting for config file <file>, and
+ * optionally replace the current path to <origin> if not NULL while the
+ * default-path mode is set to "origin". Errors are returned into an
+ * allocated string passed to <err> if it's not NULL. Returns 0 on failure
+ * or non-zero on success.
+ */
+static int cfg_apply_default_path(const char *file, const char *origin, char **err)
+{
+ const char *beg, *end;
+
+ /* make path start at <beg> and end before <end>, and switch it to ""
+ * if no slash was passed.
+ */
+ beg = file;
+ end = strrchr(beg, '/');
+ if (!end)
+ end = beg;
+
+ if (!*initial_cwd) {
+ if (getcwd(initial_cwd, sizeof(initial_cwd)) == NULL) {
+ if (err)
+ memprintf(err, "Impossible to retrieve startup directory name: %s", strerror(errno));
+ return 0;
+ }
+ }
+ else if (chdir(initial_cwd) == -1) {
+ if (err)
+ memprintf(err, "Impossible to get back to initial directory '%s': %s", initial_cwd, strerror(errno));
+ return 0;
+ }
+
+ /* OK now we're (back) to initial_cwd */
+
+ switch (default_path_mode) {
+ case DEFAULT_PATH_CURRENT:
+ /* current_cwd never set, nothing to do */
+ return 1;
+
+ case DEFAULT_PATH_ORIGIN:
+ /* current_cwd set in the config */
+ if (origin &&
+ snprintf(current_cwd, sizeof(current_cwd), "%s", origin) > sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Absolute path too long: '%s'", origin);
+ return 0;
+ }
+ break;
+
+ case DEFAULT_PATH_CONFIG:
+ if (end - beg >= sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Config file path too long, cannot use for relative paths: '%s'", file);
+ return 0;
+ }
+ memcpy(current_cwd, beg, end - beg);
+ current_cwd[end - beg] = 0;
+ break;
+
+ case DEFAULT_PATH_PARENT:
+ if (end - beg + 3 >= sizeof(current_cwd)) {
+ if (err)
+ memprintf(err, "Config file path too long, cannot use for relative paths: '%s'", file);
+ return 0;
+ }
+ memcpy(current_cwd, beg, end - beg);
+ if (end > beg)
+ memcpy(current_cwd + (end - beg), "/..\0", 4);
+ else
+ memcpy(current_cwd + (end - beg), "..\0", 3);
+ break;
+ }
+
+ if (*current_cwd && chdir(current_cwd) == -1) {
+ if (err)
+ memprintf(err, "Impossible to get back to directory '%s': %s", initial_cwd, strerror(errno));
+ return 0;
+ }
+
+ return 1;
+}
+
+/* parses a global "default-path" directive. */
+static int cfg_parse_global_def_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int ret = -1;
+
+ /* "current", "config", "parent", "origin <path>" */
+
+ if (strcmp(args[1], "current") == 0)
+ default_path_mode = DEFAULT_PATH_CURRENT;
+ else if (strcmp(args[1], "config") == 0)
+ default_path_mode = DEFAULT_PATH_CONFIG;
+ else if (strcmp(args[1], "parent") == 0)
+ default_path_mode = DEFAULT_PATH_PARENT;
+ else if (strcmp(args[1], "origin") == 0)
+ default_path_mode = DEFAULT_PATH_ORIGIN;
+ else {
+ memprintf(err, "%s default-path mode '%s' for '%s', supported modes include 'current', 'config', 'parent', and 'origin'.", *args[1] ? "unsupported" : "missing", args[1], args[0]);
+ goto end;
+ }
+
+ if (default_path_mode == DEFAULT_PATH_ORIGIN) {
+ if (!*args[2]) {
+ memprintf(err, "'%s %s' expects a directory as an argument.", args[0], args[1]);
+ goto end;
+ }
+ if (!cfg_apply_default_path(file, args[2], err)) {
+ memprintf(err, "couldn't set '%s' to origin '%s': %s.", args[0], args[2], *err);
+ goto end;
+ }
+ }
+ else if (!cfg_apply_default_path(file, NULL, err)) {
+ memprintf(err, "couldn't set '%s' to '%s': %s.", args[0], args[1], *err);
+ goto end;
+ }
+
+ /* note that once applied, the path is immediately updated */
+
+ ret = 0;
+ end:
+ return ret;
+}
+
+/*
+ * This function reads and parses the configuration file given in the argument.
+ * Returns the error code, 0 if OK, -1 if the config file couldn't be opened,
+ * or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int readcfgfile(const char *file)
+{
+ char *thisline = NULL;
+ int linesize = LINESIZE;
+ FILE *f = NULL;
+ int linenum = 0;
+ int err_code = 0;
+ struct cfg_section *cs = NULL, *pcs = NULL;
+ struct cfg_section *ics;
+ int readbytes = 0;
+ char *outline = NULL;
+ size_t outlen = 0;
+ size_t outlinesize = 0;
+ int fatal = 0;
+ int missing_lf = -1;
+ int nested_cond_lvl = 0;
+ enum nested_cond_state nested_conds[MAXNESTEDCONDS];
+ char *errmsg = NULL;
+
+ global.cfg_curr_line = 0;
+ global.cfg_curr_file = file;
+
+ if ((thisline = malloc(sizeof(*thisline) * linesize)) == NULL) {
+ ha_alert("Out of memory trying to allocate a buffer for a configuration line.\n");
+ err_code = -1;
+ goto err;
+ }
+
+ if ((f = fopen(file,"r")) == NULL) {
+ err_code = -1;
+ goto err;
+ }
+
+ /* change to the new dir if required */
+ if (!cfg_apply_default_path(file, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d]: failed to apply default-path: %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ err_code = -1;
+ goto err;
+ }
+
+next_line:
+ while (fgets(thisline + readbytes, linesize - readbytes, f) != NULL) {
+ int arg, kwm = KWM_STD;
+ char *end;
+ char *args[MAX_LINE_ARGS + 1];
+ char *line = thisline;
+
+ if (missing_lf != -1) {
+ ha_alert("parsing [%s:%d]: Stray NUL character at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ missing_lf = -1;
+ break;
+ }
+
+ linenum++;
+ global.cfg_curr_line = linenum;
+
+ if (fatal >= 50) {
+ ha_alert("parsing [%s:%d]: too many fatal errors (%d), stopping now.\n", file, linenum, fatal);
+ break;
+ }
+
+ end = line + strlen(line);
+
+ if (end-line == linesize-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ char *newline;
+ int newlinesize = linesize * 2;
+
+ newline = realloc(thisline, sizeof(*thisline) * newlinesize);
+ if (newline == NULL) {
+ ha_alert("parsing [%s:%d]: line too long, cannot allocate memory.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ linenum--;
+ continue;
+ }
+
+ readbytes = linesize - 1;
+ linesize = newlinesize;
+ thisline = newline;
+ linenum--;
+ continue;
+ }
+
+ readbytes = 0;
+
+ if (end > line && *(end-1) == '\n') {
+ /* kill trailing LF */
+ *(end - 1) = 0;
+ }
+ else {
+ /* mark this line as truncated */
+ missing_lf = end - line;
+ }
+
+ /* skip leading spaces */
+ while (isspace((unsigned char)*line))
+ line++;
+
+ if (*line == '[') {/* This is the beginning if a scope */
+ err_code |= cfg_parse_scope(file, linenum, line);
+ goto next_line;
+ }
+
+ while (1) {
+ uint32_t err;
+ const char *errptr;
+
+ arg = sizeof(args) / sizeof(*args);
+ outlen = outlinesize;
+ err = parse_line(line, outline, &outlen, args, &arg,
+ PARSE_OPT_ENV | PARSE_OPT_DQUOTE | PARSE_OPT_SQUOTE |
+ PARSE_OPT_BKSLASH | PARSE_OPT_SHARP | PARSE_OPT_WORD_EXPAND,
+ &errptr);
+
+ if (err & PARSE_ERR_QUOTE) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: unmatched quote at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_BRACE) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: unmatched brace in environment variable name at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_VARNAME) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: forbidden first char in environment variable name at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_HEX) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: truncated or invalid hexadecimal sequence at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & PARSE_ERR_WRONG_EXPAND) {
+ size_t newpos = sanitize_for_printing(line, errptr - line, 80);
+
+ ha_alert("parsing [%s:%d]: truncated or invalid word expansion sequence at position %d:\n"
+ " %s\n %*s\n", file, linenum, (int)(errptr-thisline+1), line, (int)(newpos+1), "^");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ if (err & (PARSE_ERR_TOOLARGE|PARSE_ERR_OVERLAP)) {
+ outlinesize = (outlen + 1023) & -1024;
+ outline = my_realloc2(outline, outlinesize);
+ if (outline == NULL) {
+ ha_alert("parsing [%s:%d]: line too long, cannot allocate memory.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ fatal++;
+ outlinesize = 0;
+ goto err;
+ }
+ /* try again */
+ continue;
+ }
+
+ if (err & PARSE_ERR_TOOMANY) {
+ /* only check this *after* being sure the output is allocated */
+ ha_alert("parsing [%s:%d]: too many words, truncating after word %d, position %ld: <%s>.\n",
+ file, linenum, MAX_LINE_ARGS, (long)(args[MAX_LINE_ARGS-1] - outline + 1), args[MAX_LINE_ARGS-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ goto next_line;
+ }
+
+ /* everything's OK */
+ break;
+ }
+
+ /* dump cfg */
+ if (global.mode & MODE_DUMP_CFG) {
+ if (args[0] != NULL) {
+ struct cfg_section *sect;
+ int is_sect = 0;
+ int i = 0;
+ uint32_t g_key = HA_ATOMIC_LOAD(&global.anon_key);
+
+ if (global.mode & MODE_DUMP_NB_L)
+ qfprintf(stdout, "%d\t", linenum);
+
+ /* if a word is in sections list, is_sect = 1 */
+ list_for_each_entry(sect, &sections, list) {
+ if (strcmp(args[0], sect->section_name) == 0) {
+ is_sect = 1;
+ break;
+ }
+ }
+
+ if (g_key == 0) {
+ /* no anonymizing needed, dump the config as-is (but without comments).
+ * Note: tabs were lost during tokenizing, so we reinsert for non-section
+ * keywords.
+ */
+ if (!is_sect)
+ qfprintf(stdout, "\t");
+
+ for (i = 0; i < arg; i++) {
+ qfprintf(stdout, "%s ", args[i]);
+ }
+ qfprintf(stdout, "\n");
+ continue;
+ }
+
+ /* We're anonymizing */
+
+ if (is_sect) {
+ /* new sections are optionally followed by an identifier */
+ if (arg >= 2) {
+ qfprintf(stdout, "%s %s\n", args[0], HA_ANON_ID(g_key, args[1]));
+ }
+ else {
+ qfprintf(stdout, "%s\n", args[0]);
+ }
+ continue;
+ }
+
+ /* non-section keywords start indented */
+ qfprintf(stdout, "\t");
+
+ /* some keywords deserve special treatment */
+ if (!*args[0]) {
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "anonkey") == 0) {
+ qfprintf(stdout, "%s [...]\n", args[0]);
+ }
+
+ else if (strcmp(args[0], "maxconn") == 0) {
+ qfprintf(stdout, "%s %s\n", args[0], args[1]);
+ }
+
+ else if (strcmp(args[0], "stats") == 0 &&
+ (strcmp(args[1], "timeout") == 0 || strcmp(args[1], "maxconn") == 0)) {
+ qfprintf(stdout, "%s %s %s\n", args[0], args[1], args[2]);
+ }
+
+ else if (strcmp(args[0], "stats") == 0 && strcmp(args[1], "socket") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+
+ if (arg > 2) {
+ qfprintf(stdout, "%s ", hash_ipanon(g_key, args[2], 1));
+
+ if (arg > 3) {
+ qfprintf(stdout, "[...]\n");
+ }
+ else {
+ qfprintf(stdout, "\n");
+ }
+ }
+ else {
+ qfprintf(stdout, "\n");
+ }
+ }
+
+ else if (strcmp(args[0], "timeout") == 0) {
+ qfprintf(stdout, "%s %s %s\n", args[0], args[1], args[2]);
+ }
+
+ else if (strcmp(args[0], "mode") == 0) {
+ qfprintf(stdout, "%s %s\n", args[0], args[1]);
+ }
+
+ /* It concerns user in global section and in userlist */
+ else if (strcmp(args[0], "user") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], HA_ANON_ID(g_key, args[1]));
+
+ if (arg > 2) {
+ qfprintf(stdout, "[...]\n");
+ }
+ else {
+ qfprintf(stdout, "\n");
+ }
+ }
+
+ else if (strcmp(args[0], "bind") == 0) {
+ qfprintf(stdout, "%s ", args[0]);
+ qfprintf(stdout, "%s ", hash_ipanon(g_key, args[1], 1));
+ if (arg > 2) {
+ qfprintf(stdout, "[...]\n");
+ }
+ else {
+ qfprintf(stdout, "\n");
+ }
+ }
+
+ else if (strcmp(args[0], "server") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], HA_ANON_ID(g_key, args[1]));
+
+ if (arg > 2) {
+ qfprintf(stdout, "%s ", hash_ipanon(g_key, args[2], 1));
+ }
+ if (arg > 3) {
+ qfprintf(stdout, "[...]\n");
+ }
+ else {
+ qfprintf(stdout, "\n");
+ }
+ }
+
+ else if (strcmp(args[0], "redirect") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+
+ if (strcmp(args[1], "prefix") == 0 || strcmp(args[1], "location") == 0) {
+ qfprintf(stdout, "%s ", HA_ANON_PATH(g_key, args[2]));
+ }
+ else {
+ qfprintf(stdout, "%s ", args[2]);
+ }
+ if (arg > 3) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "acl") == 0) {
+ qfprintf(stdout, "%s %s %s ", args[0], HA_ANON_ID(g_key, args[1]), args[2]);
+
+ if (arg > 3) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "log") == 0) {
+ qfprintf(stdout, "log ");
+
+ if (strcmp(args[1], "global") == 0) {
+ qfprintf(stdout, "%s ", args[1]);
+ }
+ else {
+ qfprintf(stdout, "%s ", hash_ipanon(g_key, args[1], 1));
+ }
+ if (arg > 2) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "peer") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], HA_ANON_ID(g_key, args[1]));
+ qfprintf(stdout, "%s ", hash_ipanon(g_key, args[2], 1));
+
+ if (arg > 3) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "use_backend") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], HA_ANON_ID(g_key, args[1]));
+
+ if (arg > 2) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "default_backend") == 0) {
+ qfprintf(stdout, "%s %s\n", args[0], HA_ANON_ID(g_key, args[1]));
+ }
+
+ else if (strcmp(args[0], "source") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], hash_ipanon(g_key, args[1], 1));
+
+ if (arg > 2) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "nameserver") == 0) {
+ qfprintf(stdout, "%s %s %s ", args[0],
+ HA_ANON_ID(g_key, args[1]), hash_ipanon(g_key, args[2], 1));
+ if (arg > 3) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "http-request") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+ if (arg > 2)
+ qfprintf(stdout, "[...]");
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "http-response") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+ if (arg > 2)
+ qfprintf(stdout, "[...]");
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "http-after-response") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+ if (arg > 2)
+ qfprintf(stdout, "[...]");
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "filter") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], args[1]);
+ if (arg > 2)
+ qfprintf(stdout, "[...]");
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "errorfile") == 0) {
+ qfprintf(stdout, "%s %s %s\n", args[0], args[1], HA_ANON_PATH(g_key, args[2]));
+ }
+
+ else if (strcmp(args[0], "cookie") == 0) {
+ qfprintf(stdout, "%s %s ", args[0], HA_ANON_ID(g_key, args[1]));
+ if (arg > 2)
+ qfprintf(stdout, "%s ", args[2]);
+ if (arg > 3)
+ qfprintf(stdout, "[...]");
+ qfprintf(stdout, "\n");
+ }
+
+ else if (strcmp(args[0], "stats") == 0 && strcmp(args[1], "auth") == 0) {
+ qfprintf(stdout, "%s %s %s\n", args[0], args[1], HA_ANON_STR(g_key, args[2]));
+ }
+
+ else {
+ /* display up to 3 words and mask the rest which might be confidential */
+ for (i = 0; i < MIN(arg, 3); i++) {
+ qfprintf(stdout, "%s ", args[i]);
+ }
+ if (arg > 3) {
+ qfprintf(stdout, "[...]");
+ }
+ qfprintf(stdout, "\n");
+ }
+ }
+ continue;
+ }
+ /* end of config dump */
+
+ /* empty line */
+ if (!**args)
+ continue;
+
+ /* check for config macros */
+ if (*args[0] == '.') {
+ if (strcmp(args[0], ".if") == 0) {
+ const char *errptr = NULL;
+ char *errmsg = NULL;
+ int cond;
+ char *w;
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < outline + outlen - 1; *w = ' ')
+ ;
+
+ nested_cond_lvl++;
+ if (nested_cond_lvl >= MAXNESTEDCONDS) {
+ ha_alert("parsing [%s:%d]: too many nested '.if', max is %d.\n", file, linenum, MAXNESTEDCONDS);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_cond_lvl > 1 &&
+ (nested_conds[nested_cond_lvl - 1] == NESTED_COND_IF_DROP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELIF_DROP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELIF_SKIP ||
+ nested_conds[nested_cond_lvl - 1] == NESTED_COND_ELSE_DROP)) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_SKIP;
+ goto next_line;
+ }
+
+ cond = cfg_eval_condition(args + 1, &errmsg, &errptr);
+ if (cond < 0) {
+ size_t newpos = sanitize_for_printing(args[1], errptr - args[1], 76);
+
+ ha_alert("parsing [%s:%d]: %s in '.if' at position %d:\n .if %s\n %*s\n",
+ file, linenum, errmsg,
+ (int)(errptr-args[1]+1), args[1], (int)(newpos+5), "^");
+
+ free(errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (cond)
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_TAKE;
+ else
+ nested_conds[nested_cond_lvl] = NESTED_COND_IF_DROP;
+
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".elif") == 0) {
+ const char *errptr = NULL;
+ char *errmsg = NULL;
+ int cond;
+ char *w;
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < outline + outlen - 1; *w = ' ')
+ ;
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.elif' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP) {
+ ha_alert("parsing [%s:%d]: '.elif' after '.else' is not permitted.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_IF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_SKIP;
+ goto next_line;
+ }
+
+ cond = cfg_eval_condition(args + 1, &errmsg, &errptr);
+ if (cond < 0) {
+ size_t newpos = sanitize_for_printing(args[1], errptr - args[1], 74);
+
+ ha_alert("parsing [%s:%d]: %s in '.elif' at position %d:\n .elif %s\n %*s\n",
+ file, linenum, errmsg,
+ (int)(errptr-args[1]+1), args[1], (int)(newpos+7), "^");
+
+ free(errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (cond)
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_TAKE;
+ else
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELIF_DROP;
+
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".else") == 0) {
+ if (*args[1]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.else' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP) {
+ ha_alert("parsing [%s:%d]: '.else' after '.else' is not permitted.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+
+ if (nested_conds[nested_cond_lvl] == NESTED_COND_IF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_TAKE ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP) {
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELSE_DROP;
+ } else {
+ /* otherwise we take the "else" */
+ nested_conds[nested_cond_lvl] = NESTED_COND_ELSE_TAKE;
+ }
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".endif") == 0) {
+ if (*args[1]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+
+ if (!nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: lone '.endif' with no matching '.if'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ break;
+ }
+ nested_cond_lvl--;
+ goto next_line;
+ }
+ }
+
+ if (nested_cond_lvl &&
+ (nested_conds[nested_cond_lvl] == NESTED_COND_IF_DROP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_IF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_DROP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELIF_SKIP ||
+ nested_conds[nested_cond_lvl] == NESTED_COND_ELSE_DROP)) {
+ /* The current block is masked out by the conditions */
+ goto next_line;
+ }
+
+ /* .warning/.error/.notice/.diag */
+ if (*args[0] == '.') {
+ if (strcmp(args[0], ".alert") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_alert("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ goto err;
+ }
+ else if (strcmp(args[0], ".warning") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_warning("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_WARN;
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".notice") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_notice("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ goto next_line;
+ }
+ else if (strcmp(args[0], ".diag") == 0) {
+ if (*args[2]) {
+ ha_alert("parsing [%s:%d]: Unexpected argument '%s' for '%s'. Use quotes if the message should contain spaces.\n",
+ file, linenum, args[2], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto next_line;
+ }
+
+ ha_diag_warning("parsing [%s:%d]: '%s'.\n", file, linenum, args[1]);
+ goto next_line;
+ }
+ else {
+ ha_alert("parsing [%s:%d]: unknown directive '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ break;
+ }
+ }
+
+ /* check for keyword modifiers "no" and "default" */
+ if (strcmp(args[0], "no") == 0) {
+ char *tmp;
+
+ kwm = KWM_NO;
+ tmp = args[0];
+ for (arg=0; *args[arg+1]; arg++)
+ args[arg] = args[arg+1]; // shift args after inversion
+ *tmp = '\0'; // fix the next arg to \0
+ args[arg] = tmp;
+ }
+ else if (strcmp(args[0], "default") == 0) {
+ kwm = KWM_DEF;
+ for (arg=0; *args[arg+1]; arg++)
+ args[arg] = args[arg+1]; // shift args after inversion
+ }
+
+ if (kwm != KWM_STD && strcmp(args[0], "option") != 0 &&
+ strcmp(args[0], "log") != 0 && strcmp(args[0], "busy-polling") != 0 &&
+ strcmp(args[0], "set-dumpable") != 0 && strcmp(args[0], "strict-limits") != 0 &&
+ strcmp(args[0], "insecure-fork-wanted") != 0 &&
+ strcmp(args[0], "numa-cpu-mapping") != 0) {
+ ha_alert("parsing [%s:%d]: negation/default currently "
+ "supported only for options, log, busy-polling, "
+ "set-dumpable, strict-limits, insecure-fork-wanted "
+ "and numa-cpu-mapping.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ }
+
+ /* detect section start */
+ list_for_each_entry(ics, &sections, list) {
+ if (strcmp(args[0], ics->section_name) == 0) {
+ cursection = ics->section_name;
+ pcs = cs;
+ cs = ics;
+ free(global.cfg_curr_section);
+ global.cfg_curr_section = strdup(*args[1] ? args[1] : args[0]);
+ check_section_position(args[0], file, linenum);
+ break;
+ }
+ }
+
+ if (pcs && pcs->post_section_parser) {
+ int status;
+
+ status = pcs->post_section_parser();
+ err_code |= status;
+ if (status & ERR_FATAL)
+ fatal++;
+
+ if (err_code & ERR_ABORT)
+ goto err;
+ }
+ pcs = NULL;
+
+ if (!cs) {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' out of section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ fatal++;
+ } else {
+ int status;
+
+ status = cs->section_parser(file, linenum, args, kwm);
+ err_code |= status;
+ if (status & ERR_FATAL)
+ fatal++;
+
+ if (err_code & ERR_ABORT)
+ goto err;
+ }
+ }
+
+ if (missing_lf != -1) {
+ ha_alert("parsing [%s:%d]: Missing LF on last line, file might have been truncated at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ ha_free(&global.cfg_curr_section);
+ if (cs && cs->post_section_parser)
+ err_code |= cs->post_section_parser();
+
+ if (nested_cond_lvl) {
+ ha_alert("parsing [%s:%d]: non-terminated '.if' block.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT;
+ }
+
+ if (*initial_cwd && chdir(initial_cwd) == -1) {
+ ha_alert("Impossible to get back to initial directory '%s' : %s\n", initial_cwd, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+err:
+ ha_free(&cfg_scope);
+ cursection = NULL;
+ free(thisline);
+ free(outline);
+ global.cfg_curr_line = 0;
+ global.cfg_curr_file = NULL;
+
+ if (f)
+ fclose(f);
+
+ return err_code;
+}
+
+#if defined(USE_THREAD) && defined USE_CPU_AFFINITY
+#if defined(__linux__)
+
+/* filter directory name of the pattern node<X> */
+static int numa_filter(const struct dirent *dir)
+{
+ char *endptr;
+
+ /* dir name must start with "node" prefix */
+ if (strncmp(dir->d_name, "node", 4))
+ return 0;
+
+ /* dir name must be at least 5 characters long */
+ if (!dir->d_name[4])
+ return 0;
+
+ /* dir name must end with a numeric id */
+ if (strtol(&dir->d_name[4], &endptr, 10) < 0 || *endptr)
+ return 0;
+
+ /* all tests succeeded */
+ return 1;
+}
+
+/* Inspect the cpu topology of the machine on startup. If a multi-socket
+ * machine is detected, try to bind on the first node with active cpu. This is
+ * done to prevent an impact on the overall performance when the topology of
+ * the machine is unknown. This function is not called if one of the conditions
+ * is met :
+ * - a non-null nbthread directive is active
+ * - a restrictive cpu-map directive is active
+ * - a restrictive affinity is already applied, for example via taskset
+ *
+ * Returns the count of cpus selected. If no automatic binding was required or
+ * an error occurred and the topology is unknown, 0 is returned.
+ */
+static int numa_detect_topology()
+{
+ struct dirent **node_dirlist;
+ int node_dirlist_size;
+
+ struct hap_cpuset active_cpus, node_cpu_set;
+ const char *parse_cpu_set_args[2];
+ char *err = NULL;
+ int grp, thr;
+
+ /* node_cpu_set count is used as return value */
+ ha_cpuset_zero(&node_cpu_set);
+
+ /* 1. count the sysfs node<X> directories */
+ node_dirlist = NULL;
+ node_dirlist_size = scandir(NUMA_DETECT_SYSTEM_SYSFS_PATH"/node", &node_dirlist, numa_filter, alphasort);
+ if (node_dirlist_size <= 1)
+ goto free_scandir_entries;
+
+ /* 2. read and parse the list of currently online cpu */
+ if (read_line_to_trash("%s/cpu/online", NUMA_DETECT_SYSTEM_SYSFS_PATH) < 0) {
+ ha_notice("Cannot read online CPUs list, will not try to refine binding\n");
+ goto free_scandir_entries;
+ }
+
+ parse_cpu_set_args[0] = trash.area;
+ parse_cpu_set_args[1] = "\0";
+ if (parse_cpu_set(parse_cpu_set_args, &active_cpus, &err) != 0) {
+ ha_notice("Cannot read online CPUs list: '%s'. Will not try to refine binding\n", err);
+ free(err);
+ goto free_scandir_entries;
+ }
+
+ /* 3. loop through nodes dirs and find the first one with active cpus */
+ while (node_dirlist_size--) {
+ const char *node = node_dirlist[node_dirlist_size]->d_name;
+ ha_cpuset_zero(&node_cpu_set);
+
+ if (read_line_to_trash("%s/node/%s/cpumap", NUMA_DETECT_SYSTEM_SYSFS_PATH, node) < 0) {
+ ha_notice("Cannot read CPUs list of '%s', will not select them to refine binding\n", node);
+ free(node_dirlist[node_dirlist_size]);
+ continue;
+ }
+
+ parse_cpumap(trash.area, &node_cpu_set);
+ ha_cpuset_and(&node_cpu_set, &active_cpus);
+
+ /* 5. set affinity on the first found node with active cpus */
+ if (!ha_cpuset_count(&node_cpu_set)) {
+ free(node_dirlist[node_dirlist_size]);
+ continue;
+ }
+
+ ha_diag_warning("Multi-socket cpu detected, automatically binding on active CPUs of '%s' (%u active cpu(s))\n", node, ha_cpuset_count(&node_cpu_set));
+ for (grp = 0; grp < MAX_TGROUPS; grp++)
+ for (thr = 0; thr < MAX_THREADS_PER_GROUP; thr++)
+ ha_cpuset_assign(&cpu_map[grp].thread[thr], &node_cpu_set);
+
+ free(node_dirlist[node_dirlist_size]);
+ break;
+ }
+
+ free_scandir_entries:
+ while (node_dirlist_size-- > 0)
+ free(node_dirlist[node_dirlist_size]);
+ free(node_dirlist);
+
+ return ha_cpuset_count(&node_cpu_set);
+}
+
+#elif defined(__FreeBSD__)
+static int numa_detect_topology()
+{
+ struct hap_cpuset node_cpu_set;
+ int ndomains = 0, i;
+ size_t len = sizeof(ndomains);
+ int grp, thr;
+
+ if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) {
+ ha_notice("Cannot assess the number of CPUs domains\n");
+ return 0;
+ }
+
+ BUG_ON(ndomains > MAXMEMDOM);
+ ha_cpuset_zero(&node_cpu_set);
+
+ if (ndomains < 2)
+ goto leave;
+
+ /*
+ * We retrieve the first active valid CPU domain
+ * with active cpu and binding it, we returns
+ * the number of cpu from the said domain
+ */
+ for (i = 0; i < ndomains; i ++) {
+ struct hap_cpuset dom;
+ ha_cpuset_zero(&dom);
+ if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_DOMAIN, i, sizeof(dom.cpuset), &dom.cpuset) == -1)
+ continue;
+
+ if (!ha_cpuset_count(&dom))
+ continue;
+
+ ha_cpuset_assign(&node_cpu_set, &dom);
+
+ ha_diag_warning("Multi-socket cpu detected, automatically binding on active CPUs of '%d' (%u active cpu(s))\n", i, ha_cpuset_count(&node_cpu_set));
+ for (grp = 0; grp < MAX_TGROUPS; grp++)
+ for (thr = 0; thr < MAX_THREADS_PER_GROUP; thr++)
+ ha_cpuset_assign(&cpu_map[grp].thread[thr], &node_cpu_set);
+ break;
+ }
+ leave:
+ return ha_cpuset_count(&node_cpu_set);
+}
+
+#else
+static int numa_detect_topology()
+{
+ return 0;
+}
+
+#endif
+#endif /* USE_THREAD && USE_CPU_AFFINITY */
+
+/*
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int check_config_validity()
+{
+ int cfgerr = 0;
+ struct proxy *curproxy = NULL;
+ struct proxy *init_proxies_list = NULL;
+ struct stktable *t;
+ struct server *newsrv = NULL;
+ int err_code = 0;
+ unsigned int next_pxid = 1;
+ struct bind_conf *bind_conf;
+ char *err;
+ struct cfg_postparser *postparser;
+ struct resolvers *curr_resolvers = NULL;
+ int i;
+
+ bind_conf = NULL;
+ /*
+ * Now, check for the integrity of all that we have collected.
+ */
+
+ if (!global.tune.max_http_hdr)
+ global.tune.max_http_hdr = MAX_HTTP_HDR;
+
+ if (!global.tune.cookie_len)
+ global.tune.cookie_len = CAPTURE_LEN;
+
+ if (!global.tune.requri_len)
+ global.tune.requri_len = REQURI_LEN;
+
+ if (!global.nbthread) {
+ /* nbthread not set, thus automatic. In this case, and only if
+ * running on a single process, we enable the same number of
+ * threads as the number of CPUs the process is bound to. This
+ * allows to easily control the number of threads using taskset.
+ */
+ global.nbthread = 1;
+
+#if defined(USE_THREAD)
+ {
+ int numa_cores = 0;
+#if defined(USE_CPU_AFFINITY)
+ if (global.numa_cpu_mapping && !thread_cpu_mask_forced() && !cpu_map_configured())
+ numa_cores = numa_detect_topology();
+#endif
+ global.nbthread = numa_cores ? numa_cores :
+ thread_cpus_enabled_at_boot;
+
+ /* Note that we cannot have more than 32 or 64 threads per group */
+ if (!global.nbtgroups)
+ global.nbtgroups = 1;
+
+ if (global.nbthread > MAX_THREADS_PER_GROUP * global.nbtgroups) {
+ ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group). Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n",
+ global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP);
+ global.nbthread = MAX_THREADS_PER_GROUP * global.nbtgroups;
+ }
+ }
+#endif
+ }
+
+ if (!global.nbtgroups)
+ global.nbtgroups = 1;
+
+ if (thread_map_to_groups() < 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ pool_head_requri = create_pool("requri", global.tune.requri_len , MEM_F_SHARED);
+
+ pool_head_capture = create_pool("capture", global.tune.cookie_len, MEM_F_SHARED);
+
+ /* Post initialisation of the users and groups lists. */
+ err_code = userlist_postinit();
+ if (err_code != ERR_NONE)
+ goto out;
+
+ /* first, we will invert the proxy list order */
+ curproxy = NULL;
+ while (proxies_list) {
+ struct proxy *next;
+
+ next = proxies_list->next;
+ proxies_list->next = curproxy;
+ curproxy = proxies_list;
+ if (!next)
+ break;
+ proxies_list = next;
+ }
+
+ /* starting to initialize the main proxies list */
+ init_proxies_list = proxies_list;
+
+init_proxies_list_stage1:
+ for (curproxy = init_proxies_list; curproxy; curproxy = curproxy->next) {
+ struct switching_rule *rule;
+ struct server_rule *srule;
+ struct sticking_rule *mrule;
+ struct logger *tmplogger;
+ unsigned int next_id;
+
+ if (!(curproxy->cap & PR_CAP_INT) && curproxy->uuid < 0) {
+ /* proxy ID not set, use automatic numbering with first
+ * spare entry starting with next_pxid. We don't assign
+ * numbers for internal proxies as they may depend on
+ * build or config options and we don't want them to
+ * possibly reuse existing IDs.
+ */
+ next_pxid = get_next_id(&used_proxy_id, next_pxid);
+ curproxy->conf.id.key = curproxy->uuid = next_pxid;
+ eb32_insert(&used_proxy_id, &curproxy->conf.id);
+ }
+
+ if (curproxy->mode == PR_MODE_HTTP && global.tune.bufsize >= (256 << 20) && ONLY_ONCE()) {
+ ha_alert("global.tune.bufsize must be below 256 MB when HTTP is in use (current value = %d).\n",
+ global.tune.bufsize);
+ cfgerr++;
+ }
+
+ /* next IDs are shifted even if the proxy is disabled, this
+ * guarantees that a proxy that is temporarily disabled in the
+ * configuration doesn't cause a renumbering. Internal proxies
+ * that are not assigned a static ID must never shift the IDs
+ * either since they may appear in any order (Lua, logs, etc).
+ * The GLOBAL proxy that carries the stats socket has its ID
+ * forced to zero.
+ */
+ if (curproxy->uuid >= 0)
+ next_pxid++;
+
+ if (curproxy->flags & PR_FL_DISABLED) {
+ /* ensure we don't keep listeners uselessly bound. We
+ * can't disable their listeners yet (fdtab not
+ * allocated yet) but let's skip them.
+ */
+ if (curproxy->table) {
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = NULL;
+ }
+ continue;
+ }
+
+ /* The current proxy is referencing a default proxy. We must
+ * finalize its config, but only once. If the default proxy is
+ * ready (PR_FL_READY) it means it was already fully configured.
+ */
+ if (curproxy->defpx) {
+ if (!(curproxy->defpx->flags & PR_FL_READY)) {
+ /* check validity for 'tcp-request' layer 4/5/6/7 rules */
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.l4_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.l5_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_req.inspect_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->tcp_rep.inspect_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_req_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_res_rules, curproxy->defpx, &err_code);
+ cfgerr += check_action_rules(&curproxy->defpx->http_after_res_rules, curproxy->defpx, &err_code);
+
+ err = NULL;
+ i = smp_resolve_args(curproxy->defpx, &err);
+ cfgerr += i;
+ if (i) {
+ indent_msg(&err, 8);
+ ha_alert("%s%s\n", i > 1 ? "multiple argument resolution errors:" : "", err);
+ ha_free(&err);
+ }
+ else
+ cfgerr += acl_find_targets(curproxy->defpx);
+
+ /* default proxy is now ready. Set the right FE/BE capabilities */
+ curproxy->defpx->flags |= PR_FL_READY;
+ }
+ }
+
+ /* check and reduce the bind-proc of each listener */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ int ret;
+
+ /* HTTP frontends with "h2" as ALPN/NPN will work in
+ * HTTP/2 and absolutely require buffers 16kB or larger.
+ */
+#ifdef USE_OPENSSL
+ /* no-alpn ? If so, it's the right moment to remove it */
+ if (bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.alpn_len) {
+ free(bind_conf->ssl_conf.alpn_str);
+ bind_conf->ssl_conf.alpn_str = NULL;
+ }
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ else if (!bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.npn_str &&
+ ((bind_conf->options & BC_O_USE_SSL) || bind_conf->xprt == xprt_get(XPRT_QUIC)) &&
+ curproxy->mode == PR_MODE_HTTP && global.tune.bufsize >= 16384) {
+
+ /* Neither ALPN nor NPN were explicitly set nor disabled, we're
+ * in HTTP mode with an SSL or QUIC listener, we can enable ALPN.
+ * Note that it's in binary form.
+ */
+ if (bind_conf->xprt == xprt_get(XPRT_QUIC))
+ bind_conf->ssl_conf.alpn_str = strdup("\002h3");
+ else
+ bind_conf->ssl_conf.alpn_str = strdup("\002h2\010http/1.1");
+
+ if (!bind_conf->ssl_conf.alpn_str) {
+ ha_alert("Proxy '%s': out of memory while trying to allocate a default alpn string in 'bind %s' at [%s:%d].\n",
+ curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+ bind_conf->ssl_conf.alpn_len = strlen(bind_conf->ssl_conf.alpn_str);
+ }
+#endif
+
+ if (curproxy->mode == PR_MODE_HTTP && global.tune.bufsize < 16384) {
+#ifdef OPENSSL_NPN_NEGOTIATED
+ /* check NPN */
+ if (bind_conf->ssl_conf.npn_str && strstr(bind_conf->ssl_conf.npn_str, "\002h2")) {
+ ha_alert("HTTP frontend '%s' enables HTTP/2 via NPN at [%s:%d], so global.tune.bufsize must be at least 16384 bytes (%d now).\n",
+ curproxy->id, bind_conf->file, bind_conf->line, global.tune.bufsize);
+ cfgerr++;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ /* check ALPN */
+ if (bind_conf->ssl_conf.alpn_str && strstr(bind_conf->ssl_conf.alpn_str, "\002h2")) {
+ ha_alert("HTTP frontend '%s' enables HTTP/2 via ALPN at [%s:%d], so global.tune.bufsize must be at least 16384 bytes (%d now).\n",
+ curproxy->id, bind_conf->file, bind_conf->line, global.tune.bufsize);
+ cfgerr++;
+ }
+#endif
+ } /* HTTP && bufsize < 16384 */
+#endif
+
+ /* finish the bind setup */
+ ret = bind_complete_thread_setup(bind_conf, &err_code);
+ if (ret != 0) {
+ cfgerr += ret;
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+ }
+
+ switch (curproxy->mode) {
+ case PR_MODE_TCP:
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ cfgerr += proxy_cfg_ensure_no_log(curproxy);
+ break;
+
+ case PR_MODE_HTTP:
+ cfgerr += proxy_cfg_ensure_no_log(curproxy);
+ curproxy->http_needed = 1;
+ break;
+
+ case PR_MODE_CLI:
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ cfgerr += proxy_cfg_ensure_no_log(curproxy);
+ break;
+
+ case PR_MODE_SYSLOG:
+ /* this mode is initialized as the classic tcp proxy */
+ cfgerr += proxy_cfg_ensure_no_http(curproxy);
+ break;
+
+ case PR_MODE_PEERS:
+ case PR_MODES:
+ /* should not happen, bug gcc warn missing switch statement */
+ ha_alert("%s '%s' cannot initialize this proxy mode (peers) in this way. NOTE: PLEASE REPORT THIS TO DEVELOPERS AS YOU'RE NOT SUPPOSED TO BE ABLE TO CREATE A CONFIGURATION TRIGGERING THIS!\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->conf.listeners)) {
+ ha_warning("%s '%s' has no 'bind' directive. Please declare it as a backend if this was intended.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ if (curproxy->lbprm.algo & BE_LB_KIND) {
+ if (curproxy->options & PR_O_TRANSP) {
+ ha_alert("%s '%s' cannot use both transparent and balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+#ifdef WE_DONT_SUPPORT_SERVERLESS_LISTENERS
+ else if (curproxy->srv == NULL) {
+ ha_alert("%s '%s' needs at least 1 server in balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+#endif
+ else if (curproxy->options & PR_O_DISPATCH) {
+ ha_warning("dispatch address of %s '%s' will be ignored in balance mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ else if (!(curproxy->options & (PR_O_TRANSP | PR_O_DISPATCH))) {
+ /* If no LB algo is set in a backend, and we're not in
+ * transparent mode, dispatch mode nor proxy mode, we
+ * want to use balance roundrobin by default.
+ */
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ }
+ }
+
+ if (curproxy->options & PR_O_DISPATCH)
+ curproxy->options &= ~PR_O_TRANSP;
+ else if (curproxy->options & PR_O_TRANSP)
+ curproxy->options &= ~PR_O_DISPATCH;
+
+ if ((curproxy->tcpcheck_rules.flags & TCPCHK_RULES_UNUSED_HTTP_RS)) {
+ ha_warning("%s '%s' uses http-check rules without 'option httpchk', so the rules are ignored.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if ((curproxy->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curproxy->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) != TCPCHK_RULES_HTTP_CHK) {
+ if (curproxy->options & PR_O_DISABLE404) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+ "disable-on-404", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~PR_O_DISABLE404;
+ }
+ if (curproxy->options2 & PR_O2_CHK_SNDST) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option httpchk').\n",
+ "send-state", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~PR_O2_CHK_SNDST;
+ }
+ }
+
+ if ((curproxy->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
+ if (!global.external_check) {
+ ha_alert("Proxy '%s' : '%s' unable to find required 'global.external-check'.\n",
+ curproxy->id, "option external-check");
+ cfgerr++;
+ }
+ if (!curproxy->check_command) {
+ ha_alert("Proxy '%s' : '%s' unable to find required 'external-check command'.\n",
+ curproxy->id, "option external-check");
+ cfgerr++;
+ }
+ if (!(global.tune.options & GTUNE_INSECURE_FORK)) {
+ ha_warning("Proxy '%s' : 'insecure-fork-wanted' not enabled in the global section, '%s' will likely fail.\n",
+ curproxy->id, "option external-check");
+ err_code |= ERR_WARN;
+ }
+ }
+
+ if (curproxy->email_alert.set) {
+ if (!(curproxy->email_alert.mailers.name && curproxy->email_alert.from && curproxy->email_alert.to)) {
+ ha_warning("'email-alert' will be ignored for %s '%s' (the presence any of "
+ "'email-alert from', 'email-alert level' 'email-alert mailers', "
+ "'email-alert myhostname', or 'email-alert to' "
+ "requires each of 'email-alert from', 'email-alert mailers' and 'email-alert to' "
+ "to be present).\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ free_email_alert(curproxy);
+ }
+ if (!curproxy->email_alert.myhostname)
+ curproxy->email_alert.myhostname = strdup(hostname);
+ }
+
+ if (curproxy->check_command) {
+ int clear = 0;
+ if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option external-check').\n",
+ "external-check command", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ clear = 1;
+ }
+ if (curproxy->check_command[0] != '/' && !curproxy->check_path) {
+ ha_alert("Proxy '%s': '%s' does not have a leading '/' and 'external-check path' is not set.\n",
+ curproxy->id, "external-check command");
+ cfgerr++;
+ }
+ if (clear) {
+ ha_free(&curproxy->check_command);
+ }
+ }
+
+ if (curproxy->check_path) {
+ if ((curproxy->options2 & PR_O2_CHK_ANY) != PR_O2_EXT_CHK) {
+ ha_warning("'%s' will be ignored for %s '%s' (requires 'option external-check').\n",
+ "external-check path", proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ ha_free(&curproxy->check_path);
+ }
+ }
+
+ /* if a default backend was specified, let's find it */
+ if (curproxy->defbe.name) {
+ struct proxy *target;
+
+ target = proxy_be_by_name(curproxy->defbe.name);
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find required default_backend: '%s'.\n",
+ curproxy->id, curproxy->defbe.name);
+ cfgerr++;
+ } else if (target == curproxy) {
+ ha_alert("Proxy '%s': loop detected for default_backend: '%s'.\n",
+ curproxy->id, curproxy->defbe.name);
+ cfgerr++;
+ } else if (target->mode != curproxy->mode &&
+ !(curproxy->mode == PR_MODE_TCP && target->mode == PR_MODE_HTTP)) {
+
+ ha_alert("%s %s '%s' (%s:%d) tries to use incompatible %s %s '%s' (%s:%d) as its default backend (see 'mode').\n",
+ proxy_mode_str(curproxy->mode), proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ proxy_mode_str(target->mode), proxy_type_str(target), target->id,
+ target->conf.file, target->conf.line);
+ cfgerr++;
+ } else {
+ free(curproxy->defbe.name);
+ curproxy->defbe.be = target;
+ /* Emit a warning if this proxy also has some servers */
+ if (curproxy->srv) {
+ ha_warning("In proxy '%s', the 'default_backend' rule always has precedence over the servers, which will never be used.\n",
+ curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ }
+ }
+
+ /* find the target proxy for 'use_backend' rules */
+ list_for_each_entry(rule, &curproxy->switching_rules, list) {
+ struct proxy *target;
+ struct logformat_node *node;
+ char *pxname;
+
+ /* Try to parse the string as a log format expression. If the result
+ * of the parsing is only one entry containing a simple string, then
+ * it's a standard string corresponding to a static rule, thus the
+ * parsing is cancelled and be.name is restored to be resolved.
+ */
+ pxname = rule->be.name;
+ LIST_INIT(&rule->be.expr);
+ curproxy->conf.args.ctx = ARGC_UBK;
+ curproxy->conf.args.file = rule->file;
+ curproxy->conf.args.line = rule->line;
+ err = NULL;
+ if (!parse_logformat_string(pxname, curproxy, &rule->be.expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse use_backend rule '%s' : %s.\n",
+ rule->file, rule->line, pxname, err);
+ free(err);
+ cfgerr++;
+ continue;
+ }
+ node = LIST_NEXT(&rule->be.expr, struct logformat_node *, list);
+
+ if (!LIST_ISEMPTY(&rule->be.expr)) {
+ if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr) {
+ rule->dynamic = 1;
+ free(pxname);
+ continue;
+ }
+ /* Only one element in the list, a simple string: free the expression and
+ * fall back to static rule
+ */
+ LIST_DELETE(&node->list);
+ free(node->arg);
+ free(node);
+ }
+
+ rule->dynamic = 0;
+ rule->be.name = pxname;
+
+ target = proxy_be_by_name(rule->be.name);
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find required use_backend: '%s'.\n",
+ curproxy->id, rule->be.name);
+ cfgerr++;
+ } else if (target == curproxy) {
+ ha_alert("Proxy '%s': loop detected for use_backend: '%s'.\n",
+ curproxy->id, rule->be.name);
+ cfgerr++;
+ } else if (target->mode != curproxy->mode &&
+ !(curproxy->mode == PR_MODE_TCP && target->mode == PR_MODE_HTTP)) {
+
+ ha_alert("%s %s '%s' (%s:%d) tries to use incompatible %s %s '%s' (%s:%d) in a 'use_backend' rule (see 'mode').\n",
+ proxy_mode_str(curproxy->mode), proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ proxy_mode_str(target->mode), proxy_type_str(target), target->id,
+ target->conf.file, target->conf.line);
+ cfgerr++;
+ } else {
+ ha_free(&rule->be.name);
+ rule->be.backend = target;
+ }
+ err_code |= warnif_tcp_http_cond(curproxy, rule->cond);
+ }
+
+ /* find the target server for 'use_server' rules */
+ list_for_each_entry(srule, &curproxy->server_rules, list) {
+ struct server *target;
+ struct logformat_node *node;
+ char *server_name;
+
+ /* We try to parse the string as a log format expression. If the result of the parsing
+ * is only one entry containing a single string, then it's a standard string corresponding
+ * to a static rule, thus the parsing is cancelled and we fall back to setting srv.ptr.
+ */
+ server_name = srule->srv.name;
+ LIST_INIT(&srule->expr);
+ curproxy->conf.args.ctx = ARGC_USRV;
+ err = NULL;
+ if (!parse_logformat_string(server_name, curproxy, &srule->expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) {
+ ha_alert("Parsing [%s:%d]; use-server rule failed to parse log-format '%s' : %s.\n",
+ srule->file, srule->line, server_name, err);
+ free(err);
+ cfgerr++;
+ continue;
+ }
+ node = LIST_NEXT(&srule->expr, struct logformat_node *, list);
+
+ if (!LIST_ISEMPTY(&srule->expr)) {
+ if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr) {
+ srule->dynamic = 1;
+ free(server_name);
+ continue;
+ }
+ /* Only one element in the list, a simple string: free the expression and
+ * fall back to static rule
+ */
+ LIST_DELETE(&node->list);
+ free(node->arg);
+ free(node);
+ }
+
+ srule->dynamic = 0;
+ srule->srv.name = server_name;
+ target = findserver(curproxy, srule->srv.name);
+ err_code |= warnif_tcp_http_cond(curproxy, srule->cond);
+
+ if (!target) {
+ ha_alert("%s '%s' : unable to find server '%s' referenced in a 'use-server' rule.\n",
+ proxy_type_str(curproxy), curproxy->id, srule->srv.name);
+ cfgerr++;
+ continue;
+ }
+ ha_free(&srule->srv.name);
+ srule->srv.ptr = target;
+ target->flags |= SRV_F_NON_PURGEABLE;
+ }
+
+ /* find the target table for 'stick' rules */
+ list_for_each_entry(mrule, &curproxy->sticking_rules, list) {
+ curproxy->be_req_ana |= AN_REQ_STICKING_RULES;
+ if (mrule->flags & STK_IS_STORE)
+ curproxy->be_rsp_ana |= AN_RES_STORE_RULES;
+
+ if (!resolve_stick_rule(curproxy, mrule))
+ cfgerr++;
+
+ err_code |= warnif_tcp_http_cond(curproxy, mrule->cond);
+ }
+
+ /* find the target table for 'store response' rules */
+ list_for_each_entry(mrule, &curproxy->storersp_rules, list) {
+ curproxy->be_rsp_ana |= AN_RES_STORE_RULES;
+
+ if (!resolve_stick_rule(curproxy, mrule))
+ cfgerr++;
+ }
+
+ /* check validity for 'tcp-request' layer 4/5/6/7 rules */
+ cfgerr += check_action_rules(&curproxy->tcp_req.l4_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_req.l5_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_req.inspect_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->tcp_rep.inspect_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_req_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_res_rules, curproxy, &err_code);
+ cfgerr += check_action_rules(&curproxy->http_after_res_rules, curproxy, &err_code);
+
+ /* Warn is a switch-mode http is used on a TCP listener with servers but no backend */
+ if (!curproxy->defbe.name && LIST_ISEMPTY(&curproxy->switching_rules) && curproxy->srv) {
+ if ((curproxy->options & PR_O_HTTP_UPG) && curproxy->mode == PR_MODE_TCP)
+ ha_warning("Proxy '%s' : 'switch-mode http' configured for a %s %s with no backend. "
+ "Incoming connections upgraded to HTTP cannot be routed to TCP servers\n",
+ curproxy->id, proxy_mode_str(curproxy->mode), proxy_type_str(curproxy));
+ }
+
+ if (curproxy->table && curproxy->table->peers.name) {
+ struct peers *curpeers;
+
+ for (curpeers = cfg_peers; curpeers; curpeers = curpeers->next) {
+ if (strcmp(curpeers->id, curproxy->table->peers.name) == 0) {
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = curpeers;
+ break;
+ }
+ }
+
+ if (!curpeers) {
+ ha_alert("Proxy '%s': unable to find sync peers '%s'.\n",
+ curproxy->id, curproxy->table->peers.name);
+ ha_free(&curproxy->table->peers.name);
+ curproxy->table->peers.p = NULL;
+ cfgerr++;
+ }
+ else if (curpeers->disabled) {
+ /* silently disable this peers section */
+ curproxy->table->peers.p = NULL;
+ }
+ else if (!curpeers->peers_fe) {
+ ha_alert("Proxy '%s': unable to find local peer '%s' in peers section '%s'.\n",
+ curproxy->id, localpeer, curpeers->id);
+ curproxy->table->peers.p = NULL;
+ cfgerr++;
+ }
+ }
+
+
+ if (curproxy->email_alert.mailers.name) {
+ struct mailers *curmailers = mailers;
+
+ for (curmailers = mailers; curmailers; curmailers = curmailers->next) {
+ if (strcmp(curmailers->id, curproxy->email_alert.mailers.name) == 0)
+ break;
+ }
+ if (!curmailers) {
+ ha_alert("Proxy '%s': unable to find mailers '%s'.\n",
+ curproxy->id, curproxy->email_alert.mailers.name);
+ free_email_alert(curproxy);
+ cfgerr++;
+ }
+ else {
+ err = NULL;
+ if (init_email_alert(curmailers, curproxy, &err)) {
+ ha_alert("Proxy '%s': %s.\n", curproxy->id, err);
+ free(err);
+ cfgerr++;
+ }
+ }
+ }
+
+ if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_CONVDONE) &&
+ !LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) &&
+ (curproxy->uri_auth->userlist || curproxy->uri_auth->auth_realm )) {
+ ha_alert("%s '%s': stats 'auth'/'realm' and 'http-request' can't be used at the same time.\n",
+ "proxy", curproxy->id);
+ cfgerr++;
+ goto out_uri_auth_compat;
+ }
+
+ if (curproxy->uri_auth && curproxy->uri_auth->userlist &&
+ (!(curproxy->uri_auth->flags & STAT_CONVDONE) ||
+ LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules))) {
+ const char *uri_auth_compat_req[10];
+ struct act_rule *rule;
+ i = 0;
+
+ /* build the ACL condition from scratch. We're relying on anonymous ACLs for that */
+ uri_auth_compat_req[i++] = "auth";
+
+ if (curproxy->uri_auth->auth_realm) {
+ uri_auth_compat_req[i++] = "realm";
+ uri_auth_compat_req[i++] = curproxy->uri_auth->auth_realm;
+ }
+
+ uri_auth_compat_req[i++] = "unless";
+ uri_auth_compat_req[i++] = "{";
+ uri_auth_compat_req[i++] = "http_auth(.internal-stats-userlist)";
+ uri_auth_compat_req[i++] = "}";
+ uri_auth_compat_req[i++] = "";
+
+ rule = parse_http_req_cond(uri_auth_compat_req, "internal-stats-auth-compat", 0, curproxy);
+ if (!rule) {
+ cfgerr++;
+ break;
+ }
+
+ LIST_APPEND(&curproxy->uri_auth->http_req_rules, &rule->list);
+
+ if (curproxy->uri_auth->auth_realm) {
+ ha_free(&curproxy->uri_auth->auth_realm);
+ }
+ curproxy->uri_auth->flags |= STAT_CONVDONE;
+ }
+out_uri_auth_compat:
+
+ /* check whether we have a logger that uses RFC5424 log format */
+ list_for_each_entry(tmplogger, &curproxy->loggers, list) {
+ if (tmplogger->format == LOG_FORMAT_RFC5424) {
+ if (!curproxy->conf.logformat_sd_string) {
+ /* set the default logformat_sd_string */
+ curproxy->conf.logformat_sd_string = default_rfc5424_sd_log_format;
+ }
+ break;
+ }
+ }
+
+ /* compile the log format */
+ if (!(curproxy->cap & PR_CAP_FE)) {
+ if (curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format)
+ free(curproxy->conf.logformat_string);
+ curproxy->conf.logformat_string = NULL;
+ ha_free(&curproxy->conf.lfs_file);
+ curproxy->conf.lfs_line = 0;
+
+ if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(curproxy->conf.logformat_sd_string);
+ curproxy->conf.logformat_sd_string = NULL;
+ ha_free(&curproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_line = 0;
+ }
+
+ if (curproxy->conf.logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ curproxy->conf.args.file = curproxy->conf.lfs_file;
+ curproxy->conf.args.line = curproxy->conf.lfs_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.logformat_sd_string) {
+ curproxy->conf.args.ctx = ARGC_LOGSD;
+ curproxy->conf.args.file = curproxy->conf.lfsd_file;
+ curproxy->conf.args.line = curproxy->conf.lfsd_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.logformat_sd_string, curproxy, &curproxy->logformat_sd,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ } else if (!add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.uniqueid_format_string) {
+ int where = 0;
+
+ curproxy->conf.args.ctx = ARGC_UIF;
+ curproxy->conf.args.file = curproxy->conf.uif_file;
+ curproxy->conf.args.line = curproxy->conf.uif_line;
+ err = NULL;
+ if (curproxy->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (curproxy->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(curproxy->conf.uniqueid_format_string, curproxy, &curproxy->format_unique_id,
+ LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse unique-id : %s.\n",
+ curproxy->conf.uif_file, curproxy->conf.uif_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ if (curproxy->conf.error_logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ curproxy->conf.args.file = curproxy->conf.elfs_file;
+ curproxy->conf.args.line = curproxy->conf.elfs_line;
+ err = NULL;
+ if (!parse_logformat_string(curproxy->conf.error_logformat_string, curproxy, &curproxy->logformat_error,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to parse error-log-format : %s.\n",
+ curproxy->conf.elfs_file, curproxy->conf.elfs_line, err);
+ free(err);
+ cfgerr++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+ /* "balance hash" needs to compile its expression
+ * (log backends will handle this in proxy log postcheck)
+ */
+ if (curproxy->mode != PR_MODE_SYSLOG &&
+ (curproxy->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SMP) {
+ int idx = 0;
+ const char *args[] = {
+ curproxy->lbprm.arg_str,
+ NULL,
+ };
+
+ err = NULL;
+ curproxy->conf.args.ctx = ARGC_USRV; // same context as use_server.
+ curproxy->lbprm.expr =
+ sample_parse_expr((char **)args, &idx,
+ curproxy->conf.file, curproxy->conf.line,
+ &err, &curproxy->conf.args, NULL);
+
+ if (!curproxy->lbprm.expr) {
+ ha_alert("%s '%s' [%s:%d]: failed to parse 'balance hash' expression '%s' in : %s.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str, err);
+ ha_free(&err);
+ cfgerr++;
+ }
+ else if (!(curproxy->lbprm.expr->fetch->val & SMP_VAL_BE_SET_SRV)) {
+ ha_alert("%s '%s' [%s:%d]: error detected while parsing 'balance hash' expression '%s' "
+ "which requires information from %s, which is not available here.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str, sample_src_names(curproxy->lbprm.expr->fetch->use));
+ cfgerr++;
+ }
+ else if (curproxy->mode == PR_MODE_HTTP && (curproxy->lbprm.expr->fetch->use & SMP_USE_L6REQ)) {
+ ha_warning("%s '%s' [%s:%d]: L6 sample fetch <%s> will be ignored in 'balance hash' expression in HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ curproxy->conf.file, curproxy->conf.line,
+ curproxy->lbprm.arg_str);
+ }
+ else
+ curproxy->http_needed |= !!(curproxy->lbprm.expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+
+ /* only now we can check if some args remain unresolved.
+ * This must be done after the users and groups resolution.
+ */
+ err = NULL;
+ i = smp_resolve_args(curproxy, &err);
+ cfgerr += i;
+ if (i) {
+ indent_msg(&err, 8);
+ ha_alert("%s%s\n", i > 1 ? "multiple argument resolution errors:" : "", err);
+ ha_free(&err);
+ } else
+ cfgerr += acl_find_targets(curproxy);
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) &&
+ (((curproxy->cap & PR_CAP_FE) && !curproxy->timeout.client) ||
+ ((curproxy->cap & PR_CAP_BE) && (curproxy->srv) &&
+ (!curproxy->timeout.connect ||
+ (!curproxy->timeout.server && (curproxy->mode == PR_MODE_HTTP || !curproxy->timeout.tunnel)))))) {
+ ha_warning("missing timeouts for %s '%s'.\n"
+ " | While not properly invalid, you will certainly encounter various problems\n"
+ " | with such a configuration. To fix this, please ensure that all following\n"
+ " | timeouts are set to a non-zero value: 'client', 'connect', 'server'.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ /* Historically, the tarpit and queue timeouts were inherited from contimeout.
+ * We must still support older configurations, so let's find out whether those
+ * parameters have been set or must be copied from contimeouts.
+ */
+ if (!curproxy->timeout.tarpit)
+ curproxy->timeout.tarpit = curproxy->timeout.connect;
+ if ((curproxy->cap & PR_CAP_BE) && !curproxy->timeout.queue)
+ curproxy->timeout.queue = curproxy->timeout.connect;
+
+ if ((curproxy->tcpcheck_rules.flags & TCPCHK_RULES_UNUSED_TCP_RS)) {
+ ha_warning("%s '%s' uses tcp-check rules without 'option tcp-check', so the rules are ignored.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ /* ensure that cookie capture length is not too large */
+ if (curproxy->capture_len >= global.tune.cookie_len) {
+ ha_warning("truncating capture length to %d bytes for %s '%s'.\n",
+ global.tune.cookie_len - 1, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->capture_len = global.tune.cookie_len - 1;
+ }
+
+ /* The small pools required for the capture lists */
+ if (curproxy->nb_req_cap) {
+ curproxy->req_cap_pool = create_pool("ptrcap",
+ curproxy->nb_req_cap * sizeof(char *),
+ MEM_F_SHARED);
+ }
+
+ if (curproxy->nb_rsp_cap) {
+ curproxy->rsp_cap_pool = create_pool("ptrcap",
+ curproxy->nb_rsp_cap * sizeof(char *),
+ MEM_F_SHARED);
+ }
+
+ switch (curproxy->load_server_state_from_file) {
+ case PR_SRV_STATE_FILE_UNSPEC:
+ curproxy->load_server_state_from_file = PR_SRV_STATE_FILE_NONE;
+ break;
+ case PR_SRV_STATE_FILE_GLOBAL:
+ if (!global.server_state_file) {
+ ha_warning("backend '%s' configured to load server state file from global section 'server-state-file' directive. Unfortunately, 'server-state-file' is not set!\n",
+ curproxy->id);
+ err_code |= ERR_WARN;
+ }
+ break;
+ }
+
+ /* first, we will invert the servers list order */
+ newsrv = NULL;
+ while (curproxy->srv) {
+ struct server *next;
+
+ next = curproxy->srv->next;
+ curproxy->srv->next = newsrv;
+ newsrv = curproxy->srv;
+ if (!next)
+ break;
+ curproxy->srv = next;
+ }
+
+ /* Check that no server name conflicts. This causes trouble in the stats.
+ * We only emit a warning for the first conflict affecting each server,
+ * in order to avoid combinatory explosion if all servers have the same
+ * name. We do that only for servers which do not have an explicit ID,
+ * because these IDs were made also for distinguishing them and we don't
+ * want to annoy people who correctly manage them.
+ */
+ for (newsrv = curproxy->srv; newsrv; newsrv = newsrv->next) {
+ struct server *other_srv;
+
+ if (newsrv->puid)
+ continue;
+
+ for (other_srv = curproxy->srv; other_srv && other_srv != newsrv; other_srv = other_srv->next) {
+ if (!other_srv->puid && strcmp(other_srv->id, newsrv->id) == 0) {
+ ha_alert("parsing [%s:%d] : %s '%s', another server named '%s' was already defined at line %d, please use distinct names.\n",
+ newsrv->conf.file, newsrv->conf.line,
+ proxy_type_str(curproxy), curproxy->id,
+ newsrv->id, other_srv->conf.line);
+ cfgerr++;
+ break;
+ }
+ }
+ }
+
+ /* assign automatic UIDs to servers which don't have one yet */
+ next_id = 1;
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ if (!newsrv->puid) {
+ /* server ID not set, use automatic numbering with first
+ * spare entry starting with next_svid.
+ */
+ next_id = get_next_id(&curproxy->conf.used_server_id, next_id);
+ newsrv->conf.id.key = newsrv->puid = next_id;
+ eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id);
+ }
+ newsrv->conf.name.key = newsrv->id;
+ ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name);
+
+ next_id++;
+ newsrv = newsrv->next;
+ }
+
+ curproxy->lbprm.wmult = 1; /* default weight multiplier */
+ curproxy->lbprm.wdiv = 1; /* default weight divider */
+
+ /*
+ * If this server supports a maxconn parameter, it needs a dedicated
+ * tasks to fill the emptied slots when a connection leaves.
+ * Also, resolve deferred tracking dependency if needed.
+ */
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ set_usermsgs_ctx(newsrv->conf.file, newsrv->conf.line, &newsrv->obj_type);
+
+ srv_minmax_conn_apply(newsrv);
+
+ /* this will also properly set the transport layer for
+ * prod and checks
+ * if default-server have use_ssl, prerare ssl init
+ * without activating it */
+ if (newsrv->use_ssl == 1 || newsrv->check.use_ssl == 1 ||
+ (newsrv->proxy->options & PR_O_TCPCHK_SSL) ||
+ ((newsrv->flags & SRV_F_DEFSRV_USE_SSL) && newsrv->use_ssl != 1)) {
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv)
+ cfgerr += xprt_get(XPRT_SSL)->prepare_srv(newsrv);
+ }
+
+ if ((newsrv->flags & SRV_F_FASTOPEN) &&
+ ((curproxy->retry_type & (PR_RE_DISCONNECTED | PR_RE_TIMEOUT)) !=
+ (PR_RE_DISCONNECTED | PR_RE_TIMEOUT)))
+ ha_warning("server has tfo activated, the backend should be configured with at least 'conn-failure', 'empty-response' and 'response-timeout' or we wouldn't be able to retry the connection on failure.\n");
+
+ if (newsrv->trackit) {
+ if (srv_apply_track(newsrv, curproxy)) {
+ ++cfgerr;
+ goto next_srv;
+ }
+ }
+
+ next_srv:
+ reset_usermsgs_ctx();
+ newsrv = newsrv->next;
+ }
+
+ /*
+ * Try to generate dynamic cookies for servers now.
+ * It couldn't be done earlier, since at the time we parsed
+ * the server line, we may not have known yet that we
+ * should use dynamic cookies, or the secret key may not
+ * have been provided yet.
+ */
+ if (curproxy->ck_opts & PR_CK_DYNAMIC) {
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ srv_set_dyncookie(newsrv);
+ newsrv = newsrv->next;
+ }
+
+ }
+ /* We have to initialize the server lookup mechanism depending
+ * on what LB algorithm was chosen.
+ */
+
+ if (curproxy->mode == PR_MODE_SYSLOG) {
+ /* log load-balancing requires special init that is performed
+ * during log-postparsing step
+ */
+ goto skip_server_lb_init;
+ }
+ curproxy->lbprm.algo &= ~(BE_LB_LKUP | BE_LB_PROP_DYN);
+ switch (curproxy->lbprm.algo & BE_LB_KIND) {
+ case BE_LB_KIND_RR:
+ if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_RR_STATIC) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_MAP;
+ init_server_map(curproxy);
+ } else if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_RR_RANDOM) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_CHTREE | BE_LB_PROP_DYN;
+ if (chash_init_server_tree(curproxy) < 0) {
+ cfgerr++;
+ }
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_RRTREE | BE_LB_PROP_DYN;
+ fwrr_init_server_groups(curproxy);
+ }
+ break;
+
+ case BE_LB_KIND_CB:
+ if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_CB_LC) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_LCTREE | BE_LB_PROP_DYN;
+ fwlc_init_server_tree(curproxy);
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_FSTREE | BE_LB_PROP_DYN;
+ fas_init_server_tree(curproxy);
+ }
+ break;
+
+ case BE_LB_KIND_HI:
+ if ((curproxy->lbprm.algo & BE_LB_HASH_TYPE) == BE_LB_HASH_CONS) {
+ curproxy->lbprm.algo |= BE_LB_LKUP_CHTREE | BE_LB_PROP_DYN;
+ if (chash_init_server_tree(curproxy) < 0) {
+ cfgerr++;
+ }
+ } else {
+ curproxy->lbprm.algo |= BE_LB_LKUP_MAP;
+ init_server_map(curproxy);
+ }
+ break;
+ }
+ skip_server_lb_init:
+ HA_RWLOCK_INIT(&curproxy->lbprm.lock);
+
+ if (curproxy->options & PR_O_LOGASAP)
+ curproxy->to_log &= ~LW_BYTES;
+
+ if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) &&
+ (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->loggers) &&
+ (!LIST_ISEMPTY(&curproxy->logformat) || !LIST_ISEMPTY(&curproxy->logformat_sd))) {
+ ha_warning("log format ignored for %s '%s' since it has no log address.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (curproxy->mode != PR_MODE_HTTP && !(curproxy->options & PR_O_HTTP_UPG)) {
+ int optnum;
+
+ if (curproxy->uri_auth) {
+ ha_warning("'stats' statement ignored for %s '%s' as it requires HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->uri_auth = NULL;
+ }
+
+ if (curproxy->capture_name) {
+ ha_warning("'capture' statement ignored for %s '%s' as it requires HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_req_rules)) {
+ ha_warning("'http-request' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_res_rules)) {
+ ha_warning("'http-response' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->http_after_res_rules)) {
+ ha_warning("'http-after-response' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ if (!LIST_ISEMPTY(&curproxy->redirect_rules)) {
+ ha_warning("'redirect' rules ignored for %s '%s' as they require HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ }
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++) {
+ if (cfg_opts[optnum].mode == PR_MODE_HTTP &&
+ (curproxy->cap & cfg_opts[optnum].cap) &&
+ (curproxy->options & cfg_opts[optnum].val)) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ cfg_opts[optnum].name, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options &= ~cfg_opts[optnum].val;
+ }
+ }
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++) {
+ if (cfg_opts2[optnum].mode == PR_MODE_HTTP &&
+ (curproxy->cap & cfg_opts2[optnum].cap) &&
+ (curproxy->options2 & cfg_opts2[optnum].val)) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ cfg_opts2[optnum].name, proxy_type_str(curproxy), curproxy->id);
+ err_code |= ERR_WARN;
+ curproxy->options2 &= ~cfg_opts2[optnum].val;
+ }
+ }
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (curproxy->conn_src.bind_hdr_occ) {
+ curproxy->conn_src.bind_hdr_occ = 0;
+ ha_warning("%s '%s' : ignoring use of header %s as source IP in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id, curproxy->conn_src.bind_hdr_name);
+ err_code |= ERR_WARN;
+ }
+#endif
+ }
+
+ /*
+ * ensure that we're not cross-dressing a TCP server into HTTP.
+ */
+ newsrv = curproxy->srv;
+ while (newsrv != NULL) {
+ if ((curproxy->mode != PR_MODE_HTTP) && newsrv->rdr_len) {
+ ha_alert("%s '%s' : server cannot have cookie or redirect prefix in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+
+ if ((curproxy->mode != PR_MODE_HTTP) && newsrv->cklen) {
+ ha_warning("%s '%s' : ignoring cookie for server '%s' as HTTP mode is disabled.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id);
+ err_code |= ERR_WARN;
+ }
+
+ if ((newsrv->flags & SRV_F_MAPPORTS) && (curproxy->options2 & PR_O2_RDPC_PRST)) {
+ ha_warning("%s '%s' : RDP cookie persistence will not work for server '%s' because it lacks an explicit port number.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id);
+ err_code |= ERR_WARN;
+ }
+
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (curproxy->mode != PR_MODE_HTTP && newsrv->conn_src.bind_hdr_occ) {
+ newsrv->conn_src.bind_hdr_occ = 0;
+ ha_warning("%s '%s' : server %s cannot use header %s as source IP in non-HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id, newsrv->conn_src.bind_hdr_name);
+ err_code |= ERR_WARN;
+ }
+#endif
+
+ if ((curproxy->mode != PR_MODE_HTTP) && (curproxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR)
+ curproxy->options &= ~PR_O_REUSE_MASK;
+
+ if ((curproxy->mode != PR_MODE_HTTP) && newsrv->flags & SRV_F_RHTTP) {
+ ha_alert("%s '%s' : server %s uses reverse HTTP addressing which can only be used with HTTP mode.\n",
+ proxy_type_str(curproxy), curproxy->id, newsrv->id);
+ cfgerr++;
+ err_code |= ERR_FATAL | ERR_ALERT;
+ goto out;
+ }
+
+ newsrv = newsrv->next;
+ }
+
+ /* Check filter configuration, if any */
+ cfgerr += flt_check(curproxy);
+
+ if (curproxy->cap & PR_CAP_FE) {
+ if (!curproxy->accept)
+ curproxy->accept = frontend_accept;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_req.inspect_rules)))
+ curproxy->fe_req_ana |= AN_REQ_INSPECT_FE;
+
+ if (curproxy->mode == PR_MODE_HTTP) {
+ curproxy->fe_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_PROCESS_FE;
+ curproxy->fe_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_FE;
+ }
+
+ if (curproxy->mode == PR_MODE_CLI) {
+ curproxy->fe_req_ana |= AN_REQ_WAIT_CLI;
+ curproxy->fe_rsp_ana |= AN_RES_WAIT_CLI;
+ }
+
+ /* both TCP and HTTP must check switching rules */
+ curproxy->fe_req_ana |= AN_REQ_SWITCHING_RULES;
+
+ /* Add filters analyzers if needed */
+ if (!LIST_ISEMPTY(&curproxy->filter_configs)) {
+ curproxy->fe_req_ana |= AN_REQ_FLT_START_FE | AN_REQ_FLT_XFER_DATA | AN_REQ_FLT_END;
+ curproxy->fe_rsp_ana |= AN_RES_FLT_START_FE | AN_RES_FLT_XFER_DATA | AN_RES_FLT_END;
+ }
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ if (!LIST_ISEMPTY(&curproxy->tcp_req.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_req.inspect_rules)))
+ curproxy->be_req_ana |= AN_REQ_INSPECT_BE;
+
+ if (!LIST_ISEMPTY(&curproxy->tcp_rep.inspect_rules) ||
+ (curproxy->defpx && !LIST_ISEMPTY(&curproxy->defpx->tcp_rep.inspect_rules)))
+ curproxy->be_rsp_ana |= AN_RES_INSPECT;
+
+ if (curproxy->mode == PR_MODE_HTTP) {
+ curproxy->be_req_ana |= AN_REQ_WAIT_HTTP | AN_REQ_HTTP_INNER | AN_REQ_HTTP_PROCESS_BE;
+ curproxy->be_rsp_ana |= AN_RES_WAIT_HTTP | AN_RES_HTTP_PROCESS_BE;
+ }
+
+ /* If the backend does requires RDP cookie persistence, we have to
+ * enable the corresponding analyser.
+ */
+ if (curproxy->options2 & PR_O2_RDPC_PRST)
+ curproxy->be_req_ana |= AN_REQ_PRST_RDP_COOKIE;
+
+ /* Add filters analyzers if needed */
+ if (!LIST_ISEMPTY(&curproxy->filter_configs)) {
+ curproxy->be_req_ana |= AN_REQ_FLT_START_BE | AN_REQ_FLT_XFER_DATA | AN_REQ_FLT_END;
+ curproxy->be_rsp_ana |= AN_RES_FLT_START_BE | AN_RES_FLT_XFER_DATA | AN_RES_FLT_END;
+ }
+ }
+
+ /* Check the mux protocols, if any, for each listener and server
+ * attached to the current proxy */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ int mode = conn_pr_mode_to_proto_mode(curproxy->mode);
+ const struct mux_proto_list *mux_ent;
+
+ if (!bind_conf->mux_proto) {
+ /* No protocol was specified. If we're using QUIC at the transport
+ * layer, we'll instantiate it as a mux as well. If QUIC is not
+ * compiled in, this will remain NULL.
+ */
+ if (bind_conf->xprt && bind_conf->xprt == xprt_get(XPRT_QUIC))
+ bind_conf->mux_proto = get_mux_proto(ist("quic"));
+ }
+
+ if (!bind_conf->mux_proto)
+ continue;
+
+ /* it is possible that an incorrect mux was referenced
+ * due to the proxy's mode not being taken into account
+ * on first pass. Let's adjust it now.
+ */
+ mux_ent = conn_get_best_mux_entry(bind_conf->mux_proto->token, PROTO_SIDE_FE, mode);
+
+ if (!mux_ent || !isteq(mux_ent->token, bind_conf->mux_proto->token)) {
+ ha_alert("%s '%s' : MUX protocol '%.*s' is not usable for 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ } else {
+ if ((mux_ent->mux->flags & MX_FL_FRAMED) && !(bind_conf->options & BC_O_USE_SOCK_DGRAM)) {
+ ha_alert("%s '%s' : frame-based MUX protocol '%.*s' is incompatible with stream transport of 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ }
+ else if (!(mux_ent->mux->flags & MX_FL_FRAMED) && !(bind_conf->options & BC_O_USE_SOCK_STREAM)) {
+ ha_alert("%s '%s' : stream-based MUX protocol '%.*s' is incompatible with framed transport of 'bind %s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)bind_conf->mux_proto->token.len,
+ bind_conf->mux_proto->token.ptr,
+ bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr++;
+ }
+ }
+
+ /* update the mux */
+ bind_conf->mux_proto = mux_ent;
+ }
+ for (newsrv = curproxy->srv; newsrv; newsrv = newsrv->next) {
+ int mode = conn_pr_mode_to_proto_mode(curproxy->mode);
+ const struct mux_proto_list *mux_ent;
+
+ if (!newsrv->mux_proto)
+ continue;
+
+ /* it is possible that an incorrect mux was referenced
+ * due to the proxy's mode not being taken into account
+ * on first pass. Let's adjust it now.
+ */
+ mux_ent = conn_get_best_mux_entry(newsrv->mux_proto->token, PROTO_SIDE_BE, mode);
+
+ if (!mux_ent || !isteq(mux_ent->token, newsrv->mux_proto->token)) {
+ ha_alert("%s '%s' : MUX protocol '%.*s' is not usable for server '%s' at [%s:%d].\n",
+ proxy_type_str(curproxy), curproxy->id,
+ (int)newsrv->mux_proto->token.len,
+ newsrv->mux_proto->token.ptr,
+ newsrv->id, newsrv->conf.file, newsrv->conf.line);
+ cfgerr++;
+ }
+
+ /* update the mux */
+ newsrv->mux_proto = mux_ent;
+ }
+
+ /* Allocate default tcp-check rules for proxies without
+ * explicit rules.
+ */
+ if (curproxy->cap & PR_CAP_BE) {
+ if (!(curproxy->options2 & PR_O2_CHK_ANY)) {
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curproxy->tcpcheck_rules;
+
+ curproxy->options2 |= PR_O2_TCPCHK_CHK;
+
+ rs = find_tcpcheck_ruleset("*tcp-check");
+ if (!rs) {
+ rs = create_tcpcheck_ruleset("*tcp-check");
+ if (rs == NULL) {
+ ha_alert("config: %s '%s': out of memory.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+ }
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = &rs->rules;
+ rules->flags = 0;
+ }
+ }
+ }
+
+ /*
+ * We have just initialized the main proxies list
+ * we must also configure the log-forward proxies list
+ */
+ if (init_proxies_list == proxies_list) {
+ init_proxies_list = cfg_log_forward;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage1;
+ }
+
+ if (init_proxies_list == cfg_log_forward) {
+ init_proxies_list = sink_proxies_list;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage1;
+ }
+
+ /***********************************************************/
+ /* At this point, target names have already been resolved. */
+ /***********************************************************/
+
+ /* we must finish to initialize certain things on the servers */
+
+ list_for_each_entry(newsrv, &servers_list, global_list) {
+ /* initialize idle conns lists */
+ if (srv_init_per_thr(newsrv) == -1) {
+ ha_alert("parsing [%s:%d] : failed to allocate per-thread lists for server '%s'.\n",
+ newsrv->conf.file, newsrv->conf.line, newsrv->id);
+ cfgerr++;
+ continue;
+ }
+
+ if (newsrv->max_idle_conns != 0) {
+ newsrv->curr_idle_thr = calloc(global.nbthread, sizeof(*newsrv->curr_idle_thr));
+ if (!newsrv->curr_idle_thr) {
+ ha_alert("parsing [%s:%d] : failed to allocate idle connection tasks for server '%s'.\n",
+ newsrv->conf.file, newsrv->conf.line, newsrv->id);
+ cfgerr++;
+ continue;
+ }
+
+ }
+ }
+
+ idle_conn_task = task_new_anywhere();
+ if (!idle_conn_task) {
+ ha_alert("parsing : failed to allocate global idle connection task.\n");
+ cfgerr++;
+ }
+ else {
+ idle_conn_task->process = srv_cleanup_idle_conns;
+ idle_conn_task->context = NULL;
+
+ for (i = 0; i < global.nbthread; i++) {
+ idle_conns[i].cleanup_task = task_new_on(i);
+ if (!idle_conns[i].cleanup_task) {
+ ha_alert("parsing : failed to allocate idle connection tasks for thread '%d'.\n", i);
+ cfgerr++;
+ break;
+ }
+
+ idle_conns[i].cleanup_task->process = srv_cleanup_toremove_conns;
+ idle_conns[i].cleanup_task->context = NULL;
+ HA_SPIN_INIT(&idle_conns[i].idle_conns_lock);
+ MT_LIST_INIT(&idle_conns[i].toremove_conns);
+ }
+ }
+
+ /* perform the final checks before creating tasks */
+
+ /* starting to initialize the main proxies list */
+ init_proxies_list = proxies_list;
+
+init_proxies_list_stage2:
+ for (curproxy = init_proxies_list; curproxy; curproxy = curproxy->next) {
+ struct listener *listener;
+ unsigned int next_id;
+
+ /* Configure SSL for each bind line.
+ * Note: if configuration fails at some point, the ->ctx member
+ * remains NULL so that listeners can later detach.
+ */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ if (bind_conf->xprt->prepare_bind_conf &&
+ bind_conf->xprt->prepare_bind_conf(bind_conf) < 0)
+ cfgerr++;
+ bind_conf->analysers |= curproxy->fe_req_ana;
+ if (!bind_conf->maxaccept)
+ bind_conf->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ bind_conf->accept = session_accept_fd;
+ if (curproxy->options & PR_O_TCP_NOLING)
+ bind_conf->options |= BC_O_NOLINGER;
+
+ /* smart accept mode is automatic in HTTP mode */
+ if ((curproxy->options2 & PR_O2_SMARTACC) ||
+ ((curproxy->mode == PR_MODE_HTTP || (bind_conf->options & BC_O_USE_SSL)) &&
+ !(curproxy->no_options2 & PR_O2_SMARTACC)))
+ bind_conf->options |= BC_O_NOQUICKACK;
+ }
+
+ /* adjust this proxy's listeners */
+ bind_conf = NULL;
+ next_id = 1;
+ list_for_each_entry(listener, &curproxy->conf.listeners, by_fe) {
+ if (!listener->luid) {
+ /* listener ID not set, use automatic numbering with first
+ * spare entry starting with next_luid.
+ */
+ next_id = get_next_id(&curproxy->conf.used_listener_id, next_id);
+ listener->conf.id.key = listener->luid = next_id;
+ eb32_insert(&curproxy->conf.used_listener_id, &listener->conf.id);
+ }
+ next_id++;
+
+ /* enable separate counters */
+ if (curproxy->options2 & PR_O2_SOCKSTAT) {
+ listener->counters = calloc(1, sizeof(*listener->counters));
+ if (!listener->name)
+ memprintf(&listener->name, "sock-%d", listener->luid);
+ }
+
+#ifdef USE_QUIC
+ if (listener->bind_conf->xprt == xprt_get(XPRT_QUIC)) {
+ /* quic_conn are counted against maxconn. */
+ listener->bind_conf->options |= BC_O_XPRT_MAXCONN;
+ listener->rx.quic_curr_handshake = 0;
+ listener->rx.quic_curr_accept = 0;
+
+# ifdef USE_QUIC_OPENSSL_COMPAT
+ /* store the last checked bind_conf in bind_conf */
+ if (!(global.tune.options & GTUNE_NO_QUIC) &&
+ !(global.tune.options & GTUNE_LIMITED_QUIC) &&
+ listener->bind_conf != bind_conf) {
+ bind_conf = listener->bind_conf;
+ ha_alert("Binding [%s:%d] for %s %s: this SSL library does not support the "
+ "QUIC protocol. A limited compatibility layer may be enabled using "
+ "the \"limited-quic\" global option if desired.\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(curproxy), curproxy->id);
+ cfgerr++;
+ }
+# endif
+
+ li_init_per_thr(listener);
+ }
+#endif
+ }
+
+ /* Release unused SSL configs */
+ list_for_each_entry(bind_conf, &curproxy->conf.bind, by_fe) {
+ if (!(bind_conf->options & BC_O_USE_SSL) && bind_conf->xprt->destroy_bind_conf)
+ bind_conf->xprt->destroy_bind_conf(bind_conf);
+ }
+
+ /* create the task associated with the proxy */
+ curproxy->task = task_new_anywhere();
+ if (curproxy->task) {
+ curproxy->task->context = curproxy;
+ curproxy->task->process = manage_proxy;
+ curproxy->flags |= PR_FL_READY;
+ } else {
+ ha_alert("Proxy '%s': no more memory when trying to allocate the management task\n",
+ curproxy->id);
+ cfgerr++;
+ }
+ }
+
+ /*
+ * We have just initialized the main proxies list
+ * we must also configure the log-forward proxies list
+ */
+ if (init_proxies_list == proxies_list) {
+ init_proxies_list = cfg_log_forward;
+ /* check if list is not null to avoid infinite loop */
+ if (init_proxies_list)
+ goto init_proxies_list_stage2;
+ }
+
+ /*
+ * Recount currently required checks.
+ */
+
+ for (curproxy=proxies_list; curproxy; curproxy=curproxy->next) {
+ int optnum;
+
+ for (optnum = 0; cfg_opts[optnum].name; optnum++)
+ if (curproxy->options & cfg_opts[optnum].val)
+ global.last_checks |= cfg_opts[optnum].checks;
+
+ for (optnum = 0; cfg_opts2[optnum].name; optnum++)
+ if (curproxy->options2 & cfg_opts2[optnum].val)
+ global.last_checks |= cfg_opts2[optnum].checks;
+ }
+
+ if (cfg_peers) {
+ struct peers *curpeers = cfg_peers, **last;
+ struct peer *p, *pb;
+
+ /* Remove all peers sections which don't have a valid listener,
+ * which are not used by any table, or which are bound to more
+ * than one process.
+ */
+ last = &cfg_peers;
+ while (*last) {
+ struct peer *peer;
+ struct stktable *t;
+ curpeers = *last;
+
+ if (curpeers->disabled) {
+ /* the "disabled" keyword was present */
+ if (curpeers->peers_fe)
+ stop_proxy(curpeers->peers_fe);
+ curpeers->peers_fe = NULL;
+ }
+ else if (!curpeers->peers_fe || !curpeers->peers_fe->id) {
+ ha_warning("Removing incomplete section 'peers %s' (no peer named '%s').\n",
+ curpeers->id, localpeer);
+ if (curpeers->peers_fe)
+ stop_proxy(curpeers->peers_fe);
+ curpeers->peers_fe = NULL;
+ }
+ else {
+ /* Initializes the transport layer of the server part of all the peers belonging to
+ * <curpeers> section if required.
+ * Note that ->srv is used by the local peer of a new process to connect to the local peer
+ * of an old process.
+ */
+ curpeers->peers_fe->flags |= PR_FL_READY;
+ p = curpeers->remote;
+ while (p) {
+ struct peer *other_peer;
+
+ for (other_peer = curpeers->remote; other_peer && other_peer != p; other_peer = other_peer->next) {
+ if (strcmp(other_peer->id, p->id) == 0) {
+ ha_alert("Peer section '%s' [%s:%d]: another peer named '%s' was already defined at line %s:%d, please use distinct names.\n",
+ curpeers->peers_fe->id,
+ p->conf.file, p->conf.line,
+ other_peer->id, other_peer->conf.file, other_peer->conf.line);
+ cfgerr++;
+ break;
+ }
+ }
+
+ if (p->srv) {
+ if (p->srv->use_ssl == 1 && xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv)
+ cfgerr += xprt_get(XPRT_SSL)->prepare_srv(p->srv);
+ }
+ p = p->next;
+ }
+ /* Configure the SSL bindings of the local peer if required. */
+ if (!LIST_ISEMPTY(&curpeers->peers_fe->conf.bind)) {
+ struct list *l;
+ struct bind_conf *bind_conf;
+ int ret;
+
+ l = &curpeers->peers_fe->conf.bind;
+ bind_conf = LIST_ELEM(l->n, typeof(bind_conf), by_fe);
+
+ if (curpeers->local->srv) {
+ if (curpeers->local->srv->use_ssl == 1 && !(bind_conf->options & BC_O_USE_SSL)) {
+ ha_warning("Peers section '%s': local peer have a non-SSL listener and a SSL server configured at line %s:%d.\n",
+ curpeers->peers_fe->id, curpeers->local->conf.file, curpeers->local->conf.line);
+ }
+ else if (curpeers->local->srv->use_ssl != 1 && (bind_conf->options & BC_O_USE_SSL)) {
+ ha_warning("Peers section '%s': local peer have a SSL listener and a non-SSL server configured at line %s:%d.\n",
+ curpeers->peers_fe->id, curpeers->local->conf.file, curpeers->local->conf.line);
+ }
+ }
+
+ /* finish the bind setup */
+ ret = bind_complete_thread_setup(bind_conf, &err_code);
+ if (ret != 0) {
+ cfgerr += ret;
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+
+ if (bind_conf->xprt->prepare_bind_conf &&
+ bind_conf->xprt->prepare_bind_conf(bind_conf) < 0)
+ cfgerr++;
+ }
+ if (!peers_init_sync(curpeers) || !peers_alloc_dcache(curpeers)) {
+ ha_alert("Peers section '%s': out of memory, giving up on peers.\n",
+ curpeers->id);
+ cfgerr++;
+ break;
+ }
+ last = &curpeers->next;
+
+ /* Ignore the peer shard greater than the number of peer shard for this section.
+ * Also ignore the peer shard of the local peer.
+ */
+ for (peer = curpeers->remote; peer; peer = peer->next) {
+ if (peer == curpeers->local) {
+ if (peer->srv->shard) {
+ ha_warning("Peers section '%s': shard ignored for '%s' local peer\n",
+ curpeers->id, peer->id);
+ peer->srv->shard = 0;
+ }
+ }
+ else if (peer->srv->shard > curpeers->nb_shards) {
+ ha_warning("Peers section '%s': shard ignored for '%s' local peer because "
+ "%d shard value is greater than the section number of shards (%d)\n",
+ curpeers->id, peer->id, peer->srv->shard, curpeers->nb_shards);
+ peer->srv->shard = 0;
+ }
+ }
+
+ continue;
+ }
+
+ /* clean what has been detected above */
+ p = curpeers->remote;
+ while (p) {
+ pb = p->next;
+ free(p->id);
+ free(p);
+ p = pb;
+ }
+
+ /* Destroy and unlink this curpeers section.
+ * Note: curpeers is backed up into *last.
+ */
+ free(curpeers->id);
+ curpeers = curpeers->next;
+ /* Reset any refereance to this peers section in the list of stick-tables */
+ for (t = stktables_list; t; t = t->next) {
+ if (t->peers.p && t->peers.p == *last)
+ t->peers.p = NULL;
+ }
+ free(*last);
+ *last = curpeers;
+ }
+ }
+
+ for (t = stktables_list; t; t = t->next) {
+ if (t->proxy)
+ continue;
+ err = NULL;
+ if (!stktable_init(t, &err)) {
+ ha_alert("Parsing [%s:%d]: failed to initialize '%s' stick-table: %s.\n", t->conf.file, t->conf.line, t->id, err);
+ ha_free(&err);
+ cfgerr++;
+ }
+ }
+
+ /* initialize stick-tables on backend capable proxies. This must not
+ * be done earlier because the data size may be discovered while parsing
+ * other proxies.
+ */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if ((curproxy->flags & PR_FL_DISABLED) || !curproxy->table)
+ continue;
+
+ err = NULL;
+ if (!stktable_init(curproxy->table, &err)) {
+ ha_alert("Proxy '%s': failed to initialize stick-table: %s.\n", curproxy->id, err);
+ ha_free(&err);
+ cfgerr++;
+ }
+ }
+
+ if (mailers) {
+ struct mailers *curmailers = mailers, **last;
+ struct mailer *m, *mb;
+
+ /* Remove all mailers sections which don't have a valid listener.
+ * This can happen when a mailers section is never referenced.
+ */
+ last = &mailers;
+ while (*last) {
+ curmailers = *last;
+ if (curmailers->users) {
+ last = &curmailers->next;
+ continue;
+ }
+
+ ha_warning("Removing incomplete section 'mailers %s'.\n",
+ curmailers->id);
+
+ m = curmailers->mailer_list;
+ while (m) {
+ mb = m->next;
+ free(m->id);
+ free(m);
+ m = mb;
+ }
+
+ /* Destroy and unlink this curmailers section.
+ * Note: curmailers is backed up into *last.
+ */
+ free(curmailers->id);
+ curmailers = curmailers->next;
+ free(*last);
+ *last = curmailers;
+ }
+ }
+
+ /* Update server_state_file_name to backend name if backend is supposed to use
+ * a server-state file locally defined and none has been provided */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_LOCAL &&
+ curproxy->server_state_file_name == NULL)
+ curproxy->server_state_file_name = strdup(curproxy->id);
+ }
+
+ list_for_each_entry(curr_resolvers, &sec_resolvers, list) {
+ if (LIST_ISEMPTY(&curr_resolvers->nameservers)) {
+ ha_warning("resolvers '%s' [%s:%d] has no nameservers configured!\n",
+ curr_resolvers->id, curr_resolvers->conf.file,
+ curr_resolvers->conf.line);
+ err_code |= ERR_WARN;
+ }
+ }
+
+ list_for_each_entry(postparser, &postparsers, list) {
+ if (postparser->func)
+ cfgerr += postparser->func();
+ }
+
+ if (cfgerr > 0)
+ err_code |= ERR_ALERT | ERR_FATAL;
+ out:
+ return err_code;
+}
+
+/*
+ * Registers the CFG keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void cfg_register_keywords(struct cfg_kw_list *kwl)
+{
+ LIST_APPEND(&cfg_keywords.list, &kwl->list);
+}
+
+/*
+ * Unregisters the CFG keyword list <kwl> from the list of valid keywords.
+ */
+void cfg_unregister_keywords(struct cfg_kw_list *kwl)
+{
+ LIST_DELETE(&kwl->list);
+ LIST_INIT(&kwl->list);
+}
+
+/* this function register new section in the haproxy configuration file.
+ * <section_name> is the name of this new section and <section_parser>
+ * is the called parser. If two section declaration have the same name,
+ * only the first declared is used.
+ */
+int cfg_register_section(char *section_name,
+ int (*section_parser)(const char *, int, char **, int),
+ int (*post_section_parser)())
+{
+ struct cfg_section *cs;
+
+ list_for_each_entry(cs, &sections, list) {
+ if (strcmp(cs->section_name, section_name) == 0) {
+ ha_alert("register section '%s': already registered.\n", section_name);
+ return 0;
+ }
+ }
+
+ cs = calloc(1, sizeof(*cs));
+ if (!cs) {
+ ha_alert("register section '%s': out of memory.\n", section_name);
+ return 0;
+ }
+
+ cs->section_name = section_name;
+ cs->section_parser = section_parser;
+ cs->post_section_parser = post_section_parser;
+
+ LIST_APPEND(&sections, &cs->list);
+
+ return 1;
+}
+
+/* this function register a new function which will be called once the haproxy
+ * configuration file has been parsed. It's useful to check dependencies
+ * between sections or to resolve items once everything is parsed.
+ */
+int cfg_register_postparser(char *name, int (*func)())
+{
+ struct cfg_postparser *cp;
+
+ cp = calloc(1, sizeof(*cp));
+ if (!cp) {
+ ha_alert("register postparser '%s': out of memory.\n", name);
+ return 0;
+ }
+ cp->name = name;
+ cp->func = func;
+
+ LIST_APPEND(&postparsers, &cp->list);
+
+ return 1;
+}
+
+/*
+ * free all config section entries
+ */
+void cfg_unregister_sections(void)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, &sections, list) {
+ LIST_DELETE(&cs->list);
+ free(cs);
+ }
+}
+
+void cfg_backup_sections(struct list *backup_sections)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, &sections, list) {
+ LIST_DELETE(&cs->list);
+ LIST_APPEND(backup_sections, &cs->list);
+ }
+}
+
+void cfg_restore_sections(struct list *backup_sections)
+{
+ struct cfg_section *cs, *ics;
+
+ list_for_each_entry_safe(cs, ics, backup_sections, list) {
+ LIST_DELETE(&cs->list);
+ LIST_APPEND(&sections, &cs->list);
+ }
+}
+
+/* dumps all registered keywords by section on stdout */
+void cfg_dump_registered_keywords()
+{
+ /* CFG_GLOBAL, CFG_LISTEN, CFG_USERLIST, CFG_PEERS, CFG_CRTLIST */
+ const char* sect_names[] = { "", "global", "listen", "userlist", "peers", "crt-list", 0 };
+ int section;
+ int index;
+
+ for (section = 1; sect_names[section]; section++) {
+ struct cfg_kw_list *kwl;
+ const struct cfg_keyword *kwp, *kwn;
+
+ printf("%s\n", sect_names[section]);
+
+ for (kwn = kwp = NULL;; kwp = kwn) {
+ list_for_each_entry(kwl, &cfg_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++)
+ if (kwl->kw[index].section == section &&
+ strordered(kwp ? kwp->kw : NULL, kwl->kw[index].kw, kwn != kwp ? kwn->kw : NULL))
+ kwn = &kwl->kw[index];
+ }
+ if (kwn == kwp)
+ break;
+ printf("\t%s\n", kwn->kw);
+ }
+
+ if (section == CFG_LISTEN) {
+ /* there are plenty of other keywords there */
+ extern struct list tcp_req_conn_keywords, tcp_req_sess_keywords,
+ tcp_req_cont_keywords, tcp_res_cont_keywords;
+ extern struct bind_kw_list bind_keywords;
+ extern struct srv_kw_list srv_keywords;
+ struct bind_kw_list *bkwl;
+ struct srv_kw_list *skwl;
+ const struct bind_kw *bkwp, *bkwn;
+ const struct srv_kw *skwp, *skwn;
+ const struct cfg_opt *coptp, *coptn;
+
+ /* display the non-ssl keywords */
+ for (bkwn = bkwp = NULL;; bkwp = bkwn) {
+ list_for_each_entry(bkwl, &bind_keywords.list, list) {
+ if (strcmp(bkwl->scope, "SSL") == 0) /* skip SSL keywords */
+ continue;
+ for (index = 0; bkwl->kw[index].kw != NULL; index++) {
+ if (strordered(bkwp ? bkwp->kw : NULL,
+ bkwl->kw[index].kw,
+ bkwn != bkwp ? bkwn->kw : NULL))
+ bkwn = &bkwl->kw[index];
+ }
+ }
+ if (bkwn == bkwp)
+ break;
+
+ if (!bkwn->skip)
+ printf("\tbind <addr> %s\n", bkwn->kw);
+ else
+ printf("\tbind <addr> %s +%d\n", bkwn->kw, bkwn->skip);
+ }
+#if defined(USE_OPENSSL)
+ /* displays the "ssl" keywords */
+ for (bkwn = bkwp = NULL;; bkwp = bkwn) {
+ list_for_each_entry(bkwl, &bind_keywords.list, list) {
+ if (strcmp(bkwl->scope, "SSL") != 0) /* skip non-SSL keywords */
+ continue;
+ for (index = 0; bkwl->kw[index].kw != NULL; index++) {
+ if (strordered(bkwp ? bkwp->kw : NULL,
+ bkwl->kw[index].kw,
+ bkwn != bkwp ? bkwn->kw : NULL))
+ bkwn = &bkwl->kw[index];
+ }
+ }
+ if (bkwn == bkwp)
+ break;
+
+ if (strcmp(bkwn->kw, "ssl") == 0) /* skip "bind <addr> ssl ssl" */
+ continue;
+
+ if (!bkwn->skip)
+ printf("\tbind <addr> ssl %s\n", bkwn->kw);
+ else
+ printf("\tbind <addr> ssl %s +%d\n", bkwn->kw, bkwn->skip);
+ }
+#endif
+ for (skwn = skwp = NULL;; skwp = skwn) {
+ list_for_each_entry(skwl, &srv_keywords.list, list) {
+ for (index = 0; skwl->kw[index].kw != NULL; index++)
+ if (strordered(skwp ? skwp->kw : NULL,
+ skwl->kw[index].kw,
+ skwn != skwp ? skwn->kw : NULL))
+ skwn = &skwl->kw[index];
+ }
+ if (skwn == skwp)
+ break;
+
+ if (!skwn->skip)
+ printf("\tserver <name> <addr> %s\n", skwn->kw);
+ else
+ printf("\tserver <name> <addr> %s +%d\n", skwn->kw, skwn->skip);
+ }
+
+ for (coptn = coptp = NULL;; coptp = coptn) {
+ for (index = 0; cfg_opts[index].name; index++)
+ if (strordered(coptp ? coptp->name : NULL,
+ cfg_opts[index].name,
+ coptn != coptp ? coptn->name : NULL))
+ coptn = &cfg_opts[index];
+
+ for (index = 0; cfg_opts2[index].name; index++)
+ if (strordered(coptp ? coptp->name : NULL,
+ cfg_opts2[index].name,
+ coptn != coptp ? coptn->name : NULL))
+ coptn = &cfg_opts2[index];
+ if (coptn == coptp)
+ break;
+
+ printf("\toption %s [ ", coptn->name);
+ if (coptn->cap & PR_CAP_FE)
+ printf("FE ");
+ if (coptn->cap & PR_CAP_BE)
+ printf("BE ");
+ if (coptn->mode == PR_MODE_HTTP)
+ printf("HTTP ");
+ printf("]\n");
+ }
+
+ dump_act_rules(&tcp_req_conn_keywords, "\ttcp-request connection ");
+ dump_act_rules(&tcp_req_sess_keywords, "\ttcp-request session ");
+ dump_act_rules(&tcp_req_cont_keywords, "\ttcp-request content ");
+ dump_act_rules(&tcp_res_cont_keywords, "\ttcp-response content ");
+ dump_act_rules(&http_req_keywords.list, "\thttp-request ");
+ dump_act_rules(&http_res_keywords.list, "\thttp-response ");
+ dump_act_rules(&http_after_res_keywords.list, "\thttp-after-response ");
+ }
+ if (section == CFG_PEERS) {
+ struct peers_kw_list *pkwl;
+ const struct peers_keyword *pkwp, *pkwn;
+ for (pkwn = pkwp = NULL;; pkwp = pkwn) {
+ list_for_each_entry(pkwl, &peers_keywords.list, list) {
+ for (index = 0; pkwl->kw[index].kw != NULL; index++) {
+ if (strordered(pkwp ? pkwp->kw : NULL,
+ pkwl->kw[index].kw,
+ pkwn != pkwp ? pkwn->kw : NULL))
+ pkwn = &pkwl->kw[index];
+ }
+ }
+ if (pkwn == pkwp)
+ break;
+ printf("\t%s\n", pkwn->kw);
+ }
+ }
+ if (section == CFG_CRTLIST) {
+ /* displays the keyword available for the crt-lists */
+ extern struct ssl_crtlist_kw ssl_crtlist_kws[] __maybe_unused;
+ const struct ssl_crtlist_kw *sbkwp __maybe_unused, *sbkwn __maybe_unused;
+
+#if defined(USE_OPENSSL)
+ for (sbkwn = sbkwp = NULL;; sbkwp = sbkwn) {
+ for (index = 0; ssl_crtlist_kws[index].kw != NULL; index++) {
+ if (strordered(sbkwp ? sbkwp->kw : NULL,
+ ssl_crtlist_kws[index].kw,
+ sbkwn != sbkwp ? sbkwn->kw : NULL))
+ sbkwn = &ssl_crtlist_kws[index];
+ }
+ if (sbkwn == sbkwp)
+ break;
+ if (!sbkwn->skip)
+ printf("\t%s\n", sbkwn->kw);
+ else
+ printf("\t%s +%d\n", sbkwn->kw, sbkwn->skip);
+ }
+#endif
+
+ }
+ }
+}
+
+/* these are the config sections handled by default */
+REGISTER_CONFIG_SECTION("listen", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("frontend", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("backend", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("defaults", cfg_parse_listen, NULL);
+REGISTER_CONFIG_SECTION("global", cfg_parse_global, NULL);
+REGISTER_CONFIG_SECTION("userlist", cfg_parse_users, NULL);
+REGISTER_CONFIG_SECTION("peers", cfg_parse_peers, NULL);
+REGISTER_CONFIG_SECTION("mailers", cfg_parse_mailers, NULL);
+REGISTER_CONFIG_SECTION("namespace_list", cfg_parse_netns, NULL);
+
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "default-path", cfg_parse_global_def_path },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/channel.c b/src/channel.c
new file mode 100644
index 0000000..0b6389d
--- /dev/null
+++ b/src/channel.c
@@ -0,0 +1,591 @@
+/*
+ * Channel management functions.
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/channel.h>
+
+
+/* Schedule up to <bytes> more bytes to be forwarded via the channel without
+ * notifying the owner task. Any data pending in the buffer are scheduled to be
+ * sent as well, within the limit of the number of bytes to forward. This must
+ * be the only method to use to schedule bytes to be forwarded. If the requested
+ * number is too large, it is automatically adjusted. The number of bytes taken
+ * into account is returned. Directly touching ->to_forward will cause lockups
+ * when buf->o goes down to zero if nobody is ready to push the remaining data.
+ */
+unsigned long long __channel_forward(struct channel *chn, unsigned long long bytes)
+{
+ unsigned int budget;
+ unsigned int forwarded;
+
+ /* This is more of a safety measure as it's not supposed to happen in
+ * regular code paths.
+ */
+ if (unlikely(chn->to_forward == CHN_INFINITE_FORWARD)) {
+ c_adv(chn, ci_data(chn));
+ return bytes;
+ }
+
+ /* Bound the transferred size to a 32-bit count since all our values
+ * are 32-bit, and we don't want to reach CHN_INFINITE_FORWARD.
+ */
+ budget = MIN(bytes, CHN_INFINITE_FORWARD - 1);
+
+ /* transfer as much as we can of buf->i */
+ forwarded = MIN(ci_data(chn), budget);
+ c_adv(chn, forwarded);
+ budget -= forwarded;
+
+ if (!budget)
+ return forwarded;
+
+ /* Now we must ensure chn->to_forward sats below CHN_INFINITE_FORWARD,
+ * which also implies it won't overflow. It's less operations in 64-bit.
+ */
+ bytes = (unsigned long long)chn->to_forward + budget;
+ if (bytes >= CHN_INFINITE_FORWARD)
+ bytes = CHN_INFINITE_FORWARD - 1;
+ budget = bytes - chn->to_forward;
+
+ chn->to_forward += budget;
+ forwarded += budget;
+ return forwarded;
+}
+
+/* writes <len> bytes from message <msg> to the channel's buffer. Returns -1 in
+ * case of success, -2 if the message is larger than the buffer size, or the
+ * number of bytes available otherwise. The send limit is automatically
+ * adjusted to the amount of data written. FIXME-20060521: handle unaligned
+ * data. Note: this function appends data to the buffer's output and possibly
+ * overwrites any pending input data which are assumed not to exist.
+ */
+int co_inject(struct channel *chn, const char *msg, int len)
+{
+ int max;
+
+ if (len == 0)
+ return -1;
+
+ if (len < 0 || len > c_size(chn)) {
+ /* we can't write this chunk and will never be able to, because
+ * it is larger than the buffer. This must be reported as an
+ * error. Then we return -2 so that writers that don't care can
+ * ignore it and go on, and others can check for this value.
+ */
+ return -2;
+ }
+
+ c_realign_if_empty(chn);
+ max = b_contig_space(&chn->buf);
+ if (len > max)
+ return max;
+
+ memcpy(co_tail(chn), msg, len);
+ b_add(&chn->buf, len);
+ c_adv(chn, len);
+ chn->total += len;
+ return -1;
+}
+
+/* Tries to copy character <c> into the channel's buffer after some length
+ * controls. The chn->o and to_forward pointers are updated. If the channel
+ * input is closed, -2 is returned. If there is not enough room left in the
+ * buffer, -1 is returned. Otherwise the number of bytes copied is returned
+ * (1). Channel flag READ_PARTIAL is updated if some data can be transferred.
+ */
+int ci_putchr(struct channel *chn, char c)
+{
+ if (unlikely(channel_input_closed(chn)))
+ return -2;
+
+ if (!channel_may_recv(chn))
+ return -1;
+
+ *ci_tail(chn) = c;
+
+ b_add(&chn->buf, 1);
+ chn->flags |= CF_READ_EVENT;
+
+ if (chn->to_forward >= 1) {
+ if (chn->to_forward != CHN_INFINITE_FORWARD)
+ chn->to_forward--;
+ c_adv(chn, 1);
+ }
+
+ chn->total++;
+ return 1;
+}
+
+/* Tries to copy block <blk> at once into the channel's buffer after length
+ * controls. The chn->o and to_forward pointers are updated. If the channel
+ * input is closed, -2 is returned. If the block is too large for this buffer,
+ * -3 is returned. If there is not enough room left in the buffer, -1 is
+ * returned. Otherwise the number of bytes copied is returned (0 being a valid
+ * number). Channel flag READ_PARTIAL is updated if some data can be
+ * transferred.
+ */
+int ci_putblk(struct channel *chn, const char *blk, int len)
+{
+ int max;
+
+ if (unlikely(channel_input_closed(chn)))
+ return -2;
+
+ if (len < 0)
+ return -3;
+
+ max = channel_recv_limit(chn);
+ if (unlikely(len > max - c_data(chn))) {
+ /* we can't write this chunk right now because the buffer is
+ * almost full or because the block is too large. Returns
+ * -3 if block is too large for this buffer. Or -1 if the
+ * room left is not large enough.
+ */
+ if (len > max)
+ return -3;
+
+ return -1;
+ }
+
+ if (unlikely(len == 0))
+ return 0;
+
+ /* OK so the data fits in the buffer in one or two blocks */
+ max = b_contig_space(&chn->buf);
+ memcpy(ci_tail(chn), blk, MIN(len, max));
+ if (len > max)
+ memcpy(c_orig(chn), blk + max, len - max);
+
+ b_add(&chn->buf, len);
+ channel_add_input(chn, len);
+ return len;
+}
+
+/* Locates the longest part of the channel's output buffer that is composed
+ * exclusively of characters not in the <delim> set, and delimited by one of
+ * these characters, and returns the initial part and the first of such
+ * delimiters. A single escape character in <escape> may be specified so that
+ * when not 0 and found, the character that follows it is never taken as a
+ * delimiter. Note that <delim> cannot contain the zero byte, hence this
+ * function is not usable with byte zero as a delimiter.
+ *
+ * Return values :
+ * >0 : number of bytes read. Includes the sep if present before len or end.
+ * =0 : no sep before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. One of the delimiters is waited for as long as neither the buffer
+ * nor the output are full. If either of them is full, the string may be
+ * returned as is, without the delimiter.
+ */
+int co_getdelim(const struct channel *chn, char *str, int len, const char *delim, char escape)
+{
+ uchar delim_map[256 / 8];
+ int found, escaped;
+ uint pos, bit;
+ int ret, max;
+ uchar b;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn_cons(chn)->flags & SC_FL_SHUT_DONE) || !co_data(chn))) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+
+ /* create the byte map */
+ memset(delim_map, 0, sizeof(delim_map));
+ while ((b = *delim)) {
+ pos = b >> 3;
+ bit = b & 7;
+ delim_map[pos] |= 1 << bit;
+ delim++;
+ }
+
+ found = escaped = 0;
+ while (max) {
+ *str++ = b = *p;
+ ret++;
+ max--;
+
+ if (escape && (escaped || *p == escape)) {
+ escaped = !escaped;
+ goto skip;
+ }
+
+ pos = b >> 3;
+ bit = b & 7;
+ if (delim_map[pos] & (1 << bit)) {
+ found = 1;
+ break;
+ }
+ skip:
+ p = b_next(&chn->buf, p);
+ }
+
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ !found &&
+ !(chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one text word out of a channel's buffer from a stream connector.
+ * Return values :
+ * >0 : number of bytes read. Includes the sep if present before len or end.
+ * =0 : no sep before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. The line separator is waited for as long as neither the buffer
+ * nor the output are full. If either of them is full, the string may be
+ * returned as is, without the line separator.
+ */
+int co_getword(const struct channel *chn, char *str, int len, char sep)
+{
+ int ret, max;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn_cons(chn)->flags & SC_FL_SHUT_DONE) || !co_data(chn))) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+ while (max) {
+ *str++ = *p;
+ ret++;
+ max--;
+
+ if (*p == sep)
+ break;
+ p = b_next(&chn->buf, p);
+ }
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ *(str-1) != sep &&
+ !(chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one text line out of a channel's buffer from a stream connector.
+ * Return values :
+ * >0 : number of bytes read. Includes the \n if present before len or end.
+ * =0 : no '\n' before end found. <str> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. The '\n' is waited for as long as neither the buffer nor the
+ * output are full. If either of them is full, the string may be returned
+ * as is, without the '\n'.
+ */
+int co_getline(const struct channel *chn, char *str, int len)
+{
+ int ret, max;
+ char *p;
+
+ ret = 0;
+ max = len;
+
+ /* closed or empty + imminent close = -1; empty = 0 */
+ if (unlikely((chn_cons(chn)->flags & SC_FL_SHUT_DONE) || !co_data(chn))) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ ret = -1;
+ goto out;
+ }
+
+ p = co_head(chn);
+
+ if (max > co_data(chn)) {
+ max = co_data(chn);
+ str[max-1] = 0;
+ }
+ while (max) {
+ *str++ = *p;
+ ret++;
+ max--;
+
+ if (*p == '\n')
+ break;
+ p = b_next(&chn->buf, p);
+ }
+ if (ret > 0 && ret < len &&
+ (ret < co_data(chn) || channel_may_recv(chn)) &&
+ *(str-1) != '\n' &&
+ !(chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)))
+ ret = 0;
+ out:
+ if (max)
+ *str = 0;
+ return ret;
+}
+
+/* Gets one char of data from a channel's buffer,
+ * Return values :
+ * 1 : number of bytes read, equal to requested size.
+ * =0 : not enough data available. <c> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it.
+ */
+int co_getchar(const struct channel *chn, char *c)
+{
+ if (chn_cons(chn)->flags & SC_FL_SHUT_DONE)
+ return -1;
+
+ if (unlikely(co_data(chn) == 0)) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ return -1;
+ return 0;
+ }
+
+ *c = *(co_head(chn));
+ return 1;
+}
+
+/* Gets one full block of data at once from a channel's buffer, optionally from
+ * a specific offset. Return values :
+ * >0 : number of bytes read, equal to requested size.
+ * =0 : not enough data available. <blk> is left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it.
+ */
+int co_getblk(const struct channel *chn, char *blk, int len, int offset)
+{
+ if (chn_cons(chn)->flags & SC_FL_SHUT_DONE)
+ return -1;
+
+ if (len + offset > co_data(chn) || co_data(chn) == 0) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ return -1;
+ return 0;
+ }
+
+ return b_getblk(&chn->buf, blk, len, offset);
+}
+
+/* Gets one or two blocks of data at once from a channel's output buffer.
+ * Return values :
+ * >0 : number of blocks filled (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available. <blk*> are left undefined.
+ * <0 : no more bytes readable because output is shut.
+ * The channel status is not changed. The caller must call co_skip() to
+ * update it. Unused buffers are left in an undefined state.
+ */
+int co_getblk_nc(const struct channel *chn, const char **blk1, size_t *len1, const char **blk2, size_t *len2)
+{
+ if (unlikely(co_data(chn) == 0)) {
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ return -1;
+ return 0;
+ }
+
+ return b_getblk_nc(&chn->buf, blk1, len1, blk2, len2, 0, co_data(chn));
+}
+
+/* Gets one text line out of a channel's output buffer from a stream connector.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because output is shut.
+ * The '\n' is waited for as long as neither the buffer nor the output are
+ * full. If either of them is full, the string may be returned as is, without
+ * the '\n'. Unused buffers are left in an undefined state.
+ */
+int co_getline_nc(const struct channel *chn,
+ const char **blk1, size_t *len1,
+ const char **blk2, size_t *len2)
+{
+ int retcode;
+ int l;
+
+ retcode = co_getblk_nc(chn, blk1, len1, blk2, len2);
+ if (unlikely(retcode <= 0))
+ return retcode;
+
+ for (l = 0; l < *len1 && (*blk1)[l] != '\n'; l++);
+ if (l < *len1 && (*blk1)[l] == '\n') {
+ *len1 = l + 1;
+ return 1;
+ }
+
+ if (retcode >= 2) {
+ for (l = 0; l < *len2 && (*blk2)[l] != '\n'; l++);
+ if (l < *len2 && (*blk2)[l] == '\n') {
+ *len2 = l + 1;
+ return 2;
+ }
+ }
+
+ if (chn_cons(chn)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) {
+ /* If we have found no LF and the buffer is shut, then
+ * the resulting string is made of the concatenation of
+ * the pending blocks (1 or 2).
+ */
+ return retcode;
+ }
+
+ /* No LF yet and not shut yet */
+ return 0;
+}
+
+/* Gets one full block of data at once from a channel's input buffer.
+ * This function can return the data slitted in one or two blocks.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because input is shut.
+ */
+int ci_getblk_nc(const struct channel *chn,
+ char **blk1, size_t *len1,
+ char **blk2, size_t *len2)
+{
+ if (unlikely(ci_data(chn) == 0)) {
+ if (chn_prod(chn)->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return -1;
+ return 0;
+ }
+
+ if (unlikely(ci_head(chn) + ci_data(chn) > c_wrap(chn))) {
+ *blk1 = ci_head(chn);
+ *len1 = c_wrap(chn) - ci_head(chn);
+ *blk2 = c_orig(chn);
+ *len2 = ci_data(chn) - *len1;
+ return 2;
+ }
+
+ *blk1 = ci_head(chn);
+ *len1 = ci_data(chn);
+ return 1;
+}
+
+/* Gets one text line out of a channel's input buffer from a stream connector.
+ * Return values :
+ * >0 : number of blocks returned (1 or 2). blk1 is always filled before blk2.
+ * =0 : not enough data available.
+ * <0 : no more bytes readable because output is shut.
+ * The '\n' is waited for as long as neither the buffer nor the input are
+ * full. If either of them is full, the string may be returned as is, without
+ * the '\n'. Unused buffers are left in an undefined state.
+ */
+int ci_getline_nc(const struct channel *chn,
+ char **blk1, size_t *len1,
+ char **blk2, size_t *len2)
+{
+ int retcode;
+ int l;
+
+ retcode = ci_getblk_nc(chn, blk1, len1, blk2, len2);
+ if (unlikely(retcode <= 0))
+ return retcode;
+
+ for (l = 0; l < *len1 && (*blk1)[l] != '\n'; l++);
+ if (l < *len1 && (*blk1)[l] == '\n') {
+ *len1 = l + 1;
+ return 1;
+ }
+
+ if (retcode >= 2) {
+ for (l = 0; l < *len2 && (*blk2)[l] != '\n'; l++);
+ if (l < *len2 && (*blk2)[l] == '\n') {
+ *len2 = l + 1;
+ return 2;
+ }
+ }
+
+ if (chn_cons(chn)->flags & SC_FL_SHUT_DONE) {
+ /* If we have found no LF and the buffer is shut, then
+ * the resulting string is made of the concatenation of
+ * the pending blocks (1 or 2).
+ */
+ return retcode;
+ }
+
+ /* No LF yet and not shut yet */
+ return 0;
+}
+
+/* Inserts <str> followed by "\r\n" at position <pos> relative to channel <c>'s
+ * input head. The <len> argument informs about the length of string <str> so
+ * that we don't have to measure it. <str> must be a valid pointer and must not
+ * include the trailing "\r\n".
+ *
+ * The number of bytes added is returned on success. 0 is returned on failure.
+ */
+int ci_insert_line2(struct channel *c, int pos, const char *str, int len)
+{
+ struct buffer *b = &c->buf;
+ char *dst = c_ptr(c, pos);
+ int delta;
+
+ delta = len + 2;
+
+ if (__b_tail(b) + delta >= b_wrap(b))
+ return 0; /* no space left */
+
+ if (b_data(b) &&
+ b_tail(b) + delta > b_head(b) &&
+ b_head(b) >= b_tail(b))
+ return 0; /* no space left before wrapping data */
+
+ /* first, protect the end of the buffer */
+ memmove(dst + delta, dst, b_tail(b) - dst);
+
+ /* now, copy str over dst */
+ memcpy(dst, str, len);
+ dst[len] = '\r';
+ dst[len + 1] = '\n';
+
+ b_add(b, delta);
+ return delta;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/check.c b/src/check.c
new file mode 100644
index 0000000..2753c93
--- /dev/null
+++ b/src/check.c
@@ -0,0 +1,2642 @@
+/*
+ * Health-checks functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/dgram.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/extcheck.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/mailers.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/regex.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * check - check
+ *
+ * CHECK_EV_* macros are defined in <haproxy/check.h>
+ */
+static const struct trace_event check_trace_events[] = {
+ { .mask = CHK_EV_TASK_WAKE, .name = "task_wake", .desc = "Check task woken up" },
+ { .mask = CHK_EV_HCHK_START, .name = "hchck_start", .desc = "Health-check started" },
+ { .mask = CHK_EV_HCHK_WAKE, .name = "hchck_wake", .desc = "Health-check woken up" },
+ { .mask = CHK_EV_HCHK_RUN, .name = "hchck_run", .desc = "Health-check running" },
+ { .mask = CHK_EV_HCHK_END, .name = "hchck_end", .desc = "Health-check terminated" },
+ { .mask = CHK_EV_HCHK_SUCC, .name = "hchck_succ", .desc = "Health-check success" },
+ { .mask = CHK_EV_HCHK_ERR, .name = "hchck_err", .desc = "Health-check failure" },
+
+ { .mask = CHK_EV_TCPCHK_EVAL, .name = "tcp_check_eval", .desc = "tcp-check rules evaluation" },
+ { .mask = CHK_EV_TCPCHK_ERR, .name = "tcp_check_err", .desc = "tcp-check evaluation error" },
+ { .mask = CHK_EV_TCPCHK_CONN, .name = "tcp_check_conn", .desc = "tcp-check connection rule" },
+ { .mask = CHK_EV_TCPCHK_SND, .name = "tcp_check_send", .desc = "tcp-check send rule" },
+ { .mask = CHK_EV_TCPCHK_EXP, .name = "tcp_check_expect", .desc = "tcp-check expect rule" },
+ { .mask = CHK_EV_TCPCHK_ACT, .name = "tcp_check_action", .desc = "tcp-check action rule" },
+
+ { .mask = CHK_EV_RX_DATA, .name = "rx_data", .desc = "receipt of data" },
+ { .mask = CHK_EV_RX_BLK, .name = "rx_blk", .desc = "receipt blocked" },
+ { .mask = CHK_EV_RX_ERR, .name = "rx_err", .desc = "receipt error" },
+
+ { .mask = CHK_EV_TX_DATA, .name = "tx_data", .desc = "transmission of data" },
+ { .mask = CHK_EV_TX_BLK, .name = "tx_blk", .desc = "transmission blocked" },
+ { .mask = CHK_EV_TX_ERR, .name = "tx_err", .desc = "transmission error" },
+
+ {}
+};
+
+static const struct name_desc check_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the check */ },
+ /* arg2 */ { },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc check_trace_decoding[] = {
+#define CHK_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define CHK_VERB_MINIMAL 2
+ { .name="minimal", .desc="report info on streams and connectors" },
+#define CHK_VERB_SIMPLE 3
+ { .name="simple", .desc="add info on request and response channels" },
+#define CHK_VERB_ADVANCED 4
+ { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define CHK_VERB_COMPLETE 5
+ { .name="complete", .desc="add info on channel's buffer" },
+ { /* end */ }
+};
+
+struct trace_source trace_check = {
+ .name = IST("check"),
+ .desc = "Health-check",
+ .arg_def = TRC_ARG1_CHK, // TRACE()'s first argument is always a stream
+ .default_cb = check_trace,
+ .known_events = check_trace_events,
+ .lockon_args = check_trace_lockon_args,
+ .decoding = check_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_check
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+
+/* Dummy frontend used to create all checks sessions. */
+struct proxy checks_fe;
+
+
+static inline void check_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
+{
+ size_t block1, block2;
+ int line, ptr, newptr;
+
+ block1 = b_contig_data(buf, ofs);
+ block2 = 0;
+ if (block1 > len)
+ block1 = len;
+ block2 = len - block1;
+
+ ofs = b_peek_ofs(buf, ofs);
+
+ line = 0;
+ ptr = ofs;
+ while (ptr < ofs + block1) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+
+ line = ptr = 0;
+ while (ptr < block2) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+}
+
+/* trace source and events */
+static void check_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct check *check = a1;
+ const struct server *srv = (check ? check->server : NULL);
+ const size_t *val = a4;
+ const char *res;
+
+ if (!check || src->verbosity < CHK_VERB_CLEAN)
+ return;
+
+ if (srv) {
+ chunk_appendf(&trace_buf, " : [%c] SRV=%s",
+ ((check->type == PR_O2_EXT_CHK) ? 'E' : (check->state & CHK_ST_AGENT ? 'A' : 'H')),
+ srv->id);
+
+ chunk_appendf(&trace_buf, " status=%d/%d %s",
+ (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
+ (check->health >= check->rise) ? check->fall : check->rise,
+ (check->health >= check->rise) ? (srv->uweight ? "UP" : "DRAIN") : "DOWN");
+ }
+ else
+ chunk_appendf(&trace_buf, " : [EMAIL]");
+
+ switch (check->result) {
+ case CHK_RES_NEUTRAL: res = "-"; break;
+ case CHK_RES_FAILED: res = "FAIL"; break;
+ case CHK_RES_PASSED: res = "PASS"; break;
+ case CHK_RES_CONDPASS: res = "COND"; break;
+ default: res = "UNK"; break;
+ }
+
+ if (src->verbosity == CHK_VERB_CLEAN)
+ return;
+
+ chunk_appendf(&trace_buf, " - last=%s(%d)/%s(%d)",
+ get_check_status_info(check->status), check->status,
+ res, check->result);
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ chunk_appendf(&trace_buf, " check=%p(0x%08x)", check, check->state);
+
+ if (src->verbosity == CHK_VERB_MINIMAL)
+ return;
+
+
+ if (check->sc) {
+ struct connection *conn = sc_conn(check->sc);
+
+ chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", conn, conn ? conn->flags : 0);
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", check->sc, check->sc->flags);
+ }
+
+ if (mask & CHK_EV_TCPCHK) {
+ const char *type;
+
+ switch (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) {
+ case TCPCHK_RULES_PGSQL_CHK: type = "PGSQL"; break;
+ case TCPCHK_RULES_REDIS_CHK: type = "REDIS"; break;
+ case TCPCHK_RULES_SMTP_CHK: type = "SMTP"; break;
+ case TCPCHK_RULES_HTTP_CHK: type = "HTTP"; break;
+ case TCPCHK_RULES_MYSQL_CHK: type = "MYSQL"; break;
+ case TCPCHK_RULES_LDAP_CHK: type = "LDAP"; break;
+ case TCPCHK_RULES_SSL3_CHK: type = "SSL3"; break;
+ case TCPCHK_RULES_AGENT_CHK: type = "AGENT"; break;
+ case TCPCHK_RULES_SPOP_CHK: type = "SPOP"; break;
+ case TCPCHK_RULES_TCP_CHK: type = "TCP"; break;
+ default: type = "???"; break;
+ }
+ if (check->current_step)
+ chunk_appendf(&trace_buf, " - tcp-check=(%s,%d)", type, tcpcheck_get_step_id(check, NULL));
+ else
+ chunk_appendf(&trace_buf, " - tcp-check=(%s,-)", type);
+ }
+
+ /* Display bi and bo buffer info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ const struct buffer *buf = NULL;
+
+ chunk_appendf(&trace_buf, " bi=%u@%p+%u/%u",
+ (unsigned int)b_data(&check->bi), b_orig(&check->bi),
+ (unsigned int)b_head_ofs(&check->bi), (unsigned int)b_size(&check->bi));
+ chunk_appendf(&trace_buf, " bo=%u@%p+%u/%u",
+ (unsigned int)b_data(&check->bo), b_orig(&check->bo),
+ (unsigned int)b_head_ofs(&check->bo), (unsigned int)b_size(&check->bo));
+
+ if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_RX)))
+ buf = (b_is_null(&check->bi) ? NULL : &check->bi);
+ else if (src->verbosity >= CHK_VERB_ADVANCED && (mask & (CHK_EV_TX)))
+ buf = (b_is_null(&check->bo) ? NULL : &check->bo);
+
+ if (buf) {
+ if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ int full = (src->verbosity == CHK_VERB_COMPLETE);
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htxbuf(buf), full);
+ }
+ else {
+ int max = ((src->verbosity == CHK_VERB_COMPLETE) ? 1024 : 256);
+
+ chunk_memcat(&trace_buf, "\n", 1);
+ if (b_data(buf) > max) {
+ check_trace_buf(buf, 0, max);
+ chunk_memcat(&trace_buf, " ...\n", 6);
+ }
+ else
+ check_trace_buf(buf, 0, b_data(buf));
+ }
+
+ }
+ }
+
+}
+
+
+/**************************************************************************/
+/************************ Handle check results ****************************/
+/**************************************************************************/
+struct check_status {
+ short result; /* one of SRV_CHK_* */
+ char *info; /* human readable short info */
+ char *desc; /* long description */
+};
+
+struct analyze_status {
+ char *desc; /* description */
+ unsigned char lr[HANA_OBS_SIZE]; /* result for l4/l7: 0 = ignore, 1 - error, 2 - OK */
+};
+
+static const struct check_status check_statuses[HCHK_STATUS_SIZE] = {
+ [HCHK_STATUS_UNKNOWN] = { CHK_RES_UNKNOWN, "UNK", "Unknown" },
+ [HCHK_STATUS_INI] = { CHK_RES_UNKNOWN, "INI", "Initializing" },
+ [HCHK_STATUS_START] = { /* SPECIAL STATUS*/ },
+
+ /* Below we have finished checks */
+ [HCHK_STATUS_CHECKED] = { CHK_RES_NEUTRAL, "CHECKED", "No status change" },
+ [HCHK_STATUS_HANA] = { CHK_RES_FAILED, "HANA", "Health analyze" },
+
+ [HCHK_STATUS_SOCKERR] = { CHK_RES_FAILED, "SOCKERR", "Socket error" },
+
+ [HCHK_STATUS_L4OK] = { CHK_RES_PASSED, "L4OK", "Layer4 check passed" },
+ [HCHK_STATUS_L4TOUT] = { CHK_RES_FAILED, "L4TOUT", "Layer4 timeout" },
+ [HCHK_STATUS_L4CON] = { CHK_RES_FAILED, "L4CON", "Layer4 connection problem" },
+
+ [HCHK_STATUS_L6OK] = { CHK_RES_PASSED, "L6OK", "Layer6 check passed" },
+ [HCHK_STATUS_L6TOUT] = { CHK_RES_FAILED, "L6TOUT", "Layer6 timeout" },
+ [HCHK_STATUS_L6RSP] = { CHK_RES_FAILED, "L6RSP", "Layer6 invalid response" },
+
+ [HCHK_STATUS_L7TOUT] = { CHK_RES_FAILED, "L7TOUT", "Layer7 timeout" },
+ [HCHK_STATUS_L7RSP] = { CHK_RES_FAILED, "L7RSP", "Layer7 invalid response" },
+
+ [HCHK_STATUS_L57DATA] = { /* DUMMY STATUS */ },
+
+ [HCHK_STATUS_L7OKD] = { CHK_RES_PASSED, "L7OK", "Layer7 check passed" },
+ [HCHK_STATUS_L7OKCD] = { CHK_RES_CONDPASS, "L7OKC", "Layer7 check conditionally passed" },
+ [HCHK_STATUS_L7STS] = { CHK_RES_FAILED, "L7STS", "Layer7 wrong status" },
+
+ [HCHK_STATUS_PROCERR] = { CHK_RES_FAILED, "PROCERR", "External check error" },
+ [HCHK_STATUS_PROCTOUT] = { CHK_RES_FAILED, "PROCTOUT", "External check timeout" },
+ [HCHK_STATUS_PROCOK] = { CHK_RES_PASSED, "PROCOK", "External check passed" },
+};
+
+static const struct analyze_status analyze_statuses[HANA_STATUS_SIZE] = { /* 0: ignore, 1: error, 2: OK */
+ [HANA_STATUS_UNKNOWN] = { "Unknown", { 0, 0 }},
+
+ [HANA_STATUS_L4_OK] = { "L4 successful connection", { 2, 0 }},
+ [HANA_STATUS_L4_ERR] = { "L4 unsuccessful connection", { 1, 1 }},
+
+ [HANA_STATUS_HTTP_OK] = { "Correct http response", { 0, 2 }},
+ [HANA_STATUS_HTTP_STS] = { "Wrong http response", { 0, 1 }},
+ [HANA_STATUS_HTTP_HDRRSP] = { "Invalid http response (headers)", { 0, 1 }},
+ [HANA_STATUS_HTTP_RSP] = { "Invalid http response", { 0, 1 }},
+
+ [HANA_STATUS_HTTP_READ_ERROR] = { "Read error (http)", { 0, 1 }},
+ [HANA_STATUS_HTTP_READ_TIMEOUT] = { "Read timeout (http)", { 0, 1 }},
+ [HANA_STATUS_HTTP_BROKEN_PIPE] = { "Close from server (http)", { 0, 1 }},
+};
+
+/* checks if <err> is a real error for errno or one that can be ignored, and
+ * return 0 for these ones or <err> for real ones.
+ */
+static inline int unclean_errno(int err)
+{
+ if (err == EAGAIN || err == EWOULDBLOCK || err == EINPROGRESS ||
+ err == EISCONN || err == EALREADY)
+ return 0;
+ return err;
+}
+
+/* Converts check_status code to result code */
+short get_check_status_result(short check_status)
+{
+ if (check_status < HCHK_STATUS_SIZE)
+ return check_statuses[check_status].result;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].result;
+}
+
+/* Converts check_status code to description */
+const char *get_check_status_description(short check_status) {
+
+ const char *desc;
+
+ if (check_status < HCHK_STATUS_SIZE)
+ desc = check_statuses[check_status].desc;
+ else
+ desc = NULL;
+
+ if (desc && *desc)
+ return desc;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].desc;
+}
+
+/* Converts check_status code to short info */
+const char *get_check_status_info(short check_status)
+{
+ const char *info;
+
+ if (check_status < HCHK_STATUS_SIZE)
+ info = check_statuses[check_status].info;
+ else
+ info = NULL;
+
+ if (info && *info)
+ return info;
+ else
+ return check_statuses[HCHK_STATUS_UNKNOWN].info;
+}
+
+/* Convert analyze_status to description */
+const char *get_analyze_status(short analyze_status) {
+
+ const char *desc;
+
+ if (analyze_status < HANA_STATUS_SIZE)
+ desc = analyze_statuses[analyze_status].desc;
+ else
+ desc = NULL;
+
+ if (desc && *desc)
+ return desc;
+ else
+ return analyze_statuses[HANA_STATUS_UNKNOWN].desc;
+}
+
+/* append check info to buffer msg */
+void check_append_info(struct buffer *msg, struct check *check)
+{
+ if (!check)
+ return;
+ chunk_appendf(msg, ", reason: %s", get_check_status_description(check->status));
+
+ if (check->status >= HCHK_STATUS_L57DATA)
+ chunk_appendf(msg, ", code: %d", check->code);
+
+ if (check->desc[0]) {
+ struct buffer src;
+
+ chunk_appendf(msg, ", info: \"");
+
+ chunk_initlen(&src, check->desc, 0, strlen(check->desc));
+ chunk_asciiencode(msg, &src, '"');
+
+ chunk_appendf(msg, "\"");
+ }
+
+ if (check->duration >= 0)
+ chunk_appendf(msg, ", check duration: %ldms", check->duration);
+}
+
+/* Sets check->status, update check->duration and fill check->result with an
+ * adequate CHK_RES_* value. The new check->health is computed based on the
+ * result.
+ *
+ * Shows information in logs about failed health check if server is UP or
+ * succeeded health checks if server is DOWN.
+ */
+void set_server_check_status(struct check *check, short status, const char *desc)
+{
+ struct server *s = check->server;
+ short prev_status = check->status;
+ int report = (status != prev_status) ? 1 : 0;
+
+ TRACE_POINT(CHK_EV_HCHK_RUN, check);
+
+ if (status == HCHK_STATUS_START) {
+ check->result = CHK_RES_UNKNOWN; /* no result yet */
+ check->desc[0] = '\0';
+ check->start = now_ns;
+ return;
+ }
+
+ if (!check->status)
+ return;
+
+ if (desc && *desc) {
+ strncpy(check->desc, desc, HCHK_DESC_LEN-1);
+ check->desc[HCHK_DESC_LEN-1] = '\0';
+ } else
+ check->desc[0] = '\0';
+
+ check->status = status;
+ if (check_statuses[status].result)
+ check->result = check_statuses[status].result;
+
+ if (status == HCHK_STATUS_HANA)
+ check->duration = -1;
+ else if (check->start) {
+ /* set_server_check_status() may be called more than once */
+ check->duration = ns_to_ms(now_ns - check->start);
+ check->start = 0;
+ }
+
+ /* no change is expected if no state change occurred */
+ if (check->result == CHK_RES_NEUTRAL)
+ return;
+
+ /* If the check was really just sending a mail, it won't have an
+ * associated server, so we're done now.
+ */
+ if (!s)
+ return;
+
+ switch (check->result) {
+ case CHK_RES_FAILED:
+ /* Failure to connect to the agent as a secondary check should not
+ * cause the server to be marked down.
+ */
+ if ((!(check->state & CHK_ST_AGENT) ||
+ (check->status >= HCHK_STATUS_L57DATA)) &&
+ (check->health > 0)) {
+ _HA_ATOMIC_INC(&s->counters.failed_checks);
+ report = 1;
+ check->health--;
+ if (check->health < check->rise)
+ check->health = 0;
+ }
+ break;
+
+ case CHK_RES_PASSED:
+ case CHK_RES_CONDPASS:
+ if (check->health < check->rise + check->fall - 1) {
+ report = 1;
+ check->health++;
+
+ if (check->health >= check->rise)
+ check->health = check->rise + check->fall - 1; /* OK now */
+ }
+
+ /* clear consecutive_errors if observing is enabled */
+ if (s->onerror)
+ HA_ATOMIC_STORE(&s->consecutive_errors, 0);
+ break;
+
+ default:
+ break;
+ }
+
+ if (report)
+ srv_event_hdl_publish_check(s, check);
+
+ if (s->proxy->options2 & PR_O2_LOGHCHKS && report) {
+ chunk_printf(&trash,
+ "%s check for %sserver %s/%s %s%s",
+ (check->state & CHK_ST_AGENT) ? "Agent" : "Health",
+ s->flags & SRV_F_BACKUP ? "backup " : "",
+ s->proxy->id, s->id,
+ (check->result == CHK_RES_CONDPASS) ? "conditionally ":"",
+ (check->result >= CHK_RES_PASSED) ? "succeeded" : "failed");
+
+ check_append_info(&trash, check);
+
+ chunk_appendf(&trash, ", status: %d/%d %s",
+ (check->health >= check->rise) ? check->health - check->rise + 1 : check->health,
+ (check->health >= check->rise) ? check->fall : check->rise,
+ (check->health >= check->rise) ? (s->uweight ? "UP" : "DRAIN") : "DOWN");
+
+ ha_warning("%s.\n", trash.area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ send_email_alert(s, LOG_INFO, "%s", trash.area);
+ }
+}
+
+static inline enum srv_op_st_chg_cause check_notify_cause(struct check *check)
+{
+ struct server *s = check->server;
+
+ /* We only report a cause for the check if we did not do so previously */
+ if (!s->track && !(s->proxy->options2 & PR_O2_LOGHCHKS))
+ return (check->state & CHK_ST_AGENT) ? SRV_OP_STCHGC_AGENT : SRV_OP_STCHGC_HEALTH;
+ return SRV_OP_STCHGC_NONE;
+}
+
+/* Marks the check <check>'s server down if the current check is already failed
+ * and the server is not down yet nor in maintenance.
+ */
+void check_notify_failure(struct check *check)
+{
+ struct server *s = check->server;
+
+ /* The agent secondary check should only cause a server to be marked
+ * as down if check->status is HCHK_STATUS_L7STS, which indicates
+ * that the agent returned "fail", "stopped" or "down".
+ * The implication here is that failure to connect to the agent
+ * as a secondary check should not cause the server to be marked
+ * down. */
+ if ((check->state & CHK_ST_AGENT) && check->status != HCHK_STATUS_L7STS)
+ return;
+
+ if (check->health > 0)
+ return;
+
+ TRACE_STATE("health-check failed, set server DOWN", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ srv_set_stopped(s, check_notify_cause(check));
+}
+
+/* Marks the check <check> as valid and tries to set its server up, provided
+ * it isn't in maintenance, it is not tracking a down server and other checks
+ * comply. The rule is simple : by default, a server is up, unless any of the
+ * following conditions is true :
+ * - health check failed (check->health < rise)
+ * - agent check failed (agent->health < rise)
+ * - the server tracks a down server (track && track->state == STOPPED)
+ * Note that if the server has a slowstart, it will switch to STARTING instead
+ * of RUNNING. Also, only the health checks support the nolb mode, so the
+ * agent's success may not take the server out of this mode.
+ */
+void check_notify_success(struct check *check)
+{
+ struct server *s = check->server;
+
+ if (s->next_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->track && s->track->next_state == SRV_ST_STOPPED)
+ return;
+
+ if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
+ return;
+
+ if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
+ return;
+
+ if ((check->state & CHK_ST_AGENT) && s->next_state == SRV_ST_STOPPING)
+ return;
+
+ TRACE_STATE("health-check succeeded, set server RUNNING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ srv_set_running(s, check_notify_cause(check));
+}
+
+/* Marks the check <check> as valid and tries to set its server into stopping mode
+ * if it was running or starting, and provided it isn't in maintenance and other
+ * checks comply. The conditions for the server to be marked in stopping mode are
+ * the same as for it to be turned up. Also, only the health checks support the
+ * nolb mode.
+ */
+void check_notify_stopping(struct check *check)
+{
+ struct server *s = check->server;
+
+ if (s->next_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (check->state & CHK_ST_AGENT)
+ return;
+
+ if (s->track && s->track->next_state == SRV_ST_STOPPED)
+ return;
+
+ if ((s->check.state & CHK_ST_ENABLED) && (s->check.health < s->check.rise))
+ return;
+
+ if ((s->agent.state & CHK_ST_ENABLED) && (s->agent.health < s->agent.rise))
+ return;
+
+ TRACE_STATE("health-check condionnaly succeeded, set server STOPPING", CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ srv_set_stopping(s, check_notify_cause(check));
+}
+
+/* note: use health_adjust() only, which first checks that the observe mode is
+ * enabled. This will take the server lock if needed.
+ */
+void __health_adjust(struct server *s, short status)
+{
+ int failed;
+
+ if (s->observe >= HANA_OBS_SIZE)
+ return;
+
+ if (status >= HANA_STATUS_SIZE || !analyze_statuses[status].desc)
+ return;
+
+ switch (analyze_statuses[status].lr[s->observe - 1]) {
+ case 1:
+ failed = 1;
+ break;
+
+ case 2:
+ failed = 0;
+ break;
+
+ default:
+ return;
+ }
+
+ if (!failed) {
+ /* good: clear consecutive_errors */
+ HA_ATOMIC_STORE(&s->consecutive_errors, 0);
+ return;
+ }
+
+ if (HA_ATOMIC_ADD_FETCH(&s->consecutive_errors, 1) < s->consecutive_errors_limit)
+ return;
+
+ chunk_printf(&trash, "Detected %d consecutive errors, last one was: %s",
+ HA_ATOMIC_LOAD(&s->consecutive_errors), get_analyze_status(status));
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+
+ /* force fastinter for upcoming check
+ * (does nothing if fastinter is not enabled)
+ */
+ s->check.state |= CHK_ST_FASTINTER;
+
+ switch (s->onerror) {
+ case HANA_ONERR_FASTINTER:
+ /* force fastinter - nothing to do here as all modes force it */
+ break;
+
+ case HANA_ONERR_SUDDTH:
+ /* simulate a pre-fatal failed health check */
+ if (s->check.health > s->check.rise)
+ s->check.health = s->check.rise + 1;
+
+ __fallthrough;
+
+ case HANA_ONERR_FAILCHK:
+ /* simulate a failed health check */
+ set_server_check_status(&s->check, HCHK_STATUS_HANA,
+ trash.area);
+ check_notify_failure(&s->check);
+ break;
+
+ case HANA_ONERR_MARKDWN:
+ /* mark server down */
+ s->check.health = s->check.rise;
+ set_server_check_status(&s->check, HCHK_STATUS_HANA,
+ trash.area);
+ check_notify_failure(&s->check);
+ break;
+
+ default:
+ /* write a warning? */
+ break;
+ }
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ HA_ATOMIC_STORE(&s->consecutive_errors, 0);
+ _HA_ATOMIC_INC(&s->counters.failed_hana);
+
+ if (s->check.fastinter) {
+ /* timer might need to be advanced, it might also already be
+ * running in another thread. Let's just wake the task up, it
+ * will automatically adjust its timer.
+ */
+ task_wakeup(s->check.task, TASK_WOKEN_MSG);
+ }
+}
+
+/* Checks the connection. If an error has already been reported or the socket is
+ * closed, keep errno intact as it is supposed to contain the valid error code.
+ * If no error is reported, check the socket's error queue using getsockopt().
+ * Warning, this must be done only once when returning from poll, and never
+ * after an I/O error was attempted, otherwise the error queue might contain
+ * inconsistent errors. If an error is detected, the CO_FL_ERROR is set on the
+ * socket. Returns non-zero if an error was reported, zero if everything is
+ * clean (including a properly closed socket).
+ */
+static int retrieve_errno_from_socket(struct connection *conn)
+{
+ int skerr;
+ socklen_t lskerr = sizeof(skerr);
+
+ if (conn->flags & CO_FL_ERROR && (unclean_errno(errno) || !conn->ctrl))
+ return 1;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) == 0)
+ errno = skerr;
+
+ errno = unclean_errno(errno);
+
+ if (!errno) {
+ /* we could not retrieve an error, that does not mean there is
+ * none. Just don't change anything and only report the prior
+ * error if any.
+ */
+ if (conn->flags & CO_FL_ERROR)
+ return 1;
+ else
+ return 0;
+ }
+
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
+ return 1;
+}
+
+/* Tries to collect as much information as possible on the connection status,
+ * and adjust the server status accordingly. It may make use of <errno_bck>
+ * if non-null when the caller is absolutely certain of its validity (eg:
+ * checked just after a syscall). If the caller doesn't have a valid errno,
+ * it can pass zero, and retrieve_errno_from_socket() will be called to try
+ * to extract errno from the socket. If no error is reported, it will consider
+ * the <expired> flag. This is intended to be used when a connection error was
+ * reported in conn->flags or when a timeout was reported in <expired>. The
+ * function takes care of not updating a server status which was already set.
+ * All situations where at least one of <expired> or CO_FL_ERROR are set
+ * produce a status.
+ */
+void chk_report_conn_err(struct check *check, int errno_bck, int expired)
+{
+ struct stconn *sc = check->sc;
+ struct connection *conn = sc_conn(sc);
+ const char *err_msg;
+ struct buffer *chk;
+ int step;
+
+ if (check->result != CHK_RES_UNKNOWN) {
+ return;
+ }
+
+ errno = unclean_errno(errno_bck);
+ if (conn && errno)
+ retrieve_errno_from_socket(conn);
+
+ if (conn && !(conn->flags & CO_FL_ERROR) && !sc_ep_test(sc, SE_FL_ERROR) && !expired)
+ return;
+
+ TRACE_ENTER(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check, 0, 0, (size_t[]){expired});
+
+ /* we'll try to build a meaningful error message depending on the
+ * context of the error possibly present in conn->err_code, and the
+ * socket error possibly collected above. This is useful to know the
+ * exact step of the L6 layer (eg: SSL handshake).
+ */
+ chk = get_trash_chunk();
+
+ if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ step = tcpcheck_get_step_id(check, NULL);
+ if (!step) {
+ TRACE_DEVEL("initial connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chunk_printf(chk, " at initial connection step of tcp-check");
+ }
+ else {
+ chunk_printf(chk, " at step %d of tcp-check", step);
+ /* we were looking for a string */
+ if (check->current_step && check->current_step->action == TCPCHK_ACT_CONNECT) {
+ if (check->current_step->connect.port)
+ chunk_appendf(chk, " (connect port %d)" ,check->current_step->connect.port);
+ else
+ chunk_appendf(chk, " (connect)");
+ TRACE_DEVEL("connection failure", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+ else if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT) {
+ struct tcpcheck_expect *expect = &check->current_step->expect;
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_STRING:
+ chunk_appendf(chk, " (expect string '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
+ break;
+ case TCPCHK_EXPECT_BINARY:
+ chunk_appendf(chk, " (expect binary '");
+ dump_binary(chk, istptr(expect->data), (int)istlen(expect->data));
+ chunk_appendf(chk, "')");
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ chunk_appendf(chk, " (expect regex)");
+ break;
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_appendf(chk, " (expect binary regex)");
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ chunk_appendf(chk, " (expect log-format string)");
+ break;
+ case TCPCHK_EXPECT_BINARY_LF:
+ chunk_appendf(chk, " (expect log-format binary)");
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ chunk_appendf(chk, " (expect HTTP status codes)");
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ chunk_appendf(chk, " (expect HTTP status regex)");
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ chunk_appendf(chk, " (expect HTTP header pattern)");
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chunk_appendf(chk, " (expect HTTP body content '%.*s')", (unsigned int)istlen(expect->data), istptr(expect->data));
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chunk_appendf(chk, " (expect HTTP body regex)");
+ break;
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ chunk_appendf(chk, " (expect log-format HTTP body)");
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chunk_appendf(chk, " (expect custom function)");
+ break;
+ case TCPCHK_EXPECT_UNDEF:
+ chunk_appendf(chk, " (undefined expect!)");
+ break;
+ }
+ TRACE_DEVEL("expect rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+ else if (check->current_step && check->current_step->action == TCPCHK_ACT_SEND) {
+ chunk_appendf(chk, " (send)");
+ TRACE_DEVEL("send rule failed", CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ }
+
+ if (check->current_step && check->current_step->comment)
+ chunk_appendf(chk, " comment: '%s'", check->current_step->comment);
+ }
+ }
+
+ if (conn && conn->err_code) {
+ if (unclean_errno(errno))
+ chunk_printf(&trash, "%s (%s)%s", conn_err_code_str(conn), strerror(errno),
+ chk->area);
+ else
+ chunk_printf(&trash, "%s%s", conn_err_code_str(conn),
+ chk->area);
+ err_msg = trash.area;
+ }
+ else {
+ if (unclean_errno(errno)) {
+ chunk_printf(&trash, "%s%s", strerror(errno),
+ chk->area);
+ err_msg = trash.area;
+ }
+ else {
+ err_msg = chk->area;
+ }
+ }
+
+ if (check->state & CHK_ST_PORT_MISS) {
+ /* NOTE: this is reported after <fall> tries */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+
+ if (!conn || !conn->ctrl) {
+ /* error before any connection attempt (connection allocation error or no control layer) */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+ else if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ /* L4 not established (yet) */
+ if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))
+ set_server_check_status(check, HCHK_STATUS_L4CON, err_msg);
+ else if (expired)
+ set_server_check_status(check, HCHK_STATUS_L4TOUT, err_msg);
+
+ /*
+ * might be due to a server IP change.
+ * Let's trigger a DNS resolution if none are currently running.
+ */
+ if (check->server)
+ resolv_trigger_resolution(check->server->resolv_requester);
+
+ }
+ else if (conn->flags & CO_FL_WAIT_L6_CONN) {
+ /* L6 not established (yet) */
+ if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))
+ set_server_check_status(check, HCHK_STATUS_L6RSP, err_msg);
+ else if (expired)
+ set_server_check_status(check, HCHK_STATUS_L6TOUT, err_msg);
+ }
+ else if (conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR)) {
+ /* I/O error after connection was established and before we could diagnose */
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, err_msg);
+ }
+ else if (expired) {
+ enum healthcheck_status tout = HCHK_STATUS_L7TOUT;
+
+ /* connection established but expired check */
+ if (check->current_step && check->current_step->action == TCPCHK_ACT_EXPECT &&
+ check->current_step->expect.tout_status != HCHK_STATUS_UNKNOWN)
+ tout = check->current_step->expect.tout_status;
+ set_server_check_status(check, tout, err_msg);
+ }
+
+ TRACE_LEAVE(CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ return;
+}
+
+
+/* Builds the server state header used by HTTP health-checks */
+int httpchk_build_status_header(struct server *s, struct buffer *buf)
+{
+ int sv_state;
+ int ratio;
+ char addr[46];
+ char port[6];
+ const char *srv_hlt_st[7] = { "DOWN", "DOWN %d/%d",
+ "UP %d/%d", "UP",
+ "NOLB %d/%d", "NOLB",
+ "no check" };
+
+ if (!(s->check.state & CHK_ST_ENABLED))
+ sv_state = 6;
+ else if (s->cur_state != SRV_ST_STOPPED) {
+ if (s->check.health == s->check.rise + s->check.fall - 1)
+ sv_state = 3; /* UP */
+ else
+ sv_state = 2; /* going down */
+
+ if (s->cur_state == SRV_ST_STOPPING)
+ sv_state += 2;
+ } else {
+ if (s->check.health)
+ sv_state = 1; /* going up */
+ else
+ sv_state = 0; /* DOWN */
+ }
+
+ chunk_appendf(buf, srv_hlt_st[sv_state],
+ (s->cur_state != SRV_ST_STOPPED) ? (s->check.health - s->check.rise + 1) : (s->check.health),
+ (s->cur_state != SRV_ST_STOPPED) ? (s->check.fall) : (s->check.rise));
+
+ addr_to_str(&s->addr, addr, sizeof(addr));
+ if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
+ snprintf(port, sizeof(port), "%u", s->svc_port);
+ else
+ *port = 0;
+
+ chunk_appendf(buf, "; address=%s; port=%s; name=%s/%s; node=%s; weight=%d/%d; scur=%d/%d; qcur=%d",
+ addr, port, s->proxy->id, s->id,
+ global.node,
+ (s->cur_eweight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
+ (s->proxy->lbprm.tot_weight * s->proxy->lbprm.wmult + s->proxy->lbprm.wdiv - 1) / s->proxy->lbprm.wdiv,
+ s->cur_sess, s->proxy->beconn - s->proxy->queue.length,
+ s->queue.length);
+
+ if ((s->cur_state == SRV_ST_STARTING) &&
+ ns_to_sec(now_ns) < s->last_change + s->slowstart &&
+ ns_to_sec(now_ns) >= s->last_change) {
+ ratio = MAX(1, 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart);
+ chunk_appendf(buf, "; throttle=%d%%", ratio);
+ }
+
+ return b_data(buf);
+}
+
+/**************************************************************************/
+/***************** Health-checks based on connections *********************/
+/**************************************************************************/
+/* This function is used only for server health-checks. It handles connection
+ * status updates including errors. If necessary, it wakes the check task up.
+ * It returns 0 on normal cases, <0 if at least one close() has happened on the
+ * connection (eg: reconnect). It relies on tcpcheck_main().
+ */
+int wake_srv_chk(struct stconn *sc)
+{
+ struct connection *conn;
+ struct check *check = __sc_check(sc);
+ struct email_alertq *q = container_of(check, typeof(*q), check);
+ int ret = 0;
+
+ TRACE_ENTER(CHK_EV_HCHK_WAKE, check);
+ if (check->result != CHK_RES_UNKNOWN)
+ goto end;
+
+ if (check->server)
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+ else
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+
+ /* we may have to make progress on the TCP checks */
+ ret = tcpcheck_main(check);
+
+ sc = check->sc;
+ conn = sc_conn(sc);
+
+ if (unlikely(!conn || conn->flags & CO_FL_ERROR || sc_ep_test(sc, SE_FL_ERROR))) {
+ /* We may get error reports bypassing the I/O handlers, typically
+ * the case when sending a pure TCP check which fails, then the I/O
+ * handlers above are not called. This is completely handled by the
+ * main processing task so let's simply wake it up. If we get here,
+ * we expect errno to still be valid.
+ */
+ TRACE_ERROR("report connection error", CHK_EV_HCHK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chk_report_conn_err(check, errno, 0);
+ task_wakeup(check->task, TASK_WOKEN_IO);
+ }
+
+ if (check->result != CHK_RES_UNKNOWN || ret == -1) {
+ /* Check complete or aborted. Wake the check task up to be sure
+ * the result is handled ASAP. */
+ ret = -1;
+ task_wakeup(check->task, TASK_WOKEN_IO);
+ }
+
+ if (check->server)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+ else
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+
+ end:
+ TRACE_LEAVE(CHK_EV_HCHK_WAKE, check);
+ return ret;
+}
+
+/* This function checks if any I/O is wanted, and if so, attempts to do so */
+struct task *srv_chk_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct stconn *sc = ctx;
+
+ wake_srv_chk(sc);
+ return NULL;
+}
+
+/* returns <0, 0, >0 if check thread 1 is respectively less loaded than,
+ * equally as, or more loaded than thread 2. This is made to decide on
+ * migrations so a margin is applied in either direction. For ease of
+ * remembering the direction, consider this returns load1 - load2.
+ */
+static inline int check_thread_cmp_load(int thr1, int thr2)
+{
+ uint t1_load = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr1].rq_total);
+ uint t1_act = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr1].active_checks);
+ uint t2_load = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr2].rq_total);
+ uint t2_act = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr2].active_checks);
+
+ /* twice as more active checks is a significant difference */
+ if (t1_act * 2 < t2_act)
+ return -1;
+
+ if (t2_act * 2 < t1_act)
+ return 1;
+
+ /* twice as more rqload with more checks is also a significant
+ * difference.
+ */
+ if (t1_act <= t2_act && t1_load * 2 < t2_load)
+ return -1;
+
+ if (t2_act <= t1_act && t2_load * 2 < t1_load)
+ return 1;
+
+ /* otherwise they're roughly equal */
+ return 0;
+}
+
+/* returns <0, 0, >0 if check thread 1's active checks count is respectively
+ * higher than, equal, or lower than thread 2's. This is made to decide on
+ * forced migrations upon overload, so only a very little margin is applied
+ * here (~1%). For ease of remembering the direction, consider this returns
+ * active1 - active2.
+ */
+static inline int check_thread_cmp_active(int thr1, int thr2)
+{
+ uint t1_act = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr1].active_checks);
+ uint t2_act = _HA_ATOMIC_LOAD(&ha_thread_ctx[thr2].active_checks);
+
+ if (t1_act * 128 >= t2_act * 129)
+ return 1;
+ if (t2_act * 128 >= t1_act * 129)
+ return -1;
+ return 0;
+}
+
+
+/* manages a server health-check that uses a connection. Returns
+ * the time the task accepts to wait, or TIME_ETERNITY for infinity.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_unlock label.
+ */
+struct task *process_chk_conn(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct proxy *proxy = check->proxy;
+ struct stconn *sc;
+ struct connection *conn;
+ int rv;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(CHK_EV_TASK_WAKE, check);
+
+ if (check->state & CHK_ST_SLEEPING) {
+ /* This check just restarted. It's still time to verify if
+ * we're on an overloaded thread or if a more suitable one is
+ * available. This helps spread the load over the available
+ * threads, without migrating too often. For this we'll check
+ * our load, and pick a random thread, check if it has less
+ * than half of the current thread's load, and if so we'll
+ * bounce the task there. It's possible because it's not yet
+ * tied to the current thread. The other thread will not bounce
+ * the task again because we're setting CHK_ST_READY indicating
+ * a migration.
+ */
+ uint run_checks = _HA_ATOMIC_LOAD(&th_ctx->running_checks);
+ uint my_load = HA_ATOMIC_LOAD(&th_ctx->rq_total);
+ uint attempts = MIN(global.nbthread, 3);
+
+ if (check->state & CHK_ST_READY) {
+ /* check was migrated, active already counted */
+ activity[tid].check_adopted++;
+ }
+ else {
+ /* first wakeup, let's check if another thread is less loaded
+ * than this one in order to smooth the load. If the current
+ * thread is not yet overloaded, we attempt an opportunistic
+ * migration to another thread that is not full and that is
+ * significantly less loaded. And if the current thread is
+ * already overloaded, we attempt a forced migration to a
+ * thread with less active checks. We try at most 3 random
+ * other thread.
+ */
+ while (attempts-- > 0 &&
+ (!LIST_ISEMPTY(&th_ctx->queued_checks) || my_load >= 3) &&
+ _HA_ATOMIC_LOAD(&th_ctx->active_checks) >= 3) {
+ uint new_tid = statistical_prng_range(global.nbthread);
+
+ if (new_tid == tid)
+ continue;
+
+ ALREADY_CHECKED(new_tid);
+
+ if (check_thread_cmp_active(tid, new_tid) > 0 &&
+ (run_checks >= global.tune.max_checks_per_thread ||
+ check_thread_cmp_load(tid, new_tid) > 0)) {
+ /* Found one. Let's migrate the task over there. We have to
+ * remove it from the WQ first and kill its expire time
+ * otherwise the scheduler will reinsert it and trigger a
+ * BUG_ON() as we're not allowed to call task_queue() for a
+ * foreign thread. The recipient will restore the expiration.
+ */
+ check->state |= CHK_ST_READY;
+ HA_ATOMIC_INC(&ha_thread_ctx[new_tid].active_checks);
+ task_unlink_wq(t);
+ t->expire = TICK_ETERNITY;
+ task_set_thread(t, new_tid);
+ task_wakeup(t, TASK_WOKEN_MSG);
+ TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
+ return t;
+ }
+ }
+ /* check just woke up, count it as active */
+ _HA_ATOMIC_INC(&th_ctx->active_checks);
+ }
+
+ /* OK we're keeping it so this check is ours now */
+ task_set_thread(t, tid);
+ check->state &= ~CHK_ST_SLEEPING;
+
+ /* if we just woke up and the thread is full of running, or
+ * already has others waiting, we might have to wait in queue
+ * (for health checks only). This means !SLEEPING && !READY.
+ */
+ if (check->server &&
+ (!LIST_ISEMPTY(&th_ctx->queued_checks) ||
+ (global.tune.max_checks_per_thread &&
+ _HA_ATOMIC_LOAD(&th_ctx->running_checks) >= global.tune.max_checks_per_thread))) {
+ TRACE_DEVEL("health-check queued", CHK_EV_TASK_WAKE, check);
+ t->expire = TICK_ETERNITY;
+ LIST_APPEND(&th_ctx->queued_checks, &check->check_queue);
+
+ /* reset fastinter flag (if set) so that srv_getinter()
+ * only returns fastinter if server health is degraded
+ */
+ check->state &= ~CHK_ST_FASTINTER;
+ goto out_leave;
+ }
+
+ /* OK let's run, now we cannot roll back anymore */
+ check->state |= CHK_ST_READY;
+ activity[tid].check_started++;
+ _HA_ATOMIC_INC(&th_ctx->running_checks);
+ }
+
+ /* at this point, CHK_ST_SLEEPING = 0 and CHK_ST_READY = 1*/
+
+ if (check->server)
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+
+ if (!(check->state & (CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC))) {
+ /* This task might have bounced from another overloaded thread, it
+ * needs an expiration timer that was supposed to be now, but that
+ * was erased during the bounce.
+ */
+ if (!tick_isset(t->expire)) {
+ t->expire = now_ms;
+ expired = 0;
+ }
+ }
+
+ if (unlikely(check->state & CHK_ST_PURGE)) {
+ TRACE_STATE("health-check state to purge", CHK_EV_TASK_WAKE, check);
+ }
+ else if (!(check->state & (CHK_ST_INPROGRESS))) {
+ /* no check currently running, but we might have been woken up
+ * before the timer's expiration to update it according to a
+ * new state (e.g. fastinter), in which case we'll reprogram
+ * the new timer.
+ */
+ if (!tick_is_expired(t->expire, now_ms)) { /* woke up too early */
+ if (check->server) {
+ int new_exp = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check)));
+
+ if (tick_is_expired(new_exp, t->expire)) {
+ TRACE_STATE("health-check was advanced", CHK_EV_TASK_WAKE, check);
+ goto update_timer;
+ }
+ }
+
+ TRACE_STATE("health-check wake up too early", CHK_EV_TASK_WAKE, check);
+ goto out_unlock;
+ }
+
+ /* we don't send any health-checks when the proxy is
+ * stopped, the server should not be checked or the check
+ * is disabled.
+ */
+ if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
+ (proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ TRACE_STATE("health-check paused or disabled", CHK_EV_TASK_WAKE, check);
+ goto reschedule;
+ }
+
+ /* we'll initiate a new check */
+ set_server_check_status(check, HCHK_STATUS_START, NULL);
+
+ check->state |= CHK_ST_INPROGRESS;
+ TRACE_STATE("init new health-check", CHK_EV_TASK_WAKE|CHK_EV_HCHK_START, check);
+
+ check->current_step = NULL;
+
+ check->sc = sc_new_from_check(check, SC_FL_NONE);
+ if (!check->sc) {
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, NULL);
+ goto end;
+ }
+ tcpcheck_main(check);
+ expired = 0;
+ }
+
+ /* there was a test running.
+ * First, let's check whether there was an uncaught error,
+ * which can happen on connect timeout or error.
+ */
+ if (check->result == CHK_RES_UNKNOWN && likely(!(check->state & CHK_ST_PURGE))) {
+ sc = check->sc;
+ conn = sc_conn(sc);
+
+ /* Here the connection must be defined. Otherwise the
+ * error would have already been detected
+ */
+ if ((conn && ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))) || expired) {
+ TRACE_ERROR("report connection error", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ chk_report_conn_err(check, 0, expired);
+ }
+ else {
+ if (check->state & CHK_ST_CLOSE_CONN) {
+ TRACE_DEVEL("closing current connection", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+ check->state &= ~CHK_ST_CLOSE_CONN;
+ if (!sc_reset_endp(check->sc)) {
+ /* error will be handled by tcpcheck_main().
+ * On success, remove all flags except SE_FL_DETACHED
+ */
+ sc_ep_clr(check->sc, ~SE_FL_DETACHED);
+ }
+ tcpcheck_main(check);
+ }
+ if (check->result == CHK_RES_UNKNOWN) {
+ TRACE_DEVEL("health-check not expired", CHK_EV_TASK_WAKE|CHK_EV_HCHK_RUN, check);
+ goto out_unlock; /* timeout not reached, wait again */
+ }
+ }
+ }
+
+ /* check complete or aborted */
+ TRACE_STATE("health-check complete or aborted", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END, check);
+
+ /* check->sc may be NULL when the healthcheck is purged */
+ check->current_step = NULL;
+ sc = check->sc;
+ conn = (sc ? sc_conn(sc) : NULL);
+
+ if (conn && conn->xprt) {
+ /* The check was aborted and the connection was not yet closed.
+ * This can happen upon timeout, or when an external event such
+ * as a failed response coupled with "observe layer7" caused the
+ * server state to be suddenly changed.
+ */
+ sc_conn_drain_and_shut(sc);
+ }
+
+ if (sc) {
+ sc_destroy(sc);
+ check->sc = NULL;
+ }
+
+ if (check->sess != NULL) {
+ vars_prune(&check->vars, check->sess, NULL);
+ session_free(check->sess);
+ check->sess = NULL;
+ }
+
+ end:
+ if (check->server && likely(!(check->state & CHK_ST_PURGE))) {
+ if (check->result == CHK_RES_FAILED) {
+ /* a failure or timeout detected */
+ TRACE_DEVEL("report failure", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_ERR, check);
+ check_notify_failure(check);
+ }
+ else if (check->result == CHK_RES_CONDPASS) {
+ /* check is OK but asks for stopping mode */
+ TRACE_DEVEL("report conditional success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ check_notify_stopping(check);
+ }
+ else if (check->result == CHK_RES_PASSED) {
+ /* a success was detected */
+ TRACE_DEVEL("report success", CHK_EV_TASK_WAKE|CHK_EV_HCHK_END|CHK_EV_HCHK_SUCC, check);
+ check_notify_success(check);
+ }
+ }
+
+ if (LIST_INLIST(&check->buf_wait.list))
+ LIST_DEL_INIT(&check->buf_wait.list);
+
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+ _HA_ATOMIC_DEC(&th_ctx->running_checks);
+ _HA_ATOMIC_DEC(&th_ctx->active_checks);
+ check->state &= ~(CHK_ST_INPROGRESS|CHK_ST_IN_ALLOC|CHK_ST_OUT_ALLOC);
+ check->state &= ~CHK_ST_READY;
+ check->state |= CHK_ST_SLEEPING;
+
+ update_timer:
+ /* when going to sleep, we need to check if other checks are waiting
+ * for a slot. If so we pick them out of the queue and wake them up.
+ */
+ if (check->server && (check->state & CHK_ST_SLEEPING)) {
+ if (!LIST_ISEMPTY(&th_ctx->queued_checks) &&
+ _HA_ATOMIC_LOAD(&th_ctx->running_checks) < global.tune.max_checks_per_thread) {
+ struct check *next_chk = LIST_ELEM(th_ctx->queued_checks.n, struct check *, check_queue);
+
+ /* wake up pending task */
+ LIST_DEL_INIT(&next_chk->check_queue);
+
+ activity[tid].check_started++;
+ _HA_ATOMIC_INC(&th_ctx->running_checks);
+ next_chk->state |= CHK_ST_READY;
+ /* now running */
+ task_wakeup(next_chk->task, TASK_WOKEN_RES);
+ }
+ }
+
+ if (check->server) {
+ rv = 0;
+ if (global.spread_checks > 0) {
+ rv = srv_getinter(check) * global.spread_checks / 100;
+ rv -= (int) (2 * rv * (statistical_prng() / 4294967295.0));
+ }
+ t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+ /* reset fastinter flag (if set) so that srv_getinter()
+ * only returns fastinter if server health is degraded
+ */
+ check->state &= ~CHK_ST_FASTINTER;
+ }
+
+ reschedule:
+ if (proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ t->expire = TICK_ETERNITY;
+ else {
+ while (tick_is_expired(t->expire, now_ms))
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+ }
+
+ out_unlock:
+ if (check->server)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+
+ out_leave:
+ TRACE_LEAVE(CHK_EV_TASK_WAKE, check);
+
+ /* Free the check if set to PURGE. After this, the check instance may be
+ * freed via the srv_drop invocation, so it must not be accessed after
+ * this point.
+ */
+ if (unlikely(check->state & CHK_ST_PURGE)) {
+ free_check(check);
+ if (check->server)
+ srv_drop(check->server);
+
+ t = NULL;
+ }
+
+ return t;
+}
+
+
+/**************************************************************************/
+/************************** Init/deinit checks ****************************/
+/**************************************************************************/
+/*
+ * Tries to grab a buffer and to re-enables processing on check <target>. The
+ * check flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the I/O tasklet is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+int check_buf_available(void *target)
+{
+ struct check *check = target;
+
+ BUG_ON(!check->sc);
+
+ if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) {
+ TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check);
+ check->state &= ~CHK_ST_IN_ALLOC;
+ tasklet_wakeup(check->sc->wait_event.tasklet);
+ return 1;
+ }
+ if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) {
+ TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check);
+ check->state &= ~CHK_ST_OUT_ALLOC;
+ tasklet_wakeup(check->sc->wait_event.tasklet);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Allocate a buffer. If it fails, it adds the check in buffer wait queue.
+ */
+struct buffer *check_get_buf(struct check *check, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&check->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ check->buf_wait.target = check;
+ check->buf_wait.wakeup_cb = check_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &check->buf_wait.list);
+ }
+ return buf;
+}
+
+/*
+ * Release a buffer, if any, and try to wake up entities waiting in the buffer
+ * wait queue.
+ */
+void check_release_buf(struct check *check, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(check->buf_wait.target, 1);
+ }
+}
+
+const char *init_check(struct check *check, int type)
+{
+ check->type = type;
+
+ check->bi = BUF_NULL;
+ check->bo = BUF_NULL;
+ LIST_INIT(&check->buf_wait.list);
+ LIST_INIT(&check->check_queue);
+ return NULL;
+}
+
+/* Liberates the resources allocated for a check.
+ *
+ * This function must only be run by the thread owning the check.
+ */
+void free_check(struct check *check)
+{
+ /* For agent-check, free the rules / vars from the server. This is not
+ * done for health-check : the proxy is the owner of the rules / vars
+ * in this case.
+ */
+ if (check->state & CHK_ST_AGENT) {
+ free_tcpcheck_vars(&check->tcpcheck_rules->preset_vars);
+ ha_free(&check->tcpcheck_rules);
+ }
+
+ task_destroy(check->task);
+
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+ if (check->sc) {
+ sc_destroy(check->sc);
+ check->sc = NULL;
+ }
+}
+
+/* This function must be used in order to free a started check. The check will
+ * be scheduled for a next execution in order to properly close and free all
+ * check elements.
+ *
+ * Non thread-safe.
+ */
+void check_purge(struct check *check)
+{
+ check->state |= CHK_ST_PURGE;
+ task_wakeup(check->task, TASK_WOKEN_OTHER);
+}
+
+/* manages a server health-check. Returns the time the task accepts to wait, or
+ * TIME_ETERNITY for infinity.
+ */
+struct task *process_chk(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+
+ if (check->type == PR_O2_EXT_CHK)
+ return process_chk_proc(t, context, state);
+ return process_chk_conn(t, context, state);
+
+}
+
+
+int start_check_task(struct check *check, int mininter,
+ int nbcheck, int srvpos)
+{
+ struct task *t;
+
+ /* task for the check. Process-based checks exclusively run on thread 1. */
+ if (check->type == PR_O2_EXT_CHK)
+ t = task_new_on(0);
+ else
+ t = task_new_anywhere();
+
+ if (!t)
+ goto fail_alloc_task;
+
+ check->task = t;
+ t->process = process_chk;
+ t->context = check;
+
+ if (mininter < srv_getinter(check))
+ mininter = srv_getinter(check);
+
+ if (global.spread_checks > 0) {
+ int rnd;
+
+ rnd = srv_getinter(check) * global.spread_checks / 100;
+ rnd -= (int) (2 * rnd * (ha_random32() / 4294967295.0));
+ mininter += rnd;
+ }
+
+ if (global.max_spread_checks && mininter > global.max_spread_checks)
+ mininter = global.max_spread_checks;
+
+ /* check this every ms */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(mininter * srvpos / nbcheck));
+ check->start = now_ns;
+ task_queue(t);
+
+ return 1;
+
+ fail_alloc_task:
+ ha_alert("Starting [%s:%s] check: out of memory.\n",
+ check->server->proxy->id, check->server->id);
+ return 0;
+}
+
+/*
+ * Start health-check.
+ * Returns 0 if OK, ERR_FATAL on error, and prints the error in this case.
+ */
+static int start_checks()
+{
+
+ struct proxy *px;
+ struct server *s;
+ int nbcheck=0, mininter=0, srvpos=0;
+
+ /* 0- init the dummy frontend used to create all checks sessions */
+ init_new_proxy(&checks_fe);
+ checks_fe.id = strdup("CHECKS-FE");
+ checks_fe.cap = PR_CAP_FE | PR_CAP_BE;
+ checks_fe.mode = PR_MODE_TCP;
+ checks_fe.maxconn = 0;
+ checks_fe.conn_retries = CONN_RETRIES;
+ checks_fe.options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+ checks_fe.timeout.client = TICK_ETERNITY;
+
+ /* 1- count the checkers to run simultaneously.
+ * We also determine the minimum interval among all of those which
+ * have an interval larger than SRV_CHK_INTER_THRES. This interval
+ * will be used to spread their start-up date. Those which have
+ * a shorter interval will start independently and will not dictate
+ * too short an interval for all others.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ for (s = px->srv; s; s = s->next) {
+ if (s->check.state & CHK_ST_CONFIGURED) {
+ nbcheck++;
+ if ((srv_getinter(&s->check) >= SRV_CHK_INTER_THRES) &&
+ (!mininter || mininter > srv_getinter(&s->check)))
+ mininter = srv_getinter(&s->check);
+ }
+
+ if (s->agent.state & CHK_ST_CONFIGURED) {
+ nbcheck++;
+ if ((srv_getinter(&s->agent) >= SRV_CHK_INTER_THRES) &&
+ (!mininter || mininter > srv_getinter(&s->agent)))
+ mininter = srv_getinter(&s->agent);
+ }
+ }
+ }
+
+ if (!nbcheck)
+ return ERR_NONE;
+
+ srand((unsigned)time(NULL));
+
+ /* 2- start them as far as possible from each other. For this, we will
+ * start them after their interval is set to the min interval divided
+ * by the number of servers, weighted by the server's position in the
+ * list.
+ */
+ for (px = proxies_list; px; px = px->next) {
+ if ((px->options2 & PR_O2_CHK_ANY) == PR_O2_EXT_CHK) {
+ if (init_pid_list()) {
+ ha_alert("Starting [%s] check: out of memory.\n", px->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ for (s = px->srv; s; s = s->next) {
+ /* A task for the main check */
+ if (s->check.state & CHK_ST_CONFIGURED) {
+ if (s->check.type == PR_O2_EXT_CHK) {
+ if (!prepare_external_check(&s->check))
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!start_check_task(&s->check, mininter, nbcheck, srvpos))
+ return ERR_ALERT | ERR_FATAL;
+ srvpos++;
+ }
+
+ /* A task for a auxiliary agent check */
+ if (s->agent.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&s->agent, mininter, nbcheck, srvpos)) {
+ return ERR_ALERT | ERR_FATAL;
+ }
+ srvpos++;
+ }
+ }
+ }
+ return ERR_NONE;
+}
+
+
+/*
+ * Return value:
+ * the port to be used for the health check
+ * 0 in case no port could be found for the check
+ */
+static int srv_check_healthcheck_port(struct check *chk)
+{
+ int i = 0;
+ struct server *srv = NULL;
+
+ srv = chk->server;
+
+ /* by default, we use the health check port configured */
+ if (chk->port > 0)
+ return chk->port;
+
+ /* try to get the port from check_core.addr if check.port not set */
+ i = get_host_port(&chk->addr);
+ if (i > 0)
+ return i;
+
+ /* try to get the port from server address */
+ /* prevent MAPPORTS from working at this point, since checks could
+ * not be performed in such case (MAPPORTS impose a relative ports
+ * based on live traffic)
+ */
+ if (srv->flags & SRV_F_MAPPORTS)
+ return 0;
+
+ i = srv->svc_port; /* by default */
+ if (i > 0)
+ return i;
+
+ return 0;
+}
+
+/* Initializes an health-check attached to the server <srv>. Non-zero is returned
+ * if an error occurred.
+ */
+int init_srv_check(struct server *srv)
+{
+ const char *err;
+ struct tcpcheck_rule *r;
+ int ret = ERR_NONE;
+ int check_type;
+
+ if (!srv->do_check || !(srv->proxy->cap & PR_CAP_BE))
+ goto out;
+
+ check_type = srv->check.tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK;
+
+ if (!(srv->flags & SRV_F_DYNAMIC)) {
+ /* If neither a port nor an addr was specified and no check
+ * transport layer is forced, then the transport layer used by
+ * the checks is the same as for the production traffic.
+ * Otherwise we use raw_sock by default, unless one is
+ * specified.
+ */
+ if (!srv->check.port && !is_addr(&srv->check.addr)) {
+ if (!srv->check.use_ssl && srv->use_ssl != -1) {
+ srv->check.use_ssl = srv->use_ssl;
+ srv->check.xprt = srv->xprt;
+ }
+ else if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ srv->check.send_proxy |= (srv->pp_opts);
+ }
+ else if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ }
+ else {
+ /* For dynamic servers, check-ssl and check-send-proxy must be
+ * explicitly defined even if the check port was not
+ * overridden.
+ */
+ if (srv->check.use_ssl == 1)
+ srv->check.xprt = xprt_get(XPRT_SSL);
+ }
+
+ /* Inherit the mux protocol from the server if not already defined for
+ * the check
+ */
+ if (srv->mux_proto && !srv->check.mux_proto &&
+ ((srv->mux_proto->mode == PROTO_MODE_HTTP && check_type == TCPCHK_RULES_HTTP_CHK) ||
+ (srv->mux_proto->mode == PROTO_MODE_TCP && check_type != TCPCHK_RULES_HTTP_CHK))) {
+ srv->check.mux_proto = srv->mux_proto;
+ }
+ /* test that check proto is valid if explicitly defined */
+ else if (srv->check.mux_proto &&
+ ((srv->check.mux_proto->mode == PROTO_MODE_HTTP && check_type != TCPCHK_RULES_HTTP_CHK) ||
+ (srv->check.mux_proto->mode == PROTO_MODE_TCP && check_type == TCPCHK_RULES_HTTP_CHK))) {
+ ha_alert("config: %s '%s': server '%s' uses an incompatible MUX protocol for the selected check type\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* validate <srv> server health-check settings */
+
+ /* We need at least a service port, a check port or the first tcp-check
+ * rule must be a 'connect' one when checking an IPv4/IPv6 server.
+ */
+ if ((srv_check_healthcheck_port(&srv->check) != 0) ||
+ (!is_inet_addr(&srv->check.addr) && (is_addr(&srv->check.addr) || !is_inet_addr(&srv->addr))))
+ goto init;
+
+ if (!srv->proxy->tcpcheck_rules.list || LIST_ISEMPTY(srv->proxy->tcpcheck_rules.list)) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* search the first action (connect / send / expect) in the list */
+ r = get_first_tcpcheck_rule(&srv->proxy->tcpcheck_rules);
+ if (!r || (r->action != TCPCHK_ACT_CONNECT) || (!r->connect.port && !get_host_port(&r->connect.addr))) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port "
+ "nor tcp_check rule 'connect' with port information.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* scan the tcp-check ruleset to ensure a port has been configured */
+ list_for_each_entry(r, srv->proxy->tcpcheck_rules.list, list) {
+ if ((r->action == TCPCHK_ACT_CONNECT) && (!r->connect.port && !get_host_port(&r->connect.addr))) {
+ ha_alert("config: %s '%s': server '%s' has neither service port nor check port, "
+ "and a tcp_check rule 'connect' with no port information.\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ }
+
+ init:
+ err = init_check(&srv->check, srv->proxy->options2 & PR_O2_CHK_ANY);
+ if (err) {
+ ha_alert("config: %s '%s': unable to init check for server '%s' (%s).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ srv->check.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_SLEEPING;
+ srv_take(srv);
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ out:
+ return ret;
+}
+
+/* Initializes an agent-check attached to the server <srv>. Non-zero is returned
+ * if an error occurred.
+ */
+int init_srv_agent_check(struct server *srv)
+{
+ struct tcpcheck_rule *chk;
+ const char *err;
+ int ret = ERR_NONE;
+
+ if (!srv->do_agent || !(srv->proxy->cap & PR_CAP_BE))
+ goto out;
+
+ /* If there is no connect rule preceding all send / expect rules, an
+ * implicit one is inserted before all others.
+ */
+ chk = get_first_tcpcheck_rule(srv->agent.tcpcheck_rules);
+ if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ ha_alert("%s '%s': unable to add implicit tcp-check connect rule"
+ " to agent-check for server '%s' (out of memory).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->connect.options = (TCPCHK_OPT_DEFAULT_CONNECT|TCPCHK_OPT_IMPLICIT);
+ LIST_INSERT(srv->agent.tcpcheck_rules->list, &chk->list);
+ }
+
+ /* <chk> is always defined here and it is a CONNECT action. If there is
+ * a preset variable, it means there is an agent string defined and data
+ * will be sent after the connect.
+ */
+ if (!LIST_ISEMPTY(&srv->agent.tcpcheck_rules->preset_vars))
+ chk->connect.options |= TCPCHK_OPT_HAS_DATA;
+
+
+ err = init_check(&srv->agent, PR_O2_TCPCHK_CHK);
+ if (err) {
+ ha_alert("config: %s '%s': unable to init agent-check for server '%s' (%s).\n",
+ proxy_type_str(srv->proxy), srv->proxy->id, srv->id, err);
+ ret |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (!srv->agent.inter)
+ srv->agent.inter = srv->check.inter;
+
+ srv->agent.state |= CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_SLEEPING | CHK_ST_AGENT;
+ srv_take(srv);
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ out:
+ return ret;
+}
+
+static void deinit_srv_check(struct server *srv)
+{
+ if (srv->check.state & CHK_ST_CONFIGURED) {
+ free_check(&srv->check);
+ /* it is safe to drop now since the main server reference is still held by the proxy */
+ srv_drop(srv);
+ }
+ srv->check.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED;
+ srv->do_check = 0;
+}
+
+
+static void deinit_srv_agent_check(struct server *srv)
+{
+ if (srv->agent.state & CHK_ST_CONFIGURED) {
+ free_check(&srv->agent);
+ /* it is safe to drop now since the main server reference is still held by the proxy */
+ srv_drop(srv);
+ }
+
+ srv->agent.state &= ~CHK_ST_CONFIGURED & ~CHK_ST_ENABLED & ~CHK_ST_AGENT;
+ srv->do_agent = 0;
+}
+
+REGISTER_POST_SERVER_CHECK(init_srv_check);
+REGISTER_POST_SERVER_CHECK(init_srv_agent_check);
+REGISTER_POST_CHECK(start_checks);
+
+REGISTER_SERVER_DEINIT(deinit_srv_check);
+REGISTER_SERVER_DEINIT(deinit_srv_agent_check);
+
+/* perform minimal initializations */
+static void init_checks()
+{
+ int i;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ LIST_INIT(&ha_thread_ctx[i].queued_checks);
+}
+
+INITCALL0(STG_PREPARE, init_checks);
+
+/**************************************************************************/
+/************************** Check sample fetches **************************/
+/**************************************************************************/
+
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+
+/**************************************************************************/
+/************************ Check's parsing functions ***********************/
+/**************************************************************************/
+/* Parse the "addr" server keyword */
+static int srv_parse_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct sockaddr_storage *sk;
+ int port1, port2, err_code = 0;
+
+
+ if (!*args[*cur_arg+1]) {
+ memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ sk = str2sa_range(args[*cur_arg+1], NULL, &port1, &port2, NULL, NULL, NULL, errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(errmsg, "'%s' : %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+
+ srv->check.addr = *sk;
+ /* if agentaddr was never set, we can use addr */
+ if (!(srv->flags & SRV_F_AGENTADDR))
+ srv->agent.addr = *sk;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-addr" server keyword */
+static int srv_parse_agent_addr(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct sockaddr_storage sk;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects an address as argument.", args[*cur_arg]);
+ goto error;
+ }
+ memset(&sk, 0, sizeof(sk));
+ if (str2ip(args[*cur_arg + 1], &sk) == NULL) {
+ memprintf(errmsg, "parsing agent-addr failed. Check if '%s' is correct address.", args[*cur_arg+1]);
+ goto error;
+ }
+ set_srv_agent_addr(srv, &sk);
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-check" server keyword */
+static int srv_parse_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ int err_code = 0;
+
+ if (srv->do_agent)
+ goto out;
+
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
+ args[*cur_arg], proxy_type_str(curpx), curpx->id);
+ return ERR_WARN;
+ }
+
+ if (!rules) {
+ rules = calloc(1, sizeof(*rules));
+ if (!rules) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+ LIST_INIT(&rules->preset_vars);
+ srv->agent.tcpcheck_rules = rules;
+ }
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*agent-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*agent-check");
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-lf", "%[var(check.agent_string)]", ""},
+ 1, curpx, &rs->rules, srv->conf.file, srv->conf.line, errmsg);
+ if (!chk) {
+ memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_AGENT_CHK,
+ srv->conf.file, srv->conf.line, errmsg);
+ if (!chk) {
+ memprintf(errmsg, "'%s': %s", args[*cur_arg], *errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_agent_expect_reply;
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_AGENT_CHK;
+ srv->do_agent = 1;
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-inter" server keyword */
+static int srv_parse_agent_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->agent.inter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "agent-port" server keyword */
+static int srv_parse_agent_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ set_srv_agent_port(srv, atol(args[*cur_arg + 1]));
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int set_srv_agent_send(struct server *srv, const char *send)
+{
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ struct tcpcheck_var *var = NULL;
+ char *str;
+
+ str = strdup(send);
+ var = create_tcpcheck_var(ist("check.agent_string"));
+ if (str == NULL || var == NULL)
+ goto error;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = str;
+ var->data.u.str.data = strlen(str);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+
+ return 1;
+
+ error:
+ free(str);
+ free(var);
+ return 0;
+}
+
+/* Parse the "agent-send" server keyword */
+static int srv_parse_agent_send(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ struct tcpcheck_rules *rules = srv->agent.tcpcheck_rules;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ if (!rules) {
+ rules = calloc(1, sizeof(*rules));
+ if (!rules) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+ LIST_INIT(&rules->preset_vars);
+ srv->agent.tcpcheck_rules = rules;
+ }
+
+ if (!set_srv_agent_send(srv, args[*cur_arg+1])) {
+ memprintf(errmsg, "out of memory.");
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "no-agent-send" server keyword */
+static int srv_parse_no_agent_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ deinit_srv_agent_check(srv);
+ return 0;
+}
+
+/* Parse the "check" server keyword */
+static int srv_parse_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(errmsg, "'%s' ignored because %s '%s' has no backend capability",
+ args[*cur_arg], proxy_type_str(curpx), curpx->id);
+ return ERR_WARN;
+ }
+
+ srv->do_check = 1;
+ return 0;
+}
+
+/* Parse the "check-send-proxy" server keyword */
+static int srv_parse_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.send_proxy = 1;
+ return 0;
+}
+
+/* Parse the "check-via-socks4" server keyword */
+static int srv_parse_check_via_socks4(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.via_socks4 = 1;
+ return 0;
+}
+
+/* Parse the "no-check" server keyword */
+static int srv_parse_no_check(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ deinit_srv_check(srv);
+ return 0;
+}
+
+/* Parse the "no-check-send-proxy" server keyword */
+static int srv_parse_no_check_send_proxy(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ srv->check.send_proxy = 0;
+ return 0;
+}
+
+/* parse the "check-proto" server keyword */
+static int srv_parse_check_proto(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[*cur_arg]);
+ goto error;
+ }
+ newsrv->check.mux_proto = get_mux_proto(ist(args[*cur_arg + 1]));
+ if (!newsrv->check.mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "rise" server keyword */
+static int srv_parse_check_rise(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ srv->check.rise = atol(args[*cur_arg+1]);
+ if (srv->check.rise <= 0) {
+ memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
+ goto error;
+ }
+
+ if (srv->check.health)
+ srv->check.health = srv->check.rise;
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "fall" server keyword */
+static int srv_parse_check_fall(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(errmsg, "'%s' expects an integer argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ srv->check.fall = atol(args[*cur_arg+1]);
+ if (srv->check.fall <= 0) {
+ memprintf(errmsg, "'%s' has to be > 0.", args[*cur_arg]);
+ goto error;
+ }
+
+ out:
+ return err_code;
+
+ error:
+ deinit_srv_agent_check(srv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "inter" server keyword */
+static int srv_parse_check_inter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.inter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "fastinter" server keyword */
+static int srv_parse_check_fastinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.fastinter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parse the "downinter" server keyword */
+static int srv_parse_check_downinter(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ const char *err = NULL;
+ unsigned int delay;
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a delay as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ err = parse_time_err(args[*cur_arg+1], &delay, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ memprintf(errmsg, "timer overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ memprintf(errmsg, "timer underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], srv->id);
+ goto error;
+ }
+ else if (err) {
+ memprintf(errmsg, "unexpected character '%c' in 'agent-inter' argument of server %s.",
+ *err, srv->id);
+ goto error;
+ }
+ if (delay <= 0) {
+ memprintf(errmsg, "invalid value %d for argument '%s' of server %s.",
+ delay, args[*cur_arg], srv->id);
+ goto error;
+ }
+ srv->check.downinter = delay;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parse the "port" server keyword */
+static int srv_parse_check_port(char **args, int *cur_arg, struct proxy *curpx, struct server *srv,
+ char **errmsg)
+{
+ int err_code = 0;
+
+ if (!*(args[*cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number as argument.", args[*cur_arg]);
+ goto error;
+ }
+
+ /* Only increment maxsock for servers from the configuration. Dynamic
+ * servers at the moment are not taken into account for the estimation
+ * of the resources limits.
+ */
+ if (global.mode & MODE_STARTING)
+ global.maxsock++;
+
+ srv->check.port = atol(args[*cur_arg+1]);
+ /* if agentport was never set, we can use port */
+ if (!(srv->flags & SRV_F_AGENTPORT))
+ srv->agent.port = srv->check.port;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* config parser for global "tune.max-checks-per-thread" */
+static int check_parse_global_max_checks(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+ global.tune.max_checks_per_thread = atoi(args[1]);
+ return 0;
+}
+
+/* register "global" section keywords */
+static struct cfg_kw_list chk_cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.max-checks-per-thread", check_parse_global_max_checks },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &chk_cfg_kws);
+
+/* register "server" line keywords */
+static struct srv_kw_list srv_kws = { "CHK", { }, {
+ { "addr", srv_parse_addr, 1, 1, 1 }, /* IP address to send health to or to probe from agent-check */
+ { "agent-addr", srv_parse_agent_addr, 1, 1, 1 }, /* Enable an auxiliary agent check */
+ { "agent-check", srv_parse_agent_check, 0, 1, 1 }, /* Enable agent checks */
+ { "agent-inter", srv_parse_agent_inter, 1, 1, 1 }, /* Set the interval between two agent checks */
+ { "agent-port", srv_parse_agent_port, 1, 1, 1 }, /* Set the TCP port used for agent checks. */
+ { "agent-send", srv_parse_agent_send, 1, 1, 1 }, /* Set string to send to agent. */
+ { "check", srv_parse_check, 0, 1, 1 }, /* Enable health checks */
+ { "check-proto", srv_parse_check_proto, 1, 1, 1 }, /* Set the mux protocol for health checks */
+ { "check-send-proxy", srv_parse_check_send_proxy, 0, 1, 1 }, /* Enable PROXY protocol for health checks */
+ { "check-via-socks4", srv_parse_check_via_socks4, 0, 1, 1 }, /* Enable socks4 proxy for health checks */
+ { "no-agent-check", srv_parse_no_agent_check, 0, 1, 0 }, /* Do not enable any auxiliary agent check */
+ { "no-check", srv_parse_no_check, 0, 1, 0 }, /* Disable health checks */
+ { "no-check-send-proxy", srv_parse_no_check_send_proxy, 0, 1, 0 }, /* Disable PROXY protocol for health checks */
+ { "rise", srv_parse_check_rise, 1, 1, 1 }, /* Set rise value for health checks */
+ { "fall", srv_parse_check_fall, 1, 1, 1 }, /* Set fall value for health checks */
+ { "inter", srv_parse_check_inter, 1, 1, 1 }, /* Set inter value for health checks */
+ { "fastinter", srv_parse_check_fastinter, 1, 1, 1 }, /* Set fastinter value for health checks */
+ { "downinter", srv_parse_check_downinter, 1, 1, 1 }, /* Set downinter value for health checks */
+ { "port", srv_parse_check_port, 1, 1, 1 }, /* Set the TCP port used for health checks. */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/chunk.c b/src/chunk.c
new file mode 100644
index 0000000..c5b74fc
--- /dev/null
+++ b/src/chunk.c
@@ -0,0 +1,311 @@
+/*
+ * Chunk management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/tools.h>
+
+/* trash chunks used for various conversions */
+static THREAD_LOCAL struct buffer *trash_chunk;
+static THREAD_LOCAL struct buffer trash_chunk1;
+static THREAD_LOCAL struct buffer trash_chunk2;
+
+/* trash buffers used for various conversions */
+static int trash_size __read_mostly;
+static THREAD_LOCAL char *trash_buf1;
+static THREAD_LOCAL char *trash_buf2;
+
+/* the trash pool for reentrant allocations */
+struct pool_head *pool_head_trash __read_mostly = NULL;
+
+/* this is used to drain data, and as a temporary buffer for sprintf()... */
+THREAD_LOCAL struct buffer trash = { };
+
+/*
+* Returns a pre-allocated and initialized trash chunk that can be used for any
+* type of conversion. Two chunks and their respective buffers are alternatively
+* returned so that it is always possible to iterate data transformations without
+* losing the data being transformed. The blocks are initialized to the size of
+* a standard buffer, so they should be enough for everything. For convenience,
+* a zero is always emitted at the beginning of the string so that it may be
+* used as an empty string as well.
+*/
+struct buffer *get_trash_chunk(void)
+{
+ char *trash_buf;
+
+ if (trash_chunk == &trash_chunk1) {
+ trash_chunk = &trash_chunk2;
+ trash_buf = trash_buf2;
+ }
+ else {
+ trash_chunk = &trash_chunk1;
+ trash_buf = trash_buf1;
+ }
+ *trash_buf = 0;
+ chunk_init(trash_chunk, trash_buf, trash_size);
+ return trash_chunk;
+}
+
+/* (re)allocates the trash buffers. Returns 0 in case of failure. It is
+ * possible to call this function multiple times if the trash size changes.
+ */
+static int alloc_trash_buffers(int bufsize)
+{
+ chunk_init(&trash, my_realloc2(trash.area, bufsize), bufsize);
+ trash_size = bufsize;
+ trash_buf1 = (char *)my_realloc2(trash_buf1, bufsize);
+ trash_buf2 = (char *)my_realloc2(trash_buf2, bufsize);
+ return trash.area && trash_buf1 && trash_buf2;
+}
+
+static int alloc_trash_buffers_per_thread()
+{
+ return alloc_trash_buffers(global.tune.bufsize);
+}
+
+static void free_trash_buffers_per_thread()
+{
+ chunk_destroy(&trash);
+ ha_free(&trash_buf2);
+ ha_free(&trash_buf1);
+}
+
+/* Initialize the trash buffers. It returns 0 if an error occurred. */
+int init_trash_buffers(int first)
+{
+ pool_destroy(pool_head_trash);
+ pool_head_trash = create_pool("trash",
+ sizeof(struct buffer) + global.tune.bufsize,
+ MEM_F_EXACT);
+ if (!pool_head_trash || !alloc_trash_buffers(global.tune.bufsize))
+ return 0;
+ return 1;
+}
+
+/* This is called during STG_POOL to allocate trash buffers early. They will
+ * be reallocated later once their final size is known. It returns 0 if an
+ * error occurred.
+ */
+static int alloc_early_trash(void)
+{
+ return init_trash_buffers(1);
+}
+
+/*
+ * Does an snprintf() at the beginning of chunk <chk>, respecting the limit of
+ * at most chk->size chars. If the chk->len is over, nothing is added. Returns
+ * the new chunk size, or < 0 in case of failure.
+ */
+int chunk_printf(struct buffer *chk, const char *fmt, ...)
+{
+ va_list argp;
+ int ret;
+
+ if (!chk->area || !chk->size)
+ return 0;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(chk->area, chk->size, fmt, argp);
+ va_end(argp);
+
+ if (ret >= chk->size)
+ return -1;
+
+ chk->data = ret;
+ return chk->data;
+}
+
+/*
+ * Does an snprintf() at the end of chunk <chk>, respecting the limit of
+ * at most chk->size chars. If the chk->len is over, nothing is added. Returns
+ * the new chunk size.
+ */
+int chunk_appendf(struct buffer *chk, const char *fmt, ...)
+{
+ va_list argp;
+ size_t room;
+ int ret;
+
+ if (!chk->area || !chk->size)
+ return 0;
+
+ room = chk->size - chk->data;
+ if (!room)
+ return chk->data;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(chk->area + chk->data, room, fmt, argp);
+ if (ret >= room)
+ /* do not copy anything in case of truncation */
+ chk->area[chk->data] = 0;
+ else
+ chk->data += ret;
+ va_end(argp);
+ return chk->data;
+}
+
+/*
+ * Encode chunk <src> into chunk <dst>, respecting the limit of at most
+ * chk->size chars. Replace non-printable or special characters with "&#%d;".
+ * If the chk->len is over, nothing is added. Returns the new chunk size.
+ */
+int chunk_htmlencode(struct buffer *dst, struct buffer *src)
+{
+ int i, l;
+ int olen, free;
+ char c;
+
+ olen = dst->data;
+
+ for (i = 0; i < src->data; i++) {
+ free = dst->size - dst->data;
+
+ if (!free) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ c = src->area[i];
+
+ if (!isascii((unsigned char)c) || !isprint((unsigned char)c) || c == '&' || c == '"' || c == '\'' || c == '<' || c == '>') {
+ l = snprintf(dst->area + dst->data, free, "&#%u;",
+ (unsigned char)c);
+
+ if (free < l) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ dst->data += l;
+ } else {
+ dst->area[dst->data] = c;
+ dst->data++;
+ }
+ }
+
+ return dst->data;
+}
+
+/*
+ * Encode chunk <src> into chunk <dst>, respecting the limit of at most
+ * chk->size chars. Replace non-printable or char passed in qc with "<%02X>".
+ * If the chk->len is over, nothing is added. Returns the new chunk size.
+ */
+int chunk_asciiencode(struct buffer *dst, struct buffer *src, char qc)
+{
+ int i, l;
+ int olen, free;
+ char c;
+
+ olen = dst->data;
+
+ for (i = 0; i < src->data; i++) {
+ free = dst->size - dst->data;
+
+ if (!free) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ c = src->area[i];
+
+ if (!isascii((unsigned char)c) || !isprint((unsigned char)c) || c == '<' || c == '>' || c == qc) {
+ l = snprintf(dst->area + dst->data, free, "<%02X>",
+ (unsigned char)c);
+
+ if (free < l) {
+ dst->data = olen;
+ return dst->data;
+ }
+
+ dst->data += l;
+ } else {
+ dst->area[dst->data] = c;
+ dst->data++;
+ }
+ }
+
+ return dst->data;
+}
+
+/* Compares the string in chunk <chk> with the string in <str> which must be
+ * zero-terminated. Return is the same as with strcmp(). Neither is allowed
+ * to be null.
+ */
+int chunk_strcmp(const struct buffer *chk, const char *str)
+{
+ const char *s1 = chk->area;
+ int len = chk->data;
+ int diff = 0;
+
+ do {
+ if (--len < 0) {
+ diff = (unsigned char)0 - (unsigned char)*str;
+ break;
+ }
+ diff = (unsigned char)*(s1++) - (unsigned char)*(str++);
+ } while (!diff);
+ return diff;
+}
+
+/* Case-insensitively compares the string in chunk <chk> with the string in
+ * <str> which must be zero-terminated. Return is the same as with strcmp().
+ * Neither is allowed to be null.
+ */
+int chunk_strcasecmp(const struct buffer *chk, const char *str)
+{
+ const char *s1 = chk->area;
+ int len = chk->data;
+ int diff = 0;
+
+ do {
+ if (--len < 0) {
+ diff = (unsigned char)0 - (unsigned char)*str;
+ break;
+ }
+ diff = (unsigned char)*s1 - (unsigned char)*str;
+ if (unlikely(diff)) {
+ unsigned int l = (unsigned char)*s1;
+ unsigned int r = (unsigned char)*str;
+
+ l -= 'a';
+ r -= 'a';
+
+ if (likely(l <= (unsigned char)'z' - 'a'))
+ l -= 'a' - 'A';
+ if (likely(r <= (unsigned char)'z' - 'a'))
+ r -= 'a' - 'A';
+ diff = l - r;
+ }
+ s1++; str++;
+ } while (!diff);
+ return diff;
+}
+
+INITCALL0(STG_POOL, alloc_early_trash);
+REGISTER_PER_THREAD_ALLOC(alloc_trash_buffers_per_thread);
+REGISTER_PER_THREAD_FREE(free_trash_buffers_per_thread);
+REGISTER_POST_DEINIT(free_trash_buffers_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/cli.c b/src/cli.c
new file mode 100644
index 0000000..d0435f7
--- /dev/null
+++ b/src/cli.c
@@ -0,0 +1,3423 @@
+/*
+ * Functions dedicated to statistics output and the stats socket
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/compression.h>
+#include <haproxy/dns-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/mworker.h>
+#include <haproxy/mworker-t.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pipe.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/sample-t.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+#define PAYLOAD_PATTERN "<<"
+
+static struct applet cli_applet;
+static struct applet mcli_applet;
+
+static const char cli_permission_denied_msg[] =
+ "Permission denied\n"
+ "";
+
+
+static THREAD_LOCAL char *dynamic_usage_msg = NULL;
+
+/* List head of cli keywords */
+static struct cli_kw_list cli_keywords = {
+ .list = LIST_HEAD_INIT(cli_keywords.list)
+};
+
+extern const char *stat_status_codes[];
+
+struct proxy *mworker_proxy; /* CLI proxy of the master */
+struct bind_conf *mcli_reload_bind_conf;
+
+/* CLI context for the "show env" command */
+struct show_env_ctx {
+ char **var; /* first variable to show */
+ int show_one; /* stop after showing the first one */
+};
+
+/* CLI context for the "show fd" command */
+/* flags for show_fd_ctx->show_mask */
+#define CLI_SHOWFD_F_PI 0x00000001 /* pipes */
+#define CLI_SHOWFD_F_LI 0x00000002 /* listeners */
+#define CLI_SHOWFD_F_FE 0x00000004 /* frontend conns */
+#define CLI_SHOWFD_F_SV 0x00000010 /* server-only conns */
+#define CLI_SHOWFD_F_PX 0x00000020 /* proxy-only conns */
+#define CLI_SHOWFD_F_BE 0x00000030 /* backend: srv+px */
+#define CLI_SHOWFD_F_CO 0x00000034 /* conn: be+fe */
+#define CLI_SHOWFD_F_ANY 0x0000003f /* any type */
+
+struct show_fd_ctx {
+ int fd; /* first FD to show */
+ int show_one; /* stop after showing one FD */
+ uint show_mask; /* CLI_SHOWFD_F_xxx */
+};
+
+/* CLI context for the "show cli sockets" command */
+struct show_sock_ctx {
+ struct bind_conf *bind_conf;
+ struct listener *listener;
+};
+
+static int cmp_kw_entries(const void *a, const void *b)
+{
+ const struct cli_kw *l = *(const struct cli_kw **)a;
+ const struct cli_kw *r = *(const struct cli_kw **)b;
+
+ return strcmp(l->usage ? l->usage : "", r->usage ? r->usage : "");
+}
+
+/* This will show the help message and list the commands supported at the
+ * current level that match all of the first words of <args> if args is not
+ * NULL, or all args if none matches or if args is null.
+ */
+static char *cli_gen_usage_msg(struct appctx *appctx, char * const *args)
+{
+ struct cli_kw *entries[CLI_MAX_HELP_ENTRIES];
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kw;
+ struct buffer *tmp = get_trash_chunk();
+ struct buffer out;
+ struct { struct cli_kw *kw; int dist; } matches[CLI_MAX_MATCHES], swp;
+ int idx;
+ int ishelp = 0;
+ int length = 0;
+ int help_entries = 0;
+
+ ha_free(&dynamic_usage_msg);
+
+ if (args && *args && strcmp(*args, "help") == 0) {
+ args++;
+ ishelp = 1;
+ }
+
+ /* first, let's measure the longest match */
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ (appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER))
+ continue;
+
+ /* OK this command is visible */
+ for (idx = 0; idx < CLI_PREFIX_KW_NB; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ if (idx + 1 > length)
+ length = idx + 1;
+ }
+ }
+ }
+
+ /* now <length> equals the number of exactly matching words */
+ chunk_reset(tmp);
+ if (ishelp) // this is the help message.
+ chunk_strcat(tmp, "The following commands are valid at this level:\n");
+ else {
+ chunk_strcat(tmp, "Unknown command: '");
+ if (args && *args)
+ chunk_strcat(tmp, *args);
+ chunk_strcat(tmp, "'");
+
+ if (!length && (!args || !*args || !**args)) // no match
+ chunk_strcat(tmp, ". Please enter one of the following commands only:\n");
+ else // partial match
+ chunk_strcat(tmp, ", but maybe one of the following ones is a better match:\n");
+ }
+
+ for (idx = 0; idx < CLI_MAX_MATCHES; idx++) {
+ matches[idx].kw = NULL;
+ matches[idx].dist = INT_MAX;
+ }
+
+ /* In case of partial match we'll look for the best matching entries
+ * starting from position <length>
+ */
+ if (args && args[length] && *args[length]) {
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ ((appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER)))
+ continue;
+
+ for (idx = 0; idx < length; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ }
+
+ /* extra non-matching words are fuzzy-matched */
+ if (kw->usage && idx == length && args[idx] && *args[idx]) {
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ int dist = 0;
+ int totlen = 0;
+ int i;
+
+ /* this one matches, let's compute the distance between the two
+ * on the remaining words. For this we're computing the signature
+ * of everything that remains and the cumulated length of the
+ * strings.
+ */
+ memset(word_sig, 0, sizeof(word_sig));
+ for (i = idx; i < CLI_PREFIX_KW_NB && args[i] && *args[i]; i++) {
+ update_word_fingerprint(word_sig, args[i]);
+ totlen += strlen(args[i]);
+ }
+
+ memset(list_sig, 0, sizeof(list_sig));
+ for (i = idx; i < CLI_PREFIX_KW_NB && kw->str_kw[i]; i++) {
+ update_word_fingerprint(list_sig, kw->str_kw[i]);
+ totlen += strlen(kw->str_kw[i]);
+ }
+
+ dist = word_fingerprint_distance(word_sig, list_sig);
+
+ /* insert this one at its place if relevant, in order to keep only
+ * the best matches.
+ */
+ swp.kw = kw; swp.dist = dist;
+ if (dist < 5*totlen/2 && dist < matches[CLI_MAX_MATCHES-1].dist) {
+ matches[CLI_MAX_MATCHES-1] = swp;
+ for (idx = CLI_MAX_MATCHES - 1; --idx >= 0;) {
+ if (matches[idx+1].dist >= matches[idx].dist)
+ break;
+ matches[idx+1] = matches[idx];
+ matches[idx] = swp;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (matches[0].kw) {
+ /* we have fuzzy matches, let's propose them */
+ for (idx = 0; idx < CLI_MAX_MATCHES; idx++) {
+ kw = matches[idx].kw;
+ if (!kw)
+ break;
+
+ /* stop the dump if some words look very unlikely candidates */
+ if (matches[idx].dist > 5*matches[0].dist/2)
+ break;
+
+ if (help_entries < CLI_MAX_HELP_ENTRIES)
+ entries[help_entries++] = kw;
+ }
+ }
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ /* no full dump if we've already found nice candidates */
+ if (matches[0].kw)
+ break;
+
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+
+ /* in a worker or normal process, don't display master-only commands
+ * nor expert/experimental mode commands if not in this mode.
+ */
+ if (kw->level & ~appctx->cli_level & (ACCESS_MASTER_ONLY|ACCESS_EXPERT|ACCESS_EXPERIMENTAL))
+ continue;
+
+ /* in master, if the CLI don't have the
+ * ACCESS_MCLI_DEBUG don't display commands that have
+ * neither the master bit nor the master-only bit.
+ */
+ if (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ ((appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) ==
+ (ACCESS_MASTER_ONLY|ACCESS_MASTER)))
+ continue;
+
+ for (idx = 0; idx < length; idx++) {
+ if (!kw->str_kw[idx])
+ break; // end of keyword
+ if (!args || !args[idx] || !*args[idx])
+ break; // end of command line
+ if (strcmp(kw->str_kw[idx], args[idx]) != 0)
+ break;
+ }
+
+ if (kw->usage && idx == length && help_entries < CLI_MAX_HELP_ENTRIES)
+ entries[help_entries++] = kw;
+ }
+ }
+
+ qsort(entries, help_entries, sizeof(*entries), cmp_kw_entries);
+
+ for (idx = 0; idx < help_entries; idx++)
+ chunk_appendf(tmp, " %s\n", entries[idx]->usage);
+
+ /* always show the prompt/help/quit commands */
+ chunk_strcat(tmp,
+ " help [<command>] : list matching or all commands\n"
+ " prompt [timed] : toggle interactive mode with prompt\n"
+ " quit : disconnect\n");
+
+ chunk_init(&out, NULL, 0);
+ chunk_dup(&out, tmp);
+ dynamic_usage_msg = out.area;
+
+ cli_msg(appctx, LOG_INFO, dynamic_usage_msg);
+ return dynamic_usage_msg;
+}
+
+struct cli_kw* cli_find_kw(char **args)
+{
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kw;/* current cli_kw */
+ char **tmp_args;
+ const char **tmp_str_kw;
+ int found = 0;
+
+ if (LIST_ISEMPTY(&cli_keywords.list))
+ return NULL;
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ kw = &kw_list->kw[0];
+ while (*kw->str_kw) {
+ tmp_args = args;
+ tmp_str_kw = kw->str_kw;
+ while (*tmp_str_kw) {
+ if (strcmp(*tmp_str_kw, *tmp_args) == 0) {
+ found = 1;
+ } else {
+ found = 0;
+ break;
+ }
+ tmp_args++;
+ tmp_str_kw++;
+ }
+ if (found)
+ return (kw);
+ kw++;
+ }
+ }
+ return NULL;
+}
+
+struct cli_kw* cli_find_kw_exact(char **args)
+{
+ struct cli_kw_list *kw_list;
+ int found = 0;
+ int i;
+ int j;
+
+ if (LIST_ISEMPTY(&cli_keywords.list))
+ return NULL;
+
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ for (i = 0; kw_list->kw[i].str_kw[0]; i++) {
+ found = 1;
+ for (j = 0; j < CLI_PREFIX_KW_NB; j++) {
+ if (args[j] == NULL && kw_list->kw[i].str_kw[j] == NULL) {
+ break;
+ }
+ if (args[j] == NULL || kw_list->kw[i].str_kw[j] == NULL) {
+ found = 0;
+ break;
+ }
+ if (strcmp(args[j], kw_list->kw[i].str_kw[j]) != 0) {
+ found = 0;
+ break;
+ }
+ }
+ if (found)
+ return &kw_list->kw[i];
+ }
+ }
+ return NULL;
+}
+
+void cli_register_kw(struct cli_kw_list *kw_list)
+{
+ LIST_APPEND(&cli_keywords.list, &kw_list->list);
+}
+
+/* list all known keywords on stdout, one per line */
+void cli_list_keywords(void)
+{
+ struct cli_kw_list *kw_list;
+ struct cli_kw *kwp, *kwn, *kw;
+ int idx;
+
+ for (kwn = kwp = NULL;; kwp = kwn) {
+ list_for_each_entry(kw_list, &cli_keywords.list, list) {
+ /* note: we sort based on the usage message when available,
+ * otherwise we fall back to the first keyword.
+ */
+ for (kw = &kw_list->kw[0]; kw->str_kw[0]; kw++) {
+ if (strordered(kwp ? kwp->usage ? kwp->usage : kwp->str_kw[0] : NULL,
+ kw->usage ? kw->usage : kw->str_kw[0],
+ kwn != kwp ? kwn->usage ? kwn->usage : kwn->str_kw[0] : NULL))
+ kwn = kw;
+ }
+ }
+
+ if (kwn == kwp)
+ break;
+
+ for (idx = 0; kwn->str_kw[idx]; idx++) {
+ printf("%s ", kwn->str_kw[idx]);
+ }
+ if (kwn->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER))
+ printf("[MASTER] ");
+ if (!(kwn->level & ACCESS_MASTER_ONLY))
+ printf("[WORKER] ");
+ if (kwn->level & ACCESS_EXPERT)
+ printf("[EXPERT] ");
+ if (kwn->level & ACCESS_EXPERIMENTAL)
+ printf("[EXPERIM] ");
+ printf("\n");
+ }
+}
+
+/* allocate a new stats frontend named <name>, and return it
+ * (or NULL in case of lack of memory).
+ */
+static struct proxy *cli_alloc_fe(const char *name, const char *file, int line)
+{
+ struct proxy *fe;
+
+ fe = calloc(1, sizeof(*fe));
+ if (!fe)
+ return NULL;
+
+ init_new_proxy(fe);
+ fe->next = proxies_list;
+ proxies_list = fe;
+ fe->last_change = ns_to_sec(now_ns);
+ fe->id = strdup("GLOBAL");
+ fe->cap = PR_CAP_FE|PR_CAP_INT;
+ fe->maxconn = 10; /* default to 10 concurrent connections */
+ fe->timeout.client = MS_TO_TICKS(10000); /* default timeout of 10 seconds */
+ fe->conf.file = strdup(file);
+ fe->conf.line = line;
+ fe->accept = frontend_accept;
+ fe->default_target = &cli_applet.obj_type;
+
+ /* the stats frontend is the only one able to assign ID #0 */
+ fe->conf.id.key = fe->uuid = 0;
+ eb32_insert(&used_proxy_id, &fe->conf.id);
+ return fe;
+}
+
+/* This function parses a "stats" statement in the "global" section. It returns
+ * -1 if there is any error, otherwise zero. If it returns -1, it will write an
+ * error message into the <err> buffer which will be preallocated. The trailing
+ * '\n' must not be written. The function must be called with <args> pointing to
+ * the first word after "stats".
+ */
+static int cli_parse_global(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+
+ if (strcmp(args[1], "socket") == 0) {
+ int cur_arg;
+
+ if (*args[2] == 0) {
+ memprintf(err, "'%s %s' in global section expects an address or a path to a UNIX socket", args[0], args[1]);
+ return -1;
+ }
+
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+
+ bind_conf = bind_conf_alloc(global.cli_fe, file, line, args[2], xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a bind_conf", args[0], args[1]);
+ return -1;
+ }
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_OPER; /* default access level */
+
+ if (!str2listener(args[2], global.cli_fe, bind_conf, file, line, err)) {
+ memprintf(err, "parsing [%s:%d] : '%s %s' : %s\n",
+ file, line, args[0], args[1], err && *err ? *err : "error");
+ return -1;
+ }
+
+ cur_arg = 3;
+ while (*args[cur_arg]) {
+ struct bind_kw *kw;
+ const char *best;
+ int code;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(err, "'%s %s' : '%s' option is not implemented in this version (check build options).",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ code = kw->parse(args, cur_arg, global.cli_fe, bind_conf, err);
+
+ /* FIXME: this is ugly, we don't have a way to collect warnings,
+ * yet some important bind keywords may report warnings that we
+ * must display.
+ */
+ if (((code & (ERR_WARN|ERR_FATAL|ERR_ALERT)) == ERR_WARN) && err && *err) {
+ indent_msg(err, 2);
+ ha_warning("parsing [%s:%d] : '%s %s' : %s\n", file, line, args[0], args[1], *err);
+ ha_free(err);
+ }
+
+ if (code & ~ERR_WARN) {
+ if (err && *err)
+ memprintf(err, "'%s %s' : '%s'", args[0], args[1], *err);
+ else
+ memprintf(err, "'%s %s' : error encountered while processing '%s'",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ memprintf(err, "'%s %s' : unknown keyword '%s'. Did you mean '%s' maybe ?",
+ args[0], args[1], args[cur_arg], best);
+ else
+ memprintf(err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ return -1;
+ }
+
+ bind_conf->accept = session_accept_fd;
+ bind_conf->nice = -64; /* we want to boost priority for local stats */
+ bind_conf->options |= BC_O_UNLIMITED; /* don't make the peers subject to global limits */
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ global.maxsock++; /* for the listening socket */
+ }
+ }
+ else if (strcmp(args[1], "timeout") == 0) {
+ unsigned timeout;
+ const char *res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[2], args[0], args[1]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s %s' (minimum non-null value is 1 ms)",
+ args[2], args[0], args[1]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "'%s %s' : unexpected character '%c'", args[0], args[1], *res);
+ return -1;
+ }
+
+ if (!timeout) {
+ memprintf(err, "'%s %s' expects a positive value", args[0], args[1]);
+ return -1;
+ }
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+ global.cli_fe->timeout.client = MS_TO_TICKS(timeout);
+ }
+ else if (strcmp(args[1], "maxconn") == 0) {
+ int maxconn = atol(args[2]);
+
+ if (maxconn <= 0) {
+ memprintf(err, "'%s %s' expects a positive value", args[0], args[1]);
+ return -1;
+ }
+
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", file, line)) == NULL) {
+ memprintf(err, "'%s %s' : out of memory trying to allocate a frontend", args[0], args[1]);
+ return -1;
+ }
+ }
+ global.cli_fe->maxconn = maxconn;
+ }
+ else if (strcmp(args[1], "bind-process") == 0) {
+ memprintf(err, "'%s %s' is not supported anymore.", args[0], args[1]);
+ return -1;
+ }
+ else {
+ memprintf(err, "'%s' only supports 'socket', 'maxconn', 'bind-process' and 'timeout' (got '%s')", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * This function exports the bound addresses of a <frontend> in the environment
+ * variable <varname>. Those addresses are separated by semicolons and prefixed
+ * with their type (abns@, unix@, sockpair@ etc)
+ * Return -1 upon error, 0 otherwise
+ */
+int listeners_setenv(struct proxy *frontend, const char *varname)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct bind_conf *bind_conf;
+
+ if (frontend) {
+ list_for_each_entry(bind_conf, &frontend->conf.bind, by_fe) {
+ struct listener *l;
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ char addr[46];
+ char port[6];
+
+ /* separate listener by semicolons */
+ if (trash->data)
+ chunk_appendf(trash, ";");
+
+ if (l->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&l->rx.addr;
+ if (un->sun_path[0] == '\0') {
+ chunk_appendf(trash, "abns@%s", un->sun_path+1);
+ } else {
+ chunk_appendf(trash, "unix@%s", un->sun_path);
+ }
+ } else if (l->rx.addr.ss_family == AF_INET) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(trash, "ipv4@%s:%s", addr, port);
+ } else if (l->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(trash, "ipv6@[%s]:%s", addr, port);
+ } else if (l->rx.addr.ss_family == AF_CUST_SOCKPAIR) {
+ chunk_appendf(trash, "sockpair@%d", ((struct sockaddr_in *)&l->rx.addr)->sin_addr.s_addr);
+ }
+ }
+ }
+ trash->area[trash->data++] = '\0';
+ if (setenv(varname, trash->area, 1) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+int cli_socket_setenv()
+{
+ if (listeners_setenv(global.cli_fe, "HAPROXY_CLI") < 0)
+ return -1;
+ if (listeners_setenv(mworker_proxy, "HAPROXY_MASTER_CLI") < 0)
+ return -1;
+
+ return 0;
+}
+
+REGISTER_CONFIG_POSTPARSER("cli", cli_socket_setenv);
+
+/* Verifies that the CLI at least has a level at least as high as <level>
+ * (typically ACCESS_LVL_ADMIN). Returns 1 if OK, otherwise 0. In case of
+ * failure, an error message is prepared and the appctx's state is adjusted
+ * to print it so that a return 1 is enough to abort any processing.
+ */
+int cli_has_level(struct appctx *appctx, int level)
+{
+
+ if ((appctx->cli_level & ACCESS_LVL_MASK) < level) {
+ cli_err(appctx, cli_permission_denied_msg);
+ return 0;
+ }
+ return 1;
+}
+
+/* same as cli_has_level but for the CLI proxy and without error message */
+int pcli_has_level(struct stream *s, int level)
+{
+ if ((s->pcli_flags & ACCESS_LVL_MASK) < level) {
+ return 0;
+ }
+ return 1;
+}
+
+/* Returns severity_output for the current session if set, or default for the socket */
+static int cli_get_severity_output(struct appctx *appctx)
+{
+ if (appctx->cli_severity_output)
+ return appctx->cli_severity_output;
+ return strm_li(appctx_strm(appctx))->bind_conf->severity_output;
+}
+
+/* Processes the CLI interpreter on the stats socket. This function is called
+ * from the CLI's IO handler running in an appctx context. The function returns
+ * 1 if the request was understood, otherwise zero (in which case an error
+ * message will be displayed). It is called with appctx->st0
+ * set to CLI_ST_GETREQ and presets ->st2 to 0 so that parsers don't have to do
+ * it. It will possilbly leave st0 to CLI_ST_CALLBACK if the keyword needs to
+ * have its own I/O handler called again. Most of the time, parsers will only
+ * set st0 to CLI_ST_PRINT and put their message to be displayed into cli.msg.
+ * If a keyword parser is NULL and an I/O handler is declared, the I/O handler
+ * will automatically be used.
+ */
+static int cli_parse_request(struct appctx *appctx)
+{
+ char *args[MAX_CLI_ARGS + 1], *p, *end, *payload = NULL;
+ int i = 0;
+ struct cli_kw *kw;
+
+ p = appctx->chunk->area;
+ end = p + appctx->chunk->data;
+
+ /*
+ * Get pointers on words.
+ * One extra slot is reserved to store a pointer on a null byte.
+ */
+ while (i < MAX_CLI_ARGS && p < end) {
+ int j, k;
+
+ /* skip leading spaces/tabs */
+ p += strspn(p, " \t");
+ if (!*p)
+ break;
+
+ /* first check if the '<<' is present, but this is not enough
+ * because we don't know if this is the end of the string */
+ if (strncmp(p, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) {
+ int pat_len = strlen(appctx->cli_payload_pat);
+
+ /* then if the customized pattern is empty, check if the next character is '\0' */
+ if (pat_len == 0 && p[strlen(PAYLOAD_PATTERN)] == '\0') {
+ payload = p + strlen(PAYLOAD_PATTERN) + 1;
+ break;
+ }
+
+ /* else if we found the customized pattern at the end of the string */
+ if (strcmp(p + strlen(PAYLOAD_PATTERN), appctx->cli_payload_pat) == 0) {
+ payload = p + strlen(PAYLOAD_PATTERN) + pat_len + 1;
+ break;
+ }
+ }
+
+ args[i] = p;
+ while (1) {
+ p += strcspn(p, " \t\\");
+ /* escaped chars using backlashes (\) */
+ if (*p == '\\') {
+ if (!*++p)
+ break;
+ if (!*++p)
+ break;
+ } else {
+ break;
+ }
+ }
+ *p++ = 0;
+
+ /* unescape backslashes (\) */
+ for (j = 0, k = 0; args[i][k]; k++) {
+ if (args[i][k] == '\\') {
+ if (args[i][k + 1] == '\\')
+ k++;
+ else
+ continue;
+ }
+ args[i][j] = args[i][k];
+ j++;
+ }
+ args[i][j] = 0;
+
+ i++;
+ }
+ /* fill unused slots */
+ p = appctx->chunk->area + appctx->chunk->data;
+ for (; i < MAX_CLI_ARGS + 1; i++)
+ args[i] = p;
+
+ if (!**args)
+ return 0;
+
+ kw = cli_find_kw(args);
+ if (!kw ||
+ (kw->level & ~appctx->cli_level & ACCESS_MASTER_ONLY) ||
+ (!(appctx->cli_level & ACCESS_MCLI_DEBUG) &&
+ (appctx->cli_level & ~kw->level & (ACCESS_MASTER_ONLY|ACCESS_MASTER)) == (ACCESS_MASTER_ONLY|ACCESS_MASTER))) {
+ /* keyword not found in this mode */
+ cli_gen_usage_msg(appctx, args);
+ return 0;
+ }
+
+ /* don't handle expert mode commands if not in this mode. */
+ if (kw->level & ~appctx->cli_level & ACCESS_EXPERT) {
+ cli_err(appctx, "This command is restricted to expert mode only.\n");
+ return 0;
+ }
+
+ if (kw->level & ~appctx->cli_level & ACCESS_EXPERIMENTAL) {
+ cli_err(appctx, "This command is restricted to experimental mode only.\n");
+ return 0;
+ }
+
+ if (kw->level == ACCESS_EXPERT)
+ mark_tainted(TAINTED_CLI_EXPERT_MODE);
+ else if (kw->level == ACCESS_EXPERIMENTAL)
+ mark_tainted(TAINTED_CLI_EXPERIMENTAL_MODE);
+
+ appctx->io_handler = kw->io_handler;
+ appctx->io_release = kw->io_release;
+
+ if (kw->parse && kw->parse(args, payload, appctx, kw->private) != 0)
+ goto fail;
+
+ /* kw->parse could set its own io_handler or io_release handler */
+ if (!appctx->io_handler)
+ goto fail;
+
+ appctx->st0 = CLI_ST_CALLBACK;
+ return 1;
+fail:
+ appctx->io_handler = NULL;
+ appctx->io_release = NULL;
+ return 1;
+}
+
+/* prepends then outputs the argument msg with a syslog-type severity depending on severity_output value */
+static int cli_output_msg(struct appctx *appctx, const char *msg, int severity, int severity_output)
+{
+ struct buffer *tmp;
+ struct ist imsg;
+
+ tmp = get_trash_chunk();
+ chunk_reset(tmp);
+
+ if (likely(severity_output == CLI_SEVERITY_NONE))
+ goto send_it;
+
+ if (severity < 0 || severity > 7) {
+ ha_warning("socket command feedback with invalid severity %d", severity);
+ chunk_printf(tmp, "[%d]: ", severity);
+ }
+ else {
+ switch (severity_output) {
+ case CLI_SEVERITY_NUMBER:
+ chunk_printf(tmp, "[%d]: ", severity);
+ break;
+ case CLI_SEVERITY_STRING:
+ chunk_printf(tmp, "[%s]: ", log_levels[severity]);
+ break;
+ default:
+ ha_warning("Unrecognized severity output %d", severity_output);
+ }
+ }
+ send_it:
+ /* the vast majority of messages have their trailing LF but a few are
+ * still missing it, and very rare ones might even have two. For this
+ * reason, we'll first delete the trailing LFs if present, then
+ * systematically append one.
+ */
+ for (imsg = ist(msg); imsg.len > 0 && imsg.ptr[imsg.len - 1] == '\n'; imsg.len--)
+ ;
+
+ chunk_istcat(tmp, imsg);
+ chunk_istcat(tmp, ist("\n"));
+
+ return applet_putchk(appctx, tmp);
+}
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to processes I/O from/to the stats unix socket. The system relies on a
+ * state machine handling requests and various responses. We read a request,
+ * then we process it and send the response, and we possibly display a prompt.
+ * Then we can read again. The state is stored in appctx->st0 and is one of the
+ * CLI_ST_* constants. appctx->st1 is used to indicate whether prompt is enabled
+ * or not.
+ */
+static void cli_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf;
+ int reql;
+ int len;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ while (1) {
+ if (appctx->st0 == CLI_ST_INIT) {
+ /* reset severity to default at init */
+ appctx->cli_severity_output = bind_conf->severity_output;
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ appctx->cli_level = bind_conf->level;
+ }
+ else if (appctx->st0 == CLI_ST_END) {
+ se_fl_set(appctx->sedesc, SE_FL_EOS);
+ free_trash_chunk(appctx->chunk);
+ appctx->chunk = NULL;
+ break;
+ }
+ else if (appctx->st0 == CLI_ST_GETREQ) {
+ char *str;
+
+ /* use a trash chunk to store received data */
+ if (!appctx->chunk) {
+ appctx->chunk = alloc_trash_chunk();
+ if (!appctx->chunk) {
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+ }
+
+ str = appctx->chunk->area + appctx->chunk->data;
+
+ /* ensure we have some output room left in the event we
+ * would want to return some info right after parsing.
+ */
+ if (buffer_almost_full(sc_ib(sc))) {
+ sc_need_room(sc, b_size(&res->buf) / 2);
+ break;
+ }
+
+ /* payload doesn't take escapes nor does it end on semi-colons, so
+ * we use the regular getline. Normal mode however must stop on
+ * LFs and semi-colons that are not prefixed by a backslash. Note
+ * that we reserve one byte at the end to insert a trailing nul byte.
+ */
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)
+ reql = co_getline(sc_oc(sc), str,
+ appctx->chunk->size - appctx->chunk->data - 1);
+ else
+ reql = co_getdelim(sc_oc(sc), str,
+ appctx->chunk->size - appctx->chunk->data - 1,
+ "\n;", '\\');
+
+ if (reql <= 0) { /* closed or EOL not found */
+ if (reql == 0)
+ break;
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) {
+ /* seek for a possible unescaped semi-colon. If we find
+ * one, we replace it with an LF and skip only this part.
+ */
+ for (len = 0; len < reql; len++) {
+ if (str[len] == '\\') {
+ len++;
+ continue;
+ }
+ if (str[len] == ';') {
+ str[len] = '\n';
+ reql = len + 1;
+ break;
+ }
+ }
+ }
+
+ /* now it is time to check that we have a full line,
+ * remove the trailing \n and possibly \r, then cut the
+ * line.
+ */
+ len = reql - 1;
+ if (str[len] != '\n') {
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ if (len && str[len-1] == '\r')
+ len--;
+
+ str[len] = '\0';
+ appctx->chunk->data += len;
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) {
+ appctx->chunk->area[appctx->chunk->data] = '\n';
+ appctx->chunk->area[appctx->chunk->data + 1] = 0;
+ appctx->chunk->data++;
+ }
+
+ appctx->st0 = CLI_ST_PROMPT;
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) {
+ /* look for a pattern */
+ if (len == strlen(appctx->cli_payload_pat)) {
+ /* here use 'len' because str still contains the \n */
+ if (strncmp(str, appctx->cli_payload_pat, len) == 0) {
+ /* remove the last two \n */
+ appctx->chunk->data -= strlen(appctx->cli_payload_pat) + 2;
+ appctx->chunk->area[appctx->chunk->data] = 0;
+ cli_parse_request(appctx);
+ chunk_reset(appctx->chunk);
+ /* NB: cli_sock_parse_request() may have put
+ * another CLI_ST_O_* into appctx->st0.
+ */
+
+ appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD;
+ }
+ }
+ }
+ else {
+ char *last_arg;
+ /*
+ * Look for the "payload start" pattern at the end of a line
+ * Its location is not remembered here, this is just to switch
+ * to a gathering mode.
+ * The pattern must start by << followed by 0
+ * to 7 characters, and finished by the end of
+ * the command (\n or ;).
+ */
+ /* look for the first space starting by the end of the line */
+ for (last_arg = appctx->chunk->area + appctx->chunk->data; last_arg != appctx->chunk->area; last_arg--) {
+ if (*last_arg == ' ' || *last_arg == '\t') {
+ last_arg++;
+ break;
+ }
+ }
+ if (strncmp(last_arg, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) {
+ ssize_t pat_len = strlen(last_arg + strlen(PAYLOAD_PATTERN));
+
+ /* A customized pattern can't be more than 7 characters
+ * if it's more, don't make it a payload
+ */
+ if (pat_len < sizeof(appctx->cli_payload_pat)) {
+ appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD;
+ /* copy the customized pattern, don't store the << */
+ strncpy(appctx->cli_payload_pat, last_arg + strlen(PAYLOAD_PATTERN), sizeof(appctx->cli_payload_pat)-1);
+ appctx->cli_payload_pat[sizeof(appctx->cli_payload_pat)-1] = '\0';
+ appctx->chunk->data++; // keep the trailing \0 after the pattern
+ }
+ }
+ else {
+ /* no payload, the command is complete: parse the request */
+ cli_parse_request(appctx);
+ chunk_reset(appctx->chunk);
+ }
+ }
+
+ /* re-adjust req buffer */
+ co_skip(sc_oc(sc), reql);
+ sc_opposite(sc)->flags |= SC_FL_RCV_ONCE; /* we plan to read small requests */
+ }
+ else { /* output functions */
+ struct cli_print_ctx *ctx;
+ const char *msg;
+ int sev;
+
+ switch (appctx->st0) {
+ case CLI_ST_PROMPT:
+ break;
+ case CLI_ST_PRINT: /* print const message in msg */
+ case CLI_ST_PRINT_ERR: /* print const error in msg */
+ case CLI_ST_PRINT_DYN: /* print dyn message in msg, free */
+ case CLI_ST_PRINT_DYNERR: /* print dyn error in err, free */
+ case CLI_ST_PRINT_UMSG: /* print usermsgs_ctx and reset it */
+ case CLI_ST_PRINT_UMSGERR: /* print usermsgs_ctx as error and reset it */
+ /* the message is in the svcctx */
+ ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ if (appctx->st0 == CLI_ST_PRINT || appctx->st0 == CLI_ST_PRINT_ERR) {
+ sev = appctx->st0 == CLI_ST_PRINT_ERR ?
+ LOG_ERR : ctx->severity;
+ msg = ctx->msg;
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_DYN || appctx->st0 == CLI_ST_PRINT_DYNERR) {
+ sev = appctx->st0 == CLI_ST_PRINT_DYNERR ?
+ LOG_ERR : ctx->severity;
+ msg = ctx->err;
+ if (!msg) {
+ sev = LOG_ERR;
+ msg = "Out of memory.\n";
+ }
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_UMSG ||
+ appctx->st0 == CLI_ST_PRINT_UMSGERR) {
+ sev = appctx->st0 == CLI_ST_PRINT_UMSGERR ?
+ LOG_ERR : ctx->severity;
+ msg = usermsgs_str();
+ }
+ else {
+ sev = LOG_ERR;
+ msg = "Internal error.\n";
+ }
+
+ if (cli_output_msg(appctx, msg, sev, cli_get_severity_output(appctx)) != -1) {
+ if (appctx->st0 == CLI_ST_PRINT_DYN ||
+ appctx->st0 == CLI_ST_PRINT_DYNERR) {
+ ha_free(&ctx->err);
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_UMSG ||
+ appctx->st0 == CLI_ST_PRINT_UMSGERR) {
+ usermsgs_clr(NULL);
+ }
+ appctx->st0 = CLI_ST_PROMPT;
+ }
+ break;
+
+ case CLI_ST_CALLBACK: /* use custom pointer */
+ if (appctx->io_handler)
+ if (appctx->io_handler(appctx)) {
+ appctx->st0 = CLI_ST_PROMPT;
+ if (appctx->io_release) {
+ appctx->io_release(appctx);
+ appctx->io_release = NULL;
+ }
+ }
+ break;
+ default: /* abnormal state */
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ break;
+ }
+
+ /* The post-command prompt is either LF alone or LF + '> ' in interactive mode */
+ if (appctx->st0 == CLI_ST_PROMPT) {
+ char prompt_buf[20];
+ const char *prompt = "";
+
+ if (appctx->st1 & APPCTX_CLI_ST1_PROMPT) {
+ /*
+ * when entering a payload with interactive mode, change the prompt
+ * to emphasize that more data can still be sent
+ */
+ if (appctx->chunk->data && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)
+ prompt = "+ ";
+ else if (appctx->st1 & APPCTX_CLI_ST1_TIMED) {
+ uint up = ns_to_sec(now_ns - start_time_ns);
+ snprintf(prompt_buf, sizeof(prompt_buf),
+ "\n[%u:%02u:%02u:%02u]> ",
+ (up / 86400), (up / 3600) % 24, (up / 60) % 60, up % 60);
+ prompt = prompt_buf;
+ }
+ else
+ prompt = "\n> ";
+ }
+ else {
+ if (!(appctx->st1 & (APPCTX_CLI_ST1_PAYLOAD|APPCTX_CLI_ST1_NOLF)))
+ prompt = "\n";
+ }
+
+ if (applet_putstr(appctx, prompt) != -1) {
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ }
+ }
+
+ /* If the output functions are still there, it means they require more room. */
+ if (appctx->st0 >= CLI_ST_OUTPUT) {
+ applet_wont_consume(appctx);
+ break;
+ }
+
+ /* Now we close the output if we're not in interactive
+ * mode and the request buffer is empty. This still
+ * allows pipelined requests to be sent in
+ * non-interactive mode.
+ */
+ if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && !co_data(req) && (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD))) {
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = CLI_ST_END;
+ continue;
+ }
+
+ /* switch state back to GETREQ to read next requests */
+ applet_reset_svcctx(appctx);
+ appctx->st0 = CLI_ST_GETREQ;
+ applet_will_consume(appctx);
+ applet_expect_data(appctx);
+
+ /* reactivate the \n at the end of the response for the next command */
+ appctx->st1 &= ~APPCTX_CLI_ST1_NOLF;
+
+ /* this forces us to yield between pipelined commands and
+ * avoid extremely long latencies (e.g. "del map" etc). In
+ * addition this increases the likelihood that the stream
+ * refills the buffer with new bytes in non-interactive
+ * mode, avoiding to close on apparently empty commands.
+ */
+ if (co_data(sc_oc(sc))) {
+ appctx_wakeup(appctx);
+ goto out;
+ }
+ }
+ }
+
+ out:
+ return;
+}
+
+/* This is called when the stream connector is closed. For instance, upon an
+ * external abort, we won't call the i/o handler anymore so we may need to
+ * remove back references to the stream currently being dumped.
+ */
+static void cli_release_handler(struct appctx *appctx)
+{
+ free_trash_chunk(appctx->chunk);
+ appctx->chunk = NULL;
+
+ if (appctx->io_release) {
+ appctx->io_release(appctx);
+ appctx->io_release = NULL;
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_DYN || appctx->st0 == CLI_ST_PRINT_DYNERR) {
+ struct cli_print_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ ha_free(&ctx->err);
+ }
+ else if (appctx->st0 == CLI_ST_PRINT_UMSG || appctx->st0 == CLI_ST_PRINT_UMSGERR) {
+ usermsgs_clr(NULL);
+ }
+}
+
+/* This function dumps all environmnent variables to the buffer. It returns 0
+ * if the output buffer is full and it needs to be called again, otherwise
+ * non-zero. It takes its context from the show_env_ctx in svcctx, and will
+ * start from ->var and dump only one variable if ->show_one is set.
+ */
+static int cli_io_handler_show_env(struct appctx *appctx)
+{
+ struct show_env_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ char **var = ctx->var;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ chunk_reset(&trash);
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (*var) {
+ chunk_printf(&trash, "%s\n", *var);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ if (ctx->show_one)
+ break;
+ var++;
+ ctx->var = var;
+ }
+
+ /* dump complete */
+ return 1;
+}
+
+/* This function dumps all file descriptors states (or the requested one) to
+ * the buffer. It returns 0 if the output buffer is full and it needs to be
+ * called again, otherwise non-zero. It takes its context from the show_fd_ctx
+ * in svcctx, only dumps one entry if ->show_one is non-zero, and (re)starts
+ * from ->fd.
+ */
+static int cli_io_handler_show_fd(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct show_fd_ctx *fdctx = appctx->svcctx;
+ uint match = fdctx->show_mask;
+ int fd = fdctx->fd;
+ int ret = 1;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ chunk_reset(&trash);
+
+ /* isolate the threads once per round. We're limited to a buffer worth
+ * of output anyway, it cannot last very long.
+ */
+ thread_isolate();
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (fd >= 0 && fd < global.maxsock) {
+ struct fdtab fdt;
+ const struct listener *li = NULL;
+ const struct server *sv = NULL;
+ const struct proxy *px = NULL;
+ const struct connection *conn = NULL;
+ const struct mux_ops *mux = NULL;
+ const struct xprt_ops *xprt = NULL;
+ const void *ctx = NULL;
+ const void *xprt_ctx = NULL;
+ const struct quic_conn *qc = NULL;
+ uint32_t conn_flags = 0;
+ uint8_t conn_err = 0;
+ int is_back = 0;
+ int suspicious = 0;
+
+ fdt = fdtab[fd];
+
+ /* When DEBUG_FD is set, we also report closed FDs that have a
+ * non-null event count to detect stuck ones.
+ */
+ if (!fdt.owner) {
+#ifdef DEBUG_FD
+ if (!fdt.event_count)
+#endif
+ goto skip; // closed
+ }
+ else if (fdt.iocb == sock_conn_iocb) {
+ conn = (const struct connection *)fdt.owner;
+ conn_flags = conn->flags;
+ conn_err = conn->err_code;
+ mux = conn->mux;
+ ctx = conn->ctx;
+ xprt = conn->xprt;
+ xprt_ctx = conn->xprt_ctx;
+ li = objt_listener(conn->target);
+ sv = objt_server(conn->target);
+ px = objt_proxy(conn->target);
+ is_back = conn_is_back(conn);
+ if (atleast2(fdt.thread_mask))
+ suspicious = 1;
+ if (conn->handle.fd != fd)
+ suspicious = 1;
+ }
+#if defined(USE_QUIC)
+ else if (fdt.iocb == quic_conn_sock_fd_iocb) {
+ qc = fdtab[fd].owner;
+ li = qc ? qc->li : NULL;
+ xprt_ctx = qc ? qc->xprt_ctx : NULL;
+ conn = qc ? qc->conn : NULL;
+ xprt = conn ? conn->xprt : NULL; // in fact it's &ssl_quic
+ mux = conn ? conn->mux : NULL;
+ /* quic_conns don't always have a connection but they
+ * always have an xprt_ctx.
+ */
+ }
+ else if (fdt.iocb == quic_lstnr_sock_fd_iocb) {
+ li = objt_listener(fdtab[fd].owner);
+ }
+#endif
+ else if (fdt.iocb == sock_accept_iocb)
+ li = fdt.owner;
+
+ if (!(((conn || xprt_ctx) &&
+ ((match & CLI_SHOWFD_F_SV && sv) ||
+ (match & CLI_SHOWFD_F_PX && px) ||
+ (match & CLI_SHOWFD_F_FE && li))) ||
+ (!conn &&
+ ((match & CLI_SHOWFD_F_LI && li) ||
+ (match & CLI_SHOWFD_F_PI && !li /* only pipes match this */))))) {
+ /* not a desired type */
+ goto skip;
+ }
+
+ if (!fdt.thread_mask)
+ suspicious = 1;
+
+ chunk_printf(&trash,
+ " %5d : st=0x%06x(%c%c %c%c%c%c%c W:%c%c%c R:%c%c%c) ref=%#x gid=%d tmask=0x%lx umask=0x%lx prmsk=0x%lx pwmsk=0x%lx owner=%p iocb=%p(",
+ fd,
+ fdt.state,
+ (fdt.state & FD_CLONED) ? 'C' : 'c',
+ (fdt.state & FD_LINGER_RISK) ? 'L' : 'l',
+ (fdt.state & FD_POLL_HUP) ? 'H' : 'h',
+ (fdt.state & FD_POLL_ERR) ? 'E' : 'e',
+ (fdt.state & FD_POLL_OUT) ? 'O' : 'o',
+ (fdt.state & FD_POLL_PRI) ? 'P' : 'p',
+ (fdt.state & FD_POLL_IN) ? 'I' : 'i',
+ (fdt.state & FD_EV_SHUT_W) ? 'S' : 's',
+ (fdt.state & FD_EV_READY_W) ? 'R' : 'r',
+ (fdt.state & FD_EV_ACTIVE_W) ? 'A' : 'a',
+ (fdt.state & FD_EV_SHUT_R) ? 'S' : 's',
+ (fdt.state & FD_EV_READY_R) ? 'R' : 'r',
+ (fdt.state & FD_EV_ACTIVE_R) ? 'A' : 'a',
+ (fdt.refc_tgid >> 4) & 0xffff,
+ (fdt.refc_tgid) & 0xffff,
+ fdt.thread_mask, fdt.update_mask,
+ polled_mask[fd].poll_recv,
+ polled_mask[fd].poll_send,
+ fdt.owner,
+ fdt.iocb);
+ resolve_sym_name(&trash, NULL, fdt.iocb);
+
+ if (!fdt.owner) {
+ chunk_appendf(&trash, ")");
+ }
+ else if (conn) {
+ chunk_appendf(&trash, ") back=%d cflg=0x%08x cerr=%d", is_back, conn_flags, conn_err);
+
+ if (!(conn->flags & CO_FL_FDLESS) && conn->handle.fd != fd) {
+ chunk_appendf(&trash, " fd=%d(BOGUS)", conn->handle.fd);
+ suspicious = 1;
+ } else if ((conn->flags & CO_FL_FDLESS) && (qc != conn->handle.qc)) {
+ chunk_appendf(&trash, " qc=%p(BOGUS)", conn->handle.qc);
+ suspicious = 1;
+ } else {
+ struct sockaddr_storage sa;
+ socklen_t salen;
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " fam=ipv4 lport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " fam=ipv6 lport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ else if (sa.ss_family == AF_UNIX)
+ chunk_appendf(&trash, " fam=unix");
+ }
+
+ salen = sizeof(sa);
+ if (getpeername(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " rport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " rport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ }
+ }
+
+ if (px)
+ chunk_appendf(&trash, " px=%s", px->id);
+ else if (sv)
+ chunk_appendf(&trash, " sv=%s/%s", sv->proxy->id, sv->id);
+ else if (li)
+ chunk_appendf(&trash, " fe=%s", li->bind_conf->frontend->id);
+
+ if (mux) {
+ chunk_appendf(&trash, " mux=%s ctx=%p", mux->name, ctx);
+ if (!ctx && !qc)
+ suspicious = 1;
+ if (mux->show_fd)
+ suspicious |= mux->show_fd(&trash, fdt.owner);
+ }
+ else
+ chunk_appendf(&trash, " nomux");
+
+ chunk_appendf(&trash, " xprt=%s", xprt ? xprt->name : "");
+ if (xprt) {
+ if (xprt_ctx || xprt->show_fd)
+ chunk_appendf(&trash, " xprt_ctx=%p", xprt_ctx);
+ if (xprt->show_fd)
+ suspicious |= xprt->show_fd(&trash, conn, xprt_ctx);
+ }
+ }
+ else if (li && !xprt_ctx) {
+ struct sockaddr_storage sa;
+ socklen_t salen;
+
+ chunk_appendf(&trash, ") l.st=%s fe=%s",
+ listener_state_str(li),
+ li->bind_conf->frontend->id);
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ chunk_appendf(&trash, " fam=ipv4 lport=%d", ntohs(((const struct sockaddr_in *)&sa)->sin_port));
+ else if (sa.ss_family == AF_INET6)
+ chunk_appendf(&trash, " fam=ipv6 lport=%d", ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port));
+ else if (sa.ss_family == AF_UNIX)
+ chunk_appendf(&trash, " fam=unix");
+ }
+ }
+ else
+ chunk_appendf(&trash, ")");
+
+#ifdef DEBUG_FD
+ chunk_appendf(&trash, " evcnt=%u", fdtab[fd].event_count);
+ if (fdtab[fd].event_count >= 1000000)
+ suspicious = 1;
+#endif
+ chunk_appendf(&trash, "%s\n", suspicious ? " !" : "");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ fdctx->fd = fd;
+ ret = 0;
+ break;
+ }
+ skip:
+ if (fdctx->show_one)
+ break;
+
+ fd++;
+ }
+
+ end:
+ /* dump complete */
+
+ thread_release();
+ return ret;
+}
+
+/*
+ * CLI IO handler for `show cli sockets`.
+ * Uses the svcctx as a show_sock_ctx to store/retrieve the bind_conf and the
+ * listener pointers.
+ */
+static int cli_io_handler_show_cli_sock(struct appctx *appctx)
+{
+ struct show_sock_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct bind_conf *bind_conf = ctx->bind_conf;
+
+ if (!global.cli_fe)
+ goto done;
+
+ chunk_reset(&trash);
+
+ if (!bind_conf) {
+ /* first call */
+ if (applet_putstr(appctx, "# socket lvl processes\n") == -1)
+ goto full;
+ bind_conf = LIST_ELEM(global.cli_fe->conf.bind.n, typeof(bind_conf), by_fe);
+ }
+
+ list_for_each_entry_from(bind_conf, &global.cli_fe->conf.bind, by_fe) {
+ struct listener *l = ctx->listener;
+
+ if (!l)
+ l = LIST_ELEM(bind_conf->listeners.n, typeof(l), by_bind);
+
+ list_for_each_entry_from(l, &bind_conf->listeners, by_bind) {
+ char addr[46];
+ char port[6];
+
+ if (l->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&l->rx.addr;
+ if (un->sun_path[0] == '\0') {
+ chunk_appendf(&trash, "abns@%s ", un->sun_path+1);
+ } else {
+ chunk_appendf(&trash, "unix@%s ", un->sun_path);
+ }
+ } else if (l->rx.addr.ss_family == AF_INET) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(&trash, "ipv4@%s:%s ", addr, port);
+ } else if (l->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&l->rx.addr, addr, sizeof(addr));
+ port_to_str(&l->rx.addr, port, sizeof(port));
+ chunk_appendf(&trash, "ipv6@[%s]:%s ", addr, port);
+ } else if (l->rx.addr.ss_family == AF_CUST_SOCKPAIR) {
+ chunk_appendf(&trash, "sockpair@%d ", ((struct sockaddr_in *)&l->rx.addr)->sin_addr.s_addr);
+ } else
+ chunk_appendf(&trash, "unknown ");
+
+ if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_ADMIN)
+ chunk_appendf(&trash, "admin ");
+ else if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_OPER)
+ chunk_appendf(&trash, "operator ");
+ else if ((bind_conf->level & ACCESS_LVL_MASK) == ACCESS_LVL_USER)
+ chunk_appendf(&trash, "user ");
+ else
+ chunk_appendf(&trash, " ");
+
+ chunk_appendf(&trash, "all\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->bind_conf = bind_conf;
+ ctx->listener = l;
+ goto full;
+ }
+ }
+ }
+ done:
+ return 1;
+ full:
+ return 0;
+}
+
+
+/* parse a "show env" CLI request. Returns 0 if it needs to continue, 1 if it
+ * wants to stop here. It reserves a sohw_env_ctx where it puts the variable to
+ * be dumped as well as a flag if a single variable is requested, otherwise puts
+ * environ there.
+ */
+static int cli_parse_show_env(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_env_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ extern char **environ;
+ char **var;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ var = environ;
+
+ if (*args[2]) {
+ int len = strlen(args[2]);
+
+ for (; *var; var++) {
+ if (strncmp(*var, args[2], len) == 0 &&
+ (*var)[len] == '=')
+ break;
+ }
+ if (!*var)
+ return cli_err(appctx, "Variable not found\n");
+
+ ctx->show_one = 1;
+ }
+ ctx->var = var;
+ return 0;
+}
+
+/* parse a "show fd" CLI request. Returns 0 if it needs to continue, 1 if it
+ * wants to stop here. It sets a show_fd_ctx context where, if a specific fd is
+ * requested, it puts the FD number into ->fd and sets ->show_one to 1.
+ */
+static int cli_parse_show_fd(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_fd_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ const char *c;
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ arg = 2;
+
+ /* when starting with an inversion we preset every flag */
+ if (*args[arg] == '!' || *args[arg] == '-')
+ ctx->show_mask = CLI_SHOWFD_F_ANY;
+
+ while (*args[arg] && !isdigit((uchar)*args[arg])) {
+ uint flag = 0, inv = 0;
+ c = args[arg];
+ while (*c) {
+ switch (*c) {
+ case '!': inv = !inv; break;
+ case '-': inv = !inv; break;
+ case 'p': flag = CLI_SHOWFD_F_PI; break;
+ case 'l': flag = CLI_SHOWFD_F_LI; break;
+ case 'c': flag = CLI_SHOWFD_F_CO; break;
+ case 'f': flag = CLI_SHOWFD_F_FE; break;
+ case 'b': flag = CLI_SHOWFD_F_BE; break;
+ case 's': flag = CLI_SHOWFD_F_SV; break;
+ case 'd': flag = CLI_SHOWFD_F_PX; break;
+ default: return cli_err(appctx, "Invalid FD type\n");
+ }
+ c++;
+ if (!inv)
+ ctx->show_mask |= flag;
+ else
+ ctx->show_mask &= ~flag;
+ }
+ arg++;
+ }
+
+ /* default mask is to show everything */
+ if (!ctx->show_mask)
+ ctx->show_mask = CLI_SHOWFD_F_ANY;
+
+ if (*args[arg]) {
+ ctx->fd = atoi(args[2]);
+ ctx->show_one = 1;
+ }
+
+ return 0;
+}
+
+/* parse a "set timeout" CLI request. It always returns 1. */
+static int cli_parse_set_timeout(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *s = appctx_strm(appctx);
+
+ if (strcmp(args[2], "cli") == 0) {
+ unsigned timeout;
+ const char *res;
+
+ if (!*args[3])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ res = parse_time_err(args[3], &timeout, TIME_UNIT_S);
+ if (res || timeout < 1)
+ return cli_err(appctx, "Invalid timeout value.\n");
+
+ s->scf->ioto = 1 + MS_TO_TICKS(timeout*1000);
+ task_wakeup(s->task, TASK_WOKEN_MSG); // recompute timeouts
+ return 1;
+ }
+
+ return cli_err(appctx, "'set timeout' only supports 'cli'.\n");
+}
+
+/* parse a "set maxconn global" command. It always returns 1. */
+static int cli_parse_set_maxconn_global(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int v;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ v = atoi(args[3]);
+ if (v > global.hardmaxconn)
+ return cli_err(appctx, "Value out of range.\n");
+
+ /* check for unlimited values */
+ if (v <= 0)
+ v = global.hardmaxconn;
+
+ global.maxconn = v;
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ return 1;
+}
+
+static int set_severity_output(int *target, char *argument)
+{
+ if (strcmp(argument, "none") == 0) {
+ *target = CLI_SEVERITY_NONE;
+ return 1;
+ }
+ else if (strcmp(argument, "number") == 0) {
+ *target = CLI_SEVERITY_NUMBER;
+ return 1;
+ }
+ else if (strcmp(argument, "string") == 0) {
+ *target = CLI_SEVERITY_STRING;
+ return 1;
+ }
+ return 0;
+}
+
+/* parse a "set severity-output" command. */
+static int cli_parse_set_severity_output(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ /* this will ask the applet to not output a \n after the command */
+ if (strcmp(args[3], "-") == 0)
+ appctx->st1 |= APPCTX_CLI_ST1_NOLF;
+
+ if (*args[2] && set_severity_output(&appctx->cli_severity_output, args[2]))
+ return 0;
+
+ return cli_err(appctx, "one of 'none', 'number', 'string' is a required argument\n");
+}
+
+
+/* show the level of the current CLI session */
+static int cli_parse_show_lvl(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_ADMIN)
+ return cli_msg(appctx, LOG_INFO, "admin\n");
+ else if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_OPER)
+ return cli_msg(appctx, LOG_INFO, "operator\n");
+ else if ((appctx->cli_level & ACCESS_LVL_MASK) == ACCESS_LVL_USER)
+ return cli_msg(appctx, LOG_INFO, "user\n");
+ else
+ return cli_msg(appctx, LOG_INFO, "unknown\n");
+}
+
+/* parse and set the CLI level dynamically */
+static int cli_parse_set_lvl(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ /* this will ask the applet to not output a \n after the command */
+ if (strcmp(args[1], "-") == 0)
+ appctx->st1 |= APPCTX_CLI_ST1_NOLF;
+
+ if (strcmp(args[0], "operator") == 0) {
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER)) {
+ return 1;
+ }
+ appctx->cli_level &= ~ACCESS_LVL_MASK;
+ appctx->cli_level |= ACCESS_LVL_OPER;
+
+ } else if (strcmp(args[0], "user") == 0) {
+ if (!cli_has_level(appctx, ACCESS_LVL_USER)) {
+ return 1;
+ }
+ appctx->cli_level &= ~ACCESS_LVL_MASK;
+ appctx->cli_level |= ACCESS_LVL_USER;
+ }
+ appctx->cli_level &= ~(ACCESS_EXPERT|ACCESS_EXPERIMENTAL);
+ return 1;
+}
+
+
+/* parse and set the CLI expert/experimental-mode dynamically */
+static int cli_parse_expert_experimental_mode(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int level;
+ char *level_str;
+ char *output = NULL;
+
+ /* this will ask the applet to not output a \n after the command */
+ if (*args[1] && *args[2] && strcmp(args[2], "-") == 0)
+ appctx->st1 |= APPCTX_CLI_ST1_NOLF;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[0], "expert-mode") == 0) {
+ level = ACCESS_EXPERT;
+ level_str = "expert-mode";
+ }
+ else if (strcmp(args[0], "experimental-mode") == 0) {
+ level = ACCESS_EXPERIMENTAL;
+ level_str = "experimental-mode";
+ }
+ else if (strcmp(args[0], "mcli-debug-mode") == 0) {
+ level = ACCESS_MCLI_DEBUG;
+ level_str = "mcli-debug-mode";
+ }
+ else {
+ return 1;
+ }
+
+ if (!*args[1]) {
+ memprintf(&output, "%s is %s\n", level_str,
+ (appctx->cli_level & level) ? "ON" : "OFF");
+ return cli_dynmsg(appctx, LOG_INFO, output);
+ }
+
+ appctx->cli_level &= ~level;
+ if (strcmp(args[1], "on") == 0)
+ appctx->cli_level |= level;
+ return 1;
+}
+
+/* shows HAProxy version */
+static int cli_parse_show_version(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *msg = NULL;
+
+ return cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "%s\n", haproxy_version));
+}
+
+int cli_parse_default(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ return 0;
+}
+
+/* enable or disable the anonymized mode, it returns 1 when it works or displays an error message if it doesn't. */
+static int cli_parse_set_anon(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ uint32_t tmp;
+ long long key;
+
+ if (strcmp(args[2], "on") == 0) {
+
+ if (*args[3]) {
+ key = atoll(args[3]);
+ if (key < 1 || key > UINT_MAX)
+ return cli_err(appctx, "Value out of range (1 to 4294967295 expected).\n");
+ appctx->cli_anon_key = key;
+ }
+ else {
+ tmp = HA_ATOMIC_LOAD(&global.anon_key);
+ if (tmp != 0)
+ appctx->cli_anon_key = tmp;
+ else
+ appctx->cli_anon_key = ha_random32();
+ }
+ }
+ else if (strcmp(args[2], "off") == 0) {
+
+ if (*args[3]) {
+ return cli_err(appctx, "Key can't be added while disabling anonymized mode\n");
+ }
+ else {
+ appctx->cli_anon_key = 0;
+ }
+ }
+ else {
+ return cli_err(appctx,
+ "'set anon' only supports :\n"
+ " - 'on' [key] to enable the anonymized mode\n"
+ " - 'off' to disable the anonymized mode");
+ }
+ return 1;
+}
+
+/* This function set the global anonyzing key, restricted to level 'admin' */
+static int cli_parse_set_global_key(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ long long key;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return cli_err(appctx, "Permission denied\n");
+ if (!*args[2])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ key = atoll(args[2]);
+ if (key < 0 || key > UINT_MAX)
+ return cli_err(appctx, "Value out of range (0 to 4294967295 expected).\n");
+
+ HA_ATOMIC_STORE(&global.anon_key, key);
+ return 1;
+}
+
+/* shows the anonymized mode state to everyone, and the key except for users, it always returns 1. */
+static int cli_parse_show_anon(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *msg = NULL;
+ char *anon_mode = NULL;
+ uint32_t c_key = appctx->cli_anon_key;
+
+ if (!c_key)
+ anon_mode = "Anonymized mode disabled";
+ else
+ anon_mode = "Anonymized mode enabled";
+
+ if ( !((appctx->cli_level & ACCESS_LVL_MASK) < ACCESS_LVL_OPER) && c_key != 0) {
+ cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "%s\nKey : %u\n", anon_mode, c_key));
+ }
+ else {
+ cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "%s\n", anon_mode));
+ }
+
+ return 1;
+}
+
+/* parse a "set rate-limit" command. It always returns 1. */
+static int cli_parse_set_ratelimit(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int v;
+ int *res;
+ int mul = 1;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (strcmp(args[2], "connections") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.cps_lim;
+ else if (strcmp(args[2], "sessions") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.sps_lim;
+#ifdef USE_OPENSSL
+ else if (strcmp(args[2], "ssl-sessions") == 0 && strcmp(args[3], "global") == 0)
+ res = &global.ssl_lim;
+#endif
+ else if (strcmp(args[2], "http-compression") == 0 && strcmp(args[3], "global") == 0) {
+ res = &global.comp_rate_lim;
+ mul = 1024;
+ }
+ else {
+ return cli_err(appctx,
+ "'set rate-limit' only supports :\n"
+ " - 'connections global' to set the per-process maximum connection rate\n"
+ " - 'sessions global' to set the per-process maximum session rate\n"
+#ifdef USE_OPENSSL
+ " - 'ssl-sessions global' to set the per-process maximum SSL session rate\n"
+#endif
+ " - 'http-compression global' to set the per-process maximum compression speed in kB/s\n");
+ }
+
+ if (!*args[4])
+ return cli_err(appctx, "Expects an integer value.\n");
+
+ v = atoi(args[4]);
+ if (v < 0)
+ return cli_err(appctx, "Value out of range.\n");
+
+ *res = v * mul;
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ return 1;
+}
+
+/* parse the "expose-fd" argument on the bind lines */
+static int bind_parse_expose_fd(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing fd type", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (strcmp(args[cur_arg + 1], "listeners") == 0) {
+ conf->level |= ACCESS_FD_LISTENERS;
+ } else {
+ memprintf(err, "'%s' only supports 'listeners' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "level" argument on the bind lines */
+static int bind_parse_level(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing level", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "user") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_USER;
+ } else if (strcmp(args[cur_arg + 1], "operator") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_OPER;
+ } else if (strcmp(args[cur_arg + 1], "admin") == 0) {
+ conf->level &= ~ACCESS_LVL_MASK;
+ conf->level |= ACCESS_LVL_ADMIN;
+ } else {
+ memprintf(err, "'%s' only supports 'user', 'operator', and 'admin' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+static int bind_parse_severity_output(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing severity format", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (set_severity_output(&conf->severity_output, args[cur_arg+1]))
+ return 0;
+ else {
+ memprintf(err, "'%s' only supports 'none', 'number', and 'string' (got '%s')",
+ args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+}
+
+/* Send all the bound sockets, always returns 1 */
+static int _getsocks(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ static int already_sent = 0;
+ char *cmsgbuf = NULL;
+ unsigned char *tmpbuf = NULL;
+ struct cmsghdr *cmsg;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct connection *remote = sc_conn(sc_opposite(sc));
+ struct msghdr msghdr;
+ struct iovec iov;
+ struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
+ const char *ns_name, *if_name;
+ unsigned char ns_nlen, if_nlen;
+ int nb_queued;
+ int cur_fd = 0;
+ int *tmpfd;
+ int tot_fd_nb = 0;
+ int fd = -1;
+ int curoff = 0;
+ int old_fcntl = -1;
+ int ret;
+
+ if (!remote) {
+ ha_warning("Only works on real connections\n");
+ goto out;
+ }
+
+ fd = remote->handle.fd;
+
+ /* Temporary set the FD in blocking mode, that will make our life easier */
+ old_fcntl = fcntl(fd, F_GETFL);
+ if (old_fcntl < 0) {
+ ha_warning("Couldn't get the flags for the unix socket\n");
+ goto out;
+ }
+ cmsgbuf = malloc(CMSG_SPACE(sizeof(int) * MAX_SEND_FD));
+ if (!cmsgbuf) {
+ ha_warning("Failed to allocate memory to send sockets\n");
+ goto out;
+ }
+ if (fcntl(fd, F_SETFL, old_fcntl &~ O_NONBLOCK) == -1) {
+ ha_warning("Cannot make the unix socket blocking\n");
+ goto out;
+ }
+ setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv));
+ iov.iov_base = &tot_fd_nb;
+ iov.iov_len = sizeof(tot_fd_nb);
+ if (!(strm_li(s)->bind_conf->level & ACCESS_FD_LISTENERS))
+ goto out;
+ memset(&msghdr, 0, sizeof(msghdr));
+ /*
+ * First, calculates the total number of FD, so that we can let
+ * the caller know how much it should expect.
+ */
+ for (cur_fd = 0;cur_fd < global.maxsock; cur_fd++)
+ tot_fd_nb += !!(fdtab[cur_fd].state & FD_EXPORTED);
+
+ if (tot_fd_nb == 0) {
+ if (already_sent)
+ ha_warning("_getsocks: attempt to get sockets but they were already sent and closed in this process!\n");
+ goto out;
+ }
+
+ /* First send the total number of file descriptors, so that the
+ * receiving end knows what to expect.
+ */
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+ ret = sendmsg(fd, &msghdr, 0);
+ if (ret != sizeof(tot_fd_nb)) {
+ ha_warning("Failed to send the number of sockets to send\n");
+ goto out;
+ }
+
+ /* Now send the fds */
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int) * MAX_SEND_FD);
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_len = CMSG_LEN(MAX_SEND_FD * sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ tmpfd = (int *)CMSG_DATA(cmsg);
+
+ /* For each socket, e message is sent, containing the following :
+ * Size of the namespace name (or 0 if none), as an unsigned char.
+ * The namespace name, if any
+ * Size of the interface name (or 0 if none), as an unsigned char
+ * The interface name, if any
+ * 32 bits of zeroes (used to be listener options).
+ */
+ /* We will send sockets MAX_SEND_FD per MAX_SEND_FD, allocate a
+ * buffer big enough to store the socket information.
+ */
+ tmpbuf = malloc(MAX_SEND_FD * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int)));
+ if (tmpbuf == NULL) {
+ ha_warning("Failed to allocate memory to transfer socket information\n");
+ goto out;
+ }
+
+ nb_queued = 0;
+ iov.iov_base = tmpbuf;
+ for (cur_fd = 0; cur_fd < global.maxsock; cur_fd++) {
+ if (!(fdtab[cur_fd].state & FD_EXPORTED))
+ continue;
+
+ ns_name = if_name = "";
+ ns_nlen = if_nlen = 0;
+
+ /* for now we can only retrieve namespaces and interfaces from
+ * pure listeners.
+ */
+ if (fdtab[cur_fd].iocb == sock_accept_iocb) {
+ const struct listener *l = fdtab[cur_fd].owner;
+
+ if (l->rx.settings->interface) {
+ if_name = l->rx.settings->interface;
+ if_nlen = strlen(if_name);
+ }
+
+#ifdef USE_NS
+ if (l->rx.settings->netns) {
+ ns_name = l->rx.settings->netns->node.key;
+ ns_nlen = l->rx.settings->netns->name_len;
+ }
+#endif
+ }
+
+ /* put the FD into the CMSG_DATA */
+ tmpfd[nb_queued++] = cur_fd;
+
+ /* first block is <ns_name_len> <ns_name> */
+ tmpbuf[curoff++] = ns_nlen;
+ if (ns_nlen)
+ memcpy(tmpbuf + curoff, ns_name, ns_nlen);
+ curoff += ns_nlen;
+
+ /* second block is <if_name_len> <if_name> */
+ tmpbuf[curoff++] = if_nlen;
+ if (if_nlen)
+ memcpy(tmpbuf + curoff, if_name, if_nlen);
+ curoff += if_nlen;
+
+ /* we used to send the listener options here before 2.3 */
+ memset(tmpbuf + curoff, 0, sizeof(int));
+ curoff += sizeof(int);
+
+ /* there's a limit to how many FDs may be sent at once */
+ if (nb_queued == MAX_SEND_FD) {
+ iov.iov_len = curoff;
+ if (sendmsg(fd, &msghdr, 0) != curoff) {
+ ha_warning("Failed to transfer sockets\n");
+ return -1;
+ }
+
+ /* Wait for an ack */
+ do {
+ ret = recv(fd, &tot_fd_nb, sizeof(tot_fd_nb), 0);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret <= 0) {
+ ha_warning("Unexpected error while transferring sockets\n");
+ return -1;
+ }
+ curoff = 0;
+ nb_queued = 0;
+ }
+ }
+
+ already_sent = 1;
+
+ /* flush pending stuff */
+ if (nb_queued) {
+ iov.iov_len = curoff;
+ cmsg->cmsg_len = CMSG_LEN(nb_queued * sizeof(int));
+ msghdr.msg_controllen = CMSG_SPACE(nb_queued * sizeof(int));
+ if (sendmsg(fd, &msghdr, 0) != curoff) {
+ ha_warning("Failed to transfer sockets\n");
+ goto out;
+ }
+ }
+
+out:
+ if (fd >= 0 && old_fcntl >= 0 && fcntl(fd, F_SETFL, old_fcntl) == -1) {
+ ha_warning("Cannot make the unix socket non-blocking\n");
+ goto out;
+ }
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = CLI_ST_END;
+ free(cmsgbuf);
+ free(tmpbuf);
+ return 1;
+}
+
+static int cli_parse_simple(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (*args[0] == 'h')
+ /* help */
+ cli_gen_usage_msg(appctx, args);
+ else if (*args[0] == 'p')
+ /* prompt */
+ if (strcmp(args[1], "timed") == 0) {
+ appctx->st1 |= APPCTX_CLI_ST1_PROMPT;
+ appctx->st1 ^= APPCTX_CLI_ST1_TIMED;
+ }
+ else
+ appctx->st1 ^= APPCTX_CLI_ST1_PROMPT;
+ else if (*args[0] == 'q') {
+ /* quit */
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = CLI_ST_END;
+ }
+
+ return 1;
+}
+
+void pcli_write_prompt(struct stream *s)
+{
+ struct buffer *msg = get_trash_chunk();
+ struct channel *oc = sc_oc(s->scf);
+
+ if (!(s->pcli_flags & PCLI_F_PROMPT))
+ return;
+
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ chunk_appendf(msg, "+ ");
+ } else {
+ if (s->pcli_next_pid == 0) {
+ /* master's prompt */
+ if (s->pcli_flags & PCLI_F_TIMED) {
+ uint up = ns_to_sec(now_ns - start_time_ns);
+ chunk_appendf(msg, "[%u:%02u:%02u:%02u] ",
+ (up / 86400), (up / 3600) % 24, (up / 60) % 60, up % 60);
+ }
+
+ chunk_appendf(msg, "master%s",
+ (proc_self->failedreloads > 0) ? "[ReloadFailed]" : "");
+ }
+ else {
+ /* worker's prompt */
+ if (s->pcli_flags & PCLI_F_TIMED) {
+ const struct mworker_proc *tmp, *proc;
+ uint up;
+
+ /* set proc to the worker corresponding to pcli_next_pid or NULL */
+ proc = NULL;
+ list_for_each_entry(tmp, &proc_list, list) {
+ if (!(tmp->options & PROC_O_TYPE_WORKER))
+ continue;
+ if (tmp->pid == s->pcli_next_pid) {
+ proc = tmp;
+ break;
+ }
+ }
+
+ if (!proc)
+ chunk_appendf(msg, "[gone] ");
+ else {
+ up = date.tv_sec - proc->timestamp;
+ if ((int)up < 0) /* must never be negative because of clock drift */
+ up = 0;
+ chunk_appendf(msg, "[%u:%02u:%02u:%02u] ",
+ (up / 86400), (up / 3600) % 24, (up / 60) % 60, up % 60);
+ }
+ }
+ chunk_appendf(msg, "%d", s->pcli_next_pid);
+ }
+
+ if (s->pcli_flags & (ACCESS_EXPERIMENTAL|ACCESS_EXPERT|ACCESS_MCLI_DEBUG)) {
+ chunk_appendf(msg, "(");
+
+ if (s->pcli_flags & ACCESS_EXPERIMENTAL)
+ chunk_appendf(msg, "x");
+
+ if (s->pcli_flags & ACCESS_EXPERT)
+ chunk_appendf(msg, "e");
+
+ if (s->pcli_flags & ACCESS_MCLI_DEBUG)
+ chunk_appendf(msg, "d");
+
+ chunk_appendf(msg, ")");
+ }
+
+ chunk_appendf(msg, "> ");
+
+
+ }
+ co_inject(oc, msg->area, msg->data);
+}
+
+/* The pcli_* functions are used for the CLI proxy in the master */
+
+
+/* flush the input buffer and output an error */
+void pcli_error(struct stream *s, const char *msg)
+{
+ struct buffer *buf = get_trash_chunk();
+ struct channel *oc = &s->res;
+ struct channel *ic = &s->req;
+
+ chunk_initstr(buf, msg);
+
+ if (likely(buf && buf->data))
+ co_inject(oc, buf->area, buf->data);
+
+ channel_erase(ic);
+
+}
+
+/* flush the input buffer, output the error and close */
+void pcli_reply_and_close(struct stream *s, const char *msg)
+{
+ struct buffer *buf = get_trash_chunk();
+
+ chunk_initstr(buf, msg);
+ stream_retnclose(s, buf);
+}
+
+static enum obj_type *pcli_pid_to_server(int proc_pid)
+{
+ struct mworker_proc *child;
+
+ /* return the mCLI applet of the master */
+ if (proc_pid == 0)
+ return &mcli_applet.obj_type;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->pid == proc_pid){
+ return &child->srv->obj_type;
+ }
+ }
+ return NULL;
+}
+
+/* Take a CLI prefix in argument (eg: @!1234 @master @1)
+ * Return:
+ * 0: master
+ * > 0: pid of a worker
+ * < 0: didn't find a worker
+ */
+static int pcli_prefix_to_pid(const char *prefix)
+{
+ int proc_pid;
+ struct mworker_proc *child;
+ char *errtol = NULL;
+
+ if (*prefix != '@') /* not a prefix, should not happen */
+ return -1;
+
+ prefix++;
+ if (!*prefix) /* sent @ alone, return the master */
+ return 0;
+
+ if (strcmp("master", prefix) == 0) {
+ return 0;
+ } else if (*prefix == '!') {
+ prefix++;
+ if (!*prefix)
+ return -1;
+
+ proc_pid = strtol(prefix, &errtol, 10);
+ if (*errtol != '\0')
+ return -1;
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+ if (child->pid == proc_pid){
+ return child->pid;
+ }
+ }
+ } else {
+ struct mworker_proc *chosen = NULL;
+ /* this is a relative pid */
+
+ proc_pid = strtol(prefix, &errtol, 10);
+ if (*errtol != '\0')
+ return -1;
+
+ if (proc_pid == 0) /* return the master */
+ return 0;
+
+ if (proc_pid != 1) /* only the "@1" relative PID is supported */
+ return -1;
+
+ /* chose the right process, the current one is the one with the
+ least number of reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+ if (child->reloads == 0)
+ return child->pid;
+ else if (chosen == NULL || child->reloads < chosen->reloads)
+ chosen = child;
+ }
+ if (chosen)
+ return chosen->pid;
+ }
+ return -1;
+}
+
+/* Return::
+ * >= 0 : number of words to escape
+ * = -1 : error
+ */
+int pcli_find_and_exec_kw(struct stream *s, char **args, int argl, char **errmsg, int *next_pid)
+{
+ if (argl < 1)
+ return 0;
+
+ /* there is a prefix */
+ if (args[0][0] == '@') {
+ int target_pid = pcli_prefix_to_pid(args[0]);
+
+ if (target_pid == -1) {
+ memprintf(errmsg, "Can't find the target PID matching the prefix '%s'\n", args[0]);
+ return -1;
+ }
+
+ /* if the prefix is alone, define a default target */
+ if (argl == 1)
+ s->pcli_next_pid = target_pid;
+ else
+ *next_pid = target_pid;
+ return 1;
+ } else if (strcmp("prompt", args[0]) == 0) {
+ if (argl >= 2 && strcmp(args[1], "timed") == 0) {
+ s->pcli_flags |= PCLI_F_PROMPT;
+ s->pcli_flags ^= PCLI_F_TIMED;
+ }
+ else
+ s->pcli_flags ^= PCLI_F_PROMPT;
+ return argl; /* return the number of elements in the array */
+ } else if (strcmp("quit", args[0]) == 0) {
+ sc_schedule_abort(s->scf);
+ sc_schedule_shutdown(s->scf);
+ return argl; /* return the number of elements in the array */
+ } else if (strcmp(args[0], "operator") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_OPER)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_LVL_MASK;
+ s->pcli_flags |= ACCESS_LVL_OPER;
+ return argl;
+
+ } else if (strcmp(args[0], "user") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_USER)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_LVL_MASK;
+ s->pcli_flags |= ACCESS_LVL_USER;
+ return argl;
+
+ } else if (strcmp(args[0], "expert-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+
+ s->pcli_flags &= ~ACCESS_EXPERT;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_EXPERT;
+ return argl;
+
+ } else if (strcmp(args[0], "experimental-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_EXPERIMENTAL;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_EXPERIMENTAL;
+ return argl;
+ } else if (strcmp(args[0], "mcli-debug-mode") == 0) {
+ if (!pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ memprintf(errmsg, "Permission denied!\n");
+ return -1;
+ }
+ s->pcli_flags &= ~ACCESS_MCLI_DEBUG;
+ if ((argl > 1) && (strcmp(args[1], "on") == 0))
+ s->pcli_flags |= ACCESS_MCLI_DEBUG;
+ return argl;
+ } else if (strcmp(args[0], "set") == 0) {
+ if ((argl > 1) && (strcmp(args[1], "severity-output") == 0)) {
+ if ((argl > 2) &&strcmp(args[2], "none") == 0) {
+ s->pcli_flags &= ~(ACCESS_MCLI_SEVERITY_NB|ACCESS_MCLI_SEVERITY_STR);
+ } else if ((argl > 2) && strcmp(args[2], "string") == 0) {
+ s->pcli_flags |= ACCESS_MCLI_SEVERITY_STR;
+ } else if ((argl > 2) && strcmp(args[2], "number") == 0) {
+ s->pcli_flags |= ACCESS_MCLI_SEVERITY_NB;
+ } else {
+ memprintf(errmsg, "one of 'none', 'number', 'string' is a required argument\n");
+ return -1;
+ }
+ /* only skip argl if we have "set severity-output" not only "set" */
+ return argl;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Parse the CLI request:
+ * - It does basically the same as the cli_io_handler, but as a proxy
+ * - It can exec a command and strip non forwardable commands
+ *
+ * Return:
+ * - the number of characters to forward or
+ * - 1 if there is an error or not enough data
+ */
+int pcli_parse_request(struct stream *s, struct channel *req, char **errmsg, int *next_pid)
+{
+ char *str;
+ char *end;
+ char *args[MAX_CLI_ARGS + 1]; /* +1 for storing a NULL */
+ int argl; /* number of args */
+ char *p;
+ char *trim = NULL;
+ int wtrim = 0; /* number of words to trim */
+ int reql = 0;
+ int ret;
+ int i = 0;
+
+ /* we cannot deal with a wrapping buffer, so let's take care of this
+ * first.
+ */
+ if (b_head(&req->buf) + b_data(&req->buf) > b_wrap(&req->buf))
+ b_slow_realign(&req->buf, trash.area, co_data(req));
+
+ str = (char *)ci_head(req);
+ end = (char *)ci_stop(req);
+
+ p = str;
+
+ if (!(s->pcli_flags & PCLI_F_PAYLOAD)) {
+
+ /* Looks for the end of one command */
+ while (p+reql < end) {
+ /* handle escaping */
+ if (p[reql] == '\\') {
+ reql+=2;
+ continue;
+ }
+ if (p[reql] == ';' || p[reql] == '\n') {
+ /* found the end of the command */
+ p[reql] = '\n';
+ reql++;
+ break;
+ }
+ reql++;
+ }
+ } else {
+ while (p+reql < end) {
+ if (p[reql] == '\n') {
+ /* found the end of the line */
+ reql++;
+ break;
+ }
+ reql++;
+ }
+ }
+
+ /* set end to first byte after the end of the command */
+ end = p + reql;
+
+ /* there is no end to this command, need more to parse ! */
+ if (!reql || *(end-1) != '\n') {
+ ret = -1;
+ goto end;
+ }
+
+ /* in payload mode, skip the whole parsing/exec and just look for a pattern */
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ if (reql-1 == strlen(s->pcli_payload_pat)) {
+ /* the custom pattern len can be 0 (empty line) */
+ if (strncmp(str, s->pcli_payload_pat, strlen(s->pcli_payload_pat)) == 0) {
+ s->pcli_flags &= ~PCLI_F_PAYLOAD;
+ }
+ }
+ ret = reql;
+ goto end;
+ }
+
+ *(end-1) = '\0';
+
+ /* splits the command in words */
+ while (i < MAX_CLI_ARGS && p < end) {
+ /* skip leading spaces/tabs */
+ p += strspn(p, " \t");
+ if (!*p)
+ break;
+
+ args[i] = p;
+ while (1) {
+ p += strcspn(p, " \t\\");
+ /* escaped chars using backlashes (\) */
+ if (*p == '\\') {
+ if (!*++p)
+ break;
+ if (!*++p)
+ break;
+ } else {
+ break;
+ }
+ }
+ *p++ = 0;
+ i++;
+ }
+ argl = i;
+
+ /* first look for '<<' at the beginning of the last argument */
+ if (argl && strncmp(args[argl-1], PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) {
+ size_t pat_len = strlen(args[argl-1] + strlen(PAYLOAD_PATTERN));
+
+ /*
+ * A customized pattern can't be more than 7 characters
+ * if it's more, don't make it a payload
+ */
+ if (pat_len < sizeof(s->pcli_payload_pat)) {
+ s->pcli_flags |= PCLI_F_PAYLOAD;
+ /* copy the customized pattern, don't store the << */
+ strncpy(s->pcli_payload_pat, args[argl-1] + strlen(PAYLOAD_PATTERN), sizeof(s->pcli_payload_pat)-1);
+ s->pcli_payload_pat[sizeof(s->pcli_payload_pat)-1] = '\0';
+ }
+ }
+
+ for (; i < MAX_CLI_ARGS + 1; i++)
+ args[i] = NULL;
+
+ wtrim = pcli_find_and_exec_kw(s, args, argl, errmsg, next_pid);
+
+ /* End of words are ending by \0, we need to replace the \0s by spaces
+ before forwarding them */
+ p = str;
+ while (p < end-1) {
+ if (*p == '\0')
+ *p = ' ';
+ p++;
+ }
+
+ *(end-1) = '\n';
+
+ if (wtrim > 0) {
+ trim = &args[wtrim][0];
+ if (trim == NULL) /* if this was the last word in the table */
+ trim = end;
+
+ b_del(&req->buf, trim - str);
+
+ ret = end - trim;
+ } else if (wtrim < 0) {
+ /* parsing error */
+ ret = -1;
+ goto end;
+ } else {
+ /* the whole string */
+ ret = end - str;
+ }
+
+ if (ret > 1) {
+
+ /* the mcli-debug-mode is only sent to the applet of the master */
+ if ((s->pcli_flags & ACCESS_MCLI_DEBUG) && *next_pid <= 0) {
+ ci_insert_line2(req, 0, "mcli-debug-mode on -", strlen("mcli-debug-mode on -"));
+ ret += strlen("mcli-debug-mode on -") + 2;
+ }
+ if (s->pcli_flags & ACCESS_EXPERIMENTAL) {
+ ci_insert_line2(req, 0, "experimental-mode on -", strlen("experimental-mode on -"));
+ ret += strlen("experimental-mode on -") + 2;
+ }
+ if (s->pcli_flags & ACCESS_EXPERT) {
+ ci_insert_line2(req, 0, "expert-mode on -", strlen("expert-mode on -"));
+ ret += strlen("expert-mode on -") + 2;
+ }
+ if (s->pcli_flags & ACCESS_MCLI_SEVERITY_STR) {
+ const char *cmd = "set severity-output string -";
+ ci_insert_line2(req, 0, cmd, strlen(cmd));
+ ret += strlen(cmd) + 2;
+ }
+ if (s->pcli_flags & ACCESS_MCLI_SEVERITY_NB) {
+ const char *cmd = "set severity-output number -";
+ ci_insert_line2(req, 0, cmd, strlen(cmd));
+ ret += strlen(cmd) + 2;
+ }
+
+ if (pcli_has_level(s, ACCESS_LVL_ADMIN)) {
+ goto end;
+ } else if (pcli_has_level(s, ACCESS_LVL_OPER)) {
+ ci_insert_line2(req, 0, "operator -", strlen("operator -"));
+ ret += strlen("operator -") + 2;
+ } else if (pcli_has_level(s, ACCESS_LVL_USER)) {
+ ci_insert_line2(req, 0, "user -", strlen("user -"));
+ ret += strlen("user -") + 2;
+ }
+ }
+end:
+
+ return ret;
+}
+
+int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit)
+{
+ int next_pid = -1;
+ int to_forward;
+ char *errmsg = NULL;
+
+ /* Don't read the next command if still processing the response of the
+ * current one. Just wait. At this stage, errors should be handled by
+ * the response analyzer.
+ */
+ if (s->res.analysers & AN_RES_WAIT_CLI)
+ return 0;
+
+ if ((s->pcli_flags & ACCESS_LVL_MASK) == ACCESS_LVL_NONE)
+ s->pcli_flags |= strm_li(s)->bind_conf->level & ACCESS_LVL_MASK;
+
+ /* stream that comes from the reload listener only responses the reload
+ * status and quits */
+ if (!(s->pcli_flags & PCLI_F_RELOAD)
+ && strm_li(s)->bind_conf == mcli_reload_bind_conf)
+ goto send_status;
+
+
+read_again:
+ /* if the channel is closed for read, we won't receive any more data
+ from the client, but we don't want to forward this close to the
+ server */
+ channel_dont_close(req);
+
+ /* We don't know yet to which server we will connect */
+ channel_dont_connect(req);
+
+ s->scf->flags |= SC_FL_RCV_ONCE;
+
+ /* need more data */
+ if (!ci_data(req))
+ goto missing_data;
+
+ /* If there is data available for analysis, log the end of the idle time. */
+ if (c_data(req) && s->logs.t_idle == -1)
+ s->logs.t_idle = ns_to_ms(now_ns - s->logs.accept_ts) - s->logs.t_handshake;
+
+ to_forward = pcli_parse_request(s, req, &errmsg, &next_pid);
+ if (to_forward > 0) {
+ int target_pid;
+ /* enough data */
+
+ /* forward only 1 command */
+ channel_forward(req, to_forward);
+
+ if (!(s->pcli_flags & PCLI_F_PAYLOAD)) {
+ /* we send only 1 command per request, and we write close after it */
+ sc_schedule_shutdown(s->scb);
+ } else {
+ pcli_write_prompt(s);
+ }
+
+ s->res.flags |= CF_WAKE_ONCE; /* need to be called again */
+ s->res.analysers |= AN_RES_WAIT_CLI;
+
+ if (!(s->flags & SF_ASSIGNED)) {
+ if (next_pid > -1)
+ target_pid = next_pid;
+ else
+ target_pid = s->pcli_next_pid;
+ /* we can connect now */
+ s->target = pcli_pid_to_server(target_pid);
+
+ if (!s->target)
+ goto server_disconnect;
+
+ s->flags |= (SF_DIRECT | SF_ASSIGNED);
+ channel_auto_connect(req);
+ }
+
+ } else if (to_forward == 0) {
+ /* we trimmed things but we might have other commands to consume */
+ pcli_write_prompt(s);
+ goto read_again;
+ } else if (to_forward == -1) {
+ if (!errmsg) /* no error means missing data */
+ goto missing_data;
+
+ /* there was an error during the parsing */
+ pcli_error(s, errmsg);
+ pcli_write_prompt(s);
+ }
+
+ return 0;
+
+send_help:
+ b_reset(&req->buf);
+ b_putblk(&req->buf, "help\n", 5);
+ goto read_again;
+
+send_status:
+ s->pcli_flags |= PCLI_F_RELOAD;
+ /* don't use ci_putblk here because SHUT_DONE could have been sent */
+ b_reset(&req->buf);
+ b_putblk(&req->buf, "_loadstatus;quit\n", 17);
+ goto read_again;
+
+missing_data:
+ if (s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) {
+ /* There is no more request or a only a partial one and we
+ * receive a close from the client, we can leave */
+ sc_schedule_shutdown(s->scf);
+ s->req.analysers &= ~AN_REQ_WAIT_CLI;
+ return 1;
+ }
+ else if (channel_full(req, global.tune.maxrewrite)) {
+ /* buffer is full and we didn't catch the end of a command */
+ goto send_help;
+ }
+ return 0;
+
+server_disconnect:
+ pcli_reply_and_close(s, "Can't connect to the target CLI!\n");
+ return 0;
+}
+
+int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct proxy *fe = strm_fe(s);
+ struct proxy *be = s->be;
+
+ if ((s->scb->flags & SC_FL_ERROR) || (rep->flags & (CF_READ_TIMEOUT|CF_WRITE_TIMEOUT)) ||
+ ((s->scf->flags & SC_FL_SHUT_DONE) && (rep->to_forward || co_data(rep)))) {
+ pcli_reply_and_close(s, "Can't connect to the target CLI!\n");
+ s->req.analysers &= ~AN_REQ_WAIT_CLI;
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+ return 0;
+ }
+ s->scb->flags |= SC_FL_RCV_ONCE; /* try to get back here ASAP */
+ s->scf->flags |= SC_FL_SND_NEVERWAIT;
+
+ /* don't forward the close */
+ channel_dont_close(&s->res);
+ channel_dont_close(&s->req);
+
+ if (s->pcli_flags & PCLI_F_PAYLOAD) {
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+ s->req.flags |= CF_WAKE_ONCE; /* need to be called again if there is some command left in the request */
+ return 0;
+ }
+
+ /* forward the data */
+ if (ci_data(rep)) {
+ c_adv(rep, ci_data(rep));
+ return 0;
+ }
+
+ if (s->scb->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) {
+ /* stream cleanup */
+
+ pcli_write_prompt(s);
+
+ s->scb->flags |= SC_FL_NOLINGER | SC_FL_NOHALF;
+ sc_abort(s->scb);
+ sc_shutdown(s->scb);
+
+ /*
+ * starting from there this the same code as
+ * http_end_txn_clean_session().
+ *
+ * It allows to do frontend keepalive while reconnecting to a
+ * new server for each request.
+ */
+
+ if (s->flags & SF_BE_ASSIGNED) {
+ HA_ATOMIC_DEC(&be->beconn);
+ if (unlikely(s->srv_conn))
+ sess_change_server(s, NULL);
+ }
+
+ s->logs.t_close = ns_to_ms(now_ns - s->logs.accept_ts);
+ stream_process_counters(s);
+
+ /* don't count other requests' data */
+ s->logs.bytes_in -= ci_data(&s->req);
+ s->logs.bytes_out -= ci_data(&s->res);
+
+ /* we may need to know the position in the queue */
+ pendconn_free(s);
+
+ /* let's do a final log if we need it */
+ if (!LIST_ISEMPTY(&fe->logformat) && s->logs.logwait &&
+ !(s->flags & SF_MONITOR) &&
+ (!(fe->options & PR_O_NULLNOLOG) || s->req.total)) {
+ s->do_log(s);
+ }
+
+ /* stop tracking content-based counters */
+ stream_stop_content_counters(s);
+ stream_update_time_stats(s);
+
+ s->logs.accept_date = date; /* user-visible date for logging */
+ s->logs.accept_ts = now_ns; /* corrected date for internal use */
+ s->logs.t_handshake = 0; /* There are no handshake in keep alive connection. */
+ s->logs.t_idle = -1;
+ s->logs.request_ts = 0;
+ s->logs.t_queue = -1;
+ s->logs.t_connect = -1;
+ s->logs.t_data = -1;
+ s->logs.t_close = 0;
+ s->logs.prx_queue_pos = 0; /* we get the number of pending conns before us */
+ s->logs.srv_queue_pos = 0; /* we will get this number soon */
+
+ s->logs.bytes_in = s->req.total = ci_data(&s->req);
+ s->logs.bytes_out = s->res.total = ci_data(&s->res);
+
+ stream_del_srv_conn(s);
+ if (objt_server(s->target)) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ if (may_dequeue_tasks(__objt_server(s->target), be))
+ process_srv_queue(__objt_server(s->target));
+ }
+
+ s->target = NULL;
+
+ /* only release our endpoint if we don't intend to reuse the
+ * connection.
+ */
+ if (!sc_conn_ready(s->scb)) {
+ s->srv_conn = NULL;
+ if (sc_reset_endp(s->scb) < 0) {
+ if (!s->conn_err_type)
+ s->conn_err_type = STRM_ET_CONN_OTHER;
+ if (s->srv_error)
+ s->srv_error(s, s->scb);
+ return 1;
+ }
+ se_fl_clr(s->scb->sedesc, ~SE_FL_DETACHED);
+ }
+
+ sockaddr_free(&s->scb->dst);
+
+ sc_set_state(s->scb, SC_ST_INI);
+ s->scb->flags &= ~(SC_FL_ERROR|SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED);
+ s->scb->flags &= SC_FL_ISBACK | SC_FL_DONT_WAKE; /* we're in the context of process_stream */
+
+ s->req.flags &= ~(CF_AUTO_CONNECT|CF_STREAMER|CF_STREAMER_FAST|CF_WROTE_DATA);
+ s->res.flags &= ~(CF_STREAMER|CF_STREAMER_FAST|CF_WRITE_EVENT|CF_WROTE_DATA|CF_READ_EVENT);
+ s->flags &= ~(SF_DIRECT|SF_ASSIGNED|SF_BE_ASSIGNED|SF_FORCE_PRST|SF_IGNORE_PRST);
+ s->flags &= ~(SF_CURR_SESS|SF_REDIRECTABLE|SF_SRV_REUSED);
+ s->flags &= ~(SF_ERR_MASK|SF_FINST_MASK|SF_REDISP);
+ s->conn_retries = 0; /* used for logging too */
+ s->conn_exp = TICK_ETERNITY;
+ s->conn_err_type = STRM_ET_NONE;
+ /* reinitialise the current rule list pointer to NULL. We are sure that
+ * any rulelist match the NULL pointer.
+ */
+ s->current_rule_list = NULL;
+
+ s->be = strm_fe(s);
+ s->logs.logwait = strm_fe(s)->to_log;
+ s->logs.level = 0;
+ stream_del_srv_conn(s);
+ s->target = NULL;
+ /* re-init store persistence */
+ s->store_count = 0;
+ s->uniq_id = global.req_count++;
+
+ s->scf->flags &= ~(SC_FL_EOS|SC_FL_ERROR|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED);
+ s->scf->flags &= ~SC_FL_SND_NEVERWAIT;
+ s->scf->flags |= SC_FL_RCV_ONCE; /* one read is usually enough */
+
+ s->req.flags |= CF_WAKE_ONCE; /* need to be called again if there is some command left in the request */
+
+ s->res.analysers &= ~AN_RES_WAIT_CLI;
+
+ /* We must trim any excess data from the response buffer, because we
+ * may have blocked an invalid response from a server that we don't
+ * want to accidentally forward once we disable the analysers, nor do
+ * we want those data to come along with next response. A typical
+ * example of such data would be from a buggy server responding to
+ * a HEAD with some data, or sending more than the advertised
+ * content-length.
+ */
+ if (unlikely(ci_data(&s->res)))
+ b_set_data(&s->res.buf, co_data(&s->res));
+
+ /* Now we can realign the response buffer */
+ c_realign_if_empty(&s->res);
+
+ s->scf->ioto = strm_fe(s)->timeout.client;
+ s->scb->ioto = TICK_ETERNITY;
+
+ s->req.analyse_exp = TICK_ETERNITY;
+ s->res.analyse_exp = TICK_ETERNITY;
+
+ /* we're removing the analysers, we MUST re-enable events detection.
+ * We don't enable close on the response channel since it's either
+ * already closed, or in keep-alive with an idle connection handler.
+ */
+ channel_auto_read(&s->req);
+ channel_auto_close(&s->req);
+ channel_auto_read(&s->res);
+
+
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * The mworker functions are used to initialize the CLI in the master process
+ */
+
+ /*
+ * Stop the mworker proxy
+ */
+void mworker_cli_proxy_stop()
+{
+ if (mworker_proxy)
+ stop_proxy(mworker_proxy);
+}
+
+/*
+ * Create the mworker CLI proxy
+ */
+int mworker_cli_proxy_create()
+{
+ struct mworker_proc *child;
+ char *msg = NULL;
+ char *errmsg = NULL;
+
+ mworker_proxy = alloc_new_proxy("MASTER", PR_CAP_LISTEN|PR_CAP_INT, &errmsg);
+ if (!mworker_proxy)
+ goto error_proxy;
+
+ mworker_proxy->mode = PR_MODE_CLI;
+ mworker_proxy->maxconn = 10; /* default to 10 concurrent connections */
+ mworker_proxy->timeout.client = 0; /* no timeout */
+ mworker_proxy->conf.file = strdup("MASTER");
+ mworker_proxy->conf.line = 0;
+ mworker_proxy->accept = frontend_accept;
+ mworker_proxy-> lbprm.algo = BE_LB_ALGO_NONE;
+
+ /* Does not init the default target the CLI applet, but must be done in
+ * the request parsing code */
+ mworker_proxy->default_target = NULL;
+
+ /* create all servers using the mworker_proc list */
+ list_for_each_entry(child, &proc_list, list) {
+ struct server *newsrv = NULL;
+ struct sockaddr_storage *sk;
+ int port1, port2, port;
+ struct protocol *proto;
+
+ /* only the workers support the master CLI */
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ newsrv = new_server(mworker_proxy);
+ if (!newsrv)
+ goto error;
+
+ /* we don't know the new pid yet */
+ if (child->pid == -1)
+ memprintf(&msg, "cur-%d", 1);
+ else
+ memprintf(&msg, "old-%d", child->pid);
+
+ newsrv->next = mworker_proxy->srv;
+ mworker_proxy->srv = newsrv;
+ newsrv->conf.file = strdup(msg);
+ newsrv->id = strdup(msg);
+ newsrv->conf.line = 0;
+
+ memprintf(&msg, "sockpair@%d", child->ipc_fd[0]);
+ if ((sk = str2sa_range(msg, &port, &port1, &port2, NULL, &proto, NULL,
+ &errmsg, NULL, NULL, PA_O_STREAM)) == 0) {
+ goto error;
+ }
+ ha_free(&msg);
+
+ if (!proto->connect) {
+ goto error;
+ }
+
+ /* no port specified */
+ newsrv->flags |= SRV_F_MAPPORTS;
+ newsrv->addr = *sk;
+ /* don't let the server participate to load balancing */
+ newsrv->iweight = 0;
+ newsrv->uweight = 0;
+ srv_lb_commit_status(newsrv);
+
+ child->srv = newsrv;
+ }
+
+ mworker_proxy->next = proxies_list;
+ proxies_list = mworker_proxy;
+
+ return 0;
+
+error:
+
+ list_for_each_entry(child, &proc_list, list) {
+ free((char *)child->srv->conf.file); /* cast because of const char * */
+ free(child->srv->id);
+ ha_free(&child->srv);
+ }
+ free_proxy(mworker_proxy);
+ free(msg);
+
+error_proxy:
+ ha_alert("%s\n", errmsg);
+ free(errmsg);
+
+ return -1;
+}
+
+/*
+ * Create a new listener for the master CLI proxy
+ */
+struct bind_conf *mworker_cli_proxy_new_listener(char *line)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ char *err = NULL;
+ char *args[MAX_LINE_ARGS + 1];
+ int arg;
+ int cur_arg;
+
+ arg = 1;
+ args[0] = line;
+
+ /* args is a bind configuration with spaces replaced by commas */
+ while (*line && arg < MAX_LINE_ARGS) {
+
+ if (*line == ',') {
+ *line++ = '\0';
+ while (*line == ',')
+ line++;
+ args[arg++] = line;
+ }
+ line++;
+ }
+
+ args[arg] = "\0";
+
+ bind_conf = bind_conf_alloc(mworker_proxy, "master-socket", 0, "", xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto err;
+
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_ADMIN;
+ bind_conf->level |= ACCESS_MASTER | ACCESS_MASTER_ONLY;
+
+ if (!str2listener(args[0], mworker_proxy, bind_conf, "master-socket", 0, &err)) {
+ ha_alert("Cannot create the listener of the master CLI\n");
+ goto err;
+ }
+
+ cur_arg = 1;
+
+ while (*args[cur_arg]) {
+ struct bind_kw *kw;
+ const char *best;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(&err, "'%s %s' : '%s' option is not implemented in this version (check build options).",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+ if (kw->parse(args, cur_arg, global.cli_fe, bind_conf, &err) != 0) {
+ if (err)
+ memprintf(&err, "'%s %s' : '%s'", args[0], args[1], err);
+ else
+ memprintf(&err, "'%s %s' : error encountered while processing '%s'",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ memprintf(&err, "'%s %s' : unknown keyword '%s'. Did you mean '%s' maybe ?",
+ args[0], args[1], args[cur_arg], best);
+ else
+ memprintf(&err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ goto err;
+ }
+
+
+ bind_conf->accept = session_accept_fd;
+ bind_conf->nice = -64; /* we want to boost priority for local stats */
+ bind_conf->options |= BC_O_UNLIMITED; /* don't make the peers subject to global limits */
+
+ /* Pin master CLI on the first thread of the first group only */
+ thread_set_pin_grp1(&bind_conf->thread_set, 1);
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ l->rx.flags |= RX_F_MWORKER; /* we are keeping this FD in the master */
+ global.maxsock++; /* for the listening socket */
+ }
+ global.maxsock += mworker_proxy->maxconn;
+
+ return bind_conf;
+
+err:
+ ha_alert("%s\n", err);
+ free(err);
+ free(bind_conf);
+ return NULL;
+
+}
+
+/*
+ * Create a new CLI socket using a socketpair for a worker process
+ * <mworker_proc> is the process structure, and <proc> is the process number
+ */
+int mworker_cli_sockpair_new(struct mworker_proc *mworker_proc, int proc)
+{
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ char *path = NULL;
+ char *err = NULL;
+
+ /* master pipe to ensure the master is still alive */
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, mworker_proc->ipc_fd) < 0) {
+ ha_alert("Cannot create worker socketpair.\n");
+ return -1;
+ }
+
+ /* XXX: we might want to use a separate frontend at some point */
+ if (!global.cli_fe) {
+ if ((global.cli_fe = cli_alloc_fe("GLOBAL", "master-socket", 0)) == NULL) {
+ ha_alert("out of memory trying to allocate the stats frontend");
+ goto error;
+ }
+ }
+
+ bind_conf = bind_conf_alloc(global.cli_fe, "master-socket", 0, "", xprt_get(XPRT_RAW));
+ if (!bind_conf)
+ goto error;
+
+ bind_conf->level &= ~ACCESS_LVL_MASK;
+ bind_conf->level |= ACCESS_LVL_ADMIN; /* TODO: need to lower the rights with a CLI keyword*/
+ bind_conf->level |= ACCESS_FD_LISTENERS;
+
+ if (!memprintf(&path, "sockpair@%d", mworker_proc->ipc_fd[1])) {
+ ha_alert("Cannot allocate listener.\n");
+ goto error;
+ }
+
+ if (!str2listener(path, global.cli_fe, bind_conf, "master-socket", 0, &err)) {
+ free(path);
+ ha_alert("Cannot create a CLI sockpair listener for process #%d\n", proc);
+ goto error;
+ }
+ ha_free(&path);
+
+ bind_conf->accept = session_accept_fd;
+ bind_conf->nice = -64; /* we want to boost priority for local stats */
+ bind_conf->options |= BC_O_UNLIMITED | BC_O_NOSTOP;
+
+ /* Pin master CLI on the first thread of the first group only */
+ thread_set_pin_grp1(&bind_conf->thread_set, 1);
+
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ HA_ATOMIC_INC(&unstoppable_jobs);
+ /* it's a sockpair but we don't want to keep the fd in the master */
+ l->rx.flags &= ~RX_F_INHERITED;
+ global.maxsock++; /* for the listening socket */
+ }
+
+ return 0;
+
+error:
+ close(mworker_proc->ipc_fd[0]);
+ close(mworker_proc->ipc_fd[1]);
+ free(err);
+
+ return -1;
+}
+
+static struct applet cli_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<CLI>", /* used for logging */
+ .fct = cli_io_handler,
+ .release = cli_release_handler,
+};
+
+/* master CLI */
+static struct applet mcli_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<MCLI>", /* used for logging */
+ .fct = cli_io_handler,
+ .release = cli_release_handler,
+};
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "help", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "prompt", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "quit", NULL }, NULL, cli_parse_simple, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "_getsocks", NULL }, NULL, _getsocks, NULL },
+ { { "expert-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER }, // not listed
+ { { "experimental-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER }, // not listed
+ { { "mcli-debug-mode", NULL }, NULL, cli_parse_expert_experimental_mode, NULL, NULL, NULL, ACCESS_MASTER_ONLY }, // not listed
+ { { "set", "anon", "on" }, "set anon on [value] : activate the anonymized mode", cli_parse_set_anon, NULL, NULL },
+ { { "set", "anon", "off" }, "set anon off : deactivate the anonymized mode", cli_parse_set_anon, NULL, NULL },
+ { { "set", "anon", "global-key", NULL }, "set anon global-key <value> : change the global anonymizing key", cli_parse_set_global_key, NULL, NULL },
+ { { "set", "maxconn", "global", NULL }, "set maxconn global <value> : change the per-process maxconn setting", cli_parse_set_maxconn_global, NULL },
+ { { "set", "rate-limit", NULL }, "set rate-limit <setting> <value> : change a rate limiting value", cli_parse_set_ratelimit, NULL },
+ { { "set", "severity-output", NULL }, "set severity-output [none|number|string]: set presence of severity level in feedback information", cli_parse_set_severity_output, NULL, NULL },
+ { { "set", "timeout", NULL }, "set timeout [cli] <delay> : change a timeout setting", cli_parse_set_timeout, NULL, NULL },
+ { { "show", "anon", NULL }, "show anon : display the current state of anonymized mode", cli_parse_show_anon, NULL },
+ { { "show", "env", NULL }, "show env [var] : dump environment variables known to the process", cli_parse_show_env, cli_io_handler_show_env, NULL },
+ { { "show", "cli", "sockets", NULL }, "show cli sockets : dump list of cli sockets", cli_parse_default, cli_io_handler_show_cli_sock, NULL, NULL, ACCESS_MASTER },
+ { { "show", "cli", "level", NULL }, "show cli level : display the level of the current CLI session", cli_parse_show_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ { { "show", "fd", NULL }, "show fd [-!plcfbsd]* [num] : dump list of file descriptors in use or a specific one", cli_parse_show_fd, cli_io_handler_show_fd, NULL },
+ { { "show", "version", NULL }, "show version : show version of the current process", cli_parse_show_version, NULL, NULL, NULL, ACCESS_MASTER },
+ { { "operator", NULL }, "operator : lower the level of the current CLI session to operator", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ { { "user", NULL }, "user : lower the level of the current CLI session to user", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER},
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "stats", cli_parse_global },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+static struct bind_kw_list bind_kws = { "STAT", { }, {
+ { "level", bind_parse_level, 1 }, /* set the unix socket admin level */
+ { "expose-fd", bind_parse_expose_fd, 1 }, /* set the unix socket expose fd rights */
+ { "severity-output", bind_parse_severity_output, 1 }, /* set the severity output format */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/clock.c b/src/clock.c
new file mode 100644
index 0000000..ec2133c
--- /dev/null
+++ b/src/clock.c
@@ -0,0 +1,460 @@
+/*
+ * General time-keeping code and variables
+ *
+ * Copyright 2000-2021 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/time.h>
+#include <signal.h>
+#include <time.h>
+
+#ifdef USE_THREAD
+#include <pthread.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/clock.h>
+#include <haproxy/signal-t.h>
+#include <haproxy/time.h>
+#include <haproxy/tinfo-t.h>
+#include <haproxy/tools.h>
+
+struct timeval start_date; /* the process's start date in wall-clock time */
+struct timeval ready_date; /* date when the process was considered ready */
+ullong start_time_ns; /* the process's start date in internal monotonic time (ns) */
+volatile ullong global_now_ns; /* common monotonic date between all threads, in ns (wraps every 585 yr) */
+volatile uint global_now_ms; /* common monotonic date in milliseconds (may wrap) */
+
+THREAD_ALIGNED(64) static llong now_offset; /* global offset between system time and global time in ns */
+
+THREAD_LOCAL ullong now_ns; /* internal monotonic date derived from real clock, in ns (wraps every 585 yr) */
+THREAD_LOCAL uint now_ms; /* internal monotonic date in milliseconds (may wrap) */
+THREAD_LOCAL struct timeval date; /* the real current date (wall-clock time) */
+
+static THREAD_LOCAL struct timeval before_poll; /* system date before calling poll() */
+static THREAD_LOCAL struct timeval after_poll; /* system date after leaving poll() */
+static THREAD_LOCAL unsigned int samp_time; /* total elapsed time over current sample */
+static THREAD_LOCAL unsigned int idle_time; /* total idle time over current sample */
+static THREAD_LOCAL unsigned int iso_time_sec; /* last iso time value for this thread */
+static THREAD_LOCAL char iso_time_str[34]; /* ISO time representation of gettimeofday() */
+
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+static clockid_t per_thread_clock_id[MAX_THREADS];
+#endif
+
+/* returns the system's monotonic time in nanoseconds if supported, otherwise zero */
+uint64_t now_mono_time(void)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_MONOTONIC_CLOCK)
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* Returns the system's monotonic time in nanoseconds.
+ * Uses the coarse clock source if supported (for fast but
+ * less precise queries with limited resource usage).
+ * Fallback to now_mono_time() if coarse source is not supported,
+ * which may itself return 0 if not supported either.
+ */
+uint64_t now_mono_time_fast(void)
+{
+#if defined(CLOCK_MONOTONIC_COARSE)
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+ return (ts.tv_sec * 1000000000ULL + ts.tv_nsec);
+#else
+ /* fallback to regular mono time,
+ * returns 0 if not supported
+ */
+ return now_mono_time();
+#endif
+}
+
+/* returns the current thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
+uint64_t now_cpu_time(void)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct timespec ts;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* Returns the current thread's cumulated CPU time in nanoseconds.
+ *
+ * thread_local timer is cached so that call is less precise but also less
+ * expensive if heavily used.
+ * We use the mono time as a cache expiration hint since now_cpu_time() is
+ * known to be much more expensive than now_mono_time_fast() on systems
+ * supporting the COARSE clock source.
+ *
+ * Returns 0 if either now_mono_time_fast() or now_cpu_time() are not
+ * supported.
+ */
+uint64_t now_cpu_time_fast(void)
+{
+ static THREAD_LOCAL uint64_t mono_cache = 0;
+ static THREAD_LOCAL uint64_t cpu_cache = 0;
+ uint64_t mono_cur;
+
+ mono_cur = now_mono_time_fast();
+ if (unlikely(mono_cur != mono_cache)) {
+ /* global mono clock was updated: local cache is outdated */
+ cpu_cache = now_cpu_time();
+ mono_cache = mono_cur;
+ }
+ return cpu_cache;
+}
+
+/* returns another thread's cumulated CPU time in nanoseconds if supported, otherwise zero */
+uint64_t now_cpu_time_thread(int thr)
+{
+ uint64_t ret = 0;
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct timespec ts;
+ clock_gettime(per_thread_clock_id[thr], &ts);
+ ret = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+#endif
+ return ret;
+}
+
+/* set the clock source for the local thread */
+void clock_set_local_source(void)
+{
+#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+#ifdef USE_THREAD
+ pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]);
+#else
+ per_thread_clock_id[tid] = CLOCK_THREAD_CPUTIME_ID;
+#endif
+#endif
+}
+
+/* registers a timer <tmr> of type timer_t delivering signal <sig> with value
+ * <val>. It tries on the current thread's clock ID first and falls back to
+ * CLOCK_REALTIME. Returns non-zero on success, 1 on failure.
+ */
+int clock_setup_signal_timer(void *tmr, int sig, int val)
+{
+ int ret = 0;
+
+#if defined(USE_RT) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+ struct sigevent sev = { };
+ timer_t *timer = tmr;
+ sigset_t set;
+
+ /* unblock the WDTSIG signal we intend to use */
+ sigemptyset(&set);
+ sigaddset(&set, WDTSIG);
+ ha_sigmask(SIG_UNBLOCK, &set, NULL);
+
+ /* this timer will signal WDTSIG when it fires, with tid in the si_int
+ * field (important since any thread will receive the signal).
+ */
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = sig;
+ sev.sigev_value.sival_int = val;
+ if (timer_create(per_thread_clock_id[tid], &sev, timer) != -1 ||
+ timer_create(CLOCK_REALTIME, &sev, timer) != -1)
+ ret = 1;
+#endif
+ return ret;
+}
+
+/* clock_update_date: sets <date> to system time, and sets <now_ns> to something
+ * as close as possible to real time, following a monotonic function. The main
+ * principle consists in detecting backwards and forwards time jumps and adjust
+ * an offset to correct them. This function should be called once after each
+ * poll, and never farther apart than MAX_DELAY_MS*2. The poll's timeout should
+ * be passed in <max_wait>, and the return value in <interrupted> (a non-zero
+ * value means that we have not expired the timeout).
+ *
+ * clock_init_process_date() must have been called once first, and
+ * clock_init_thread_date() must also have been called once for each thread.
+ *
+ * An offset is used to adjust the current time (date), to figure a monotonic
+ * local time (now_ns). The offset is not critical, as it is only updated after
+ * a clock jump is detected. From this point all threads will apply it to their
+ * locally measured time, and will then agree around a common monotonic
+ * global_now_ns value that serves to further refine their local time. Both
+ * now_ns and global_now_ns are 64-bit integers counting nanoseconds since a
+ * vague reference (it starts roughly 20s before the next wrap-around of the
+ * millisecond counter after boot). The offset is also an integral number of
+ * nanoseconds, but it's signed so that the clock can be adjusted in the two
+ * directions.
+ */
+void clock_update_local_date(int max_wait, int interrupted)
+{
+ struct timeval min_deadline, max_deadline;
+
+ gettimeofday(&date, NULL);
+
+ /* compute the minimum and maximum local date we may have reached based
+ * on our past date and the associated timeout. There are three possible
+ * extremities:
+ * - the new date cannot be older than before_poll
+ * - if not interrupted, the new date cannot be older than
+ * before_poll+max_wait
+ * - in any case the new date cannot be newer than
+ * before_poll+max_wait+some margin (100ms used here).
+ * In case of violation, we'll ignore the current date and instead
+ * restart from the last date we knew.
+ */
+ _tv_ms_add(&min_deadline, &before_poll, max_wait);
+ _tv_ms_add(&max_deadline, &before_poll, max_wait + 100);
+
+ if (unlikely(__tv_islt(&date, &before_poll) || // big jump backwards
+ (!interrupted && __tv_islt(&date, &min_deadline)) || // small jump backwards
+ __tv_islt(&max_deadline, &date))) { // big jump forwards
+ if (!interrupted)
+ now_ns += ms_to_ns(max_wait);
+ } else {
+ /* The date is still within expectations. Let's apply the
+ * now_offset to the system date. Note: ofs if made of two
+ * independent signed ints.
+ */
+ now_ns = tv_to_ns(&date) + HA_ATOMIC_LOAD(&now_offset);
+ }
+ now_ms = ns_to_ms(now_ns);
+}
+
+void clock_update_global_date()
+{
+ ullong old_now_ns;
+ uint old_now_ms;
+
+ /* now that we have bounded the local time, let's check if it's
+ * realistic regarding the global date, which only moves forward,
+ * otherwise catch up.
+ */
+ old_now_ns = _HA_ATOMIC_LOAD(&global_now_ns);
+ old_now_ms = global_now_ms;
+
+ do {
+ if (now_ns < old_now_ns)
+ now_ns = old_now_ns;
+
+ /* now <now_ns> is expected to be the most accurate date,
+ * equal to <global_now_ns> or newer. Updating the global
+ * date too often causes extreme contention and is not
+ * needed: it's only used to help threads run at the
+ * same date in case of local drift, and the global date,
+ * which changes, is only used by freq counters (a choice
+ * which is debatable by the way since it changes under us).
+ * Tests have seen that the contention can be reduced from
+ * 37% in this function to almost 0% when keeping clocks
+ * synchronized no better than 32 microseconds, so that's
+ * what we're doing here.
+ */
+ now_ms = ns_to_ms(now_ns);
+
+ if (!((now_ns ^ old_now_ns) & ~0x7FFFULL))
+ return;
+
+ /* let's try to update the global_now_ns (both in nanoseconds
+ * and ms forms) or loop again.
+ */
+ } while ((!_HA_ATOMIC_CAS(&global_now_ns, &old_now_ns, now_ns) ||
+ (now_ms != old_now_ms && !_HA_ATOMIC_CAS(&global_now_ms, &old_now_ms, now_ms))) &&
+ __ha_cpu_relax());
+
+ /* <now_ns> and <now_ms> are now updated to the last value of
+ * global_now_ns and global_now_ms, which were also monotonically
+ * updated. We can compute the latest offset, we don't care who writes
+ * it last, the variations will not break the monotonic property.
+ */
+ HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date));
+}
+
+/* must be called once at boot to initialize some global variables */
+void clock_init_process_date(void)
+{
+ now_offset = 0;
+ gettimeofday(&date, NULL);
+ after_poll = before_poll = date;
+ now_ns = global_now_ns = tv_to_ns(&date);
+ global_now_ms = ns_to_ms(now_ns);
+
+ /* force time to wrap 20s after boot: we first compute the time offset
+ * that once applied to the wall-clock date will make the local time
+ * wrap in 5 seconds. This offset is applied to the process-wide time,
+ * and will be used to recompute the local time, both of which will
+ * match and continue from this shifted date.
+ */
+ now_offset = sec_to_ns((uint)((uint)(-global_now_ms) / 1000U - BOOT_TIME_WRAP_SEC));
+ global_now_ns += now_offset;
+ now_ns = global_now_ns;
+ now_ms = global_now_ms = ns_to_ms(now_ns);
+
+ th_ctx->idle_pct = 100;
+ clock_update_date(0, 1);
+}
+
+void clock_adjust_now_offset(void)
+{
+ HA_ATOMIC_STORE(&now_offset, now_ns - tv_to_ns(&date));
+}
+
+/* must be called once per thread to initialize their thread-local variables.
+ * Note that other threads might also be initializing and running in parallel.
+ */
+void clock_init_thread_date(void)
+{
+ gettimeofday(&date, NULL);
+ after_poll = before_poll = date;
+
+ now_ns = _HA_ATOMIC_LOAD(&global_now_ns);
+ th_ctx->idle_pct = 100;
+ th_ctx->prev_cpu_time = now_cpu_time();
+ clock_update_date(0, 1);
+}
+
+/* report the average CPU idle percentage over all running threads, between 0 and 100 */
+uint clock_report_idle(void)
+{
+ uint total = 0;
+ uint rthr = 0;
+ uint thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ if (!ha_thread_info[thr].tg ||
+ !(ha_thread_info[thr].tg->threads_enabled & ha_thread_info[thr].ltid_bit))
+ continue;
+ total += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].idle_pct);
+ rthr++;
+ }
+ return rthr ? total / rthr : 0;
+}
+
+/* Update the idle time value twice a second, to be called after
+ * clock_update_date() when called after poll(), and currently called only by
+ * clock_leaving_poll() below. It relies on <before_poll> to be updated to
+ * the system time before calling poll().
+ */
+static inline void clock_measure_idle(void)
+{
+ /* Let's compute the idle to work ratio. We worked between after_poll
+ * and before_poll, and slept between before_poll and date. The idle_pct
+ * is updated at most twice every second. Note that the current second
+ * rarely changes so we avoid a multiply when not needed.
+ */
+ int delta;
+
+ if ((delta = date.tv_sec - before_poll.tv_sec))
+ delta *= 1000000;
+ idle_time += delta + (date.tv_usec - before_poll.tv_usec);
+
+ if ((delta = date.tv_sec - after_poll.tv_sec))
+ delta *= 1000000;
+ samp_time += delta + (date.tv_usec - after_poll.tv_usec);
+
+ after_poll.tv_sec = date.tv_sec; after_poll.tv_usec = date.tv_usec;
+ if (samp_time < 500000)
+ return;
+
+ HA_ATOMIC_STORE(&th_ctx->idle_pct, (100ULL * idle_time + samp_time / 2) / samp_time);
+ idle_time = samp_time = 0;
+}
+
+/* Collect date and time information after leaving poll(). <timeout> must be
+ * set to the maximum sleep time passed to poll (in milliseconds), and
+ * <interrupted> must be zero if the poller reached the timeout or non-zero
+ * otherwise, which generally is provided by the poller's return value.
+ */
+void clock_leaving_poll(int timeout, int interrupted)
+{
+ clock_measure_idle();
+ th_ctx->prev_cpu_time = now_cpu_time();
+ th_ctx->prev_mono_time = now_mono_time();
+}
+
+/* Collect date and time information before calling poll(). This will be used
+ * to count the run time of the past loop and the sleep time of the next poll.
+ * It also compares the elapsed and cpu times during the activity period to
+ * estimate the amount of stolen time, which is reported if higher than half
+ * a millisecond.
+ */
+void clock_entering_poll(void)
+{
+ uint64_t new_mono_time;
+ uint64_t new_cpu_time;
+ uint32_t run_time;
+ int64_t stolen;
+
+ gettimeofday(&before_poll, NULL);
+
+ run_time = (before_poll.tv_sec - after_poll.tv_sec) * 1000000U + (before_poll.tv_usec - after_poll.tv_usec);
+
+ new_cpu_time = now_cpu_time();
+ new_mono_time = now_mono_time();
+
+ if (th_ctx->prev_cpu_time && th_ctx->prev_mono_time) {
+ new_cpu_time -= th_ctx->prev_cpu_time;
+ new_mono_time -= th_ctx->prev_mono_time;
+ stolen = new_mono_time - new_cpu_time;
+ if (unlikely(stolen >= 500000)) {
+ stolen /= 500000;
+ /* more than half a millisecond difference might
+ * indicate an undesired preemption.
+ */
+ report_stolen_time(stolen);
+ }
+ }
+
+ /* update the average runtime */
+ activity_count_runtime(run_time);
+}
+
+/* returns the current date as returned by gettimeofday() in ISO+microsecond
+ * format. It uses a thread-local static variable that the reader can consume
+ * for as long as it wants until next call. Thus, do not call it from a signal
+ * handler. If <pad> is non-0, a trailing space will be added. It will always
+ * return exactly 32 or 33 characters (depending on padding) and will always be
+ * zero-terminated, thus it will always fit into a 34 bytes buffer.
+ * This also always include the local timezone (in +/-HH:mm format) .
+ */
+char *timeofday_as_iso_us(int pad)
+{
+ struct timeval new_date;
+ struct tm tm;
+ const char *offset;
+ char c;
+
+ gettimeofday(&new_date, NULL);
+ if (new_date.tv_sec != iso_time_sec || !new_date.tv_sec) {
+ get_localtime(new_date.tv_sec, &tm);
+ offset = get_gmt_offset(new_date.tv_sec, &tm);
+ if (unlikely(strftime(iso_time_str, sizeof(iso_time_str), "%Y-%m-%dT%H:%M:%S.000000+00:00", &tm) != 32))
+ strlcpy2(iso_time_str, "YYYY-mm-ddTHH:MM:SS.000000-00:00", sizeof(iso_time_str)); // make the failure visible but respect format.
+ iso_time_str[26] = offset[0];
+ iso_time_str[27] = offset[1];
+ iso_time_str[28] = offset[2];
+ iso_time_str[30] = offset[3];
+ iso_time_str[31] = offset[4];
+ iso_time_sec = new_date.tv_sec;
+ }
+
+ /* utoa_pad adds a trailing 0 so we save the char for restore */
+ c = iso_time_str[26];
+ utoa_pad(new_date.tv_usec, iso_time_str + 20, 7);
+ iso_time_str[26] = c;
+ if (pad) {
+ iso_time_str[32] = ' ';
+ iso_time_str[33] = 0;
+ }
+ return iso_time_str;
+}
diff --git a/src/compression.c b/src/compression.c
new file mode 100644
index 0000000..7b75461
--- /dev/null
+++ b/src/compression.c
@@ -0,0 +1,742 @@
+/*
+ * HTTP compression.
+ *
+ * Copyright 2012 Exceliance, David Du Colombier <dducolombier@exceliance.fr>
+ * William Lallemand <wlallemand@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+
+#if defined(USE_ZLIB)
+/* Note: the crappy zlib and openssl libs both define the "free_func" type.
+ * That's a very clever idea to use such a generic name in general purpose
+ * libraries, really... The zlib one is easier to redefine than openssl's,
+ * so let's only fix this one.
+ */
+#define free_func zlib_free_func
+#include <zlib.h>
+#undef free_func
+#endif /* USE_ZLIB */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression-t.h>
+#include <haproxy/compression.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/pool.h>
+#include <haproxy/stream.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+#if defined(USE_ZLIB)
+__decl_spinlock(comp_pool_lock);
+#endif
+
+#ifdef USE_ZLIB
+
+static void *alloc_zlib(void *opaque, unsigned int items, unsigned int size);
+static void free_zlib(void *opaque, void *ptr);
+
+/* zlib allocation */
+static struct pool_head *zlib_pool_deflate_state __read_mostly = NULL;
+static struct pool_head *zlib_pool_window __read_mostly = NULL;
+static struct pool_head *zlib_pool_prev __read_mostly = NULL;
+static struct pool_head *zlib_pool_head __read_mostly = NULL;
+static struct pool_head *zlib_pool_pending_buf __read_mostly = NULL;
+
+long zlib_used_memory = 0;
+
+static int global_tune_zlibmemlevel = 8; /* zlib memlevel */
+static int global_tune_zlibwindowsize = MAX_WBITS; /* zlib window size */
+
+#endif
+
+unsigned int compress_min_idle = 0;
+
+static int identity_init(struct comp_ctx **comp_ctx, int level);
+static int identity_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int identity_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int identity_end(struct comp_ctx **comp_ctx);
+
+#if defined(USE_SLZ)
+
+static int rfc1950_init(struct comp_ctx **comp_ctx, int level);
+static int rfc1951_init(struct comp_ctx **comp_ctx, int level);
+static int rfc1952_init(struct comp_ctx **comp_ctx, int level);
+static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int rfc195x_end(struct comp_ctx **comp_ctx);
+
+#elif defined(USE_ZLIB)
+
+static int gzip_init(struct comp_ctx **comp_ctx, int level);
+static int raw_def_init(struct comp_ctx **comp_ctx, int level);
+static int deflate_init(struct comp_ctx **comp_ctx, int level);
+static int deflate_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out);
+static int deflate_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int deflate_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int deflate_end(struct comp_ctx **comp_ctx);
+
+#endif /* USE_ZLIB */
+
+
+const struct comp_algo comp_algos[] =
+{
+ { "identity", 8, "identity", 8, identity_init, identity_add_data, identity_flush, identity_finish, identity_end },
+#if defined(USE_SLZ)
+ { "deflate", 7, "deflate", 7, rfc1950_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+ { "raw-deflate", 11, "deflate", 7, rfc1951_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+ { "gzip", 4, "gzip", 4, rfc1952_init, rfc195x_add_data, rfc195x_flush, rfc195x_finish, rfc195x_end },
+#elif defined(USE_ZLIB)
+ { "deflate", 7, "deflate", 7, deflate_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+ { "raw-deflate", 11, "deflate", 7, raw_def_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+ { "gzip", 4, "gzip", 4, gzip_init, deflate_add_data, deflate_flush, deflate_finish, deflate_end },
+#endif /* USE_ZLIB */
+ { NULL, 0, NULL, 0, NULL , NULL, NULL, NULL, NULL }
+};
+
+/*
+ * Add a content-type in the configuration
+ * Returns 0 in case of success, 1 in case of allocation failure.
+ */
+int comp_append_type(struct comp_type **types, const char *type)
+{
+ struct comp_type *comp_type;
+
+ comp_type = calloc(1, sizeof(*comp_type));
+ if (!comp_type)
+ return 1;
+ comp_type->name_len = strlen(type);
+ comp_type->name = strdup(type);
+ comp_type->next = *types;
+ *types = comp_type;
+ return 0;
+}
+
+/*
+ * Add an algorithm in the configuration
+ * Returns 0 in case of success, -1 if the <algo> is unmanaged, 1 in case of
+ * allocation failure.
+ */
+int comp_append_algo(struct comp_algo **algos, const char *algo)
+{
+ struct comp_algo *comp_algo;
+ int i;
+
+ for (i = 0; comp_algos[i].cfg_name; i++) {
+ if (strcmp(algo, comp_algos[i].cfg_name) == 0) {
+ comp_algo = calloc(1, sizeof(*comp_algo));
+ if (!comp_algo)
+ return 1;
+ memmove(comp_algo, &comp_algos[i], sizeof(struct comp_algo));
+ comp_algo->next = *algos;
+ *algos = comp_algo;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+#if defined(USE_ZLIB) || defined(USE_SLZ)
+DECLARE_STATIC_POOL(pool_comp_ctx, "comp_ctx", sizeof(struct comp_ctx));
+
+/*
+ * Alloc the comp_ctx
+ */
+static inline int init_comp_ctx(struct comp_ctx **comp_ctx)
+{
+#ifdef USE_ZLIB
+ z_stream *strm;
+
+ if (global.maxzlibmem > 0 && (global.maxzlibmem - zlib_used_memory) < sizeof(struct comp_ctx))
+ return -1;
+#endif
+
+ *comp_ctx = pool_alloc(pool_comp_ctx);
+ if (*comp_ctx == NULL)
+ return -1;
+#if defined(USE_SLZ)
+ (*comp_ctx)->direct_ptr = NULL;
+ (*comp_ctx)->direct_len = 0;
+ (*comp_ctx)->queued = BUF_NULL;
+#elif defined(USE_ZLIB)
+ _HA_ATOMIC_ADD(&zlib_used_memory, sizeof(struct comp_ctx));
+ __ha_barrier_atomic_store();
+
+ strm = &(*comp_ctx)->strm;
+ strm->zalloc = alloc_zlib;
+ strm->zfree = free_zlib;
+ strm->opaque = *comp_ctx;
+#endif
+ return 0;
+}
+
+/*
+ * Dealloc the comp_ctx
+ */
+static inline int deinit_comp_ctx(struct comp_ctx **comp_ctx)
+{
+ if (!*comp_ctx)
+ return 0;
+
+ pool_free(pool_comp_ctx, *comp_ctx);
+ *comp_ctx = NULL;
+
+#ifdef USE_ZLIB
+ _HA_ATOMIC_SUB(&zlib_used_memory, sizeof(struct comp_ctx));
+ __ha_barrier_atomic_store();
+#endif
+ return 0;
+}
+#endif
+
+
+/****************************
+ **** Identity algorithm ****
+ ****************************/
+
+/*
+ * Init the identity algorithm
+ */
+static int identity_init(struct comp_ctx **comp_ctx, int level)
+{
+ return 0;
+}
+
+/*
+ * Process data
+ * Return size of consumed data or -1 on error
+ */
+static int identity_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ char *out_data = b_tail(out);
+ int out_len = b_room(out);
+
+ if (out_len < in_len)
+ return -1;
+
+ memcpy(out_data, in_data, in_len);
+
+ b_add(out, in_len);
+
+ return in_len;
+}
+
+static int identity_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return 0;
+}
+
+static int identity_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return 0;
+}
+
+/*
+ * Deinit the algorithm
+ */
+static int identity_end(struct comp_ctx **comp_ctx)
+{
+ return 0;
+}
+
+
+#ifdef USE_SLZ
+
+/* SLZ's gzip format (RFC1952). Returns < 0 on error. */
+static int rfc1952_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1952_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* SLZ's raw deflate format (RFC1951). Returns < 0 on error. */
+static int rfc1951_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1951_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* SLZ's zlib format (RFC1950). Returns < 0 on error. */
+static int rfc1950_init(struct comp_ctx **comp_ctx, int level)
+{
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ (*comp_ctx)->cur_lvl = !!level;
+ return slz_rfc1950_init(&(*comp_ctx)->strm, !!level);
+}
+
+/* Return the size of consumed data or -1. The output buffer is unused at this
+ * point, we only keep a reference to the input data or a copy of them if the
+ * reference is already used.
+ */
+static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ static THREAD_LOCAL struct buffer tmpbuf = BUF_NULL;
+
+ if (in_len <= 0)
+ return 0;
+
+ if (comp_ctx->direct_ptr && b_is_null(&comp_ctx->queued)) {
+ /* data already being pointed to, we're in front of fragmented
+ * data and need a buffer now. We reuse the same buffer, as it's
+ * not used out of the scope of a series of add_data()*, end().
+ */
+ if (b_alloc(&tmpbuf) == NULL)
+ return -1; /* no memory */
+ b_reset(&tmpbuf);
+ memcpy(b_tail(&tmpbuf), comp_ctx->direct_ptr, comp_ctx->direct_len);
+ b_add(&tmpbuf, comp_ctx->direct_len);
+ comp_ctx->direct_ptr = NULL;
+ comp_ctx->direct_len = 0;
+ comp_ctx->queued = tmpbuf;
+ /* fall through buffer copy */
+ }
+
+ if (!b_is_null(&comp_ctx->queued)) {
+ /* data already pending */
+ memcpy(b_tail(&comp_ctx->queued), in_data, in_len);
+ b_add(&comp_ctx->queued, in_len);
+ return in_len;
+ }
+
+ comp_ctx->direct_ptr = in_data;
+ comp_ctx->direct_len = in_len;
+ return in_len;
+}
+
+/* Compresses the data accumulated using add_data(), and optionally sends the
+ * format-specific trailer if <finish> is non-null. <out> is expected to have a
+ * large enough free non-wrapping space as verified by http_comp_buffer_init().
+ * The number of bytes emitted is reported.
+ */
+static int rfc195x_flush_or_finish(struct comp_ctx *comp_ctx, struct buffer *out, int finish)
+{
+ struct slz_stream *strm = &comp_ctx->strm;
+ const char *in_ptr;
+ int in_len;
+ int out_len;
+
+ in_ptr = comp_ctx->direct_ptr;
+ in_len = comp_ctx->direct_len;
+
+ if (!b_is_null(&comp_ctx->queued)) {
+ in_ptr = b_head(&comp_ctx->queued);
+ in_len = b_data(&comp_ctx->queued);
+ }
+
+ out_len = b_data(out);
+
+ if (in_ptr)
+ b_add(out, slz_encode(strm, b_tail(out), in_ptr, in_len, !finish));
+
+ if (finish)
+ b_add(out, slz_finish(strm, b_tail(out)));
+ else
+ b_add(out, slz_flush(strm, b_tail(out)));
+
+ out_len = b_data(out) - out_len;
+
+ /* very important, we must wipe the data we've just flushed */
+ comp_ctx->direct_len = 0;
+ comp_ctx->direct_ptr = NULL;
+ comp_ctx->queued = BUF_NULL;
+
+ /* Verify compression rate limiting and CPU usage */
+ if ((global.comp_rate_lim > 0 && (read_freq_ctr(&global.comp_bps_out) > global.comp_rate_lim)) || /* rate */
+ (th_ctx->idle_pct < compress_min_idle)) { /* idle */
+ if (comp_ctx->cur_lvl > 0)
+ strm->level = --comp_ctx->cur_lvl;
+ }
+ else if (comp_ctx->cur_lvl < global.tune.comp_maxlevel && comp_ctx->cur_lvl < 1) {
+ strm->level = ++comp_ctx->cur_lvl;
+ }
+
+ /* and that's all */
+ return out_len;
+}
+
+static int rfc195x_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return rfc195x_flush_or_finish(comp_ctx, out, 0);
+}
+
+static int rfc195x_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return rfc195x_flush_or_finish(comp_ctx, out, 1);
+}
+
+/* we just need to free the comp_ctx here, nothing was allocated */
+static int rfc195x_end(struct comp_ctx **comp_ctx)
+{
+ deinit_comp_ctx(comp_ctx);
+ return 0;
+}
+
+#elif defined(USE_ZLIB) /* ! USE_SLZ */
+
+/*
+ * This is a tricky allocation function using the zlib.
+ * This is based on the allocation order in deflateInit2.
+ */
+static void *alloc_zlib(void *opaque, unsigned int items, unsigned int size)
+{
+ struct comp_ctx *ctx = opaque;
+ static THREAD_LOCAL char round = 0; /* order in deflateInit2 */
+ void *buf = NULL;
+ struct pool_head *pool = NULL;
+
+ if (global.maxzlibmem > 0 && (global.maxzlibmem - zlib_used_memory) < (long)(items * size))
+ goto end;
+
+ switch (round) {
+ case 0:
+ if (zlib_pool_deflate_state == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_deflate_state == NULL)
+ zlib_pool_deflate_state = create_pool("zlib_state", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_deflate_state;
+ ctx->zlib_deflate_state = buf = pool_alloc(pool);
+ break;
+
+ case 1:
+ if (zlib_pool_window == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_window == NULL)
+ zlib_pool_window = create_pool("zlib_window", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_window;
+ ctx->zlib_window = buf = pool_alloc(pool);
+ break;
+
+ case 2:
+ if (zlib_pool_prev == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_prev == NULL)
+ zlib_pool_prev = create_pool("zlib_prev", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_prev;
+ ctx->zlib_prev = buf = pool_alloc(pool);
+ break;
+
+ case 3:
+ if (zlib_pool_head == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_head == NULL)
+ zlib_pool_head = create_pool("zlib_head", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_head;
+ ctx->zlib_head = buf = pool_alloc(pool);
+ break;
+
+ case 4:
+ if (zlib_pool_pending_buf == NULL) {
+ HA_SPIN_LOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ if (zlib_pool_pending_buf == NULL)
+ zlib_pool_pending_buf = create_pool("zlib_pending_buf", size * items, MEM_F_SHARED);
+ HA_SPIN_UNLOCK(COMP_POOL_LOCK, &comp_pool_lock);
+ }
+ pool = zlib_pool_pending_buf;
+ ctx->zlib_pending_buf = buf = pool_alloc(pool);
+ break;
+ }
+ if (buf != NULL) {
+ _HA_ATOMIC_ADD(&zlib_used_memory, pool->size);
+ __ha_barrier_atomic_store();
+ }
+
+end:
+
+ /* deflateInit2() first allocates and checks the deflate_state, then if
+ * it succeeds, it allocates all other 4 areas at ones and checks them
+ * at the end. So we want to correctly count the rounds depending on when
+ * zlib is supposed to abort.
+ */
+ if (buf || round)
+ round = (round + 1) % 5;
+ return buf;
+}
+
+static void free_zlib(void *opaque, void *ptr)
+{
+ struct comp_ctx *ctx = opaque;
+ struct pool_head *pool = NULL;
+
+ if (ptr == ctx->zlib_window)
+ pool = zlib_pool_window;
+ else if (ptr == ctx->zlib_deflate_state)
+ pool = zlib_pool_deflate_state;
+ else if (ptr == ctx->zlib_prev)
+ pool = zlib_pool_prev;
+ else if (ptr == ctx->zlib_head)
+ pool = zlib_pool_head;
+ else if (ptr == ctx->zlib_pending_buf)
+ pool = zlib_pool_pending_buf;
+ else {
+ // never matched, just to silence gcc
+ ABORT_NOW();
+ return;
+ }
+
+ pool_free(pool, ptr);
+ _HA_ATOMIC_SUB(&zlib_used_memory, pool->size);
+ __ha_barrier_atomic_store();
+}
+
+/**************************
+**** gzip algorithm ****
+***************************/
+static int gzip_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, global_tune_zlibwindowsize + 16, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+
+ return 0;
+}
+
+/* Raw deflate algorithm */
+static int raw_def_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, -global_tune_zlibwindowsize, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+ return 0;
+}
+
+/**************************
+**** Deflate algorithm ****
+***************************/
+
+static int deflate_init(struct comp_ctx **comp_ctx, int level)
+{
+ z_stream *strm;
+
+ if (init_comp_ctx(comp_ctx) < 0)
+ return -1;
+
+ strm = &(*comp_ctx)->strm;
+
+ if (deflateInit2(strm, level, Z_DEFLATED, global_tune_zlibwindowsize, global_tune_zlibmemlevel, Z_DEFAULT_STRATEGY) != Z_OK) {
+ deinit_comp_ctx(comp_ctx);
+ return -1;
+ }
+
+ (*comp_ctx)->cur_lvl = level;
+
+ return 0;
+}
+
+/* Return the size of consumed data or -1 */
+static int deflate_add_data(struct comp_ctx *comp_ctx, const char *in_data, int in_len, struct buffer *out)
+{
+ int ret;
+ z_stream *strm = &comp_ctx->strm;
+ char *out_data = b_tail(out);
+ int out_len = b_room(out);
+
+ if (in_len <= 0)
+ return 0;
+
+
+ if (out_len <= 0)
+ return -1;
+
+ strm->next_in = (unsigned char *)in_data;
+ strm->avail_in = in_len;
+ strm->next_out = (unsigned char *)out_data;
+ strm->avail_out = out_len;
+
+ ret = deflate(strm, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ return -1;
+
+ /* deflate update the available data out */
+ b_add(out, out_len - strm->avail_out);
+
+ return in_len - strm->avail_in;
+}
+
+static int deflate_flush_or_finish(struct comp_ctx *comp_ctx, struct buffer *out, int flag)
+{
+ int ret;
+ int out_len = 0;
+ z_stream *strm = &comp_ctx->strm;
+
+ strm->next_in = NULL;
+ strm->avail_in = 0;
+ strm->next_out = (unsigned char *)b_tail(out);
+ strm->avail_out = b_room(out);
+
+ ret = deflate(strm, flag);
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ return -1;
+
+ out_len = b_room(out) - strm->avail_out;
+ b_add(out, out_len);
+
+ /* compression limit */
+ if ((global.comp_rate_lim > 0 && (read_freq_ctr(&global.comp_bps_out) > global.comp_rate_lim)) || /* rate */
+ (th_ctx->idle_pct < compress_min_idle)) { /* idle */
+ /* decrease level */
+ if (comp_ctx->cur_lvl > 0) {
+ comp_ctx->cur_lvl--;
+ deflateParams(&comp_ctx->strm, comp_ctx->cur_lvl, Z_DEFAULT_STRATEGY);
+ }
+
+ } else if (comp_ctx->cur_lvl < global.tune.comp_maxlevel) {
+ /* increase level */
+ comp_ctx->cur_lvl++ ;
+ deflateParams(&comp_ctx->strm, comp_ctx->cur_lvl, Z_DEFAULT_STRATEGY);
+ }
+
+ return out_len;
+}
+
+static int deflate_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return deflate_flush_or_finish(comp_ctx, out, Z_SYNC_FLUSH);
+}
+
+static int deflate_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+ return deflate_flush_or_finish(comp_ctx, out, Z_FINISH);
+}
+
+static int deflate_end(struct comp_ctx **comp_ctx)
+{
+ z_stream *strm = &(*comp_ctx)->strm;
+ int ret;
+
+ ret = deflateEnd(strm);
+
+ deinit_comp_ctx(comp_ctx);
+
+ return ret;
+}
+
+/* config parser for global "tune.zlibmemlevel" */
+static int zlib_parse_global_memlevel(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a numeric value between 1 and 9.", args[0]);
+ return -1;
+ }
+
+ global_tune_zlibmemlevel = atoi(args[1]);
+ if (global_tune_zlibmemlevel < 1 || global_tune_zlibmemlevel > 9) {
+ memprintf(err, "'%s' expects a numeric value between 1 and 9.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* config parser for global "tune.zlibwindowsize" */
+static int zlib_parse_global_windowsize(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a numeric value between 8 and 15.", args[0]);
+ return -1;
+ }
+
+ global_tune_zlibwindowsize = atoi(args[1]);
+ if (global_tune_zlibwindowsize < 8 || global_tune_zlibwindowsize > 15) {
+ memprintf(err, "'%s' expects a numeric value between 8 and 15.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+#endif /* USE_ZLIB */
+
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+#ifdef USE_ZLIB
+ { CFG_GLOBAL, "tune.zlib.memlevel", zlib_parse_global_memlevel },
+ { CFG_GLOBAL, "tune.zlib.windowsize", zlib_parse_global_windowsize },
+#endif
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+static void comp_register_build_opts(void)
+{
+ char *ptr = NULL;
+ int i;
+
+#ifdef USE_ZLIB
+ memprintf(&ptr, "Built with zlib version : " ZLIB_VERSION);
+ memprintf(&ptr, "%s\nRunning on zlib version : %s", ptr, zlibVersion());
+#elif defined(USE_SLZ)
+ memprintf(&ptr, "Built with libslz for stateless compression.");
+#else
+ memprintf(&ptr, "Built without compression support (neither USE_ZLIB nor USE_SLZ are set).");
+#endif
+ memprintf(&ptr, "%s\nCompression algorithms supported :", ptr);
+
+ for (i = 0; comp_algos[i].cfg_name; i++)
+ memprintf(&ptr, "%s%s %s(\"%s\")", ptr, (i == 0 ? "" : ","), comp_algos[i].cfg_name, comp_algos[i].ua_name);
+
+ if (i == 0)
+ memprintf(&ptr, "%s none", ptr);
+
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, comp_register_build_opts);
diff --git a/src/connection.c b/src/connection.c
new file mode 100644
index 0000000..7930cc4
--- /dev/null
+++ b/src/connection.c
@@ -0,0 +1,2748 @@
+/*
+ * Connection management functions
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/hash.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proto_rhttp.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+DECLARE_POOL(pool_head_connection, "connection", sizeof(struct connection));
+DECLARE_POOL(pool_head_conn_hash_node, "conn_hash_node", sizeof(struct conn_hash_node));
+DECLARE_POOL(pool_head_sockaddr, "sockaddr", sizeof(struct sockaddr_storage));
+DECLARE_POOL(pool_head_pp_tlv_128, "pp_tlv_128", sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_128);
+DECLARE_POOL(pool_head_pp_tlv_256, "pp_tlv_256", sizeof(struct conn_tlv_list) + HA_PP2_TLV_VALUE_256);
+
+struct idle_conns idle_conns[MAX_THREADS] = { };
+struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };
+
+/* List head of all known muxes for PROTO */
+struct mux_proto_list mux_proto_list = {
+ .list = LIST_HEAD_INIT(mux_proto_list.list)
+};
+
+struct mux_stopping_data mux_stopping_data[MAX_THREADS];
+
+/* disables sending of proxy-protocol-v2's LOCAL command */
+static int pp2_never_send_local;
+
+/* find the value of a received TLV for a given type */
+struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type)
+{
+ struct conn_tlv_list *tlv = NULL;
+
+ if (!conn)
+ return NULL;
+
+ list_for_each_entry(tlv, &conn->tlv_list, list) {
+ if (tlv->type == type)
+ return tlv;
+ }
+
+ return NULL;
+}
+
+/* Remove <conn> idle connection from its attached tree (idle, safe or avail).
+ * If also present in the secondary server idle list, conn is removed from it.
+ *
+ * Must be called with idle_conns_lock held.
+ */
+void conn_delete_from_tree(struct connection *conn)
+{
+ LIST_DEL_INIT(&conn->idle_list);
+ eb64_delete(&conn->hash_node->node);
+}
+
+int conn_create_mux(struct connection *conn)
+{
+ if (conn_is_back(conn)) {
+ struct server *srv;
+ struct stconn *sc = conn->ctx;
+ struct session *sess = conn->owner;
+
+ if (conn->flags & CO_FL_ERROR)
+ goto fail;
+
+ if (sess && obj_type(sess->origin) == OBJ_TYPE_CHECK) {
+ if (conn_install_mux_chk(conn, conn->ctx, sess) < 0)
+ goto fail;
+ }
+ else if (conn_install_mux_be(conn, conn->ctx, sess, NULL) < 0)
+ goto fail;
+ srv = objt_server(conn->target);
+
+ /* If we're doing http-reuse always, and the connection is not
+ * private with available streams (an http2 connection), add it
+ * to the available list, so that others can use it right
+ * away. If the connection is private, add it in the session
+ * server list.
+ */
+ if (srv && ((srv->proxy->options & PR_O_REUSE_MASK) == PR_O_REUSE_ALWS) &&
+ !(conn->flags & CO_FL_PRIVATE) && conn->mux->avail_streams(conn) > 0) {
+ srv_add_to_avail_list(srv, conn);
+ }
+ else if (conn->flags & CO_FL_PRIVATE) {
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(sess, conn, conn->target);
+ }
+ return 0;
+fail:
+ /* let the upper layer know the connection failed */
+ if (sc) {
+ sc->app_ops->wake(sc);
+ }
+ else if (conn_reverse_in_preconnect(conn)) {
+ struct listener *l = conn_active_reverse_listener(conn);
+
+ /* If mux init failed, consider connection on error.
+ * This is necessary to ensure connection is freed by
+ * proto-rhttp receiver task.
+ */
+ if (!conn->mux)
+ conn->flags |= CO_FL_ERROR;
+
+ /* If connection is interrupted without CO_FL_ERROR, receiver task won't free it. */
+ BUG_ON(!(conn->flags & CO_FL_ERROR));
+
+ task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
+ }
+ return -1;
+ } else
+ return conn_complete_session(conn);
+
+}
+
+/* This is used at the end of the socket IOCB to possibly create the mux if it
+ * was not done yet, or wake it up if flags changed compared to old_flags or if
+ * need_wake insists on this. It returns <0 if the connection was destroyed and
+ * must not be used, >=0 otherwise.
+ */
+int conn_notify_mux(struct connection *conn, int old_flags, int forced_wake)
+{
+ int ret = 0;
+
+ /* If we don't yet have a mux, that means we were waiting for
+ * information to create one, typically from the ALPN. If we're
+ * done with the handshake, attempt to create one.
+ */
+ if (unlikely(!conn->mux) && !(conn->flags & CO_FL_WAIT_XPRT)) {
+ ret = conn_create_mux(conn);
+ if (ret < 0)
+ goto done;
+ }
+
+ /* The wake callback is normally used to notify the data layer about
+ * data layer activity (successful send/recv), connection establishment,
+ * shutdown and fatal errors. We need to consider the following
+ * situations to wake up the data layer :
+ * - change among the CO_FL_NOTIFY_DONE flags :
+ * SOCK_{RD,WR}_SH, ERROR,
+ * - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
+ * end of handshake and transition to CONNECTED
+ * - raise of CONNECTED with HANDSHAKE down
+ * - end of HANDSHAKE with CONNECTED set
+ * - regular data layer activity
+ *
+ * One tricky case is the wake up on read0 or error on an idle
+ * backend connection, that can happen on a connection that is still
+ * polled while at the same moment another thread is about to perform a
+ * takeover. The solution against this is to remove the connection from
+ * the idle list if it was in it, and possibly reinsert it at the end
+ * if the connection remains valid. The cost is non-null (locked tree
+ * removal) but remains low given that this is extremely rarely called.
+ * In any case it's guaranteed by the FD's thread_mask that we're
+ * called from the same thread the connection is queued in.
+ *
+ * Note that the wake callback is allowed to release the connection and
+ * the fd (and return < 0 in this case).
+ */
+ if ((forced_wake ||
+ ((conn->flags ^ old_flags) & CO_FL_NOTIFY_DONE) ||
+ ((old_flags & CO_FL_WAIT_XPRT) && !(conn->flags & CO_FL_WAIT_XPRT))) &&
+ conn->mux && conn->mux->wake) {
+ uint conn_in_list = conn->flags & CO_FL_LIST_MASK;
+ struct server *srv = objt_server(conn->target);
+
+ if (conn_in_list) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ ret = conn->mux->wake(conn);
+ if (ret < 0)
+ goto done;
+
+ if (conn_in_list) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+ done:
+ return ret;
+}
+
+/* Change the mux for the connection.
+ * The caller should make sure he's not subscribed to the underlying XPRT.
+ */
+int conn_upgrade_mux_fe(struct connection *conn, void *ctx, struct buffer *buf,
+ struct ist mux_proto, int mode)
+{
+ struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
+ const struct mux_ops *old_mux, *new_mux;
+ void *old_mux_ctx;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+
+ if (!mux_proto.len) {
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+ }
+ new_mux = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
+ old_mux = conn->mux;
+
+ /* No mux found */
+ if (!new_mux)
+ return -1;
+
+ /* Same mux, nothing to do */
+ if (old_mux == new_mux)
+ return 0;
+
+ old_mux_ctx = conn->ctx;
+ conn->mux = new_mux;
+ conn->ctx = ctx;
+ if (new_mux->init(conn, bind_conf->frontend, conn->owner, buf) == -1) {
+ /* The mux upgrade failed, so restore the old mux */
+ conn->ctx = old_mux_ctx;
+ conn->mux = old_mux;
+ return -1;
+ }
+
+ /* The mux was upgraded, destroy the old one */
+ *buf = BUF_NULL;
+ old_mux->destroy(old_mux_ctx);
+ return 0;
+}
+
+/* installs the best mux for incoming connection <conn> using the upper context
+ * <ctx>. If the mux protocol is forced, we use it to find the best
+ * mux. Otherwise we use the ALPN name, if any. Returns < 0 on error.
+ */
+int conn_install_mux_fe(struct connection *conn, void *ctx)
+{
+ struct bind_conf *bind_conf = __objt_listener(conn->target)->bind_conf;
+ const struct mux_ops *mux_ops;
+
+ if (bind_conf->mux_proto)
+ mux_ops = bind_conf->mux_proto->mux;
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if (bind_conf->frontend->mode == PR_MODE_HTTP)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_FE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+
+ /* Ensure a valid protocol is selected if connection is targeted by a
+ * tcp-request session attach-srv rule.
+ */
+ if (conn->reverse.target && !(mux_ops->flags & MX_FL_REVERSABLE)) {
+ conn->err_code = CO_ER_REVERSE;
+ return -1;
+ }
+
+ return conn_install_mux(conn, mux_ops, ctx, bind_conf->frontend, conn->owner);
+}
+
+/* installs the best mux for outgoing connection <conn> using the upper context
+ * <ctx>. If the server mux protocol is forced, we use it to find the best mux.
+ * It's also possible to specify an alternative mux protocol <force_mux_ops>,
+ * in which case it will be used instead of the default server mux protocol.
+ *
+ * Returns < 0 on error.
+ */
+int conn_install_mux_be(struct connection *conn, void *ctx, struct session *sess,
+ const struct mux_ops *force_mux_ops)
+{
+ struct server *srv = objt_server(conn->target);
+ struct proxy *prx = objt_proxy(conn->target);
+ const struct mux_ops *mux_ops;
+
+ if (srv)
+ prx = srv->proxy;
+
+ if (!prx) // target must be either proxy or server
+ return -1;
+
+ if (srv && srv->mux_proto && likely(!force_mux_ops)) {
+ mux_ops = srv->mux_proto->mux;
+ }
+ else if (srv && unlikely(force_mux_ops)) {
+ mux_ops = force_mux_ops;
+ }
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if (prx->mode == PR_MODE_HTTP)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+ return conn_install_mux(conn, mux_ops, ctx, prx, sess);
+}
+
+/* installs the best mux for outgoing connection <conn> for a check using the
+ * upper context <ctx>. If the mux protocol is forced by the check, we use it to
+ * find the best mux. Returns < 0 on error.
+ */
+int conn_install_mux_chk(struct connection *conn, void *ctx, struct session *sess)
+{
+ struct check *check = objt_check(sess->origin);
+ struct server *srv = objt_server(conn->target);
+ struct proxy *prx = objt_proxy(conn->target);
+ const struct mux_ops *mux_ops;
+
+ if (!check) // Check must be defined
+ return -1;
+
+ if (srv)
+ prx = srv->proxy;
+
+ if (!prx) // target must be either proxy or server
+ return -1;
+
+ if (check->mux_proto)
+ mux_ops = check->mux_proto->mux;
+ else {
+ struct ist mux_proto;
+ const char *alpn_str = NULL;
+ int alpn_len = 0;
+ int mode;
+
+ if ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK)
+ mode = PROTO_MODE_HTTP;
+ else
+ mode = PROTO_MODE_TCP;
+
+ conn_get_alpn(conn, &alpn_str, &alpn_len);
+ mux_proto = ist2(alpn_str, alpn_len);
+
+ mux_ops = conn_get_best_mux(conn, mux_proto, PROTO_SIDE_BE, mode);
+ if (!mux_ops)
+ return -1;
+ }
+ return conn_install_mux(conn, mux_ops, ctx, prx, sess);
+}
+
+/* Set the ALPN of connection <conn> to <alpn>. If force is false, <alpn> must
+ * be a subset or identical to the registered protos for the parent SSL_CTX.
+ * In this case <alpn> must be a single protocol value, not a list.
+ *
+ * Returns 0 if ALPN is updated else -1.
+ */
+int conn_update_alpn(struct connection *conn, const struct ist alpn, int force)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ size_t alpn_len = istlen(alpn);
+ char *ctx_alpn_str = NULL;
+ int ctx_alpn_len = 0, found = 0;
+
+ /* if not force, first search if alpn is a subset or identical to the
+ * parent SSL_CTX.
+ */
+ if (!force) {
+ /* retrieve the SSL_CTX according to the connection side. */
+ if (conn_is_back(conn)) {
+ if (obj_type(conn->target) == OBJ_TYPE_SERVER) {
+ struct server *srv = __objt_server(conn->target);
+ ctx_alpn_str = srv->ssl_ctx.alpn_str;
+ ctx_alpn_len = srv->ssl_ctx.alpn_len;
+ }
+ }
+ else {
+ struct session *sess = conn->owner;
+ struct listener *li = sess->listener;
+
+ if (li->bind_conf && li->bind_conf->options & BC_O_USE_SSL) {
+ ctx_alpn_str = li->bind_conf->ssl_conf.alpn_str;
+ ctx_alpn_len = li->bind_conf->ssl_conf.alpn_len;
+ }
+ }
+
+ if (ctx_alpn_str) {
+ /* search if ALPN is present in SSL_CTX ALPN before
+ * using it.
+ */
+ while (ctx_alpn_len) {
+ /* skip ALPN whose size is not 8 */
+ if (*ctx_alpn_str != alpn_len - 1) {
+ ctx_alpn_len -= *ctx_alpn_str + 1;
+ }
+ else {
+ if (isteqi(ist2(ctx_alpn_str, alpn_len), alpn)) {
+ found = 1;
+ break;
+ }
+ }
+ ctx_alpn_str += *ctx_alpn_str + 1;
+
+ /* This indicates an invalid ALPN formatted
+ * string and should never happen. */
+ BUG_ON(ctx_alpn_len < 0);
+ }
+ }
+ }
+
+ if (found || force) {
+ ssl_sock_set_alpn(conn, (const uchar *)istptr(alpn), istlen(alpn));
+ return 0;
+ }
+
+#endif
+ return -1;
+}
+
+/* Initializes all required fields for a new connection. Note that it does the
+ * minimum acceptable initialization for a connection that already exists and
+ * is about to be reused. It also leaves the addresses untouched, which makes
+ * it usable across connection retries to reset a connection to a known state.
+ */
+void conn_init(struct connection *conn, void *target)
+{
+ conn->obj_type = OBJ_TYPE_CONN;
+ conn->flags = CO_FL_NONE;
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ conn->owner = NULL;
+ conn->send_proxy_ofs = 0;
+ conn->handle.fd = DEAD_FD_MAGIC;
+ conn->err_code = CO_ER_NONE;
+ conn->target = target;
+ conn->destroy_cb = NULL;
+ conn->proxy_netns = NULL;
+ MT_LIST_INIT(&conn->toremove_list);
+ if (conn_is_back(conn))
+ LIST_INIT(&conn->session_list);
+ else
+ LIST_INIT(&conn->stopping_list);
+ LIST_INIT(&conn->tlv_list);
+ conn->subs = NULL;
+ conn->src = NULL;
+ conn->dst = NULL;
+ conn->hash_node = NULL;
+ conn->xprt = NULL;
+ conn->reverse.target = NULL;
+ conn->reverse.name = BUF_NULL;
+}
+
+/* Initialize members used for backend connections.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int conn_backend_init(struct connection *conn)
+{
+ if (!sockaddr_alloc(&conn->dst, 0, 0))
+ return 1;
+
+ conn->hash_node = conn_alloc_hash_node(conn);
+ if (unlikely(!conn->hash_node))
+ return 1;
+
+ return 0;
+}
+
+/* Release connection elements reserved for backend side usage. It also takes
+ * care to detach it if linked to a session or a server instance.
+ *
+ * This function is useful when freeing a connection or reversing it to the
+ * frontend side.
+ */
+static void conn_backend_deinit(struct connection *conn)
+{
+ /* If the connection is owned by the session, remove it from its list
+ */
+ if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) {
+ session_unown_conn(conn->owner, conn);
+ }
+ else if (!(conn->flags & CO_FL_PRIVATE)) {
+ if (obj_type(conn->target) == OBJ_TYPE_SERVER)
+ srv_release_conn(__objt_server(conn->target), conn);
+ }
+
+ /* Make sure the connection is not left in the idle connection tree */
+ if (conn->hash_node != NULL)
+ BUG_ON(conn->hash_node->node.node.leaf_p != NULL);
+
+ pool_free(pool_head_conn_hash_node, conn->hash_node);
+ conn->hash_node = NULL;
+
+}
+
+/* Tries to allocate a new connection and initialized its main fields. The
+ * connection is returned on success, NULL on failure. The connection must
+ * be released using pool_free() or conn_free().
+ */
+struct connection *conn_new(void *target)
+{
+ struct connection *conn;
+
+ conn = pool_alloc(pool_head_connection);
+ if (unlikely(!conn))
+ return NULL;
+
+ conn_init(conn, target);
+
+ if (conn_is_back(conn)) {
+ if (obj_type(target) == OBJ_TYPE_SERVER)
+ srv_use_conn(__objt_server(target), conn);
+
+ if (conn_backend_init(conn)) {
+ conn_free(conn);
+ return NULL;
+ }
+ }
+
+ return conn;
+}
+
+/* Releases a connection previously allocated by conn_new() */
+void conn_free(struct connection *conn)
+{
+ struct conn_tlv_list *tlv, *tlv_back = NULL;
+
+ if (conn_is_back(conn))
+ conn_backend_deinit(conn);
+
+ /* Remove the conn from toremove_list.
+ *
+ * This is needed to prevent a double-free in case the connection was
+ * already scheduled from cleaning but is freed before via another
+ * call.
+ */
+ MT_LIST_DELETE(&conn->toremove_list);
+
+ sockaddr_free(&conn->src);
+ sockaddr_free(&conn->dst);
+
+ /* Free all previously allocated TLVs */
+ list_for_each_entry_safe(tlv, tlv_back, &conn->tlv_list, list) {
+ LIST_DELETE(&tlv->list);
+ if (tlv->len > HA_PP2_TLV_VALUE_256)
+ free(tlv);
+ else if (tlv->len <= HA_PP2_TLV_VALUE_128)
+ pool_free(pool_head_pp_tlv_128, tlv);
+ else
+ pool_free(pool_head_pp_tlv_256, tlv);
+ }
+
+ ha_free(&conn->reverse.name.area);
+
+ if (conn_reverse_in_preconnect(conn)) {
+ struct listener *l = conn_active_reverse_listener(conn);
+ rhttp_notify_preconn_err(l);
+ HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
+ }
+ else if (conn->flags & CO_FL_REVERSED) {
+ HA_ATOMIC_DEC(&th_ctx->nb_rhttp_conns);
+ }
+
+
+ conn_force_unsubscribe(conn);
+ pool_free(pool_head_connection, conn);
+}
+
+struct conn_hash_node *conn_alloc_hash_node(struct connection *conn)
+{
+ struct conn_hash_node *hash_node = NULL;
+
+ hash_node = pool_zalloc(pool_head_conn_hash_node);
+ if (unlikely(!hash_node))
+ return NULL;
+
+ hash_node->conn = conn;
+
+ return hash_node;
+}
+
+/* Allocates a struct sockaddr from the pool if needed, assigns it to *sap and
+ * returns it. If <sap> is NULL, the address is always allocated and returned.
+ * if <sap> is non-null, an address will only be allocated if it points to a
+ * non-null pointer. In this case the allocated address will be assigned there.
+ * If <orig> is non-null and <len> positive, the address in <sa> will be copied
+ * into the allocated address. In both situations the new pointer is returned.
+ */
+struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len)
+{
+ struct sockaddr_storage *sa;
+
+ if (sap && *sap)
+ return *sap;
+
+ sa = pool_alloc(pool_head_sockaddr);
+ if (sa && orig && len > 0)
+ memcpy(sa, orig, len);
+ if (sap)
+ *sap = sa;
+ return sa;
+}
+
+/* Releases the struct sockaddr potentially pointed to by <sap> to the pool. It
+ * may be NULL or may point to NULL. If <sap> is not NULL, a NULL is placed
+ * there.
+ */
+void sockaddr_free(struct sockaddr_storage **sap)
+{
+ if (!sap)
+ return;
+ pool_free(pool_head_sockaddr, *sap);
+ *sap = NULL;
+}
+
+/* Try to add a handshake pseudo-XPRT. If the connection's first XPRT is
+ * raw_sock, then just use the new XPRT as the connection XPRT, otherwise
+ * call the xprt's add_xprt() method.
+ * Returns 0 on success, or non-zero on failure.
+ */
+int xprt_add_hs(struct connection *conn)
+{
+ void *xprt_ctx = NULL;
+ const struct xprt_ops *ops = xprt_get(XPRT_HANDSHAKE);
+ void *nextxprt_ctx = NULL;
+ const struct xprt_ops *nextxprt_ops = NULL;
+
+ if (conn->flags & CO_FL_ERROR)
+ return -1;
+ if (ops->init(conn, &xprt_ctx) < 0)
+ return -1;
+ if (conn->xprt == xprt_get(XPRT_RAW)) {
+ nextxprt_ctx = conn->xprt_ctx;
+ nextxprt_ops = conn->xprt;
+ conn->xprt_ctx = xprt_ctx;
+ conn->xprt = ops;
+ } else {
+ if (conn->xprt->add_xprt(conn, conn->xprt_ctx, xprt_ctx, ops,
+ &nextxprt_ctx, &nextxprt_ops) != 0) {
+ ops->close(conn, xprt_ctx);
+ return -1;
+ }
+ }
+ if (ops->add_xprt(conn, xprt_ctx, nextxprt_ctx, nextxprt_ops, NULL, NULL) != 0) {
+ ops->close(conn, xprt_ctx);
+ return -1;
+ }
+ return 0;
+}
+
+/* returns a human-readable error code for conn->err_code, or NULL if the code
+ * is unknown.
+ */
+const char *conn_err_code_str(struct connection *c)
+{
+ switch (c->err_code) {
+ case CO_ER_NONE: return "Success";
+
+ case CO_ER_CONF_FDLIM: return "Reached configured maxconn value";
+ case CO_ER_PROC_FDLIM: return "Too many sockets on the process";
+ case CO_ER_SYS_FDLIM: return "Too many sockets on the system";
+ case CO_ER_SYS_MEMLIM: return "Out of system buffers";
+ case CO_ER_NOPROTO: return "Protocol or address family not supported";
+ case CO_ER_SOCK_ERR: return "General socket error";
+ case CO_ER_PORT_RANGE: return "Source port range exhausted";
+ case CO_ER_CANT_BIND: return "Can't bind to source address";
+ case CO_ER_FREE_PORTS: return "Out of local source ports on the system";
+ case CO_ER_ADDR_INUSE: return "Local source address already in use";
+
+ case CO_ER_PRX_EMPTY: return "Connection closed while waiting for PROXY protocol header";
+ case CO_ER_PRX_ABORT: return "Connection error while waiting for PROXY protocol header";
+ case CO_ER_PRX_TIMEOUT: return "Timeout while waiting for PROXY protocol header";
+ case CO_ER_PRX_TRUNCATED: return "Truncated PROXY protocol header received";
+ case CO_ER_PRX_NOT_HDR: return "Received something which does not look like a PROXY protocol header";
+ case CO_ER_PRX_BAD_HDR: return "Received an invalid PROXY protocol header";
+ case CO_ER_PRX_BAD_PROTO: return "Received an unhandled protocol in the PROXY protocol header";
+
+ case CO_ER_CIP_EMPTY: return "Connection closed while waiting for NetScaler Client IP header";
+ case CO_ER_CIP_ABORT: return "Connection error while waiting for NetScaler Client IP header";
+ case CO_ER_CIP_TIMEOUT: return "Timeout while waiting for a NetScaler Client IP header";
+ case CO_ER_CIP_TRUNCATED: return "Truncated NetScaler Client IP header received";
+ case CO_ER_CIP_BAD_MAGIC: return "Received an invalid NetScaler Client IP magic number";
+ case CO_ER_CIP_BAD_PROTO: return "Received an unhandled protocol in the NetScaler Client IP header";
+
+ case CO_ER_SSL_EMPTY: return "Connection closed during SSL handshake";
+ case CO_ER_SSL_ABORT: return "Connection error during SSL handshake";
+ case CO_ER_SSL_TIMEOUT: return "Timeout during SSL handshake";
+ case CO_ER_SSL_TOO_MANY: return "Too many SSL connections";
+ case CO_ER_SSL_NO_MEM: return "Out of memory when initializing an SSL connection";
+ case CO_ER_SSL_RENEG: return "Rejected a client-initiated SSL renegotiation attempt";
+ case CO_ER_SSL_CA_FAIL: return "SSL client CA chain cannot be verified";
+ case CO_ER_SSL_CRT_FAIL: return "SSL client certificate not trusted";
+ case CO_ER_SSL_MISMATCH: return "Server presented an SSL certificate different from the configured one";
+ case CO_ER_SSL_MISMATCH_SNI: return "Server presented an SSL certificate different from the expected one";
+ case CO_ER_SSL_HANDSHAKE: return "SSL handshake failure";
+ case CO_ER_SSL_HANDSHAKE_HB: return "SSL handshake failure after heartbeat";
+ case CO_ER_SSL_KILLED_HB: return "Stopped a TLSv1 heartbeat attack (CVE-2014-0160)";
+ case CO_ER_SSL_NO_TARGET: return "Attempt to use SSL on an unknown target (internal error)";
+ case CO_ER_SSL_EARLY_FAILED: return "Server refused early data";
+
+ case CO_ER_SOCKS4_SEND: return "SOCKS4 Proxy write error during handshake";
+ case CO_ER_SOCKS4_RECV: return "SOCKS4 Proxy read error during handshake";
+ case CO_ER_SOCKS4_DENY: return "SOCKS4 Proxy deny the request";
+ case CO_ER_SOCKS4_ABORT: return "SOCKS4 Proxy handshake aborted by server";
+
+ case CO_ERR_SSL_FATAL: return "SSL fatal error";
+
+ case CO_ER_REVERSE: return "Reverse connect failure";
+ }
+ return NULL;
+}
+
+/* Send a message over an established connection. It makes use of send() and
+ * returns the same return code and errno. If the socket layer is not ready yet
+ * then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
+ * as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
+ * EMSGSIZE if called with a zero length message. The purpose is to simplify
+ * some rare attempts to directly write on the socket from above the connection
+ * (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
+ * It automatically retries on EINTR. Other errors cause the connection to be
+ * marked as in error state. It takes similar arguments as send() except the
+ * first one which is the connection instead of the file descriptor. <flags>
+ * only support CO_SFL_MSG_MORE.
+ */
+int conn_ctrl_send(struct connection *conn, const void *buf, int len, int flags)
+{
+ const struct buffer buffer = b_make((char*)buf, len, 0, len);
+ const struct xprt_ops *xprt = xprt_get(XPRT_RAW);
+ int ret;
+
+ ret = -1;
+ errno = ENOTSOCK;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH)
+ goto fail;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ errno = EMSGSIZE;
+ if (!len)
+ goto fail;
+
+ /* snd_buf() already takes care of updating conn->flags and handling
+ * the FD polling status.
+ */
+ ret = xprt->snd_buf(conn, NULL, &buffer, buffer.data, flags);
+ if (conn->flags & CO_FL_ERROR)
+ ret = -1;
+ return ret;
+ fail:
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
+ return ret;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The event subscriber <es> is not allowed to change from a previous call as
+ * long as at least one event is still subscribed. The <event_type> must only
+ * be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(conn->subs && conn->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ conn->subs = NULL;
+
+ if (conn_ctrl_ready(conn) && conn->ctrl->ignore_events)
+ conn->ctrl->ignore_events(conn, event_type);
+
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>.
+ * The <es> struct is not allowed to differ from the one passed during a
+ * previous call to subscribe(). If the connection's ctrl layer is ready,
+ * the wait_event is immediately woken up and the subscription is cancelled.
+ * It always returns zero.
+ */
+int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ int ret = 0;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(conn->subs && conn->subs != es);
+
+ if (conn->subs && (conn->subs->events & event_type) == event_type)
+ return 0;
+
+ if (conn_ctrl_ready(conn) && conn->ctrl->check_events) {
+ ret = conn->ctrl->check_events(conn, event_type);
+ if (ret)
+ tasklet_wakeup(es->tasklet);
+ }
+
+ es->events = (es->events | event_type) & ~ret;
+ conn->subs = es->events ? es : NULL;
+ return 0;
+}
+
+/* Drains possibly pending incoming data on the connection and update the flags
+ * accordingly. This is used to know whether we need to disable lingering on
+ * close. Returns non-zero if it is safe to close without disabling lingering,
+ * otherwise zero. The CO_FL_SOCK_RD_SH flag may also be updated if the incoming
+ * shutdown was reported by the ->drain() function.
+ */
+int conn_ctrl_drain(struct connection *conn)
+{
+ int ret = 0;
+
+ if (!conn_ctrl_ready(conn) || conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
+ ret = 1;
+ else if (conn->ctrl->drain) {
+ ret = conn->ctrl->drain(conn);
+ if (ret)
+ conn->flags |= CO_FL_SOCK_RD_SH;
+ }
+ return ret;
+}
+
+/*
+ * Get data length from tlv
+ */
+static inline size_t get_tlv_length(const struct tlv *src)
+{
+ return (src->length_hi << 8) | src->length_lo;
+}
+
+/* This handshake handler waits a PROXY protocol header at the beginning of the
+ * raw data stream. The header looks like this :
+ *
+ * "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
+ *
+ * There must be exactly one space between each field. Fields are :
+ * - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
+ * - SRC3 : layer 3 (eg: IP) source address in standard text form
+ * - DST3 : layer 3 (eg: IP) destination address in standard text form
+ * - SRC4 : layer 4 (eg: TCP port) source address in standard text form
+ * - DST4 : layer 4 (eg: TCP port) destination address in standard text form
+ *
+ * This line MUST be at the beginning of the buffer and MUST NOT wrap.
+ *
+ * The header line is small and in all cases smaller than the smallest normal
+ * TCP MSS. So it MUST always be delivered as one segment, which ensures we
+ * can safely use MSG_PEEK and avoid buffering.
+ *
+ * Once the data is fetched, the values are set in the connection's address
+ * fields, and data are removed from the socket's buffer. The function returns
+ * zero if it needs to wait for more data or if it fails, or 1 if it completed
+ * and removed itself.
+ */
+int conn_recv_proxy(struct connection *conn, int flag)
+{
+ struct session *sess = conn->owner;
+ char *line, *end;
+ struct proxy_hdr_v2 *hdr_v2;
+ const char v2sig[] = PP2_SIGNATURE;
+ size_t total_v2_len;
+ size_t tlv_offset = 0;
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ trash.data = ret;
+ break;
+ }
+
+ if (!trash.data) {
+ /* client shutdown */
+ conn->err_code = CO_ER_PRX_EMPTY;
+ goto fail;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (trash.data < 6)
+ goto missing;
+
+ line = trash.area;
+ end = trash.area + trash.data;
+
+ /* Decode a possible proxy request, fail early if it does not match */
+ if (strncmp(line, "PROXY ", 6) != 0)
+ goto not_v1;
+
+ line += 6;
+ if (trash.data < 9) /* shortest possible line */
+ goto missing;
+
+ if (memcmp(line, "TCP4 ", 5) == 0) {
+ u32 src3, dst3, sport, dport;
+
+ line += 5;
+
+ src3 = inetaddr_host_lim_ret(line, end, &line);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ dst3 = inetaddr_host_lim_ret(line, end, &line);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ sport = read_uint((const char **)&line, end);
+ if (line == end)
+ goto missing;
+ if (*line++ != ' ')
+ goto bad_header;
+
+ dport = read_uint((const char **)&line, end);
+ if (line > end - 2)
+ goto missing;
+ if (*line++ != '\r')
+ goto bad_header;
+ if (*line++ != '\n')
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = htonl(src3);
+ ((struct sockaddr_in *)sess->src)->sin_port = htons(sport);
+
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = htonl(dst3);
+ ((struct sockaddr_in *)sess->dst)->sin_port = htons(dport);
+ }
+ else if (memcmp(line, "TCP6 ", 5) == 0) {
+ u32 sport, dport;
+ char *src_s;
+ char *dst_s, *sport_s, *dport_s;
+ struct in6_addr src3, dst3;
+
+ line += 5;
+
+ src_s = line;
+ dst_s = sport_s = dport_s = NULL;
+ while (1) {
+ if (line > end - 2) {
+ goto missing;
+ }
+ else if (*line == '\r') {
+ *line = 0;
+ line++;
+ if (*line++ != '\n')
+ goto bad_header;
+ break;
+ }
+
+ if (*line == ' ') {
+ *line = 0;
+ if (!dst_s)
+ dst_s = line + 1;
+ else if (!sport_s)
+ sport_s = line + 1;
+ else if (!dport_s)
+ dport_s = line + 1;
+ }
+ line++;
+ }
+
+ if (!dst_s || !sport_s || !dport_s)
+ goto bad_header;
+
+ sport = read_uint((const char **)&sport_s,dport_s - 1);
+ if (*sport_s != 0)
+ goto bad_header;
+
+ dport = read_uint((const char **)&dport_s,line - 2);
+ if (*dport_s != 0)
+ goto bad_header;
+
+ if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
+ goto bad_header;
+
+ if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, &src3, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = htons(sport);
+
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, &dst3, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = htons(dport);
+ }
+ else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
+ /* This can be a UNIX socket forwarded by an haproxy upstream */
+ line += 9;
+ }
+ else {
+ /* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
+ conn->err_code = CO_ER_PRX_BAD_PROTO;
+ goto fail;
+ }
+
+ trash.data = line - trash.area;
+ goto eat_header;
+
+ not_v1:
+ /* try PPv2 */
+ if (trash.data < PP2_HEADER_LEN)
+ goto missing;
+
+ hdr_v2 = (struct proxy_hdr_v2 *) trash.area;
+
+ if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
+ (hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
+ conn->err_code = CO_ER_PRX_NOT_HDR;
+ goto fail;
+ }
+
+ total_v2_len = PP2_HEADER_LEN + ntohs(hdr_v2->len);
+ if (trash.data < total_v2_len)
+ goto missing;
+
+ switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
+ case 0x01: /* PROXY command */
+ switch (hdr_v2->fam) {
+ case 0x11: /* TCPv4 */
+ if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
+ ((struct sockaddr_in *)sess->src)->sin_port = hdr_v2->addr.ip4.src_port;
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
+ ((struct sockaddr_in *)sess->dst)->sin_port = hdr_v2->addr.ip4.dst_port;
+ tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
+ break;
+ case 0x21: /* TCPv6 */
+ if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
+ goto bad_header;
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->src)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_v2->addr.ip6.src_port;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)sess->dst)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_v2->addr.ip6.dst_port;
+ tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
+ break;
+ }
+
+ /* TLV parsing */
+ while (tlv_offset < total_v2_len) {
+ struct ist tlv;
+ struct tlv *tlv_packet = NULL;
+ struct conn_tlv_list *new_tlv = NULL;
+ size_t data_len = 0;
+
+ /* Verify that we have at least TLV_HEADER_SIZE bytes left */
+ if (tlv_offset + TLV_HEADER_SIZE > total_v2_len)
+ goto bad_header;
+
+ tlv_packet = (struct tlv *) &trash.area[tlv_offset];
+ tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet));
+ tlv_offset += istlen(tlv) + TLV_HEADER_SIZE;
+
+ /* Verify that the TLV length does not exceed the total PROXYv2 length */
+ if (tlv_offset > total_v2_len)
+ goto bad_header;
+
+ /* Prepare known TLV types */
+ switch (tlv_packet->type) {
+ case PP2_TYPE_CRC32C: {
+ uint32_t n_crc32c;
+
+ /* Verify that this TLV is exactly 4 bytes long */
+ if (istlen(tlv) != PP2_CRC32C_LEN)
+ goto bad_header;
+
+ n_crc32c = read_n32(istptr(tlv));
+ write_n32(istptr(tlv), 0); // compute with CRC==0
+
+ if (hash_crc32c(trash.area, total_v2_len) != n_crc32c)
+ goto bad_header;
+ break;
+ }
+#ifdef USE_NS
+ case PP2_TYPE_NETNS: {
+ const struct netns_entry *ns;
+
+ ns = netns_store_lookup(istptr(tlv), istlen(tlv));
+ if (ns)
+ conn->proxy_netns = ns;
+ break;
+ }
+#endif
+ case PP2_TYPE_AUTHORITY: {
+ /* For now, keep the length restriction by HAProxy */
+ if (istlen(tlv) > HA_PP2_AUTHORITY_MAX)
+ goto bad_header;
+
+ break;
+ }
+ case PP2_TYPE_UNIQUE_ID: {
+ if (istlen(tlv) > UNIQUEID_LEN)
+ goto bad_header;
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* If we did not find a known TLV type that we can optimize for, we generically allocate it */
+ data_len = get_tlv_length(tlv_packet);
+
+ /* Prevent attackers from allocating too much memory */
+ if (unlikely(data_len > HA_PP2_MAX_ALLOC))
+ goto fail;
+
+ /* Alloc memory based on data_len */
+ if (data_len > HA_PP2_TLV_VALUE_256)
+ new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list));
+ else if (data_len <= HA_PP2_TLV_VALUE_128)
+ new_tlv = pool_alloc(pool_head_pp_tlv_128);
+ else
+ new_tlv = pool_alloc(pool_head_pp_tlv_256);
+
+ if (unlikely(!new_tlv))
+ goto fail;
+
+ new_tlv->type = tlv_packet->type;
+
+ /* Save TLV to make it accessible via sample fetch */
+ memcpy(new_tlv->value, tlv.ptr, data_len);
+ new_tlv->len = data_len;
+
+ LIST_APPEND(&conn->tlv_list, &new_tlv->list);
+ }
+
+
+ /* Verify that the PROXYv2 header ends at a TLV boundary.
+ * This is can not be true, because the TLV parsing already
+ * verifies that a TLV does not exceed the total length and
+ * also that there is space for a TLV header.
+ */
+ BUG_ON(tlv_offset != total_v2_len);
+
+ /* unsupported protocol, keep local connection address */
+ break;
+ case 0x00: /* LOCAL command */
+ /* keep local connection address for LOCAL */
+ break;
+ default:
+ goto bad_header; /* not a supported command */
+ }
+
+ trash.data = total_v2_len;
+ goto eat_header;
+
+ eat_header:
+ /* remove the PROXY line from the request. For this we re-read the
+ * exact line at once. If we don't get the exact same result, we
+ * fail.
+ */
+ while (1) {
+ ssize_t len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
+
+ if (len2 < 0 && errno == EINTR)
+ continue;
+ if (len2 != trash.data)
+ goto recv_abort;
+ break;
+ }
+
+ conn->flags &= ~flag;
+ conn->flags |= CO_FL_RCVD_PROXY;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ missing:
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we have not read anything. Otherwise we need to fail because we won't
+ * be able to poll anymore.
+ */
+ conn->err_code = CO_ER_PRX_TRUNCATED;
+ goto fail;
+
+ bad_header:
+ /* This is not a valid proxy protocol header */
+ conn->err_code = CO_ER_PRX_BAD_HDR;
+ goto fail;
+
+ recv_abort:
+ conn->err_code = CO_ER_PRX_ABORT;
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+/* This callback is used to send a valid PROXY protocol line to a socket being
+ * established. It returns 0 if it fails in a fatal way or needs to poll to go
+ * further, otherwise it returns non-zero and removes itself from the connection's
+ * flags (the bit is provided in <flag> by the caller). It is designed to be
+ * called by the connection handler and relies on it to commit polling changes.
+ * Note that it can emit a PROXY line by relying on the other end's address
+ * when the connection is attached to a stream connector, or by resolving the
+ * local address otherwise (also called a LOCAL line).
+ */
+int conn_send_proxy(struct connection *conn, unsigned int flag)
+{
+ if (!conn_ctrl_ready(conn))
+ goto out_error;
+
+ /* If we have a PROXY line to send, we'll use this to validate the
+ * connection, in which case the connection is validated only once
+ * we've sent the whole proxy line. Otherwise we use connect().
+ */
+ if (conn->send_proxy_ofs) {
+ struct stconn *sc;
+ int ret;
+
+ /* If there is no mux attached to the connection, it means the
+ * connection context is a stream connector.
+ */
+ sc = conn->mux ? conn_get_first_sc(conn) : conn->ctx;
+
+ /* The target server expects a PROXY line to be sent first.
+ * If the send_proxy_ofs is negative, it corresponds to the
+ * offset to start sending from then end of the proxy string
+ * (which is recomputed every time since it's constant). If
+ * it is positive, it means we have to send from the start.
+ * We can only send a "normal" PROXY line when the connection
+ * is attached to a stream connector. Otherwise we can only
+ * send a LOCAL line (eg: for use with health checks).
+ */
+
+ if (sc && sc_strm(sc)) {
+ ret = make_proxy_line(trash.area, trash.size,
+ objt_server(conn->target),
+ sc_conn(sc_opposite(sc)),
+ __sc_strm(sc));
+ }
+ else {
+ /* The target server expects a LOCAL line to be sent first. Retrieving
+ * local or remote addresses may fail until the connection is established.
+ */
+ if (!conn_get_src(conn) || !conn_get_dst(conn))
+ goto out_wait;
+
+ ret = make_proxy_line(trash.area, trash.size,
+ objt_server(conn->target), conn,
+ NULL);
+ }
+
+ if (!ret)
+ goto out_error;
+
+ if (conn->send_proxy_ofs > 0)
+ conn->send_proxy_ofs = -ret; /* first call */
+
+ /* we have to send trash from (ret+sp for -sp bytes). If the
+ * data layer has a pending write, we'll also set MSG_MORE.
+ */
+ ret = conn_ctrl_send(conn,
+ trash.area + ret + conn->send_proxy_ofs,
+ -conn->send_proxy_ofs,
+ (conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
+
+ if (ret < 0)
+ goto out_error;
+
+ conn->send_proxy_ofs += ret; /* becomes zero once complete */
+ if (conn->send_proxy_ofs != 0)
+ goto out_wait;
+
+ /* OK we've sent the whole line, we're connected */
+ }
+
+ /* The connection is ready now, simply return and let the connection
+ * handler notify upper layers if needed.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ conn->flags &= ~flag;
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor */
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+
+ out_wait:
+ return 0;
+}
+
+/* This handshake handler waits a NetScaler Client IP insertion header
+ * at the beginning of the raw data stream. The header format is
+ * described in doc/netscaler-client-ip-insertion-protocol.txt
+ *
+ * This line MUST be at the beginning of the buffer and MUST NOT be
+ * fragmented.
+ *
+ * The header line is small and in all cases smaller than the smallest normal
+ * TCP MSS. So it MUST always be delivered as one segment, which ensures we
+ * can safely use MSG_PEEK and avoid buffering.
+ *
+ * Once the data is fetched, the values are set in the connection's address
+ * fields, and data are removed from the socket's buffer. The function returns
+ * zero if it needs to wait for more data or if it fails, or 1 if it completed
+ * and removed itself.
+ */
+int conn_recv_netscaler_cip(struct connection *conn, int flag)
+{
+ struct session *sess = conn->owner;
+ char *line;
+ uint32_t hdr_len;
+ uint8_t ip_ver;
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ trash.data = ret;
+ break;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (!trash.data) {
+ /* client shutdown */
+ conn->err_code = CO_ER_CIP_EMPTY;
+ goto fail;
+ }
+
+ /* Fail if buffer length is not large enough to contain
+ * CIP magic, header length or
+ * CIP magic, CIP length, CIP type, header length */
+ if (trash.data < 12)
+ goto missing;
+
+ line = trash.area;
+
+ /* Decode a possible NetScaler Client IP request, fail early if
+ * it does not match */
+ if (ntohl(read_u32(line)) != __objt_listener(conn->target)->bind_conf->ns_cip_magic)
+ goto bad_magic;
+
+ /* Legacy CIP protocol */
+ if ((trash.area[8] & 0xD0) == 0x40) {
+ hdr_len = ntohl(read_u32((line+4)));
+ line += 8;
+ }
+ /* Standard CIP protocol */
+ else if (trash.area[8] == 0x00) {
+ hdr_len = ntohs(read_u32((line+10)));
+ line += 12;
+ }
+ /* Unknown CIP protocol */
+ else {
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ /* Fail if buffer length is not large enough to contain
+ * a minimal IP header */
+ if (trash.data < 20)
+ goto missing;
+
+ /* Get IP version from the first four bits */
+ ip_ver = (*line & 0xf0) >> 4;
+
+ if (ip_ver == 4) {
+ struct ip *hdr_ip4;
+ struct my_tcphdr *hdr_tcp;
+
+ hdr_ip4 = (struct ip *)line;
+
+ if (trash.data < 40 || trash.data < hdr_len) {
+ /* Fail if buffer length is not large enough to contain
+ * IPv4 header, TCP header */
+ goto missing;
+ }
+ else if (hdr_ip4->ip_p != IPPROTO_TCP) {
+ /* The protocol does not include a TCP header */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in *)sess->src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->src)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
+ ((struct sockaddr_in *)sess->src)->sin_port = hdr_tcp->source;
+
+ ((struct sockaddr_in *)sess->dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)sess->dst)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
+ ((struct sockaddr_in *)sess->dst)->sin_port = hdr_tcp->dest;
+ }
+ else if (ip_ver == 6) {
+ struct ip6_hdr *hdr_ip6;
+ struct my_tcphdr *hdr_tcp;
+
+ hdr_ip6 = (struct ip6_hdr *)line;
+
+ if (trash.data < 60 || trash.data < hdr_len) {
+ /* Fail if buffer length is not large enough to contain
+ * IPv6 header, TCP header */
+ goto missing;
+ }
+ else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
+ /* The protocol does not include a TCP header */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));
+
+ if (!sess || !sockaddr_alloc(&sess->src, NULL, 0) || !sockaddr_alloc(&sess->dst, NULL, 0))
+ goto fail;
+
+ /* update the session's addresses and mark them set */
+ ((struct sockaddr_in6 *)sess->src)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)sess->src)->sin6_addr = hdr_ip6->ip6_src;
+ ((struct sockaddr_in6 *)sess->src)->sin6_port = hdr_tcp->source;
+
+ ((struct sockaddr_in6 *)sess->dst)->sin6_family = AF_INET6;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_addr = hdr_ip6->ip6_dst;
+ ((struct sockaddr_in6 *)sess->dst)->sin6_port = hdr_tcp->dest;
+ }
+ else {
+ /* The protocol does not match something known (IPv4/IPv6) */
+ conn->err_code = CO_ER_CIP_BAD_PROTO;
+ goto fail;
+ }
+
+ line += hdr_len;
+ trash.data = line - trash.area;
+
+ /* remove the NetScaler Client IP header from the request. For this
+ * we re-read the exact line at once. If we don't get the exact same
+ * result, we fail.
+ */
+ while (1) {
+ int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
+ if (len2 < 0 && errno == EINTR)
+ continue;
+ if (len2 != trash.data)
+ goto recv_abort;
+ break;
+ }
+
+ conn->flags &= ~flag;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ missing:
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we have not read anything. Otherwise we need to fail because we won't
+ * be able to poll anymore.
+ */
+ conn->err_code = CO_ER_CIP_TRUNCATED;
+ goto fail;
+
+ bad_magic:
+ conn->err_code = CO_ER_CIP_BAD_MAGIC;
+ goto fail;
+
+ recv_abort:
+ conn->err_code = CO_ER_CIP_ABORT;
+ conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+
+int conn_send_socks4_proxy_request(struct connection *conn)
+{
+ struct socks4_request req_line;
+
+ if (!conn_ctrl_ready(conn))
+ goto out_error;
+
+ if (!conn_get_dst(conn))
+ goto out_error;
+
+ req_line.version = 0x04;
+ req_line.command = 0x01;
+ req_line.port = get_net_port(conn->dst);
+ req_line.ip = is_inet_addr(conn->dst);
+ memcpy(req_line.user_id, "HAProxy\0", 8);
+
+ if (conn->send_proxy_ofs > 0) {
+ /*
+ * This is the first call to send the request
+ */
+ conn->send_proxy_ofs = -(int)sizeof(req_line);
+ }
+
+ if (conn->send_proxy_ofs < 0) {
+ int ret = 0;
+
+ /* we are sending the socks4_req_line here. If the data layer
+ * has a pending write, we'll also set MSG_MORE.
+ */
+ ret = conn_ctrl_send(
+ conn,
+ ((char *)(&req_line)) + (sizeof(req_line)+conn->send_proxy_ofs),
+ -conn->send_proxy_ofs,
+ (conn->subs && conn->subs->events & SUB_RETRY_SEND) ? CO_SFL_MSG_MORE : 0);
+
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Before send remain is [%d], sent [%d]\n",
+ conn_fd(conn), -conn->send_proxy_ofs, ret);
+
+ if (ret < 0) {
+ goto out_error;
+ }
+
+ conn->send_proxy_ofs += ret; /* becomes zero once complete */
+ if (conn->send_proxy_ofs != 0) {
+ goto out_wait;
+ }
+ }
+
+ /* OK we've the whole request sent */
+ conn->flags &= ~CO_FL_SOCKS4_SEND;
+
+ /* The connection is ready now, simply return and let the connection
+ * handler notify upper layers if needed.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (conn->flags & CO_FL_SEND_PROXY) {
+ /*
+ * Get the send_proxy_ofs ready for the send_proxy due to we are
+ * reusing the "send_proxy_ofs", and SOCKS4 handshake should be done
+ * before sending PROXY Protocol.
+ */
+ conn->send_proxy_ofs = 1;
+ }
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor */
+ conn->flags |= CO_FL_ERROR;
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_SEND;
+ }
+ return 0;
+
+ out_wait:
+ return 0;
+}
+
+int conn_recv_socks4_proxy_response(struct connection *conn)
+{
+ char line[SOCKS4_HS_RSP_LEN];
+ int ret;
+
+ if (!conn_ctrl_ready(conn))
+ goto fail;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ goto not_ready;
+
+ while (1) {
+ /* SOCKS4 Proxy will response with 8 bytes, 0x00 | 0x5A | 0x00 0x00 | 0x00 0x00 0x00 0x00
+ * Try to peek into it, before all 8 bytes ready.
+ */
+ ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, MSG_PEEK);
+
+ if (ret == 0) {
+ /* the socket has been closed or shutdown for send */
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d], looks like the socket has been closed or shutdown for send\n",
+ conn->handle.fd, ret, errno);
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_RECV;
+ }
+ goto fail;
+ }
+
+ if (ret > 0) {
+ if (ret == SOCKS4_HS_RSP_LEN) {
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received 8 bytes, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
+ conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
+ }else{
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], first byte is [%02X], last bye is [%02X]\n", conn->handle.fd, ret, line[0], line[ret-1]);
+ }
+ } else {
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d]\n", conn->handle.fd, ret, errno);
+ }
+
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(conn->handle.fd);
+ goto not_ready;
+ }
+ goto recv_abort;
+ }
+ break;
+ }
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ if (ret < SOCKS4_HS_RSP_LEN) {
+ /* Missing data. Since we're using MSG_PEEK, we can only poll again if
+ * we are not able to read enough data.
+ */
+ goto not_ready;
+ }
+
+ /*
+ * Base on the SOCSK4 protocol:
+ *
+ * +----+----+----+----+----+----+----+----+
+ * | VN | CD | DSTPORT | DSTIP |
+ * +----+----+----+----+----+----+----+----+
+ * # of bytes: 1 1 2 4
+ * VN is the version of the reply code and should be 0. CD is the result
+ * code with one of the following values:
+ * 90: request granted
+ * 91: request rejected or failed
+ * 92: request rejected because SOCKS server cannot connect to identd on the client
+ * 93: request rejected because the client program and identd report different user-ids
+ * The remaining fields are ignored.
+ */
+ if (line[1] != 90) {
+ conn->flags &= ~CO_FL_SOCKS4_RECV;
+
+ DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: FAIL, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
+ conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_DENY;
+ }
+ goto fail;
+ }
+
+ /* remove the 8 bytes response from the stream */
+ while (1) {
+ ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, 0);
+ if (ret < 0 && errno == EINTR) {
+ continue;
+ }
+ if (ret != SOCKS4_HS_RSP_LEN) {
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_RECV;
+ }
+ goto fail;
+ }
+ break;
+ }
+
+ conn->flags &= ~CO_FL_SOCKS4_RECV;
+ return 1;
+
+ not_ready:
+ return 0;
+
+ recv_abort:
+ if (conn->err_code == CO_ER_NONE) {
+ conn->err_code = CO_ER_SOCKS4_ABORT;
+ }
+ conn->flags |= (CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH);
+ goto fail;
+
+ fail:
+ conn->flags |= CO_FL_ERROR;
+ return 0;
+}
+
+/* registers proto mux list <list>. Modifies the list element! */
+void register_mux_proto(struct mux_proto_list *list)
+{
+ LIST_APPEND(&mux_proto_list.list, &list->list);
+}
+
+/* Lists the known proto mux on <out>. This function is used by "haproxy -vv"
+ * and is suitable for early boot just after the "REGISTER" stage because it
+ * doesn't depend on anything to be already allocated.
+ */
+void list_mux_proto(FILE *out)
+{
+ struct mux_proto_list *item;
+ struct ist proto;
+ char *mode, *side;
+ int done;
+
+ fprintf(out, "Available multiplexer protocols :\n"
+ "(protocols marked as <default> cannot be specified using 'proto' keyword)\n");
+ list_for_each_entry(item, &mux_proto_list.list, list) {
+ proto = item->token;
+
+ if (item->mode == PROTO_MODE_ANY)
+ mode = "TCP|HTTP";
+ else if (item->mode == PROTO_MODE_TCP)
+ mode = "TCP";
+ else if (item->mode == PROTO_MODE_HTTP)
+ mode = "HTTP";
+ else
+ mode = "NONE";
+
+ if (item->side == PROTO_SIDE_BOTH)
+ side = "FE|BE";
+ else if (item->side == PROTO_SIDE_FE)
+ side = "FE";
+ else if (item->side == PROTO_SIDE_BE)
+ side = "BE";
+ else
+ side = "NONE";
+
+ fprintf(out, " %10s : mode=%-5s side=%-6s mux=%-5s flags=",
+ (proto.len ? proto.ptr : "<default>"), mode, side, item->mux->name);
+
+ done = 0;
+
+ /* note: the block below could be simplified using macros but for only
+ * 4 flags it's not worth it.
+ */
+ if (item->mux->flags & MX_FL_HTX)
+ done |= fprintf(out, "%sHTX", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_HOL_RISK)
+ done |= fprintf(out, "%sHOL_RISK", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_NO_UPG)
+ done |= fprintf(out, "%sNO_UPG", done ? "|" : "");
+
+ if (item->mux->flags & MX_FL_FRAMED)
+ done |= fprintf(out, "%sFRAMED", done ? "|" : "");
+
+ fprintf(out, "\n");
+ }
+}
+
+/* Makes a PROXY protocol line from the two addresses. The output is sent to
+ * buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
+ * It returns the number of bytes composing this line (including the trailing
+ * LF), or zero in case of failure (eg: not enough space). It supports TCP4,
+ * TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
+ * emitted as well.
+ */
+static int make_proxy_line_v1(char *buf, int buf_len, const struct sockaddr_storage *src, const struct sockaddr_storage *dst)
+{
+ int ret = 0;
+ char * protocol;
+ char src_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+ char dst_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+ in_port_t src_port;
+ in_port_t dst_port;
+
+ if ( !src
+ || !dst
+ || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
+ || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
+ /* unknown family combination */
+ ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
+ if (ret >= buf_len)
+ return 0;
+
+ return ret;
+ }
+
+ /* IPv4 for both src and dst */
+ if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
+ protocol = "TCP4";
+ if (!inet_ntop(AF_INET, &((struct sockaddr_in *)src)->sin_addr, src_str, sizeof(src_str)))
+ return 0;
+ src_port = ((struct sockaddr_in *)src)->sin_port;
+ if (!inet_ntop(AF_INET, &((struct sockaddr_in *)dst)->sin_addr, dst_str, sizeof(dst_str)))
+ return 0;
+ dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ /* IPv6 for at least one of src and dst */
+ else {
+ struct in6_addr tmp;
+
+ protocol = "TCP6";
+
+ if (src->ss_family == AF_INET) {
+ /* Convert src to IPv6 */
+ v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
+ src_port = ((struct sockaddr_in *)src)->sin_port;
+ }
+ else {
+ tmp = ((struct sockaddr_in6 *)src)->sin6_addr;
+ src_port = ((struct sockaddr_in6 *)src)->sin6_port;
+ }
+
+ if (!inet_ntop(AF_INET6, &tmp, src_str, sizeof(src_str)))
+ return 0;
+
+ if (dst->ss_family == AF_INET) {
+ /* Convert dst to IPv6 */
+ v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
+ dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ else {
+ tmp = ((struct sockaddr_in6 *)dst)->sin6_addr;
+ dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
+ }
+
+ if (!inet_ntop(AF_INET6, &tmp, dst_str, sizeof(dst_str)))
+ return 0;
+ }
+
+ ret = snprintf(buf, buf_len, "PROXY %s %s %s %u %u\r\n", protocol, src_str, dst_str, ntohs(src_port), ntohs(dst_port));
+ if (ret >= buf_len)
+ return 0;
+
+ return ret;
+}
+
+static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
+{
+ struct tlv *tlv;
+
+ if (!dest || (length + sizeof(*tlv) > dest_len))
+ return 0;
+
+ tlv = (struct tlv *)dest;
+
+ tlv->type = type;
+ tlv->length_hi = length >> 8;
+ tlv->length_lo = length & 0x00ff;
+ memcpy(tlv->value, value, length);
+ return length + sizeof(*tlv);
+}
+
+/* Note: <remote> is explicitly allowed to be NULL */
+static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
+{
+ const char pp2_signature[] = PP2_SIGNATURE;
+ void *tlv_crc32c_p = NULL;
+ int ret = 0;
+ struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
+ struct sockaddr_storage null_addr = { .ss_family = 0 };
+ struct srv_pp_tlv_list *srv_tlv = NULL;
+ const struct sockaddr_storage *src = &null_addr;
+ const struct sockaddr_storage *dst = &null_addr;
+ const char *value = "";
+ int value_len = 0;
+
+ if (buf_len < PP2_HEADER_LEN)
+ return 0;
+ memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);
+
+ if (strm) {
+ src = sc_src(strm->scf);
+ dst = sc_dst(strm->scf);
+ }
+ else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
+ src = conn_src(remote);
+ dst = conn_dst(remote);
+ }
+
+ /* At least one of src or dst is not of AF_INET or AF_INET6 */
+ if ( !src
+ || !dst
+ || (!pp2_never_send_local && conn_is_back(remote)) // locally initiated connection
+ || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
+ || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
+ if (buf_len < PP2_HDR_LEN_UNSPEC)
+ return 0;
+ hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
+ hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
+ ret = PP2_HDR_LEN_UNSPEC;
+ }
+ else {
+ hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
+ /* IPv4 for both src and dst */
+ if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
+ if (buf_len < PP2_HDR_LEN_INET)
+ return 0;
+ hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
+ hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
+ hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
+ hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
+ hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ ret = PP2_HDR_LEN_INET;
+ }
+ /* IPv6 for at least one of src and dst */
+ else {
+ struct in6_addr tmp;
+
+ if (buf_len < PP2_HDR_LEN_INET6)
+ return 0;
+ hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
+ if (src->ss_family == AF_INET) {
+ v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
+ memcpy(hdr->addr.ip6.src_addr, &tmp, 16);
+ hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
+ }
+ else {
+ memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
+ hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
+ }
+ if (dst->ss_family == AF_INET) {
+ v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
+ memcpy(hdr->addr.ip6.dst_addr, &tmp, 16);
+ hdr->addr.ip6.dst_port = ((struct sockaddr_in *)dst)->sin_port;
+ }
+ else {
+ memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
+ hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
+ }
+
+ ret = PP2_HDR_LEN_INET6;
+ }
+ }
+
+ if (strm) {
+ struct buffer *replace = NULL;
+
+ list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) {
+ replace = NULL;
+
+ /* Users will always need to provide a value, in case of forwarding, they should use fc_pp_tlv.
+ * for generic types. Otherwise, we will send an empty TLV.
+ */
+ if (!LIST_ISEMPTY(&srv_tlv->fmt)) {
+ replace = alloc_trash_chunk();
+ if (unlikely(!replace))
+ return 0;
+
+ replace->data = build_logline(strm, replace->area, replace->size, &srv_tlv->fmt);
+
+ if (unlikely((buf_len - ret) < sizeof(struct tlv))) {
+ free_trash_chunk(replace);
+ return 0;
+ }
+ ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, replace->data, replace->area);
+ free_trash_chunk(replace);
+ }
+ else {
+ /* Create empty TLV as no value was specified */
+ ret += make_tlv(&buf[ret], (buf_len - ret), srv_tlv->type, 0, NULL);
+ }
+ }
+ }
+
+ /* Handle predefined TLVs as usual */
+ if (srv->pp_opts & SRV_PP_V2_CRC32C) {
+ uint32_t zero_crc32c = 0;
+
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ tlv_crc32c_p = (void *)((struct tlv *)&buf[ret])->value;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_CRC32C, sizeof(zero_crc32c), (const char *)&zero_crc32c);
+ }
+
+ if (remote && conn_get_alpn(remote, &value, &value_len)) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_ALPN, value_len, value);
+ }
+
+ if (srv->pp_opts & SRV_PP_V2_AUTHORITY) {
+ struct conn_tlv_list *tlv = conn_get_tlv(remote, PP2_TYPE_AUTHORITY);
+
+ value = NULL;
+ if (tlv) {
+ value_len = tlv->len;
+ value = tlv->value;
+ }
+#ifdef USE_OPENSSL
+ else {
+ if ((value = ssl_sock_get_sni(remote)))
+ value_len = strlen(value);
+ }
+#endif
+ if (value) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_AUTHORITY, value_len, value);
+ }
+ }
+
+ if (strm && (srv->pp_opts & SRV_PP_V2_UNIQUE_ID)) {
+ struct session* sess = strm_sess(strm);
+ struct ist unique_id = stream_generate_unique_id(strm, &sess->fe->format_unique_id);
+
+ value = unique_id.ptr;
+ value_len = unique_id.len;
+
+ if (value_len >= 0) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_UNIQUE_ID, value_len, value);
+ }
+ }
+
+#ifdef USE_OPENSSL
+ if (srv->pp_opts & SRV_PP_V2_SSL) {
+ struct tlv_ssl *tlv;
+ int ssl_tlv_len = 0;
+
+ if ((buf_len - ret) < sizeof(struct tlv_ssl))
+ return 0;
+ tlv = (struct tlv_ssl *)&buf[ret];
+ memset(tlv, 0, sizeof(struct tlv_ssl));
+ ssl_tlv_len += sizeof(struct tlv_ssl);
+ tlv->tlv.type = PP2_TYPE_SSL;
+ if (conn_is_ssl(remote)) {
+ tlv->client |= PP2_CLIENT_SSL;
+ value = ssl_sock_get_proto_version(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
+ }
+ if (ssl_sock_get_cert_used_sess(remote)) {
+ tlv->client |= PP2_CLIENT_CERT_SESS;
+ tlv->verify = htonl(ssl_sock_get_verify_result(remote));
+ if (ssl_sock_get_cert_used_conn(remote))
+ tlv->client |= PP2_CLIENT_CERT_CONN;
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
+ struct buffer *cn_trash = get_trash_chunk();
+ if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN,
+ cn_trash->data,
+ cn_trash->area);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_KEY_ALG) {
+ struct buffer *pkey_trash = get_trash_chunk();
+ if (ssl_sock_get_pkey_algo(remote, pkey_trash) > 0) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_KEY_ALG,
+ pkey_trash->data,
+ pkey_trash->area);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_SIG_ALG) {
+ value = ssl_sock_get_cert_sig(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_SIG_ALG, strlen(value), value);
+ }
+ }
+ if (srv->pp_opts & SRV_PP_V2_SSL_CIPHER) {
+ value = ssl_sock_get_cipher_name(remote);
+ if (value) {
+ ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CIPHER, strlen(value), value);
+ }
+ }
+ }
+ tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
+ tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
+ ret += ssl_tlv_len;
+ }
+#endif
+
+#ifdef USE_NS
+ if (remote && (remote->proxy_netns)) {
+ if ((buf_len - ret) < sizeof(struct tlv))
+ return 0;
+ ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
+ }
+#endif
+
+ hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
+
+ if (tlv_crc32c_p) {
+ write_u32(tlv_crc32c_p, htonl(hash_crc32c(buf, ret)));
+ }
+
+ return ret;
+}
+
+/* Note: <remote> is explicitly allowed to be NULL */
+int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm)
+{
+ int ret = 0;
+
+ if (srv && (srv->pp_opts & SRV_PP_V2)) {
+ ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm);
+ }
+ else {
+ const struct sockaddr_storage *src = NULL;
+ const struct sockaddr_storage *dst = NULL;
+
+ if (strm) {
+ src = sc_src(strm->scf);
+ dst = sc_dst(strm->scf);
+ }
+ else if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
+ src = conn_src(remote);
+ dst = conn_dst(remote);
+ }
+
+ if (src && dst)
+ ret = make_proxy_line_v1(buf, buf_len, src, dst);
+ else
+ ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
+ }
+
+ return ret;
+}
+
+/* returns 0 on success */
+static int cfg_parse_pp2_never_send_local(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ pp2_never_send_local = 1;
+ return 0;
+}
+
+/* extracts some info from the connection and appends them to buffer <buf>. The
+ * connection's pointer, its direction, target (fe/be/srv), xprt/ctrl, source
+ * when set, destination when set, are printed in a compact human-readable format
+ * fitting on a single line. This is handy to complete traces or debug output.
+ * It is permitted to pass a NULL conn pointer. The number of characters emitted
+ * is returned. A prefix <pfx> might be prepended before the first field if not
+ * NULL.
+ */
+int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx)
+{
+ const struct listener *li;
+ const struct server *sv;
+ const struct proxy *px;
+ char addr[40];
+ int old_len = buf->data;
+
+ if (!conn)
+ return 0;
+
+ chunk_appendf(buf, "%sconn=%p(%s)", pfx ? pfx : "", conn, conn_is_back(conn) ? "OUT" : "IN");
+
+ if ((li = objt_listener(conn->target)))
+ chunk_appendf(buf, " fe=%s", li->bind_conf->frontend->id);
+ else if ((sv = objt_server(conn->target)))
+ chunk_appendf(buf, " sv=%s/%s", sv->proxy->id, sv->id);
+ else if ((px = objt_proxy(conn->target)))
+ chunk_appendf(buf, " be=%s", px->id);
+
+ chunk_appendf(buf, " %s/%s", conn_get_xprt_name(conn), conn_get_ctrl_name(conn));
+
+ if (conn->src && addr_to_str(conn->src, addr, sizeof(addr)))
+ chunk_appendf(buf, " src=%s:%d", addr, get_host_port(conn->src));
+
+ if (conn->dst && addr_to_str(conn->dst, addr, sizeof(addr)))
+ chunk_appendf(buf, " dst=%s:%d", addr, get_host_port(conn->dst));
+
+ return buf->data - old_len;
+}
+
+/* return the major HTTP version as 1 or 2 depending on how the request arrived
+ * before being processed.
+ *
+ * WARNING: Should be updated if a new major HTTP version is added.
+ */
+static int
+smp_fetch_fc_http_major(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = NULL;
+ const char *mux_name = NULL;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ /* No connection or a connection with a RAW muxx */
+ if (!conn || (conn->mux && !(conn->mux->flags & MX_FL_HTX)))
+ return 0;
+
+ /* No mux install, this may change */
+ if (!conn->mux) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ mux_name = conn_get_mux_name(conn);
+
+ smp->data.type = SMP_T_SINT;
+ if (strcmp(mux_name, "QUIC") == 0)
+ smp->data.u.sint = 3;
+ else if (strcmp(mux_name, "H2") == 0)
+ smp->data.u.sint = 2;
+ else
+ smp->data.u.sint = 1;
+
+ return 1;
+}
+
+/* fetch if the received connection used a PROXY protocol header */
+int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;
+
+ return 1;
+}
+
+/*
+ * This function checks the TLV type converter configuration.
+ * It expects the corresponding TLV type as a string representing the number
+ * or a constant. args[0] will be turned into the numerical value of the
+ * TLV type string.
+ */
+static int smp_check_tlv_type(struct arg *args, char **err)
+{
+ int type;
+ char *endp;
+ struct ist input = ist2(args[0].data.str.area, args[0].data.str.data);
+
+ if (isteqi(input, ist("ALPN")) != 0)
+ type = PP2_TYPE_ALPN;
+ else if (isteqi(input, ist("AUTHORITY")) != 0)
+ type = PP2_TYPE_AUTHORITY;
+ else if (isteqi(input, ist("CRC32C")) != 0)
+ type = PP2_TYPE_CRC32C;
+ else if (isteqi(input, ist("NOOP")) != 0)
+ type = PP2_TYPE_NOOP;
+ else if (isteqi(input, ist("UNIQUE_ID")) != 0)
+ type = PP2_TYPE_UNIQUE_ID;
+ else if (isteqi(input, ist("SSL")) != 0)
+ type = PP2_TYPE_SSL;
+ else if (isteqi(input, ist("SSL_VERSION")) != 0)
+ type = PP2_SUBTYPE_SSL_VERSION;
+ else if (isteqi(input, ist("SSL_CN")) != 0)
+ type = PP2_SUBTYPE_SSL_CN;
+ else if (isteqi(input, ist("SSL_CIPHER")) != 0)
+ type = PP2_SUBTYPE_SSL_CIPHER;
+ else if (isteqi(input, ist("SSL_SIG_ALG")) != 0)
+ type = PP2_SUBTYPE_SSL_SIG_ALG;
+ else if (isteqi(input, ist("SSL_KEY_ALG")) != 0)
+ type = PP2_SUBTYPE_SSL_KEY_ALG;
+ else if (isteqi(input, ist("NETNS")) != 0)
+ type = PP2_TYPE_NETNS;
+ else {
+ type = strtoul(input.ptr, &endp, 0);
+ if (endp && *endp != '\0') {
+ memprintf(err, "Could not convert type '%s'", input.ptr);
+ return 0;
+ }
+ }
+
+ if (type < 0 || type > 255) {
+ memprintf(err, "Invalid TLV Type '%s'", input.ptr);
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = type;
+
+ return 1;
+}
+
+/* fetch an arbitrary TLV from a PROXY protocol v2 header */
+int smp_fetch_fc_pp_tlv(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int idx;
+ struct connection *conn = NULL;
+ struct conn_tlv_list *conn_tlv = NULL;
+
+ conn = objt_conn(smp->sess->origin);
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (args[0].type != ARGT_SINT)
+ return 0;
+
+ idx = args[0].data.sint;
+ conn_tlv = smp->ctx.p ? smp->ctx.p : LIST_ELEM(conn->tlv_list.n, struct conn_tlv_list *, list);
+ list_for_each_entry_from(conn_tlv, &conn->tlv_list, list) {
+ if (conn_tlv->type == idx) {
+ smp->flags |= SMP_F_NOT_LAST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = conn_tlv->value;
+ smp->data.u.str.data = conn_tlv->len;
+ smp->ctx.p = conn_tlv;
+
+ return 1;
+ }
+ }
+
+ smp->flags &= ~SMP_F_NOT_LAST;
+
+ return 0;
+}
+
+/* fetch the authority TLV from a PROXY protocol header */
+int smp_fetch_fc_pp_authority(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct arg tlv_arg;
+ int ret;
+
+ set_tlv_arg(PP2_TYPE_AUTHORITY, &tlv_arg);
+ ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
+ smp->flags &= ~SMP_F_NOT_LAST; // return only the first authority
+ return ret;
+}
+
+/* fetch the unique ID TLV from a PROXY protocol header */
+int smp_fetch_fc_pp_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct arg tlv_arg;
+ int ret;
+
+ set_tlv_arg(PP2_TYPE_UNIQUE_ID, &tlv_arg);
+ ret = smp_fetch_fc_pp_tlv(&tlv_arg, smp, kw, private);
+ smp->flags &= ~SMP_F_NOT_LAST; // return only the first unique ID
+ return ret;
+}
+
+/* fetch the error code of a connection */
+int smp_fetch_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (unsigned long long int)conn->err_code;
+
+ return 1;
+}
+
+/* fetch a string representation of the error code of a connection */
+int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ const char *err_code_str;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ err_code_str = conn_err_code_str(conn);
+
+ if (!err_code_str)
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char*)err_code_str;
+ smp->data.u.str.data = strlen(err_code_str);
+
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types should be declared using the
+ * appropriate pseudo-type. If not available it must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
+ { "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L4CLI },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "pp2-never-send-local", cfg_parse_pp2_never_send_local },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* private function to handle sockaddr as input for connection hash */
+static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss,
+ char *buf, size_t *idx,
+ enum conn_hash_params_t *hash_flags,
+ enum conn_hash_params_t param_type_addr,
+ enum conn_hash_params_t param_type_port)
+{
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+
+ switch (ss->ss_family) {
+ case AF_INET:
+ addr = (struct sockaddr_in *)ss;
+
+ conn_hash_update(buf, idx,
+ &addr->sin_addr, sizeof(addr->sin_addr),
+ hash_flags, param_type_addr);
+
+ if (addr->sin_port) {
+ conn_hash_update(buf, idx,
+ &addr->sin_port, sizeof(addr->sin_port),
+ hash_flags, param_type_port);
+ }
+
+ break;
+
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)ss;
+
+ conn_hash_update(buf, idx,
+ &addr6->sin6_addr, sizeof(addr6->sin6_addr),
+ hash_flags, param_type_addr);
+
+ if (addr6->sin6_port) {
+ conn_hash_update(buf, idx,
+ &addr6->sin6_port, sizeof(addr6->sin6_port),
+ hash_flags, param_type_port);
+ }
+
+ break;
+ }
+}
+
+/* Generate the hash of a connection with params as input
+ * Each non-null field of params is taken into account for the hash calcul.
+ */
+uint64_t conn_hash_prehash(char *buf, size_t size)
+{
+ return XXH64(buf, size, 0);
+}
+
+/* Append <data> into <buf> at <idx> offset in preparation for connection hash
+ * calcul. <idx> is incremented beyond data <size>. In the same time, <flags>
+ * are updated with <type> for the hash header.
+ */
+void conn_hash_update(char *buf, size_t *idx,
+ const void *data, size_t size,
+ enum conn_hash_params_t *flags,
+ enum conn_hash_params_t type)
+{
+ memcpy(&buf[*idx], data, size);
+ *idx += size;
+ *flags |= type;
+}
+
+uint64_t conn_hash_digest(char *buf, size_t bufsize,
+ enum conn_hash_params_t flags)
+{
+ const uint64_t flags_u64 = (uint64_t)flags;
+ const uint64_t hash = XXH64(buf, bufsize, 0);
+
+ return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(hash);
+}
+
+uint64_t conn_calculate_hash(const struct conn_hash_params *params)
+{
+ char *buf;
+ size_t idx = 0;
+ uint64_t hash = 0;
+ enum conn_hash_params_t hash_flags = 0;
+
+ buf = trash.area;
+
+ conn_hash_update(buf, &idx, &params->target, sizeof(params->target), &hash_flags, 0);
+
+ if (params->sni_prehash) {
+ conn_hash_update(buf, &idx,
+ &params->sni_prehash, sizeof(params->sni_prehash),
+ &hash_flags, CONN_HASH_PARAMS_TYPE_SNI);
+ }
+
+ if (params->dst_addr) {
+ conn_calculate_hash_sockaddr(params->dst_addr,
+ buf, &idx, &hash_flags,
+ CONN_HASH_PARAMS_TYPE_DST_ADDR,
+ CONN_HASH_PARAMS_TYPE_DST_PORT);
+ }
+
+ if (params->src_addr) {
+ conn_calculate_hash_sockaddr(params->src_addr,
+ buf, &idx, &hash_flags,
+ CONN_HASH_PARAMS_TYPE_SRC_ADDR,
+ CONN_HASH_PARAMS_TYPE_SRC_PORT);
+ }
+
+ if (params->proxy_prehash) {
+ conn_hash_update(buf, &idx,
+ &params->proxy_prehash, sizeof(params->proxy_prehash),
+ &hash_flags, CONN_HASH_PARAMS_TYPE_PROXY);
+ }
+
+ hash = conn_hash_digest(buf, idx, hash_flags);
+ return hash;
+}
+
+/* Reverse a <conn> connection instance. This effectively moves the connection
+ * from frontend to backend side or vice-versa depending on its initial status.
+ *
+ * For active reversal, 'reverse' member points to the listener used as the new
+ * connection target. Once transition is completed, the connection needs to be
+ * accepted on the listener to instantiate its parent session before using
+ * streams.
+ *
+ * For passive reversal, 'reverse' member points to the server used as the new
+ * connection target. Once transition is completed, the connection appears as a
+ * normal backend connection.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int conn_reverse(struct connection *conn)
+{
+ struct conn_hash_params hash_params;
+ int64_t hash = 0;
+ struct session *sess = conn->owner;
+
+ if (!conn_is_back(conn)) {
+ /* srv must have been set by a previous 'attach-srv' rule. */
+ struct server *srv = objt_server(conn->reverse.target);
+ BUG_ON(!srv);
+
+ if (conn_backend_init(conn))
+ return 1;
+
+ /* Initialize hash value for usage as idle conns. */
+ memset(&hash_params, 0, sizeof(hash_params));
+ hash_params.target = srv;
+
+ if (b_data(&conn->reverse.name)) {
+ /* data cannot wrap else prehash usage is incorrect */
+ BUG_ON(b_data(&conn->reverse.name) != b_contig_data(&conn->reverse.name, 0));
+
+ hash_params.sni_prehash =
+ conn_hash_prehash(b_head(&conn->reverse.name),
+ b_data(&conn->reverse.name));
+ }
+
+ hash = conn_calculate_hash(&hash_params);
+ conn->hash_node->node.key = hash;
+
+ conn->target = &srv->obj_type;
+ srv_use_conn(srv, conn);
+
+ /* Free the session after detaching the connection from it. */
+ session_unown_conn(sess, conn);
+ sess->origin = NULL;
+ session_free(sess);
+ conn_set_owner(conn, NULL, NULL);
+
+ conn->flags |= CO_FL_REVERSED;
+ }
+ else {
+ /* Wake up receiver to proceed to connection accept. */
+ struct listener *l = __objt_listener(conn->reverse.target);
+
+ conn_backend_deinit(conn);
+
+ conn->target = &l->obj_type;
+ conn->flags |= CO_FL_ACT_REVERSING;
+ task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
+ }
+
+ /* Invert source and destination addresses if already set. */
+ SWAP(conn->src, conn->dst);
+
+ conn->reverse.target = NULL;
+ ha_free(&conn->reverse.name.area);
+ conn->reverse.name = BUF_NULL;
+
+ return 0;
+}
+
+/* Handler of the task of mux_stopping_data.
+ * Called on soft-stop.
+ */
+static struct task *mux_stopping_process(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn, *back;
+
+ list_for_each_entry_safe(conn, back, &mux_stopping_data[tid].list, stopping_list) {
+ if (conn->mux && conn->mux->wake)
+ conn->mux->wake(conn);
+ }
+
+ return t;
+}
+
+static int allocate_mux_cleanup(void)
+{
+ /* allocates the thread bound mux_stopping_data task */
+ mux_stopping_data[tid].task = task_new_here();
+ if (!mux_stopping_data[tid].task) {
+ ha_alert("Failed to allocate the task for connection cleanup on thread %d.\n", tid);
+ return 0;
+ }
+
+ mux_stopping_data[tid].task->process = mux_stopping_process;
+ LIST_INIT(&mux_stopping_data[tid].list);
+
+ return 1;
+}
+REGISTER_PER_THREAD_ALLOC(allocate_mux_cleanup);
+
+static int deallocate_mux_cleanup(void)
+{
+ task_destroy(mux_stopping_data[tid].task);
+ return 1;
+}
+REGISTER_PER_THREAD_FREE(deallocate_mux_cleanup);
+
+static void deinit_idle_conns(void)
+{
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ task_destroy(idle_conns[i].cleanup_task);
+ }
+}
+REGISTER_POST_DEINIT(deinit_idle_conns);
diff --git a/src/cpuset.c b/src/cpuset.c
new file mode 100644
index 0000000..82e350f
--- /dev/null
+++ b/src/cpuset.c
@@ -0,0 +1,296 @@
+#define _GNU_SOURCE
+#include <sched.h>
+#include <ctype.h>
+
+#include <haproxy/compat.h>
+#include <haproxy/cpuset.h>
+#include <haproxy/intops.h>
+#include <haproxy/tools.h>
+
+struct cpu_map *cpu_map;
+
+void ha_cpuset_zero(struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_ZERO(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset = 0;
+#endif
+}
+
+int ha_cpuset_set(struct hap_cpuset *set, int cpu)
+{
+ if (cpu >= ha_cpuset_size())
+ return 1;
+
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_SET(cpu, &set->cpuset);
+ return 0;
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset |= (0x1 << cpu);
+ return 0;
+#endif
+}
+
+int ha_cpuset_clr(struct hap_cpuset *set, int cpu)
+{
+ if (cpu >= ha_cpuset_size())
+ return 1;
+
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_CLR(cpu, &set->cpuset);
+ return 0;
+
+#elif defined(CPUSET_USE_ULONG)
+ set->cpuset &= ~(0x1 << cpu);
+ return 0;
+#endif
+}
+
+void ha_cpuset_and(struct hap_cpuset *dst, struct hap_cpuset *src)
+{
+#if defined(CPUSET_USE_CPUSET)
+ CPU_AND(&dst->cpuset, &dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_AND(&dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ dst->cpuset &= src->cpuset;
+#endif
+}
+
+void ha_cpuset_or(struct hap_cpuset *dst, struct hap_cpuset *src)
+{
+#if defined(CPUSET_USE_CPUSET)
+ CPU_OR(&dst->cpuset, &dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_OR(&dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ dst->cpuset |= src->cpuset;
+#endif
+}
+
+int ha_cpuset_isset(const struct hap_cpuset *set, int cpu)
+{
+ if (cpu >= ha_cpuset_size())
+ return 0;
+
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_ISSET(cpu, &set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ return !!(set->cpuset & (0x1 << cpu));
+#else
+ return 0;
+#endif
+}
+
+int ha_cpuset_count(const struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_COUNT(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ return my_popcountl(set->cpuset);
+#endif
+}
+
+int ha_cpuset_ffs(const struct hap_cpuset *set)
+{
+#if defined(CPUSET_USE_CPUSET)
+ int n;
+
+ if (!CPU_COUNT(&set->cpuset))
+ return 0;
+
+ for (n = 0; !CPU_ISSET(n, &set->cpuset); ++n)
+ ;
+
+ return n + 1;
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_FFS(&set->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ if (!set->cpuset)
+ return 0;
+
+ return my_ffsl(set->cpuset);
+#endif
+}
+
+void ha_cpuset_assign(struct hap_cpuset *dst, struct hap_cpuset *src)
+{
+#if defined(CPUSET_USE_CPUSET)
+ CPU_ZERO(&dst->cpuset);
+ CPU_OR(&dst->cpuset, &dst->cpuset, &src->cpuset);
+
+#elif defined(CPUSET_USE_FREEBSD_CPUSET)
+ CPU_COPY(&src->cpuset, &dst->cpuset);
+
+#elif defined(CPUSET_USE_ULONG)
+ dst->cpuset = src->cpuset;
+#endif
+}
+
+int ha_cpuset_size()
+{
+#if defined(CPUSET_USE_CPUSET) || defined(CPUSET_USE_FREEBSD_CPUSET)
+ return CPU_SETSIZE;
+
+#elif defined(CPUSET_USE_ULONG)
+ return LONGBITS;
+
+#endif
+}
+
+/* Detects CPUs that are bound to the current process. Returns the number of
+ * CPUs detected or 0 if the detection failed.
+ */
+int ha_cpuset_detect_bound(struct hap_cpuset *set)
+{
+ ha_cpuset_zero(set);
+
+ /* detect bound CPUs depending on the OS's API */
+ if (0
+#if defined(__linux__)
+ || sched_getaffinity(0, sizeof(set->cpuset), &set->cpuset) != 0
+#elif defined(__FreeBSD__)
+ || cpuset_getaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(set->cpuset), &set->cpuset) != 0
+#else
+ || 1 // unhandled platform
+#endif
+ ) {
+ /* detection failed */
+ return 0;
+ }
+
+ return ha_cpuset_count(set);
+}
+
+/* Parse cpu sets. Each CPU set is either a unique number between 0 and
+ * ha_cpuset_size() - 1 or a range with two such numbers delimited by a dash
+ * ('-'). Each CPU set can be a list of unique numbers or ranges separated by
+ * a comma. It is also possible to specify multiple cpu numbers or ranges in
+ * distinct argument in <args>. On success, it returns 0, otherwise it returns
+ * 1, optionally with an error message in <err> if <err> is not NULL.
+ */
+int parse_cpu_set(const char **args, struct hap_cpuset *cpu_set, char **err)
+{
+ int cur_arg = 0;
+ const char *arg;
+
+ ha_cpuset_zero(cpu_set);
+
+ arg = args[cur_arg];
+ while (*arg) {
+ const char *dash, *comma;
+ unsigned int low, high;
+
+ if (!isdigit((unsigned char)*args[cur_arg])) {
+ memprintf(err, "'%s' is not a CPU range.", arg);
+ return 1;
+ }
+
+ low = high = str2uic(arg);
+
+ comma = strchr(arg, ',');
+ dash = strchr(arg, '-');
+
+ if (dash && (!comma || dash < comma))
+ high = *(dash+1) ? str2uic(dash + 1) : ha_cpuset_size() - 1;
+
+ if (high < low) {
+ unsigned int swap = low;
+ low = high;
+ high = swap;
+ }
+
+ if (high >= ha_cpuset_size()) {
+ memprintf(err, "supports CPU numbers from 0 to %d.",
+ ha_cpuset_size() - 1);
+ return 1;
+ }
+
+ while (low <= high)
+ ha_cpuset_set(cpu_set, low++);
+
+ /* if a comma is present, parse the rest of the arg, else
+ * skip to the next arg */
+ arg = comma ? comma + 1 : args[++cur_arg];
+ }
+ return 0;
+}
+
+/* Parse a linux cpu map string representing to a numeric cpu mask map
+ * The cpu map string is a list of 4-byte hex strings separated by commas, with
+ * most-significant byte first, one bit per cpu number.
+ */
+void parse_cpumap(char *cpumap_str, struct hap_cpuset *cpu_set)
+{
+ unsigned long cpumap;
+ char *start, *endptr, *comma;
+ int i, j;
+
+ ha_cpuset_zero(cpu_set);
+
+ i = 0;
+ do {
+ /* reverse-search for a comma, parse the string after the comma
+ * or at the beginning if no comma found
+ */
+ comma = strrchr(cpumap_str, ',');
+ start = comma ? comma + 1 : cpumap_str;
+
+ cpumap = strtoul(start, &endptr, 16);
+ for (j = 0; cpumap; cpumap >>= 1, ++j) {
+ if (cpumap & 0x1)
+ ha_cpuset_set(cpu_set, j + i * 32);
+ }
+
+ if (comma)
+ *comma = '\0';
+ ++i;
+ } while (comma);
+}
+
+/* Returns true if at least one cpu-map directive was configured, otherwise
+ * false.
+ */
+int cpu_map_configured(void)
+{
+ int grp, thr;
+
+ for (grp = 0; grp < MAX_TGROUPS; grp++) {
+ for (thr = 0; thr < MAX_THREADS_PER_GROUP; thr++)
+ if (ha_cpuset_count(&cpu_map[grp].thread[thr]))
+ return 1;
+ }
+ return 0;
+}
+
+/* Allocates everything needed to store CPU information at boot.
+ * Returns non-zero on success, zero on failure.
+ */
+static int cpuset_alloc(void)
+{
+ /* allocate the structures used to store CPU topology info */
+ cpu_map = (struct cpu_map*)calloc(MAX_TGROUPS, sizeof(*cpu_map));
+ if (!cpu_map)
+ return 0;
+
+ return 1;
+}
+
+static void cpuset_deinit(void)
+{
+ ha_free(&cpu_map);
+}
+
+INITCALL0(STG_ALLOC, cpuset_alloc);
+REGISTER_POST_DEINIT(cpuset_deinit);
diff --git a/src/debug.c b/src/debug.c
new file mode 100644
index 0000000..fbaad80
--- /dev/null
+++ b/src/debug.c
@@ -0,0 +1,2301 @@
+/*
+ * Process debugging functions.
+ *
+ * Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#ifdef USE_EPOLL
+#include <sys/epoll.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/debug.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <import/ist.h>
+
+
+/* The dump state is made of:
+ * - num_thread on the lowest 15 bits
+ * - a SYNC flag on bit 15 (waiting for sync start)
+ * - number of participating threads on bits 16-30
+ * Initiating a dump consists in setting it to SYNC and incrementing the
+ * num_thread part when entering the function. The first thread periodically
+ * recounts active threads and compares it to the ready ones, and clears SYNC
+ * and sets the number of participants to the value found, which serves as a
+ * start signal. A thread finished dumping looks up the TID of the next active
+ * thread after it and writes it in the lowest part. If there's none, it sets
+ * the thread counter to the number of participants and resets that part,
+ * which serves as an end-of-dump signal. All threads decrement the num_thread
+ * part. Then all threads wait for the value to reach zero. Only used when
+ * USE_THREAD_DUMP is set.
+ */
+#define THREAD_DUMP_TMASK 0x00007FFFU
+#define THREAD_DUMP_FSYNC 0x00008000U
+#define THREAD_DUMP_PMASK 0x7FFF0000U
+
+/* Description of a component with name, version, path, build options etc. E.g.
+ * one of them is haproxy. Others might be some clearly identified shared libs.
+ * They're intentionally self-contained and to be placed into an array to make
+ * it easier to find them in a core. The important fields (name and version)
+ * are locally allocated, other ones are dynamic.
+ */
+struct post_mortem_component {
+ char name[32]; // symbolic short name
+ char version[32]; // exact version
+ char *toolchain; // compiler and version (e.g. gcc-11.4.0)
+ char *toolchain_opts; // optims, arch-specific options (e.g. CFLAGS)
+ char *build_settings; // build options (e.g. USE_*, TARGET, etc)
+ char *path; // path if known.
+};
+
+/* This is a collection of information that are centralized to help with core
+ * dump analysis. It must be used with a public variable and gather elements
+ * as much as possible without dereferences so that even when identified in a
+ * core dump it's possible to get the most out of it even if the core file is
+ * not much exploitable. It's aligned to 256 so that it's easy to spot, given
+ * that being that large it will not change its size much.
+ */
+struct post_mortem {
+ /* platform-specific information */
+ struct {
+ struct utsname utsname; // OS name+ver+arch+hostname
+ char hw_vendor[64]; // hardware/hypervisor vendor when known
+ char hw_family[64]; // hardware/hypervisor product family when known
+ char hw_model[64]; // hardware/hypervisor product/model when known
+ char brd_vendor[64]; // mainboard vendor when known
+ char brd_model[64]; // mainboard model when known
+ char soc_vendor[64]; // SoC/CPU vendor from cpuinfo
+ char soc_model[64]; // SoC model when known and relevant
+ char cpu_model[64]; // CPU model when different from SoC
+ char virt_techno[16]; // when provided by cpuid
+ char cont_techno[16]; // empty, "no", "yes", "docker" or others
+ } platform;
+
+ /* process-specific information */
+ struct {
+ pid_t pid;
+ uid_t boot_uid;
+ gid_t boot_gid;
+ struct rlimit limit_fd; // RLIMIT_NOFILE
+ struct rlimit limit_ram; // RLIMIT_AS or RLIMIT_DATA
+
+#if defined(USE_THREAD)
+ struct {
+ ullong pth_id; // pthread_t cast to a ullong
+ void *stack_top; // top of the stack
+ } thread_info[MAX_THREADS];
+#endif
+ } process;
+
+#if defined(HA_HAVE_DUMP_LIBS)
+ /* information about dynamic shared libraries involved */
+ char *libs; // dump of one addr / path per line, or NULL
+#endif
+
+ /* info about identified distinct components (executable, shared libs, etc).
+ * These can be all listed at once in gdb using:
+ * p *post_mortem.components@post_mortem.nb_components
+ */
+ uint nb_components; // # of components below
+ struct post_mortem_component *components; // NULL or array
+} post_mortem ALIGNED(256) = { };
+
+/* Points to a copy of the buffer where the dump functions should write, when
+ * non-null. It's only used by debuggers for core dump analysis.
+ */
+struct buffer *thread_dump_buffer = NULL;
+unsigned int debug_commands_issued = 0;
+
+/* dumps a backtrace of the current thread that is appended to buffer <buf>.
+ * Lines are prefixed with the string <prefix> which may be empty (used for
+ * indenting). It is recommended to use this at a function's tail so that
+ * the function does not appear in the call stack. The <dump> argument
+ * indicates what dump state to start from, and should usually be zero. It
+ * may be among the following values:
+ * - 0: search usual callers before step 1, or directly jump to 2
+ * - 1: skip usual callers before step 2
+ * - 2: dump until polling loop, scheduler, or main() (excluded)
+ * - 3: end
+ * - 4-7: like 0 but stops *after* main.
+ */
+void ha_dump_backtrace(struct buffer *buf, const char *prefix, int dump)
+{
+ struct buffer bak;
+ char pfx2[100];
+ void *callers[100];
+ int j, nptrs;
+ const void *addr;
+
+ nptrs = my_backtrace(callers, sizeof(callers)/sizeof(*callers));
+ if (!nptrs)
+ return;
+
+ if (snprintf(pfx2, sizeof(pfx2), "%s| ", prefix) > sizeof(pfx2))
+ pfx2[0] = 0;
+
+ /* The call backtrace_symbols_fd(callers, nptrs, STDOUT_FILENO would
+ * produce similar output to the following:
+ */
+ chunk_appendf(buf, "%scall trace(%d):\n", prefix, nptrs);
+ for (j = 0; (j < nptrs || (dump & 3) < 2); j++) {
+ if (j == nptrs && !(dump & 3)) {
+ /* we failed to spot the starting point of the
+ * dump, let's start over dumping everything we
+ * have.
+ */
+ dump += 2;
+ j = 0;
+ }
+ bak = *buf;
+ dump_addr_and_bytes(buf, pfx2, callers[j], 8);
+ addr = resolve_sym_name(buf, ": ", callers[j]);
+ if ((dump & 3) == 0) {
+ /* dump not started, will start *after* ha_thread_dump_one(),
+ * ha_panic and ha_backtrace_to_stderr
+ */
+ if (addr == ha_panic ||
+ addr == ha_backtrace_to_stderr || addr == ha_thread_dump_one)
+ dump++;
+ *buf = bak;
+ continue;
+ }
+
+ if ((dump & 3) == 1) {
+ /* starting */
+ if (addr == ha_panic ||
+ addr == ha_backtrace_to_stderr || addr == ha_thread_dump_one) {
+ *buf = bak;
+ continue;
+ }
+ dump++;
+ }
+
+ if ((dump & 3) == 2) {
+ /* still dumping */
+ if (dump == 6) {
+ /* we only stop *after* main and we must send the LF */
+ if (addr == main) {
+ j = nptrs;
+ dump++;
+ }
+ }
+ else if (addr == run_poll_loop || addr == main || addr == run_tasks_from_lists) {
+ dump++;
+ *buf = bak;
+ break;
+ }
+ }
+ /* OK, line dumped */
+ chunk_appendf(buf, "\n");
+ }
+}
+
+/* dump a backtrace of current thread's stack to stderr. */
+void ha_backtrace_to_stderr(void)
+{
+ char area[2048];
+ struct buffer b = b_make(area, sizeof(area), 0, 0);
+
+ ha_dump_backtrace(&b, " ", 4);
+ if (b.data)
+ DISGUISE(write(2, b.area, b.data));
+}
+
+/* Dumps to the thread's buffer some known information for the desired thread,
+ * and optionally extra info when it's safe to do so (current thread or
+ * isolated). The dump will be appended to the buffer, so the caller is
+ * responsible for preliminary initializing it. The <from_signal> argument will
+ * indicate if the function is called from the debug signal handler, indicating
+ * the thread was dumped upon request from another one, otherwise if the thread
+ * it the current one, a star ('*') will be displayed in front of the thread to
+ * indicate the requesting one. Any stuck thread is also prefixed with a '>'.
+ * The caller is responsible for atomically setting up the thread's dump buffer
+ * to point to a valid buffer with enough room. Output will be truncated if it
+ * does not fit. When the dump is complete, the dump buffer will be switched to
+ * (void*)0x1 that the caller must turn to 0x0 once the contents are collected.
+ */
+void ha_thread_dump_one(int thr, int from_signal)
+{
+ struct buffer *buf = HA_ATOMIC_LOAD(&ha_thread_ctx[thr].thread_dump_buffer);
+ unsigned long __maybe_unused thr_bit = ha_thread_info[thr].ltid_bit;
+ int __maybe_unused tgrp = ha_thread_info[thr].tgid;
+ unsigned long long p = ha_thread_ctx[thr].prev_cpu_time;
+ unsigned long long n = now_cpu_time_thread(thr);
+ int stuck = !!(ha_thread_ctx[thr].flags & TH_FL_STUCK);
+
+ chunk_appendf(buf,
+ "%c%cThread %-2u: id=0x%llx act=%d glob=%d wq=%d rq=%d tl=%d tlsz=%d rqsz=%d\n"
+ " %2u/%-2u stuck=%d prof=%d",
+ (thr == tid && !from_signal) ? '*' : ' ', stuck ? '>' : ' ', thr + 1,
+ ha_get_pthread_id(thr),
+ thread_has_tasks(),
+ !eb_is_empty(&ha_thread_ctx[thr].rqueue_shared),
+ !eb_is_empty(&ha_thread_ctx[thr].timers),
+ !eb_is_empty(&ha_thread_ctx[thr].rqueue),
+ !(LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_URGENT]) &&
+ LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_NORMAL]) &&
+ LIST_ISEMPTY(&ha_thread_ctx[thr].tasklets[TL_BULK]) &&
+ MT_LIST_ISEMPTY(&ha_thread_ctx[thr].shared_tasklet_list)),
+ ha_thread_ctx[thr].tasks_in_list,
+ ha_thread_ctx[thr].rq_total,
+ ha_thread_info[thr].tgid, ha_thread_info[thr].ltid + 1,
+ stuck,
+ !!(ha_thread_ctx[thr].flags & TH_FL_TASK_PROFILING));
+
+#if defined(USE_THREAD)
+ chunk_appendf(buf,
+ " harmless=%d isolated=%d",
+ !!(_HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp-1].threads_harmless) & thr_bit),
+ isolated_thread == thr);
+#endif
+
+ chunk_appendf(buf, "\n");
+ chunk_appendf(buf, " cpu_ns: poll=%llu now=%llu diff=%llu\n", p, n, n-p);
+
+ /* this is the end of what we can dump from outside the current thread */
+
+ if (thr != tid && !thread_isolated())
+ goto leave;
+
+ chunk_appendf(buf, " curr_task=");
+ ha_task_dump(buf, th_ctx->current, " ");
+
+ if (stuck && thr == tid) {
+#ifdef USE_LUA
+ if (th_ctx->current &&
+ th_ctx->current->process == process_stream && th_ctx->current->context) {
+ const struct stream *s = (const struct stream *)th_ctx->current->context;
+ struct hlua *hlua = s ? s->hlua : NULL;
+
+ if (hlua && hlua->T) {
+ mark_tainted(TAINTED_LUA_STUCK);
+ if (hlua->state_id == 0)
+ mark_tainted(TAINTED_LUA_STUCK_SHARED);
+ }
+ }
+#endif
+
+ if (HA_ATOMIC_LOAD(&pool_trim_in_progress))
+ mark_tainted(TAINTED_MEM_TRIMMING_STUCK);
+
+ /* We only emit the backtrace for stuck threads in order not to
+ * waste precious output buffer space with non-interesting data.
+ * Please leave this as the last instruction in this function
+ * so that the compiler uses tail merging and the current
+ * function does not appear in the stack.
+ */
+ ha_dump_backtrace(buf, " ", 0);
+ }
+ leave:
+ /* end of dump, setting the buffer to 0x1 will tell the caller we're done */
+ HA_ATOMIC_STORE(&ha_thread_ctx[thr].thread_dump_buffer, (void*)0x1UL);
+}
+
+/* Triggers a thread dump from thread <thr>, either directly if it's the
+ * current thread or if thread dump signals are not implemented, or by sending
+ * a signal if it's a remote one and the feature is supported. The buffer <buf>
+ * will get the dump appended, and the caller is responsible for making sure
+ * there is enough room otherwise some contents will be truncated.
+ */
+void ha_thread_dump(struct buffer *buf, int thr)
+{
+ struct buffer *old = NULL;
+
+ /* try to impose our dump buffer and to reserve the target thread's
+ * next dump for us.
+ */
+ do {
+ if (old)
+ ha_thread_relax();
+ old = NULL;
+ } while (!HA_ATOMIC_CAS(&ha_thread_ctx[thr].thread_dump_buffer, &old, buf));
+
+#ifdef USE_THREAD_DUMP
+ /* asking the remote thread to dump itself allows to get more details
+ * including a backtrace.
+ */
+ if (thr != tid)
+ ha_tkill(thr, DEBUGSIG);
+ else
+#endif
+ ha_thread_dump_one(thr, thr != tid);
+
+ /* now wait for the dump to be done, and release it */
+ do {
+ if (old)
+ ha_thread_relax();
+ old = (void*)0x01;
+ } while (!HA_ATOMIC_CAS(&ha_thread_ctx[thr].thread_dump_buffer, &old, 0));
+}
+
+/* dumps into the buffer some information related to task <task> (which may
+ * either be a task or a tasklet, and prepend each line except the first one
+ * with <pfx>. The buffer is only appended and the first output starts by the
+ * pointer itself. The caller is responsible for making sure the task is not
+ * going to vanish during the dump.
+ */
+void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx)
+{
+ const struct stream *s = NULL;
+ const struct appctx __maybe_unused *appctx = NULL;
+ struct hlua __maybe_unused *hlua = NULL;
+ const struct stconn *sc;
+
+ if (!task) {
+ chunk_appendf(buf, "0\n");
+ return;
+ }
+
+ if (TASK_IS_TASKLET(task))
+ chunk_appendf(buf,
+ "%p (tasklet) calls=%u\n",
+ task,
+ task->calls);
+ else
+ chunk_appendf(buf,
+ "%p (task) calls=%u last=%llu%s\n",
+ task,
+ task->calls,
+ task->wake_date ? (unsigned long long)(now_mono_time() - task->wake_date) : 0,
+ task->wake_date ? " ns ago" : "");
+
+ chunk_appendf(buf, "%s fct=%p(", pfx, task->process);
+ resolve_sym_name(buf, NULL, task->process);
+ chunk_appendf(buf,") ctx=%p", task->context);
+
+ if (task->process == task_run_applet && (appctx = task->context))
+ chunk_appendf(buf, "(%s)\n", appctx->applet->name);
+ else
+ chunk_appendf(buf, "\n");
+
+ if (task->process == process_stream && task->context)
+ s = (struct stream *)task->context;
+ else if (task->process == task_run_applet && task->context && (sc = appctx_sc((struct appctx *)task->context)))
+ s = sc_strm(sc);
+ else if (task->process == sc_conn_io_cb && task->context)
+ s = sc_strm(((struct stconn *)task->context));
+
+ if (s) {
+ chunk_appendf(buf, "%sstream=", pfx);
+ strm_dump_to_buffer(buf, s, pfx, HA_ATOMIC_LOAD(&global.anon_key));
+ }
+
+#ifdef USE_LUA
+ hlua = NULL;
+ if (s && (hlua = s->hlua)) {
+ chunk_appendf(buf, "%sCurrent executing Lua from a stream analyser -- ", pfx);
+ }
+ else if (task->process == hlua_process_task && (hlua = task->context)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua task -- ", pfx);
+ }
+ else if (task->process == task_run_applet && (appctx = task->context) &&
+ (appctx->applet->fct == hlua_applet_tcp_fct)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua TCP service -- ", pfx);
+ }
+ else if (task->process == task_run_applet && (appctx = task->context) &&
+ (appctx->applet->fct == hlua_applet_http_fct)) {
+ chunk_appendf(buf, "%sCurrent executing a Lua HTTP service -- ", pfx);
+ }
+
+ if (hlua && hlua->T) {
+ chunk_appendf(buf, "stack traceback:\n ");
+ append_prefixed_str(buf, hlua_traceback(hlua->T, "\n "), pfx, '\n', 0);
+ }
+
+ /* we may need to terminate the current line */
+ if (*b_peek(buf, b_data(buf)-1) != '\n')
+ b_putchr(buf, '\n');
+#endif
+}
+
+/* This function dumps all profiling settings. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_threads(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int thr;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ if (appctx->st0)
+ thr = appctx->st1;
+ else
+ thr = 0;
+
+ do {
+ chunk_reset(&trash);
+ ha_thread_dump(&trash, thr);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* failed, try again */
+ appctx->st1 = thr;
+ return 0;
+ }
+ thr++;
+ } while (thr < global.nbthread);
+
+ return 1;
+}
+
+#if defined(HA_HAVE_DUMP_LIBS)
+/* parse a "show libs" command. It returns 1 if it emits anything otherwise zero. */
+static int debug_parse_cli_show_libs(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ chunk_reset(&trash);
+ if (dump_libs(&trash, 1))
+ return cli_msg(appctx, LOG_INFO, trash.area);
+ else
+ return 0;
+}
+#endif
+
+/* parse a "show dev" command. It returns 1 if it emits anything otherwise zero. */
+static int debug_parse_cli_show_dev(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ const char **build_opt;
+
+ if (*args[2])
+ return cli_err(appctx, "This command takes no argument.\n");
+
+ chunk_reset(&trash);
+
+ chunk_appendf(&trash, "Features\n %s\n", build_features);
+
+ chunk_appendf(&trash, "Build options\n");
+ for (build_opt = NULL; (build_opt = hap_get_next_build_opt(build_opt)); )
+ if (append_prefixed_str(&trash, *build_opt, " ", '\n', 0) == 0)
+ chunk_strcat(&trash, "\n");
+
+ chunk_appendf(&trash, "Platform info\n");
+ if (*post_mortem.platform.hw_vendor)
+ chunk_appendf(&trash, " machine vendor: %s\n", post_mortem.platform.hw_vendor);
+ if (*post_mortem.platform.hw_family)
+ chunk_appendf(&trash, " machine family: %s\n", post_mortem.platform.hw_family);
+ if (*post_mortem.platform.hw_model)
+ chunk_appendf(&trash, " machine model: %s\n", post_mortem.platform.hw_model);
+ if (*post_mortem.platform.brd_vendor)
+ chunk_appendf(&trash, " board vendor: %s\n", post_mortem.platform.brd_vendor);
+ if (*post_mortem.platform.brd_model)
+ chunk_appendf(&trash, " board model: %s\n", post_mortem.platform.brd_model);
+ if (*post_mortem.platform.soc_vendor)
+ chunk_appendf(&trash, " soc vendor: %s\n", post_mortem.platform.soc_vendor);
+ if (*post_mortem.platform.soc_model)
+ chunk_appendf(&trash, " soc model: %s\n", post_mortem.platform.soc_model);
+ if (*post_mortem.platform.cpu_model)
+ chunk_appendf(&trash, " cpu model: %s\n", post_mortem.platform.cpu_model);
+ if (*post_mortem.platform.virt_techno)
+ chunk_appendf(&trash, " virtual machine: %s\n", post_mortem.platform.virt_techno);
+ if (*post_mortem.platform.cont_techno)
+ chunk_appendf(&trash, " container: %s\n", post_mortem.platform.cont_techno);
+ if (*post_mortem.platform.utsname.sysname)
+ chunk_appendf(&trash, " OS name: %s\n", post_mortem.platform.utsname.sysname);
+ if (*post_mortem.platform.utsname.release)
+ chunk_appendf(&trash, " OS release: %s\n", post_mortem.platform.utsname.release);
+ if (*post_mortem.platform.utsname.version)
+ chunk_appendf(&trash, " OS version: %s\n", post_mortem.platform.utsname.version);
+ if (*post_mortem.platform.utsname.machine)
+ chunk_appendf(&trash, " OS architecture: %s\n", post_mortem.platform.utsname.machine);
+ if (*post_mortem.platform.utsname.nodename)
+ chunk_appendf(&trash, " node name: %s\n", HA_ANON_CLI(post_mortem.platform.utsname.nodename));
+
+ chunk_appendf(&trash, "Process info\n");
+ chunk_appendf(&trash, " pid: %d\n", post_mortem.process.pid);
+ chunk_appendf(&trash, " boot uid: %d\n", post_mortem.process.boot_uid);
+ chunk_appendf(&trash, " boot gid: %d\n", post_mortem.process.boot_gid);
+
+ if ((ulong)post_mortem.process.limit_fd.rlim_cur != RLIM_INFINITY)
+ chunk_appendf(&trash, " fd limit (soft): %lu\n", (ulong)post_mortem.process.limit_fd.rlim_cur);
+ if ((ulong)post_mortem.process.limit_fd.rlim_max != RLIM_INFINITY)
+ chunk_appendf(&trash, " fd limit (hard): %lu\n", (ulong)post_mortem.process.limit_fd.rlim_max);
+ if ((ulong)post_mortem.process.limit_ram.rlim_cur != RLIM_INFINITY)
+ chunk_appendf(&trash, " ram limit (soft): %lu\n", (ulong)post_mortem.process.limit_ram.rlim_cur);
+ if ((ulong)post_mortem.process.limit_ram.rlim_max != RLIM_INFINITY)
+ chunk_appendf(&trash, " ram limit (hard): %lu\n", (ulong)post_mortem.process.limit_ram.rlim_max);
+
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* Dumps a state of all threads into the trash and on fd #2, then aborts.
+ * A copy will be put into a trash chunk that's assigned to thread_dump_buffer
+ * so that the debugger can easily find it. This buffer might be truncated if
+ * too many threads are being dumped, but at least we'll dump them all on stderr.
+ * If thread_dump_buffer is set, it means that a panic has already begun.
+ */
+void ha_panic()
+{
+ struct buffer *old;
+ unsigned int thr;
+
+ mark_tainted(TAINTED_PANIC);
+
+ old = NULL;
+ if (!HA_ATOMIC_CAS(&thread_dump_buffer, &old, get_trash_chunk())) {
+ /* a panic dump is already in progress, let's not disturb it,
+ * we'll be called via signal DEBUGSIG. By returning we may be
+ * able to leave a current signal handler (e.g. WDT) so that
+ * this will ensure more reliable signal delivery.
+ */
+ return;
+ }
+
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "Thread %u is about to kill the process.\n", tid + 1);
+
+ for (thr = 0; thr < global.nbthread; thr++) {
+ ha_thread_dump(&trash, thr);
+ DISGUISE(write(2, trash.area, trash.data));
+ b_force_xfer(thread_dump_buffer, &trash, b_room(thread_dump_buffer));
+ chunk_reset(&trash);
+ }
+
+#ifdef USE_LUA
+ if (get_tainted() & TAINTED_LUA_STUCK_SHARED && global.nbthread > 1) {
+ chunk_printf(&trash,
+ "### Note: at least one thread was stuck in a Lua context loaded using the\n"
+ " 'lua-load' directive, which is known for causing heavy contention\n"
+ " when used with threads. Please consider using 'lua-load-per-thread'\n"
+ " instead if your code is safe to run in parallel on multiple threads.\n");
+ DISGUISE(write(2, trash.area, trash.data));
+ }
+ else if (get_tainted() & TAINTED_LUA_STUCK) {
+ chunk_printf(&trash,
+ "### Note: at least one thread was stuck in a Lua context in a way that suggests\n"
+ " heavy processing inside a dependency or a long loop that can't yield.\n"
+ " Please make sure any external code you may rely on is safe for use in\n"
+ " an event-driven engine.\n");
+ DISGUISE(write(2, trash.area, trash.data));
+ }
+#endif
+ if (get_tainted() & TAINTED_MEM_TRIMMING_STUCK) {
+ chunk_printf(&trash,
+ "### Note: one thread was found stuck under malloc_trim(), which can run for a\n"
+ " very long time on large memory systems. You way want to disable this\n"
+ " memory reclaiming feature by setting 'no-memory-trimming' in the\n"
+ " 'global' section of your configuration to avoid this in the future.\n");
+ DISGUISE(write(2, trash.area, trash.data));
+ }
+
+ for (;;)
+ abort();
+}
+
+/* Complain with message <msg> on stderr. If <counter> is not NULL, it is
+ * atomically incremented, and the message is only printed when the counter
+ * was zero, so that the message is only printed once. <taint> is only checked
+ * on bit 1, and will taint the process either for a bug (2) or warn (0).
+ */
+void complain(int *counter, const char *msg, int taint)
+{
+ if (counter && _HA_ATOMIC_FETCH_ADD(counter, 1))
+ return;
+ DISGUISE(write(2, msg, strlen(msg)));
+ if (taint & 2)
+ mark_tainted(TAINTED_BUG);
+ else
+ mark_tainted(TAINTED_WARN);
+}
+
+/* parse a "debug dev exit" command. It always returns 1, though it should never return. */
+static int debug_parse_cli_exit(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int code = atoi(args[3]);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ exit(code);
+ return 1;
+}
+
+/* parse a "debug dev bug" command. It always returns 1, though it should never return.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_bug(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ BUG_ON(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev warn" command. It always returns 1.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_warn(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ WARN_ON(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev check" command. It always returns 1.
+ * Note: we make sure not to make the function static so that it appears in the trace.
+ */
+int debug_parse_cli_check(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ CHECK_IF(one > zero);
+ return 1;
+}
+
+/* parse a "debug dev close" command. It always returns 1. */
+static int debug_parse_cli_close(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int fd;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing file descriptor number.\n");
+
+ fd = atoi(args[3]);
+ if (fd < 0 || fd >= global.maxsock)
+ return cli_err(appctx, "File descriptor out of range.\n");
+
+ if (!fdtab[fd].owner)
+ return cli_msg(appctx, LOG_INFO, "File descriptor was already closed.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ fd_delete(fd);
+ return 1;
+}
+
+/* this is meant to cause a deadlock when more than one task is running it or when run twice */
+static struct task *debug_run_cli_deadlock(struct task *task, void *ctx, unsigned int state)
+{
+ static HA_SPINLOCK_T lock __maybe_unused;
+
+ HA_SPIN_LOCK(OTHER_LOCK, &lock);
+ return NULL;
+}
+
+/* parse a "debug dev deadlock" command. It always returns 1. */
+static int debug_parse_cli_deadlock(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int tasks;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ for (tasks = atoi(args[3]); tasks > 0; tasks--) {
+ struct task *t = task_new_on(tasks % global.nbthread);
+ if (!t)
+ continue;
+ t->process = debug_run_cli_deadlock;
+ t->context = NULL;
+ task_wakeup(t, TASK_WOKEN_INIT);
+ }
+
+ return 1;
+}
+
+/* parse a "debug dev delay" command. It always returns 1. */
+static int debug_parse_cli_delay(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int delay = atoi(args[3]);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ usleep((long)delay * 1000);
+ return 1;
+}
+
+/* parse a "debug dev log" command. It always returns 1. */
+static int debug_parse_cli_log(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ chunk_reset(&trash);
+ for (arg = 3; *args[arg]; arg++) {
+ if (arg > 3)
+ chunk_strcat(&trash, " ");
+ chunk_strcat(&trash, args[arg]);
+ }
+
+ send_log(NULL, LOG_INFO, "%s\n", trash.area);
+ return 1;
+}
+
+/* parse a "debug dev loop" command. It always returns 1. */
+static int debug_parse_cli_loop(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct timeval deadline, curr;
+ int loop = atoi(args[3]);
+ int isolate;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ isolate = strcmp(args[4], "isolated") == 0;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ gettimeofday(&curr, NULL);
+ tv_ms_add(&deadline, &curr, loop);
+
+ if (isolate)
+ thread_isolate();
+
+ while (tv_ms_cmp(&curr, &deadline) < 0)
+ gettimeofday(&curr, NULL);
+
+ if (isolate)
+ thread_release();
+
+ return 1;
+}
+
+/* parse a "debug dev panic" command. It always returns 1, though it should never return. */
+static int debug_parse_cli_panic(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ ha_panic();
+ return 1;
+}
+
+/* parse a "debug dev exec" command. It always returns 1. */
+#if defined(DEBUG_DEV)
+static int debug_parse_cli_exec(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int pipefd[2];
+ int arg;
+ int pid;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ chunk_reset(&trash);
+ for (arg = 3; *args[arg]; arg++) {
+ if (arg > 3)
+ chunk_strcat(&trash, " ");
+ chunk_strcat(&trash, args[arg]);
+ }
+
+ thread_isolate();
+ if (pipe(pipefd) < 0)
+ goto fail_pipe;
+
+ if (fd_set_cloexec(pipefd[0]) == -1)
+ goto fail_fcntl;
+
+ if (fd_set_cloexec(pipefd[1]) == -1)
+ goto fail_fcntl;
+
+ pid = fork();
+
+ if (pid < 0)
+ goto fail_fork;
+ else if (pid == 0) {
+ /* child */
+ char *cmd[4] = { "/bin/sh", "-c", 0, 0 };
+
+ close(0);
+ dup2(pipefd[1], 1);
+ dup2(pipefd[1], 2);
+
+ cmd[2] = trash.area;
+ execvp(cmd[0], cmd);
+ printf("execvp() failed\n");
+ exit(1);
+ }
+
+ /* parent */
+ thread_release();
+ close(pipefd[1]);
+ chunk_reset(&trash);
+ while (1) {
+ size_t ret = read(pipefd[0], trash.area + trash.data, trash.size - 20 - trash.data);
+ if (ret <= 0)
+ break;
+ trash.data += ret;
+ if (trash.data + 20 == trash.size) {
+ chunk_strcat(&trash, "\n[[[TRUNCATED]]]\n");
+ break;
+ }
+ }
+ close(pipefd[0]);
+ waitpid(pid, NULL, WNOHANG);
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+
+ fail_fork:
+ fail_fcntl:
+ close(pipefd[0]);
+ close(pipefd[1]);
+ fail_pipe:
+ thread_release();
+ return cli_err(appctx, "Failed to execute command.\n");
+}
+
+/* handles SIGRTMAX to inject random delays on the receiving thread in order
+ * to try to increase the likelihood to reproduce inter-thread races. The
+ * signal is periodically sent by a task initiated by "debug dev delay-inj".
+ */
+void debug_delay_inj_sighandler(int sig, siginfo_t *si, void *arg)
+{
+ volatile int i = statistical_prng_range(10000);
+
+ while (i--)
+ __ha_cpu_relax();
+}
+#endif
+
+/* parse a "debug dev hex" command. It always returns 1. */
+static int debug_parse_cli_hex(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long start, len;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing memory address to dump from.\n");
+
+ start = strtoul(args[3], NULL, 0);
+ if (!start)
+ return cli_err(appctx, "Will not dump from NULL address.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ /* by default, dump ~128 till next block of 16 */
+ len = strtoul(args[4], NULL, 0);
+ if (!len)
+ len = ((start + 128) & -16) - start;
+
+ chunk_reset(&trash);
+ dump_hex(&trash, " ", (const void *)start, len, 1);
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev sym <addr>" command. It always returns 1. */
+static int debug_parse_cli_sym(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long addr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing memory address to be resolved.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ addr = strtoul(args[3], NULL, 0);
+ chunk_printf(&trash, "%#lx resolves to ", addr);
+ resolve_sym_name(&trash, NULL, (const void *)addr);
+ chunk_appendf(&trash, "\n");
+
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev tkill" command. It always returns 1. */
+static int debug_parse_cli_tkill(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int thr = 0;
+ int sig = SIGABRT;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (*args[3])
+ thr = atoi(args[3]);
+
+ if (thr < 0 || thr > global.nbthread)
+ return cli_err(appctx, "Thread number out of range (use 0 for current).\n");
+
+ if (*args[4])
+ sig = atoi(args[4]);
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ if (thr)
+ ha_tkill(thr - 1, sig);
+ else
+ raise(sig);
+ return 1;
+}
+
+/* hashes 'word' in "debug dev hash 'word' ". */
+static int debug_parse_cli_hash(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *msg = NULL;
+
+ cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "%s\n", HA_ANON_CLI(args[3])));
+ return 1;
+}
+
+/* parse a "debug dev write" command. It always returns 1. */
+static int debug_parse_cli_write(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long len;
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing output size.\n");
+
+ len = strtoul(args[3], NULL, 0);
+ if (len >= trash.size)
+ return cli_err(appctx, "Output too large, must be <tune.bufsize.\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ chunk_reset(&trash);
+ trash.data = len;
+ memset(trash.area, '.', trash.data);
+ trash.area[trash.data] = 0;
+ for (len = 64; len < trash.data; len += 64)
+ trash.area[len] = '\n';
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse a "debug dev stream" command */
+/*
+ * debug dev stream [strm=<ptr>] [strm.f[{+-=}<flags>]] [txn.f[{+-=}<flags>]] \
+ * [req.f[{+-=}<flags>]] [res.f[{+-=}<flags>]] \
+ * [sif.f[{+-=<flags>]] [sib.f[{+-=<flags>]] \
+ * [sif.s[=<state>]] [sib.s[=<state>]]
+ */
+static int debug_parse_cli_stream(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *s = appctx_strm(appctx);
+ int arg;
+ void *ptr;
+ int size;
+ const char *word, *end;
+ struct ist name;
+ char *msg = NULL;
+ char *endarg;
+ unsigned long long old, new;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = NULL; size = 0;
+
+ if (!*args[3]) {
+ return cli_err(appctx,
+ "Usage: debug dev stream [ strm=<ptr> ] { <obj> <op> <value> | wake }*\n"
+ " <obj> = { strm.f | strm.x | scf.s | scb.s | txn.f | req.f | res.f }\n"
+ " <op> = {'' (show) | '=' (assign) | '^' (xor) | '+' (or) | '-' (andnot)}\n"
+ " <value> = 'now' | 64-bit dec/hex integer (0x prefix supported)\n"
+ " 'wake' wakes the stream assigned to 'strm' (default: current)\n"
+ );
+ }
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ for (arg = 3; *args[arg]; arg++) {
+ old = 0;
+ end = word = args[arg];
+ while (*end && *end != '=' && *end != '^' && *end != '+' && *end != '-')
+ end++;
+ name = ist2(word, end - word);
+ if (isteq(name, ist("strm"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s; size = sizeof(s);
+ } else if (isteq(name, ist("strm.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->flags; size = sizeof(s->flags);
+ } else if (isteq(name, ist("strm.x"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->conn_exp; size = sizeof(s->conn_exp);
+ } else if (isteq(name, ist("txn.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->txn->flags; size = sizeof(s->txn->flags);
+ } else if (isteq(name, ist("req.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->req.flags; size = sizeof(s->req.flags);
+ } else if (isteq(name, ist("res.f"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->res.flags; size = sizeof(s->res.flags);
+ } else if (isteq(name, ist("scf.s"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->scf->state; size = sizeof(s->scf->state);
+ } else if (isteq(name, ist("scb.s"))) {
+ ptr = (!s || !may_access(s)) ? NULL : &s->scf->state; size = sizeof(s->scb->state);
+ } else if (isteq(name, ist("wake"))) {
+ if (s && may_access(s) && may_access((void *)s + sizeof(*s) - 1))
+ task_wakeup(s->task, TASK_WOKEN_TIMER|TASK_WOKEN_IO|TASK_WOKEN_MSG);
+ continue;
+ } else
+ return cli_dynerr(appctx, memprintf(&msg, "Unsupported field name: '%s'.\n", word));
+
+ /* read previous value */
+ if ((s || ptr == &s) && ptr && may_access(ptr) && may_access(ptr + size - 1)) {
+ if (size == 8)
+ old = read_u64(ptr);
+ else if (size == 4)
+ old = read_u32(ptr);
+ else if (size == 2)
+ old = read_u16(ptr);
+ else
+ old = *(const uint8_t *)ptr;
+ } else {
+ memprintf(&msg,
+ "%sSkipping inaccessible pointer %p for field '%.*s'.\n",
+ msg ? msg : "", ptr, (int)(end - word), word);
+ continue;
+ }
+
+ /* parse the new value . */
+ new = strtoll(end + 1, &endarg, 0);
+ if (end[1] && *endarg) {
+ if (strcmp(end + 1, "now") == 0)
+ new = now_ms;
+ else {
+ memprintf(&msg,
+ "%sIgnoring unparsable value '%s' for field '%.*s'.\n",
+ msg ? msg : "", end + 1, (int)(end - word), word);
+ continue;
+ }
+ }
+
+ switch (*end) {
+ case '\0': /* show */
+ memprintf(&msg, "%s%.*s=%#llx ", msg ? msg : "", (int)(end - word), word, old);
+ new = old; // do not change the value
+ break;
+
+ case '=': /* set */
+ break;
+
+ case '^': /* XOR */
+ new = old ^ new;
+ break;
+
+ case '+': /* OR */
+ new = old | new;
+ break;
+
+ case '-': /* AND NOT */
+ new = old & ~new;
+ break;
+
+ default:
+ break;
+ }
+
+ /* write the new value */
+ if (new != old) {
+ if (size == 8)
+ write_u64(ptr, new);
+ else if (size == 4)
+ write_u32(ptr, new);
+ else if (size == 2)
+ write_u16(ptr, new);
+ else
+ *(uint8_t *)ptr = new;
+ }
+ }
+
+ if (msg && *msg)
+ return cli_dynmsg(appctx, LOG_INFO, msg);
+ return 1;
+}
+
+/* parse a "debug dev stream" command */
+/*
+ * debug dev task <ptr> [ "wake" | "expire" | "kill" ]
+ * Show/change status of a task/tasklet
+ */
+static int debug_parse_cli_task(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ const struct ha_caller *caller;
+ struct task *t;
+ char *endarg;
+ char *msg;
+ void *ptr;
+ int ret = 1;
+ int task_ok;
+ int arg;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ /* parse the pointer value */
+ ptr = (void *)strtoul(args[3], &endarg, 0);
+ if (!*args[3] || *endarg)
+ goto usage;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ /* everything below must run under thread isolation till reaching label "leave" */
+ thread_isolate();
+
+ /* struct tasklet is smaller than struct task and is sufficient to check
+ * the TASK_COMMON part.
+ */
+ if (!may_access(ptr) || !may_access(ptr + sizeof(struct tasklet) - 1) ||
+ ((const struct tasklet *)ptr)->tid < -1 ||
+ ((const struct tasklet *)ptr)->tid >= (int)MAX_THREADS) {
+ ret = cli_err(appctx, "The designated memory area doesn't look like a valid task/tasklet\n");
+ goto leave;
+ }
+
+ t = ptr;
+ caller = t->caller;
+ msg = NULL;
+ task_ok = may_access(ptr + sizeof(*t) - 1);
+
+ chunk_reset(&trash);
+ resolve_sym_name(&trash, NULL, (const void *)t->process);
+
+ /* we need to be careful here because we may dump a freed task that's
+ * still in the pool cache, containing garbage in pointers.
+ */
+ if (!*args[4]) {
+ memprintf(&msg, "%s%p: %s state=%#x tid=%d process=%s ctx=%p calls=%d last=%s:%d intl=%d",
+ msg ? msg : "", t, (t->state & TASK_F_TASKLET) ? "tasklet" : "task",
+ t->state, t->tid, trash.area, t->context, t->calls,
+ caller && may_access(caller) && may_access(caller->func) && isalnum((uchar)*caller->func) ? caller->func : "0",
+ caller ? t->caller->line : 0,
+ (t->state & TASK_F_TASKLET) ? LIST_INLIST(&((const struct tasklet *)t)->list) : 0);
+
+ if (task_ok && !(t->state & TASK_F_TASKLET))
+ memprintf(&msg, "%s inrq=%d inwq=%d exp=%d nice=%d",
+ msg ? msg : "", task_in_rq(t), task_in_wq(t), t->expire, t->nice);
+
+ memprintf(&msg, "%s\n", msg ? msg : "");
+ }
+
+ for (arg = 4; *args[arg]; arg++) {
+ if (strcmp(args[arg], "expire") == 0) {
+ if (t->state & TASK_F_TASKLET) {
+ /* do nothing for tasklets */
+ }
+ else if (task_ok) {
+ /* unlink task and wake with timer flag */
+ __task_unlink_wq(t);
+ t->expire = now_ms;
+ task_wakeup(t, TASK_WOKEN_TIMER);
+ }
+ } else if (strcmp(args[arg], "wake") == 0) {
+ /* wake with all flags but init / timer */
+ if (t->state & TASK_F_TASKLET)
+ tasklet_wakeup((struct tasklet *)t);
+ else if (task_ok)
+ task_wakeup(t, TASK_WOKEN_ANY & ~(TASK_WOKEN_INIT|TASK_WOKEN_TIMER));
+ } else if (strcmp(args[arg], "kill") == 0) {
+ /* Kill the task. This is not idempotent! */
+ if (!(t->state & TASK_KILLED)) {
+ if (t->state & TASK_F_TASKLET)
+ tasklet_kill((struct tasklet *)t);
+ else if (task_ok)
+ task_kill(t);
+ }
+ } else {
+ thread_release();
+ goto usage;
+ }
+ }
+
+ if (msg && *msg)
+ ret = cli_dynmsg(appctx, LOG_INFO, msg);
+ leave:
+ thread_release();
+ return ret;
+ usage:
+ return cli_err(appctx,
+ "Usage: debug dev task <ptr> [ wake | expire | kill ]\n"
+ " By default, dumps some info on task/tasklet <ptr>. 'wake' will wake it up\n"
+ " with all conditions flags but init/exp. 'expire' will expire the entry, and\n"
+ " 'kill' will kill it (warning: may crash since later not idempotent!). All\n"
+ " changes may crash the process if performed on a wrong object!\n"
+ );
+}
+
+#if defined(DEBUG_DEV)
+static struct task *debug_delay_inj_task(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned long *tctx = ctx; // [0] = interval, [1] = nbwakeups
+ unsigned long inter = tctx[0];
+ unsigned long count = tctx[1];
+ unsigned long rnd;
+
+ if (inter)
+ t->expire = tick_add(now_ms, inter);
+ else
+ task_wakeup(t, TASK_WOKEN_MSG);
+
+ /* wake a random thread */
+ while (count--) {
+ rnd = statistical_prng_range(global.nbthread);
+ ha_tkill(rnd, SIGRTMAX);
+ }
+ return t;
+}
+
+/* parse a "debug dev delay-inj" command
+ * debug dev delay-inj <inter> <count>
+ */
+static int debug_parse_delay_inj(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ unsigned long *tctx; // [0] = inter, [2] = count
+ struct task *task;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[4])
+ return cli_err(appctx, "Usage: debug dev delay-inj <inter_ms> <count>*\n");
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+
+ tctx = calloc(2, sizeof(*tctx));
+ if (!tctx)
+ goto fail;
+
+ tctx[0] = atoi(args[3]);
+ tctx[1] = atoi(args[4]);
+
+ task = task_new_here/*anywhere*/();
+ if (!task)
+ goto fail;
+
+ task->process = debug_delay_inj_task;
+ task->context = tctx;
+ task_wakeup(task, TASK_WOKEN_INIT);
+ return 1;
+
+ fail:
+ free(tctx);
+ return cli_err(appctx, "Not enough memory");
+}
+#endif // DEBUG_DEV
+
+static struct task *debug_task_handler(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned long *tctx = ctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+ unsigned long inter = tctx[1];
+ unsigned long rnd;
+
+ t->expire = tick_add(now_ms, inter);
+
+ /* half of the calls will wake up another entry */
+ rnd = statistical_prng();
+ if (rnd & 1) {
+ rnd >>= 1;
+ rnd %= tctx[0];
+ rnd = tctx[rnd + 2];
+
+ if (rnd & 1)
+ task_wakeup((struct task *)(rnd - 1), TASK_WOKEN_MSG);
+ else
+ tasklet_wakeup((struct tasklet *)rnd);
+ }
+ return t;
+}
+
+static struct task *debug_tasklet_handler(struct task *t, void *ctx, unsigned int state)
+{
+ unsigned long *tctx = ctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+ unsigned long rnd;
+ int i;
+
+ /* wake up two random entries */
+ for (i = 0; i < 2; i++) {
+ rnd = statistical_prng() % tctx[0];
+ rnd = tctx[rnd + 2];
+
+ if (rnd & 1)
+ task_wakeup((struct task *)(rnd - 1), TASK_WOKEN_MSG);
+ else
+ tasklet_wakeup((struct tasklet *)rnd);
+ }
+ return t;
+}
+
+/* parse a "debug dev sched" command
+ * debug dev sched {task|tasklet} [count=<count>] [mask=<mask>] [single=<single>] [inter=<inter>]
+ */
+static int debug_parse_cli_sched(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ int arg;
+ void *ptr;
+ int size;
+ const char *word, *end;
+ struct ist name;
+ char *msg = NULL;
+ char *endarg;
+ unsigned long long new;
+ unsigned long count = 0;
+ unsigned long thrid = tid;
+ unsigned int inter = 0;
+ unsigned long i;
+ int mode = 0; // 0 = tasklet; 1 = task
+ unsigned long *tctx; // [0] = #tasks, [1] = inter, [2+] = { tl | (tsk+1) }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = NULL; size = 0;
+
+ if (strcmp(args[3], "task") != 0 && strcmp(args[3], "tasklet") != 0) {
+ return cli_err(appctx,
+ "Usage: debug dev sched {task|tasklet} { <obj> = <value> }*\n"
+ " <obj> = {count | tid | inter }\n"
+ " <value> = 64-bit dec/hex integer (0x prefix supported)\n"
+ );
+ }
+
+ mode = strcmp(args[3], "task") == 0;
+
+ _HA_ATOMIC_INC(&debug_commands_issued);
+ for (arg = 4; *args[arg]; arg++) {
+ end = word = args[arg];
+ while (*end && *end != '=' && *end != '^' && *end != '+' && *end != '-')
+ end++;
+ name = ist2(word, end - word);
+ if (isteq(name, ist("count"))) {
+ ptr = &count; size = sizeof(count);
+ } else if (isteq(name, ist("tid"))) {
+ ptr = &thrid; size = sizeof(thrid);
+ } else if (isteq(name, ist("inter"))) {
+ ptr = &inter; size = sizeof(inter);
+ } else
+ return cli_dynerr(appctx, memprintf(&msg, "Unsupported setting: '%s'.\n", word));
+
+ /* parse the new value . */
+ new = strtoll(end + 1, &endarg, 0);
+ if (end[1] && *endarg) {
+ memprintf(&msg,
+ "%sIgnoring unparsable value '%s' for field '%.*s'.\n",
+ msg ? msg : "", end + 1, (int)(end - word), word);
+ continue;
+ }
+
+ /* write the new value */
+ if (size == 8)
+ write_u64(ptr, new);
+ else if (size == 4)
+ write_u32(ptr, new);
+ else if (size == 2)
+ write_u16(ptr, new);
+ else
+ *(uint8_t *)ptr = new;
+ }
+
+ tctx = calloc(count + 2, sizeof(*tctx));
+ if (!tctx)
+ goto fail;
+
+ tctx[0] = (unsigned long)count;
+ tctx[1] = (unsigned long)inter;
+
+ if (thrid >= global.nbthread)
+ thrid = tid;
+
+ for (i = 0; i < count; i++) {
+ /* now, if poly or mask was set, tmask corresponds to the
+ * valid thread mask to use, otherwise it remains zero.
+ */
+ //printf("%lu: mode=%d mask=%#lx\n", i, mode, tmask);
+ if (mode == 0) {
+ struct tasklet *tl = tasklet_new();
+
+ if (!tl)
+ goto fail;
+
+ tl->tid = thrid;
+ tl->process = debug_tasklet_handler;
+ tl->context = tctx;
+ tctx[i + 2] = (unsigned long)tl;
+ } else {
+ struct task *task = task_new_on(thrid);
+
+ if (!task)
+ goto fail;
+
+ task->process = debug_task_handler;
+ task->context = tctx;
+ tctx[i + 2] = (unsigned long)task + 1;
+ }
+ }
+
+ /* start the tasks and tasklets */
+ for (i = 0; i < count; i++) {
+ unsigned long ctx = tctx[i + 2];
+
+ if (ctx & 1)
+ task_wakeup((struct task *)(ctx - 1), TASK_WOKEN_INIT);
+ else
+ tasklet_wakeup((struct tasklet *)ctx);
+ }
+
+ if (msg && *msg)
+ return cli_dynmsg(appctx, LOG_INFO, msg);
+ return 1;
+
+ fail:
+ /* free partially allocated entries */
+ for (i = 0; tctx && i < count; i++) {
+ unsigned long ctx = tctx[i + 2];
+
+ if (!ctx)
+ break;
+
+ if (ctx & 1)
+ task_destroy((struct task *)(ctx - 1));
+ else
+ tasklet_free((struct tasklet *)ctx);
+ }
+
+ free(tctx);
+ return cli_err(appctx, "Not enough memory");
+}
+
+/* CLI state for "debug dev fd" */
+struct dev_fd_ctx {
+ int start_fd;
+};
+
+/* CLI parser for the "debug dev fd" command. The current FD to restart from is
+ * stored in a struct dev_fd_ctx pointed to by svcctx.
+ */
+static int debug_parse_cli_fd(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct dev_fd_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ /* start at fd #0 */
+ ctx->start_fd = 0;
+ return 0;
+}
+
+/* CLI I/O handler for the "debug dev fd" command. Dumps all FDs that are
+ * accessible from the process but not known from fdtab. The FD number to
+ * restart from is stored in a struct dev_fd_ctx pointed to by svcctx.
+ */
+static int debug_iohandler_fd(struct appctx *appctx)
+{
+ struct dev_fd_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct sockaddr_storage sa;
+ struct stat statbuf;
+ socklen_t salen, vlen;
+ int ret1, ret2, port;
+ char *addrstr;
+ int ret = 1;
+ int i, fd;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ chunk_reset(&trash);
+
+ thread_isolate();
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ for (fd = ctx->start_fd; fd < global.maxsock; fd++) {
+ /* check for FD's existence */
+ ret1 = fcntl(fd, F_GETFD, 0);
+ if (ret1 == -1)
+ continue; // not known to the process
+ if (fdtab[fd].owner)
+ continue; // well-known
+
+ /* OK we're seeing an orphan let's try to retrieve as much
+ * information as possible about it.
+ */
+ chunk_printf(&trash, "%5d", fd);
+
+ if (fstat(fd, &statbuf) != -1) {
+ chunk_appendf(&trash, " type=%s mod=%04o dev=%#llx siz=%#llx uid=%lld gid=%lld fs=%#llx ino=%#llx",
+ isatty(fd) ? "tty.":
+ S_ISREG(statbuf.st_mode) ? "file":
+ S_ISDIR(statbuf.st_mode) ? "dir.":
+ S_ISCHR(statbuf.st_mode) ? "chr.":
+ S_ISBLK(statbuf.st_mode) ? "blk.":
+ S_ISFIFO(statbuf.st_mode) ? "pipe":
+ S_ISLNK(statbuf.st_mode) ? "link":
+ S_ISSOCK(statbuf.st_mode) ? "sock":
+#ifdef USE_EPOLL
+ /* trick: epoll_ctl() will return -ENOENT when trying
+ * to remove from a valid epoll FD an FD that was not
+ * registered against it. But we don't want to risk
+ * disabling a random FD. Instead we'll create a new
+ * one by duplicating 0 (it should be valid since
+ * pointing to a terminal or /dev/null), and try to
+ * remove it.
+ */
+ ({
+ int fd2 = dup(0);
+ int ret = fd2;
+ if (ret >= 0) {
+ ret = epoll_ctl(fd, EPOLL_CTL_DEL, fd2, NULL);
+ if (ret == -1 && errno == ENOENT)
+ ret = 0; // that's a real epoll
+ else
+ ret = -1; // it's something else
+ close(fd2);
+ }
+ ret;
+ }) == 0 ? "epol" :
+#endif
+ "????",
+ (uint)statbuf.st_mode & 07777,
+
+ (ullong)statbuf.st_rdev,
+ (ullong)statbuf.st_size,
+ (ullong)statbuf.st_uid,
+ (ullong)statbuf.st_gid,
+
+ (ullong)statbuf.st_dev,
+ (ullong)statbuf.st_ino);
+ }
+
+ chunk_appendf(&trash, " getfd=%s+%#x",
+ (ret1 & FD_CLOEXEC) ? "cloex" : "",
+ ret1 &~ FD_CLOEXEC);
+
+ /* FD options */
+ ret2 = fcntl(fd, F_GETFL, 0);
+ if (ret2) {
+ chunk_appendf(&trash, " getfl=%s",
+ (ret1 & 3) >= 2 ? "O_RDWR" :
+ (ret1 & 1) ? "O_WRONLY" : "O_RDONLY");
+
+ for (i = 2; i < 32; i++) {
+ if (!(ret2 & (1UL << i)))
+ continue;
+ switch (1UL << i) {
+ case O_CREAT: chunk_appendf(&trash, ",O_CREAT"); break;
+ case O_EXCL: chunk_appendf(&trash, ",O_EXCL"); break;
+ case O_NOCTTY: chunk_appendf(&trash, ",O_NOCTTY"); break;
+ case O_TRUNC: chunk_appendf(&trash, ",O_TRUNC"); break;
+ case O_APPEND: chunk_appendf(&trash, ",O_APPEND"); break;
+#ifdef O_ASYNC
+ case O_ASYNC: chunk_appendf(&trash, ",O_ASYNC"); break;
+#endif
+#ifdef O_DIRECT
+ case O_DIRECT: chunk_appendf(&trash, ",O_DIRECT"); break;
+#endif
+#ifdef O_NOATIME
+ case O_NOATIME: chunk_appendf(&trash, ",O_NOATIME"); break;
+#endif
+ }
+ }
+ }
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_TYPE, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_type=%d", ret2);
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_accept=%d", ret2);
+
+ vlen = sizeof(ret2);
+ ret1 = getsockopt(fd, SOL_SOCKET, SO_ERROR, &ret2, &vlen);
+ if (ret1 != -1)
+ chunk_appendf(&trash, " so_error=%d", ret2);
+
+ salen = sizeof(sa);
+ if (getsockname(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ port = ntohs(((const struct sockaddr_in *)&sa)->sin_port);
+ else if (sa.ss_family == AF_INET6)
+ port = ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port);
+ else
+ port = 0;
+ addrstr = sa2str(&sa, port, 0);
+ chunk_appendf(&trash, " laddr=%s", addrstr);
+ free(addrstr);
+ }
+
+ salen = sizeof(sa);
+ if (getpeername(fd, (struct sockaddr *)&sa, &salen) != -1) {
+ if (sa.ss_family == AF_INET)
+ port = ntohs(((const struct sockaddr_in *)&sa)->sin_port);
+ else if (sa.ss_family == AF_INET6)
+ port = ntohs(((const struct sockaddr_in6 *)&sa)->sin6_port);
+ else
+ port = 0;
+ addrstr = sa2str(&sa, port, 0);
+ chunk_appendf(&trash, " raddr=%s", addrstr);
+ free(addrstr);
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->start_fd = fd;
+ ret = 0;
+ break;
+ }
+ }
+
+ thread_release();
+ end:
+ return ret;
+}
+
+#if defined(DEBUG_MEM_STATS)
+
+/* CLI state for "debug dev memstats" */
+struct dev_mem_ctx {
+ struct mem_stats *start, *stop; /* begin/end of dump */
+ char *match; /* non-null if a name prefix is specified */
+ int show_all; /* show all entries if non-null */
+ int width; /* 1st column width */
+ long tot_size; /* sum of alloc-free */
+ ulong tot_calls; /* sum of calls */
+};
+
+/* CLI parser for the "debug dev memstats" command. Sets a dev_mem_ctx shown above. */
+static int debug_parse_cli_memstats(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct dev_mem_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg;
+
+ extern __attribute__((__weak__)) struct mem_stats __start_mem_stats;
+ extern __attribute__((__weak__)) struct mem_stats __stop_mem_stats;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ for (arg = 3; *args[arg]; arg++) {
+ if (strcmp(args[arg], "reset") == 0) {
+ struct mem_stats *ptr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ for (ptr = &__start_mem_stats; ptr < &__stop_mem_stats; ptr++) {
+ _HA_ATOMIC_STORE(&ptr->calls, 0);
+ _HA_ATOMIC_STORE(&ptr->size, 0);
+ }
+ return 1;
+ }
+ else if (strcmp(args[arg], "all") == 0) {
+ ctx->show_all = 1;
+ continue;
+ }
+ else if (strcmp(args[arg], "match") == 0 && *args[arg + 1]) {
+ ha_free(&ctx->match);
+ ctx->match = strdup(args[arg + 1]);
+ arg++;
+ continue;
+ }
+ else
+ return cli_err(appctx, "Expects either 'reset', 'all', or 'match <pfx>'.\n");
+ }
+
+ /* otherwise proceed with the dump from p0 to p1 */
+ ctx->start = &__start_mem_stats;
+ ctx->stop = &__stop_mem_stats;
+ ctx->width = 0;
+ return 0;
+}
+
+/* CLI I/O handler for the "debug dev memstats" command using a dev_mem_ctx
+ * found in appctx->svcctx. Dumps all mem_stats structs referenced by pointers
+ * located between ->start and ->stop. Dumps all entries if ->show_all != 0,
+ * otherwise only non-zero calls.
+ */
+static int debug_iohandler_memstats(struct appctx *appctx)
+{
+ struct dev_mem_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct mem_stats *ptr;
+ const char *pfx = ctx->match;
+ int ret = 1;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ if (!ctx->width) {
+ /* we don't know the first column's width, let's compute it
+ * now based on a first pass on printable entries and their
+ * expected width (approximated).
+ */
+ for (ptr = ctx->start; ptr != ctx->stop; ptr++) {
+ const char *p, *name;
+ int w = 0;
+ char tmp;
+
+ if (!ptr->size && !ptr->calls && !ctx->show_all)
+ continue;
+
+ for (p = name = ptr->caller.file; *p; p++) {
+ if (*p == '/')
+ name = p + 1;
+ }
+
+ if (ctx->show_all)
+ w = snprintf(&tmp, 0, "%s(%s:%d) ", ptr->caller.func, name, ptr->caller.line);
+ else
+ w = snprintf(&tmp, 0, "%s:%d ", name, ptr->caller.line);
+
+ if (w > ctx->width)
+ ctx->width = w;
+ }
+ }
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ for (ptr = ctx->start; ptr != ctx->stop; ptr++) {
+ const char *type;
+ const char *name;
+ const char *p;
+ const char *info = NULL;
+ const char *func = NULL;
+ int direction = 0; // neither alloc nor free (e.g. realloc)
+
+ if (!ptr->size && !ptr->calls && !ctx->show_all)
+ continue;
+
+ /* basename only */
+ for (p = name = ptr->caller.file; *p; p++) {
+ if (*p == '/')
+ name = p + 1;
+ }
+
+ func = ptr->caller.func;
+
+ switch (ptr->caller.what) {
+ case MEM_STATS_TYPE_CALLOC: type = "CALLOC"; direction = 1; break;
+ case MEM_STATS_TYPE_FREE: type = "FREE"; direction = -1; break;
+ case MEM_STATS_TYPE_MALLOC: type = "MALLOC"; direction = 1; break;
+ case MEM_STATS_TYPE_REALLOC: type = "REALLOC"; break;
+ case MEM_STATS_TYPE_STRDUP: type = "STRDUP"; direction = 1; break;
+ case MEM_STATS_TYPE_P_ALLOC: type = "P_ALLOC"; direction = 1; if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
+ case MEM_STATS_TYPE_P_FREE: type = "P_FREE"; direction = -1; if (ptr->extra) info = ((const struct pool_head *)ptr->extra)->name; break;
+ default: type = "UNSET"; break;
+ }
+
+ //chunk_printf(&trash,
+ // "%20s:%-5d %7s size: %12lu calls: %9lu size/call: %6lu\n",
+ // name, ptr->line, type,
+ // (unsigned long)ptr->size, (unsigned long)ptr->calls,
+ // (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0));
+
+ /* only match requested prefixes */
+ if (pfx && (!info || strncmp(info, pfx, strlen(pfx)) != 0))
+ continue;
+
+ chunk_reset(&trash);
+ if (ctx->show_all)
+ chunk_appendf(&trash, "%s(", func);
+
+ chunk_appendf(&trash, "%s:%d", name, ptr->caller.line);
+
+ if (ctx->show_all)
+ chunk_appendf(&trash, ")");
+
+ while (trash.data < ctx->width)
+ trash.area[trash.data++] = ' ';
+
+ chunk_appendf(&trash, "%7s size: %12lu calls: %9lu size/call: %6lu %s\n",
+ type,
+ (unsigned long)ptr->size, (unsigned long)ptr->calls,
+ (unsigned long)(ptr->calls ? (ptr->size / ptr->calls) : 0),
+ info ? info : "");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->start = ptr;
+ ret = 0;
+ goto end;
+ }
+ if (direction > 0) {
+ ctx->tot_size += (ulong)ptr->size;
+ ctx->tot_calls += (ulong)ptr->calls;
+ }
+ else if (direction < 0) {
+ ctx->tot_size -= (ulong)ptr->size;
+ ctx->tot_calls += (ulong)ptr->calls;
+ }
+ }
+
+ /* now dump a summary */
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "Total");
+ while (trash.data < ctx->width)
+ trash.area[trash.data++] = ' ';
+
+ chunk_appendf(&trash, "%7s size: %12ld calls: %9lu size/call: %6ld %s\n",
+ "BALANCE",
+ ctx->tot_size, ctx->tot_calls,
+ (long)(ctx->tot_calls ? (ctx->tot_size / ctx->tot_calls) : 0),
+ "(excl. realloc)");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ctx->start = ptr;
+ ret = 0;
+ goto end;
+ }
+ end:
+ return ret;
+}
+
+/* release the "show pools" context */
+static void debug_release_memstats(struct appctx *appctx)
+{
+ struct dev_mem_ctx *ctx = appctx->svcctx;
+
+ ha_free(&ctx->match);
+}
+#endif
+
+#ifdef USE_THREAD_DUMP
+
+/* handles DEBUGSIG to dump the state of the thread it's working on. This is
+ * appended at the end of thread_dump_buffer which must be protected against
+ * reentrance from different threads (a thread-local buffer works fine).
+ */
+void debug_handler(int sig, siginfo_t *si, void *arg)
+{
+ struct buffer *buf = HA_ATOMIC_LOAD(&th_ctx->thread_dump_buffer);
+ int harmless = is_thread_harmless();
+
+ /* first, let's check it's really for us and that we didn't just get
+ * a spurious DEBUGSIG.
+ */
+ if (!buf || buf == (void*)(0x1UL))
+ return;
+
+ /* now dump the current state into the designated buffer, and indicate
+ * we come from a sig handler.
+ */
+ ha_thread_dump_one(tid, 1);
+
+ /* mark the current thread as stuck to detect it upon next invocation
+ * if it didn't move.
+ */
+ if (!harmless &&
+ !(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_SLEEPING))
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_STUCK);
+}
+
+static int init_debug_per_thread()
+{
+ sigset_t set;
+
+ /* unblock the DEBUGSIG signal we intend to use */
+ sigemptyset(&set);
+ sigaddset(&set, DEBUGSIG);
+#if defined(DEBUG_DEV)
+ sigaddset(&set, SIGRTMAX);
+#endif
+ ha_sigmask(SIG_UNBLOCK, &set, NULL);
+ return 1;
+}
+
+static int init_debug()
+{
+ struct sigaction sa;
+ void *callers[1];
+
+ /* calling backtrace() will access libgcc at runtime. We don't want to
+ * do it after the chroot, so let's perform a first call to have it
+ * ready in memory for later use.
+ */
+ my_backtrace(callers, sizeof(callers)/sizeof(*callers));
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = debug_handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(DEBUGSIG, &sa, NULL);
+
+#if defined(DEBUG_DEV)
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = debug_delay_inj_sighandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(SIGRTMAX, &sa, NULL);
+#endif
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_debug);
+REGISTER_PER_THREAD_INIT(init_debug_per_thread);
+
+#endif /* USE_THREAD_DUMP */
+
+
+static void feed_post_mortem_linux()
+{
+#if defined(__linux__)
+ struct stat statbuf;
+ FILE *file;
+
+ /* DMI reports either HW or hypervisor, this allows to detect most VMs.
+ * On ARM the device-tree is often more precise for the model. Since many
+ * boards present "to be filled by OEM" or so in many fields, we dedup
+ * them as much as possible.
+ */
+ if (read_line_to_trash("/sys/class/dmi/id/sys_vendor") > 0)
+ strlcpy2(post_mortem.platform.hw_vendor, trash.area, sizeof(post_mortem.platform.hw_vendor));
+
+ if (read_line_to_trash("/sys/class/dmi/id/product_family") > 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_vendor) != 0)
+ strlcpy2(post_mortem.platform.hw_family, trash.area, sizeof(post_mortem.platform.hw_family));
+
+ if ((read_line_to_trash("/sys/class/dmi/id/product_name") > 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_vendor) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_family) != 0))
+ strlcpy2(post_mortem.platform.hw_model, trash.area, sizeof(post_mortem.platform.hw_model));
+
+ if ((read_line_to_trash("/sys/class/dmi/id/board_vendor") > 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_vendor) != 0))
+ strlcpy2(post_mortem.platform.brd_vendor, trash.area, sizeof(post_mortem.platform.brd_vendor));
+
+ if ((read_line_to_trash("/sys/firmware/devicetree/base/model") > 0 &&
+ strcmp(trash.area, post_mortem.platform.brd_vendor) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_vendor) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_family) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_model) != 0) ||
+ (read_line_to_trash("/sys/class/dmi/id/board_name") > 0 &&
+ strcmp(trash.area, post_mortem.platform.brd_vendor) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_vendor) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_family) != 0 &&
+ strcmp(trash.area, post_mortem.platform.hw_model) != 0))
+ strlcpy2(post_mortem.platform.brd_model, trash.area, sizeof(post_mortem.platform.brd_model));
+
+ /* Check for containers. In a container on linux we don't see keventd (2.4) kthreadd (2.6+) on pid 2 */
+ if (read_line_to_trash("/proc/2/status") <= 0 ||
+ (strcmp(trash.area, "Name:\tkthreadd") != 0 &&
+ strcmp(trash.area, "Name:\tkeventd") != 0)) {
+ /* OK we're in a container. Docker often has /.dockerenv */
+ const char *tech = "yes";
+
+ if (stat("/.dockerenv", &statbuf) == 0)
+ tech = "docker";
+ strlcpy2(post_mortem.platform.cont_techno, tech, sizeof(post_mortem.platform.cont_techno));
+ }
+ else {
+ strlcpy2(post_mortem.platform.cont_techno, "no", sizeof(post_mortem.platform.cont_techno));
+ }
+
+ file = fopen("/proc/cpuinfo", "r");
+ if (file) {
+ uint cpu_implem = 0, cpu_arch = 0, cpu_variant = 0, cpu_part = 0, cpu_rev = 0; // arm
+ uint cpu_family = 0, model = 0, stepping = 0; // x86
+ char vendor_id[64] = "", model_name[64] = ""; // x86
+ char machine[64] = "", system_type[64] = "", cpu_model[64] = ""; // mips
+ const char *virt = "no";
+ char *p, *e, *v, *lf;
+
+ /* let's figure what CPU we're working with */
+ while ((p = fgets(trash.area, trash.size, file)) != NULL) {
+ lf = strchr(p, '\n');
+ if (lf)
+ *lf = 0;
+
+ /* stop at first line break */
+ if (!*p)
+ break;
+
+ /* skip colon and spaces and trim spaces after name */
+ v = e = strchr(p, ':');
+ if (!e)
+ continue;
+
+ do { *e-- = 0; } while (e >= p && (*e == ' ' || *e == '\t'));
+
+ /* locate value after colon */
+ do { v++; } while (*v == ' ' || *v == '\t');
+
+ /* ARM */
+ if (strcmp(p, "CPU implementer") == 0)
+ cpu_implem = strtoul(v, NULL, 0);
+ else if (strcmp(p, "CPU architecture") == 0)
+ cpu_arch = strtoul(v, NULL, 0);
+ else if (strcmp(p, "CPU variant") == 0)
+ cpu_variant = strtoul(v, NULL, 0);
+ else if (strcmp(p, "CPU part") == 0)
+ cpu_part = strtoul(v, NULL, 0);
+ else if (strcmp(p, "CPU revision") == 0)
+ cpu_rev = strtoul(v, NULL, 0);
+
+ /* x86 */
+ else if (strcmp(p, "cpu family") == 0)
+ cpu_family = strtoul(v, NULL, 0);
+ else if (strcmp(p, "model") == 0)
+ model = strtoul(v, NULL, 0);
+ else if (strcmp(p, "stepping") == 0)
+ stepping = strtoul(v, NULL, 0);
+ else if (strcmp(p, "vendor_id") == 0)
+ strlcpy2(vendor_id, v, sizeof(vendor_id));
+ else if (strcmp(p, "model name") == 0)
+ strlcpy2(model_name, v, sizeof(model_name));
+ else if (strcmp(p, "flags") == 0) {
+ if (strstr(v, "hypervisor")) {
+ if (strncmp(post_mortem.platform.hw_vendor, "QEMU", 4) == 0)
+ virt = "qemu";
+ else if (strncmp(post_mortem.platform.hw_vendor, "VMware", 6) == 0)
+ virt = "vmware";
+ else
+ virt = "yes";
+ }
+ }
+
+ /* MIPS */
+ else if (strcmp(p, "system type") == 0)
+ strlcpy2(system_type, v, sizeof(system_type));
+ else if (strcmp(p, "machine") == 0)
+ strlcpy2(machine, v, sizeof(machine));
+ else if (strcmp(p, "cpu model") == 0)
+ strlcpy2(cpu_model, v, sizeof(cpu_model));
+ }
+ fclose(file);
+
+ /* Machine may replace hw_product on MIPS */
+ if (!*post_mortem.platform.hw_model)
+ strlcpy2(post_mortem.platform.hw_model, machine, sizeof(post_mortem.platform.hw_model));
+
+ /* SoC vendor */
+ strlcpy2(post_mortem.platform.soc_vendor, vendor_id, sizeof(post_mortem.platform.soc_vendor));
+
+ /* SoC model */
+ if (*system_type) {
+ /* MIPS */
+ strlcpy2(post_mortem.platform.soc_model, system_type, sizeof(post_mortem.platform.soc_model));
+ *system_type = 0;
+ } else if (*model_name) {
+ /* x86 */
+ strlcpy2(post_mortem.platform.soc_model, model_name, sizeof(post_mortem.platform.soc_model));
+ *model_name = 0;
+ }
+
+ /* Create a CPU model name based on available IDs */
+ if (cpu_implem) // arm
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sImpl %#02x", *cpu_model ? " " : "", cpu_implem);
+
+ if (cpu_family) // x86
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sFam %u", *cpu_model ? " " : "", cpu_family);
+
+ if (model) // x86
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sModel %u", *cpu_model ? " " : "", model);
+
+ if (stepping) // x86
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sStep %u", *cpu_model ? " " : "", stepping);
+
+ if (cpu_arch) // arm
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sArch %u", *cpu_model ? " " : "", cpu_arch);
+
+ if (cpu_part) // arm
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sPart %#03x", *cpu_model ? " " : "", cpu_part);
+
+ if (cpu_variant || cpu_rev) // arm
+ snprintf(cpu_model + strlen(cpu_model),
+ sizeof(cpu_model) - strlen(cpu_model),
+ "%sr%up%u", *cpu_model ? " " : "", cpu_variant, cpu_rev);
+
+ strlcpy2(post_mortem.platform.cpu_model, cpu_model, sizeof(post_mortem.platform.cpu_model));
+
+ if (*virt)
+ strlcpy2(post_mortem.platform.virt_techno, virt, sizeof(post_mortem.platform.virt_techno));
+ }
+#endif // __linux__
+}
+
+static int feed_post_mortem()
+{
+ /* kernel type, version and arch */
+ uname(&post_mortem.platform.utsname);
+
+ /* some boot-time info related to the process */
+ post_mortem.process.pid = getpid();
+ post_mortem.process.boot_uid = geteuid();
+ post_mortem.process.boot_gid = getegid();
+
+ getrlimit(RLIMIT_NOFILE, &post_mortem.process.limit_fd);
+#if defined(RLIMIT_AS)
+ getrlimit(RLIMIT_AS, &post_mortem.process.limit_ram);
+#elif defined(RLIMIT_DATA)
+ getrlimit(RLIMIT_DATA, &post_mortem.process.limit_ram);
+#endif
+
+ if (strcmp(post_mortem.platform.utsname.sysname, "Linux") == 0)
+ feed_post_mortem_linux();
+
+#if defined(HA_HAVE_DUMP_LIBS)
+ chunk_reset(&trash);
+ if (dump_libs(&trash, 1))
+ post_mortem.libs = strdup(trash.area);
+#endif
+
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(feed_post_mortem);
+
+static void deinit_post_mortem(void)
+{
+ int comp;
+
+#if defined(HA_HAVE_DUMP_LIBS)
+ ha_free(&post_mortem.libs);
+#endif
+ for (comp = 0; comp < post_mortem.nb_components; comp++) {
+ free(post_mortem.components[comp].toolchain);
+ free(post_mortem.components[comp].toolchain_opts);
+ free(post_mortem.components[comp].build_settings);
+ free(post_mortem.components[comp].path);
+ }
+ ha_free(&post_mortem.components);
+}
+
+REGISTER_POST_DEINIT(deinit_post_mortem);
+
+/* Appends a component to the list of post_portem info. May silently fail
+ * on allocation errors but we don't care since the goal is to provide info
+ * we have in case it helps.
+ */
+void post_mortem_add_component(const char *name, const char *version,
+ const char *toolchain, const char *toolchain_opts,
+ const char *build_settings, const char *path)
+{
+ struct post_mortem_component *comp;
+ int nbcomp = post_mortem.nb_components;
+
+ comp = realloc(post_mortem.components, (nbcomp + 1) * sizeof(*comp));
+ if (!comp)
+ return;
+
+ memset(&comp[nbcomp], 0, sizeof(*comp));
+ strlcpy2(comp[nbcomp].name, name, sizeof(comp[nbcomp].name));
+ strlcpy2(comp[nbcomp].version, version, sizeof(comp[nbcomp].version));
+ comp[nbcomp].toolchain = strdup(toolchain);
+ comp[nbcomp].toolchain_opts = strdup(toolchain_opts);
+ comp[nbcomp].build_settings = strdup(build_settings);
+ comp[nbcomp].path = strdup(path);
+
+ post_mortem.nb_components++;
+ post_mortem.components = comp;
+}
+
+#ifdef USE_THREAD
+/* init code is called one at a time so let's collect all per-thread info on
+ * the last starting thread. These info are not critical anyway and there's no
+ * problem if we get them slightly late.
+ */
+static int feed_post_mortem_late()
+{
+ static int per_thread_info_collected;
+
+ if (HA_ATOMIC_ADD_FETCH(&per_thread_info_collected, 1) == global.nbthread) {
+ int i;
+ for (i = 0; i < global.nbthread; i++) {
+ post_mortem.process.thread_info[i].pth_id = ha_thread_info[i].pth_id;
+ post_mortem.process.thread_info[i].stack_top = ha_thread_info[i].stack_top;
+ }
+ }
+ return 1;
+}
+
+REGISTER_PER_THREAD_INIT(feed_post_mortem_late);
+#endif
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ {{ "debug", "dev", "bug", NULL }, "debug dev bug : call BUG_ON() and crash", debug_parse_cli_bug, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "check", NULL }, "debug dev check : call CHECK_IF() and possibly crash", debug_parse_cli_check, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "close", NULL }, "debug dev close <fd> : close this file descriptor", debug_parse_cli_close, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "deadlock", NULL }, "debug dev deadlock [nbtask] : deadlock between this number of tasks", debug_parse_cli_deadlock, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "delay", NULL }, "debug dev delay [ms] : sleep this long", debug_parse_cli_delay, NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(DEBUG_DEV)
+ {{ "debug", "dev", "delay-inj", NULL },"debug dev delay-inj <inter> <count> : inject random delays into threads", debug_parse_delay_inj, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "exec", NULL }, "debug dev exec [cmd] ... : show this command's output", debug_parse_cli_exec, NULL, NULL, NULL, ACCESS_EXPERT },
+#endif
+ {{ "debug", "dev", "fd", NULL }, "debug dev fd : scan for rogue/unhandled FDs", debug_parse_cli_fd, debug_iohandler_fd, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "exit", NULL }, "debug dev exit [code] : immediately exit the process", debug_parse_cli_exit, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "hash", NULL }, "debug dev hash [msg] : return msg hashed if anon is set", debug_parse_cli_hash, NULL, NULL, NULL, 0 },
+ {{ "debug", "dev", "hex", NULL }, "debug dev hex <addr> [len] : dump a memory area", debug_parse_cli_hex, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "log", NULL }, "debug dev log [msg] ... : send this msg to global logs", debug_parse_cli_log, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "loop", NULL }, "debug dev loop <ms> [isolated] : loop this long, possibly isolated", debug_parse_cli_loop, NULL, NULL, NULL, ACCESS_EXPERT },
+#if defined(DEBUG_MEM_STATS)
+ {{ "debug", "dev", "memstats", NULL }, "debug dev memstats [reset|all|match ...]: dump/reset memory statistics", debug_parse_cli_memstats, debug_iohandler_memstats, debug_release_memstats, NULL, 0 },
+#endif
+ {{ "debug", "dev", "panic", NULL }, "debug dev panic : immediately trigger a panic", debug_parse_cli_panic, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "sched", NULL }, "debug dev sched {task|tasklet} [k=v]* : stress the scheduler", debug_parse_cli_sched, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "stream",NULL }, "debug dev stream [k=v]* : show/manipulate stream flags", debug_parse_cli_stream,NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "sym", NULL }, "debug dev sym <addr> : resolve symbol address", debug_parse_cli_sym, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "task", NULL }, "debug dev task <ptr> [wake|expire|kill] : show/wake/expire/kill task/tasklet", debug_parse_cli_task, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "tkill", NULL }, "debug dev tkill [thr] [sig] : send signal to thread", debug_parse_cli_tkill, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "warn", NULL }, "debug dev warn : call WARN_ON() and possibly crash", debug_parse_cli_warn, NULL, NULL, NULL, ACCESS_EXPERT },
+ {{ "debug", "dev", "write", NULL }, "debug dev write [size] : write that many bytes in return", debug_parse_cli_write, NULL, NULL, NULL, ACCESS_EXPERT },
+
+ {{ "show", "dev", NULL, NULL }, "show dev : show debug info for developers", debug_parse_cli_show_dev, NULL, NULL },
+#if defined(HA_HAVE_DUMP_LIBS)
+ {{ "show", "libs", NULL, NULL }, "show libs : show loaded object files and libraries", debug_parse_cli_show_libs, NULL, NULL },
+#endif
+ {{ "show", "threads", NULL, NULL }, "show threads : show some threads debugging information", NULL, cli_io_handler_show_threads, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/dgram.c b/src/dgram.c
new file mode 100644
index 0000000..c983c03
--- /dev/null
+++ b/src/dgram.c
@@ -0,0 +1,79 @@
+/*
+ * Datagram processing functions
+ *
+ * Copyright 2014 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/fd.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/dgram.h>
+#include <haproxy/errors.h>
+#include <haproxy/tools.h>
+
+/* datagram handler callback */
+void dgram_fd_handler(int fd)
+{
+ struct dgram_conn *dgram = fdtab[fd].owner;
+
+ if (unlikely(!dgram))
+ return;
+
+ if (fd_recv_ready(fd))
+ dgram->data->recv(dgram);
+ if (fd_send_ready(fd))
+ dgram->data->send(dgram);
+
+ return;
+}
+
+/* config parser for global "tune.{rcv,snd}buf.{frontend,backend}" */
+static int dgram_parse_tune_bufs(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int *valptr;
+ int val;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* "tune.rcvbuf.frontend", "tune.rcvbuf.backend",
+ * "tune.sndbuf.frontend", "tune.sndbuf.backend"
+ */
+ valptr = (args[0][5] == 'r' && args[0][12] == 'f') ? &global.tune.frontend_rcvbuf :
+ (args[0][5] == 'r' && args[0][12] == 'b') ? &global.tune.backend_rcvbuf :
+ (args[0][5] == 's' && args[0][12] == 'f') ? &global.tune.frontend_sndbuf :
+ &global.tune.backend_sndbuf;
+
+ if (*valptr != 0) {
+ memprintf(err, "parsing [%s:%d] : ignoring '%s' which was already specified.\n", file, line, args[0]);
+ return 1;
+ }
+
+ val = atoi(args[1]);
+
+ if (*(args[1]) == 0 || val <= 0) {
+ memprintf(err, "parsing [%s:%d] : '%s' expects a strictly positive integer argument.\n", file, line, args[0]);
+ return -1;
+ }
+
+ *valptr = val;
+ return 0;
+}
+
+/* register "global" section keywords */
+static struct cfg_kw_list dgram_cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.rcvbuf.backend", dgram_parse_tune_bufs },
+ { CFG_GLOBAL, "tune.rcvbuf.frontend", dgram_parse_tune_bufs },
+ { CFG_GLOBAL, "tune.sndbuf.backend", dgram_parse_tune_bufs },
+ { CFG_GLOBAL, "tune.sndbuf.frontend", dgram_parse_tune_bufs },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &dgram_cfg_kws);
diff --git a/src/dict.c b/src/dict.c
new file mode 100644
index 0000000..a225081
--- /dev/null
+++ b/src/dict.c
@@ -0,0 +1,127 @@
+#include <string.h>
+
+#include <import/eb32tree.h>
+#include <import/ebistree.h>
+#include <haproxy/dict.h>
+#include <haproxy/thread.h>
+
+struct dict *new_dict(const char *name)
+{
+ struct dict *dict;
+
+ dict = malloc(sizeof *dict);
+ if (!dict)
+ return NULL;
+
+ dict->name = name;
+ dict->values = EB_ROOT_UNIQUE;
+ HA_RWLOCK_INIT(&dict->rwlock);
+
+ return dict;
+}
+
+/*
+ * Allocate a new dictionary entry with <s> as string value which is strdup()'ed.
+ * Returns the new allocated entry if succeeded, NULL if not.
+ */
+static struct dict_entry *new_dict_entry(char *s)
+{
+ struct dict_entry *de;
+
+ de = calloc(1, sizeof *de);
+ if (!de)
+ return NULL;
+
+ de->value.key = strdup(s);
+ if (!de->value.key)
+ goto err;
+
+ de->len = strlen(s);
+ de->refcount = 1;
+
+ return de;
+
+ err:
+ ha_free(&de->value.key);
+ de->len = 0;
+ free(de);
+ return NULL;
+}
+
+/*
+ * Release the memory allocated for <de> dictionary entry.
+ */
+static void free_dict_entry(struct dict_entry *de)
+{
+ de->refcount = 0;
+ ha_free(&de->value.key);
+ free(de);
+}
+
+/*
+ * Simple function to lookup dictionary entries with <s> as value.
+ */
+static struct dict_entry *__dict_lookup(struct dict *d, const char *s)
+{
+ struct dict_entry *de;
+ struct ebpt_node *node;
+
+ de = NULL;
+ node = ebis_lookup(&d->values, s);
+ if (node)
+ de = container_of(node, struct dict_entry, value);
+
+ return de;
+}
+
+/*
+ * Insert an entry in <d> dictionary with <s> as value. *
+ */
+struct dict_entry *dict_insert(struct dict *d, char *s)
+{
+ struct dict_entry *de;
+ struct ebpt_node *n;
+
+ HA_RWLOCK_RDLOCK(DICT_LOCK, &d->rwlock);
+ de = __dict_lookup(d, s);
+ HA_RWLOCK_RDUNLOCK(DICT_LOCK, &d->rwlock);
+ if (de) {
+ HA_ATOMIC_INC(&de->refcount);
+ return de;
+ }
+
+ de = new_dict_entry(s);
+ if (!de)
+ return NULL;
+
+ HA_RWLOCK_WRLOCK(DICT_LOCK, &d->rwlock);
+ n = ebis_insert(&d->values, &de->value);
+ HA_RWLOCK_WRUNLOCK(DICT_LOCK, &d->rwlock);
+ if (n != &de->value) {
+ free_dict_entry(de);
+ de = container_of(n, struct dict_entry, value);
+ }
+
+ return de;
+}
+
+
+/*
+ * Unreference a dict entry previously acquired with <dict_insert>.
+ * If this is the last live reference to the entry, it is
+ * removed from the dictionary.
+ */
+void dict_entry_unref(struct dict *d, struct dict_entry *de)
+{
+ if (!de)
+ return;
+
+ if (HA_ATOMIC_SUB_FETCH(&de->refcount, 1) != 0)
+ return;
+
+ HA_RWLOCK_WRLOCK(DICT_LOCK, &d->rwlock);
+ ebpt_delete(&de->value);
+ HA_RWLOCK_WRUNLOCK(DICT_LOCK, &d->rwlock);
+
+ free_dict_entry(de);
+}
diff --git a/src/dns.c b/src/dns.c
new file mode 100644
index 0000000..23e9d9d
--- /dev/null
+++ b/src/dns.c
@@ -0,0 +1,1330 @@
+/*
+ * Name server resolution
+ *
+ * Copyright 2020 HAProxy Technologies
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/dgram.h>
+#include <haproxy/dns.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/log.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+
+static THREAD_LOCAL char *dns_msg_trash;
+
+DECLARE_STATIC_POOL(dns_session_pool, "dns_session", sizeof(struct dns_session));
+DECLARE_STATIC_POOL(dns_query_pool, "dns_query", sizeof(struct dns_query));
+DECLARE_STATIC_POOL(dns_msg_buf, "dns_msg_buf", DNS_TCP_MSG_RING_MAX_SIZE);
+
+/* Opens an UDP socket on the namesaver's IP/Port, if required. Returns 0 on
+ * success, -1 otherwise. ns->dgram must be defined.
+ */
+static int dns_connect_nameserver(struct dns_nameserver *ns)
+{
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ /* Already connected */
+ if (dgram->t.sock.fd != -1)
+ return 0;
+
+ /* Create an UDP socket and connect it on the nameserver's IP/Port */
+ if ((fd = socket(dgram->addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+ send_log(NULL, LOG_WARNING,
+ "DNS : section '%s': can't create socket for nameserver '%s'.\n",
+ ns->counters->pid, ns->id);
+ return -1;
+ }
+ if (connect(fd, (struct sockaddr*)&dgram->addr.to, get_addr_len(&dgram->addr.to)) == -1) {
+ send_log(NULL, LOG_WARNING,
+ "DNS : section '%s': can't connect socket for nameserver '%s'.\n",
+ ns->counters->id, ns->id);
+ close(fd);
+ return -1;
+ }
+
+ /* Make the socket non blocking */
+ fd_set_nonblock(fd);
+
+ /* Add the fd in the fd list and update its parameters */
+ dgram->t.sock.fd = fd;
+ fd_insert(fd, dgram, dgram_fd_handler, tgid, tg->threads_enabled);
+ fd_want_recv(fd);
+ return 0;
+}
+
+/* Sends a message to a name server
+ * It returns message length on success
+ * or -1 in error case
+ * 0 is returned in case of output ring buffer is full
+ */
+int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (ns->dgram) {
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+ fd = dgram->t.sock.fd;
+ if (fd == -1) {
+ if (dns_connect_nameserver(ns) == -1) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ fd = dgram->t.sock.fd;
+ }
+
+ ret = send(fd, buf, len, 0);
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ struct ist myist;
+
+ myist = ist2(buf, len);
+ ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ if (!ret) {
+ ns->counters->snd_error++;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ fd_cant_send(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return ret;
+ }
+ ns->counters->snd_error++;
+ fd_delete(fd);
+ dgram->t.sock.fd = -1;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ ns->counters->sent++;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ }
+ else if (ns->stream) {
+ struct ist myist;
+
+ myist = ist2(buf, len);
+ ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ if (!ret) {
+ ns->counters->snd_error++;
+ return -1;
+ }
+ task_wakeup(ns->stream->task_req, TASK_WOKEN_MSG);
+ return ret;
+ }
+
+ return ret;
+}
+
+void dns_session_free(struct dns_session *);
+
+/* Receives a dns message
+ * Returns message length
+ * 0 is returned if no more message available
+ * -1 in error case
+ */
+ssize_t dns_recv_nameserver(struct dns_nameserver *ns, void *data, size_t size)
+{
+ ssize_t ret = -1;
+
+ if (ns->dgram) {
+ struct dgram_conn *dgram = &ns->dgram->conn;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+ fd = dgram->t.sock.fd;
+ if (fd == -1) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+
+ if ((ret = recv(fd, data, size, 0)) < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return 0;
+ }
+ fd_delete(fd);
+ dgram->t.sock.fd = -1;
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return -1;
+ }
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ }
+ else if (ns->stream) {
+ struct dns_stream_server *dss = ns->stream;
+ struct dns_session *ds;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+ if (!LIST_ISEMPTY(&dss->wait_sess)) {
+ ds = LIST_NEXT(&dss->wait_sess, struct dns_session *, waiter);
+ ret = ds->rx_msg.len < size ? ds->rx_msg.len : size;
+ memcpy(data, ds->rx_msg.area, ret);
+
+ ds->rx_msg.len = 0;
+
+ /* This barrier is here to ensure that all data is
+ * stored if the appctx detect the elem is out of the
+ * list.
+ */
+ __ha_barrier_store();
+
+ LIST_DEL_INIT(&ds->waiter);
+
+ if (ds->appctx) {
+ /* This second barrier is here to ensure that
+ * the waked up appctx won't miss that the elem
+ * is removed from the list.
+ */
+ __ha_barrier_store();
+
+ /* awake appctx because it may have other
+ * message to receive
+ */
+ appctx_wakeup(ds->appctx);
+
+ /* dns_session could already be into free_sess list
+ * so we firstly remove it */
+ LIST_DEL_INIT(&ds->list);
+
+ /* decrease nb_queries to free a slot for a new query on that sess */
+ ds->nb_queries--;
+ if (ds->nb_queries) {
+ /* it remains pipelined unanswered request
+ * into this session but we just decrease
+ * the counter so the session
+ * can not be full of pipelined requests
+ * so we can add if to free_sess list
+ * to receive a new request
+ */
+ LIST_INSERT(&ds->dss->free_sess, &ds->list);
+ }
+ else {
+ /* there is no more pipelined requests
+ * into this session, so we move it
+ * to idle_sess list */
+ LIST_INSERT(&ds->dss->idle_sess, &ds->list);
+
+ /* update the counter of idle sessions */
+ ds->dss->idle_conns++;
+
+ /* Note: this is useless there to update
+ * the max_active_conns since we increase
+ * the idle count */
+ }
+ }
+ else {
+ /* there is no more appctx for this session
+ * it means it is ready to die
+ */
+ dns_session_free(ds);
+ }
+
+
+ }
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ }
+
+ return ret;
+}
+
+static void dns_resolve_recv(struct dgram_conn *dgram)
+{
+ struct dns_nameserver *ns;
+ int fd;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+
+ fd = dgram->t.sock.fd;
+
+ /* check if ready for reading */
+ if ((fd == -1) || !fd_recv_ready(fd)) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ /* no need to go further if we can't retrieve the nameserver */
+ if ((ns = dgram->owner) == NULL) {
+ _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ fd_stop_recv(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+
+ ns->process_responses(ns);
+}
+
+/* Called when a dns network socket is ready to send data */
+static void dns_resolve_send(struct dgram_conn *dgram)
+{
+ int fd;
+ struct dns_nameserver *ns;
+ struct ring *ring;
+ struct buffer *buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dgram->lock);
+
+ fd = dgram->t.sock.fd;
+
+ /* check if ready for sending */
+ if ((fd == -1) || !fd_send_ready(fd)) {
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ /* no need to go further if we can't retrieve the nameserver */
+ if ((ns = dgram->owner) == NULL) {
+ _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ fd_stop_send(fd);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+ return;
+ }
+
+ ring = ns->dgram->ring_req;
+ buf = &ring->buf;
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ns->dgram->ofs_req == ~0)) {
+ ns->dgram->ofs_req = b_peek_ofs(buf, 0);
+ HA_ATOMIC_INC(b_orig(buf) + ns->dgram->ofs_req);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = ns->dgram->ofs_req - b_head_ofs(buf);
+ if (ns->dgram->ofs_req < b_head_ofs(buf))
+ ofs += b_size(buf);
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ while (ofs + 1 < b_data(buf)) {
+ int ret;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+ if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
+
+ ret = send(fd, dns_msg_trash, len, 0);
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_send(fd);
+ goto out;
+ }
+ ns->counters->snd_error++;
+ fd_delete(fd);
+ fd = dgram->t.sock.fd = -1;
+ goto out;
+ }
+ ns->counters->sent++;
+
+ ofs += cnt + len;
+ }
+
+ /* we don't want/need to be waked up any more for sending
+ * because all ring content is sent */
+ fd_stop_send(fd);
+
+out:
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ns->dgram->ofs_req = b_peek_ofs(buf, ofs);
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock);
+}
+
+/* proto_udp callback functions for a DNS resolution */
+struct dgram_data_cb dns_dgram_cb = {
+ .recv = dns_resolve_recv,
+ .send = dns_resolve_send,
+};
+
+int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk)
+{
+ struct dns_dgram_server *dgram;
+
+ if ((dgram = calloc(1, sizeof(*dgram))) == NULL)
+ return -1;
+
+ /* Leave dgram partially initialized, no FD attached for
+ * now. */
+ dgram->conn.owner = ns;
+ dgram->conn.data = &dns_dgram_cb;
+ dgram->conn.t.sock.fd = -1;
+ dgram->conn.addr.to = *sk;
+ HA_SPIN_INIT(&dgram->conn.lock);
+ ns->dgram = dgram;
+
+ dgram->ofs_req = ~0; /* init ring offset */
+ dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
+ if (!dgram->ring_req) {
+ ha_alert("memory allocation error initializing the ring for nameserver.\n");
+ goto out;
+ }
+
+ /* attach the task as reader */
+ if (!ring_attach(dgram->ring_req)) {
+ /* mark server attached to the ring */
+ ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n");
+ goto out;
+ }
+ return 0;
+out:
+ ring_free(dgram->ring_req);
+
+ free(dgram);
+
+ return -1;
+}
+
+/*
+ * IO Handler to handle message push to dns tcp server
+ * It takes its context from appctx->svcctx.
+ */
+static void dns_session_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct dns_session *ds = appctx->svcctx;
+ struct ring *ring = &ds->ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ int available_room;
+ size_t len, cnt, ofs;
+ int ret = 0;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* we want to be sure to not miss that we have been awaked for a shutdown */
+ __ha_barrier_load();
+
+ /* that means the connection was requested to shutdown
+ * for instance idle expire */
+ if (ds->shutdown)
+ goto close;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ goto out;
+ }
+
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ds->ofs == ~0)) {
+ ds->ofs = b_peek_ofs(buf, 0);
+ HA_ATOMIC_INC(b_orig(buf) + ds->ofs);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = ds->ofs - b_head_ofs(buf);
+ if (ds->ofs < b_head_ofs(buf))
+ ofs += b_size(buf);
+
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ /* in following loop, ofs always points to the counter byte that
+ * precedes the message so that we can take our reference there if we
+ * have to stop before the end (ret=0).
+ */
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ struct dns_query *query;
+ uint16_t original_qid;
+ uint16_t new_qid;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ /* retrieve available room on output channel */
+ available_room = channel_recv_max(sc_ic(sc));
+
+ /* tx_msg_offset null means we are at the start of a new message */
+ if (!ds->tx_msg_offset) {
+ uint16_t slen;
+
+ /* check if there is enough room to put message len and query id */
+ if (available_room < sizeof(slen) + sizeof(new_qid)) {
+ sc_need_room(sc, sizeof(slen) + sizeof(new_qid));
+ ret = 0;
+ break;
+ }
+
+ /* put msg len into then channel */
+ slen = (uint16_t)msg_len;
+ slen = htons(slen);
+ applet_putblk(appctx, (char *)&slen, sizeof(slen));
+ available_room -= sizeof(slen);
+
+ /* backup original query id */
+ len = b_getblk(buf, (char *)&original_qid, sizeof(original_qid), ofs + cnt);
+ if (!len) {
+ /* should never happen since messages are atomically
+ * written into ring
+ */
+ ret = 0;
+ break;
+ }
+
+ /* generates new query id */
+ new_qid = ++ds->query_counter;
+ new_qid = htons(new_qid);
+
+ /* put new query id into the channel */
+ applet_putblk(appctx, (char *)&new_qid, sizeof(new_qid));
+ available_room -= sizeof(new_qid);
+
+ /* keep query id mapping */
+
+ query = pool_alloc(dns_query_pool);
+ if (query) {
+ query->qid.key = new_qid;
+ query->original_qid = original_qid;
+ query->expire = tick_add(now_ms, 5000);
+ LIST_INIT(&query->list);
+ if (LIST_ISEMPTY(&ds->queries)) {
+ /* enable task to handle expire */
+ ds->task_exp->expire = query->expire;
+ /* ensure this will be executed by the same
+ * thread than ds_session_release
+ * to ensure session_release is free
+ * to destroy the task */
+ task_queue(ds->task_exp);
+ }
+ LIST_APPEND(&ds->queries, &query->list);
+ eb32_insert(&ds->query_ids, &query->qid);
+ ds->onfly_queries++;
+ }
+
+ /* update the tx_offset to handle output in 16k streams */
+ ds->tx_msg_offset = sizeof(original_qid);
+
+ }
+
+ /* check if it remains available room on output chan */
+ if (unlikely(!available_room)) {
+ sc_need_room(sc, 1);
+ ret = 0;
+ break;
+ }
+
+ chunk_reset(&trash);
+ if ((msg_len - ds->tx_msg_offset) > available_room) {
+ /* remaining msg data is too large to be written in output channel at one time */
+
+ len = b_getblk(buf, trash.area, available_room, ofs + cnt + ds->tx_msg_offset);
+
+ /* update offset to complete mesg forwarding later */
+ ds->tx_msg_offset += len;
+ }
+ else {
+ /* remaining msg data can be written in output channel at one time */
+ len = b_getblk(buf, trash.area, msg_len - ds->tx_msg_offset, ofs + cnt + ds->tx_msg_offset);
+
+ /* reset tx_msg_offset to mark forward fully processed */
+ ds->tx_msg_offset = 0;
+ }
+ trash.data += len;
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* should never happen since we
+ * check available_room is large
+ * enough here.
+ */
+ ret = 0;
+ break;
+ }
+
+ if (ds->tx_msg_offset) {
+ /* msg was not fully processed, we must be awake to drain pending data */
+ sc_need_room(sc, 0);
+ ret = 0;
+ break;
+ }
+ /* switch to next message */
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ ds->ofs = b_peek_ofs(buf, ofs);
+
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new request to write arrived */
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock);
+ BUG_ON(LIST_INLIST(&appctx->wait_entry));
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock);
+ applet_have_no_more_data(appctx);
+ }
+
+ /* if session is not a waiter it means there is no committed
+ * message into rx_buf and we are free to use it
+ * Note: we need a load barrier here to not miss the
+ * delete from the list
+ */
+ __ha_barrier_load();
+ if (!LIST_INLIST_ATOMIC(&ds->waiter)) {
+ while (1) {
+ uint16_t query_id;
+ struct eb32_node *eb;
+ struct dns_query *query;
+
+ if (!ds->rx_msg.len) {
+ /* retrieve message len */
+ ret = co_getblk(sc_oc(sc), (char *)&msg_len, 2, 0);
+ if (ret <= 0) {
+ if (ret == -1)
+ goto error;
+ applet_need_more_data(appctx);
+ break;
+ }
+
+ /* mark as consumed */
+ co_skip(sc_oc(sc), 2);
+
+ /* store message len */
+ ds->rx_msg.len = ntohs(msg_len);
+ if (!ds->rx_msg.len)
+ continue;
+ }
+
+ if (co_data(sc_oc(sc)) + ds->rx_msg.offset < ds->rx_msg.len) {
+ /* message only partially available */
+
+ /* read available data */
+ ret = co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, co_data(sc_oc(sc)), 0);
+ if (ret <= 0) {
+ if (ret == -1)
+ goto error;
+ applet_need_more_data(appctx);
+ break;
+ }
+
+ /* update message offset */
+ ds->rx_msg.offset += co_data(sc_oc(sc));
+
+ /* consume all pending data from the channel */
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+
+ /* we need to wait for more data */
+ applet_need_more_data(appctx);
+ break;
+ }
+
+ /* enough data is available into the channel to read the message until the end */
+
+ /* read from the channel until the end of the message */
+ ret = co_getblk(sc_oc(sc), ds->rx_msg.area + ds->rx_msg.offset, ds->rx_msg.len - ds->rx_msg.offset, 0);
+ if (ret <= 0) {
+ if (ret == -1)
+ goto error;
+ applet_need_more_data(appctx);
+ break;
+ }
+
+ /* consume all data until the end of the message from the channel */
+ co_skip(sc_oc(sc), ds->rx_msg.len - ds->rx_msg.offset);
+
+ /* reset reader offset to 0 for next message reand */
+ ds->rx_msg.offset = 0;
+
+ /* try remap query id to original */
+ memcpy(&query_id, ds->rx_msg.area, sizeof(query_id));
+ eb = eb32_lookup(&ds->query_ids, query_id);
+ if (!eb) {
+ /* query id not found means we have an unknown corresponding
+ * request, perhaps server's bug or or the query reached
+ * timeout
+ */
+ ds->rx_msg.len = 0;
+ continue;
+ }
+
+ /* re-map the original query id set by the requester */
+ query = eb32_entry(eb, struct dns_query, qid);
+ memcpy(ds->rx_msg.area, &query->original_qid, sizeof(query->original_qid));
+
+ /* remove query ids mapping from pending queries list/tree */
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ ds->onfly_queries--;
+
+ /* the dns_session is also added in queue of the
+ * wait_sess list where the task processing
+ * response will pop available responses
+ */
+ HA_SPIN_LOCK(DNS_LOCK, &ds->dss->lock);
+
+ BUG_ON(LIST_INLIST(&ds->waiter));
+ LIST_APPEND(&ds->dss->wait_sess, &ds->waiter);
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &ds->dss->lock);
+
+ /* awake the task processing the responses */
+ task_wakeup(ds->dss->task_rsp, TASK_WOKEN_INIT);
+
+ break;
+ }
+ }
+
+out:
+ return;
+
+close:
+ se_fl_set(appctx->sedesc, SE_FL_EOS|SE_FL_EOI);
+ goto out;
+
+error:
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ goto out;
+}
+
+void dns_queries_flush(struct dns_session *ds)
+{
+ struct dns_query *query, *queryb;
+
+ list_for_each_entry_safe(query, queryb, &ds->queries, list) {
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ }
+}
+
+void dns_session_free(struct dns_session *ds)
+{
+ pool_free(dns_msg_buf, ds->rx_msg.area);
+ pool_free(dns_msg_buf, ds->tx_ring_area);
+ task_destroy(ds->task_exp);
+
+ dns_queries_flush(ds);
+
+ /* Ensure to remove this session from external lists
+ * Note: we are under the lock of dns_stream_server
+ * which own the heads of those lists.
+ */
+ LIST_DEL_INIT(&ds->waiter);
+ LIST_DEL_INIT(&ds->list);
+
+ ds->dss->cur_conns--;
+ /* Note: this is useless to update
+ * max_active_conns here because
+ * we decrease the value
+ */
+
+ BUG_ON(!LIST_ISEMPTY(&ds->list));
+ BUG_ON(!LIST_ISEMPTY(&ds->waiter));
+ BUG_ON(!LIST_ISEMPTY(&ds->queries));
+ BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters));
+ BUG_ON(!eb_is_empty(&ds->query_ids));
+ pool_free(dns_session_pool, ds);
+}
+
+static struct appctx *dns_session_create(struct dns_session *ds);
+
+static int dns_session_init(struct appctx *appctx)
+{
+ struct dns_session *ds = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &ds->dss->srv->addr, sizeof(ds->dss->srv->addr)))
+ goto error;
+
+ if (appctx_finalize_startup(appctx, ds->dss->srv->proxy, &BUF_NULL) == -1)
+ goto error;
+
+ s = appctx_strm(appctx);
+ s->scb->dst = addr;
+ s->scb->flags |= (SC_FL_RCV_ONCE|SC_FL_NOLINGER);
+ s->target = &ds->dss->srv->obj_type;
+ s->flags = SF_ASSIGNED;
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ applet_expect_no_data(appctx);
+ ds->appctx = appctx;
+ return 0;
+
+ error:
+ return -1;
+}
+
+/*
+ * Function to release a DNS tcp session
+ */
+static void dns_session_release(struct appctx *appctx)
+{
+ struct dns_session *ds = appctx->svcctx;
+ struct dns_stream_server *dss __maybe_unused;
+
+ if (!ds)
+ return;
+
+ /* We do not call ring_appctx_detach here
+ * because we want to keep readers counters
+ * to retry a conn with a different appctx.
+ */
+ HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock);
+
+ dss = ds->dss;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+ LIST_DEL_INIT(&ds->list);
+
+ if (stopping) {
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (!ds->nb_queries) {
+ /* this is an idle session */
+ /* Note: this is useless to update max_active_sess
+ * here because we decrease idle_conns but
+ * dns_session_free decrease curconns
+ */
+
+ ds->dss->idle_conns--;
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (ds->onfly_queries == ds->nb_queries) {
+ /* the session can be released because
+ * it means that all queries AND
+ * responses are in fly */
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ /* if there is no pending complete response
+ * message, ensure to reset
+ * message offsets if the session
+ * was closed with an incomplete pending response
+ */
+ if (!LIST_INLIST(&ds->waiter))
+ ds->rx_msg.len = ds->rx_msg.offset = 0;
+
+ /* we flush pending sent queries because we never
+ * have responses
+ */
+ ds->nb_queries -= ds->onfly_queries;
+ dns_queries_flush(ds);
+
+ /* reset offset to be sure to start from message start */
+ ds->tx_msg_offset = 0;
+
+ /* here the ofs and the attached counter
+ * are kept unchanged
+ */
+
+ /* Create a new appctx, We hope we can
+ * create from the release callback! */
+ ds->appctx = dns_session_create(ds);
+ if (!ds->appctx) {
+ dns_session_free(ds);
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return;
+ }
+
+ if (ds->nb_queries < DNS_STREAM_MAX_PIPELINED_REQ)
+ LIST_INSERT(&ds->dss->free_sess, &ds->list);
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+}
+
+/* DNS tcp session applet */
+static struct applet dns_session_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<STRMDNS>", /* used for logging */
+ .fct = dns_session_io_handler,
+ .init = dns_session_init,
+ .release = dns_session_release,
+};
+
+/*
+ * Function used to create an appctx for a DNS session
+ * It sets its context into appctx->svcctx.
+ */
+static struct appctx *dns_session_create(struct dns_session *ds)
+{
+ struct appctx *appctx;
+
+ appctx = appctx_new_here(&dns_session_applet, NULL);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)ds;
+
+ if (appctx_init(appctx) == -1) {
+ ha_alert("out of memory in dns_session_create().\n");
+ goto out_free_appctx;
+ }
+
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_close:
+ return NULL;
+}
+
+/* Task processing expiration of unresponded queries, this one is supposed
+ * to be stuck on the same thread than the appctx handler
+ */
+static struct task *dns_process_query_exp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_session *ds = (struct dns_session *)context;
+ struct dns_query *query, *queryb;
+
+ t->expire = TICK_ETERNITY;
+
+ list_for_each_entry_safe(query, queryb, &ds->queries, list) {
+ if (tick_is_expired(query->expire, now_ms)) {
+ eb32_delete(&query->qid);
+ LIST_DELETE(&query->list);
+ pool_free(dns_query_pool, query);
+ ds->onfly_queries--;
+ }
+ else {
+ t->expire = query->expire;
+ break;
+ }
+ }
+
+ return t;
+}
+
+/* Task processing expiration of idle sessions */
+static struct task *dns_process_idle_exp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_stream_server *dss = (struct dns_stream_server *)context;
+ struct dns_session *ds, *dsb;
+ int target = 0;
+ int cur_active_conns;
+
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+
+ cur_active_conns = dss->cur_conns - dss->idle_conns;
+ if (cur_active_conns > dss->max_active_conns)
+ dss->max_active_conns = cur_active_conns;
+
+ target = (dss->max_active_conns - cur_active_conns) / 2;
+ list_for_each_entry_safe(ds, dsb, &dss->idle_sess, list) {
+ if (!stopping && !target)
+ break;
+
+ /* remove conn to pending list to ensure it won't be reused */
+ LIST_DEL_INIT(&ds->list);
+
+ /* force session shutdown */
+ ds->shutdown = 1;
+
+ /* to be sure that the appctx won't miss shutdown */
+ __ha_barrier_store();
+
+ /* wake appctx to perform the shutdown */
+ appctx_wakeup(ds->appctx);
+ }
+
+ /* reset max to current active conns */
+ dss->max_active_conns = cur_active_conns;
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+
+ t->expire = tick_add(now_ms, 5000);
+
+ return t;
+}
+
+struct dns_session *dns_session_new(struct dns_stream_server *dss)
+{
+ struct dns_session *ds;
+
+ if (dss->maxconn && (dss->maxconn <= dss->cur_conns))
+ return NULL;
+
+ ds = pool_zalloc(dns_session_pool);
+ if (!ds)
+ return NULL;
+
+ ds->ofs = ~0;
+ ds->dss = dss;
+ LIST_INIT(&ds->list);
+ LIST_INIT(&ds->queries);
+ LIST_INIT(&ds->waiter);
+ ds->rx_msg.offset = ds->rx_msg.len = 0;
+ ds->rx_msg.area = NULL;
+ ds->tx_ring_area = NULL;
+ ds->task_exp = NULL;
+ ds->appctx = NULL;
+ ds->shutdown = 0;
+ ds->nb_queries = 0;
+ ds->query_ids = EB_ROOT_UNIQUE;
+ ds->rx_msg.area = pool_alloc(dns_msg_buf);
+ if (!ds->rx_msg.area)
+ goto error;
+
+ ds->tx_ring_area = pool_alloc(dns_msg_buf);
+ if (!ds->tx_ring_area)
+ goto error;
+
+ ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE);
+ /* never fail because it is the first watcher attached to the ring */
+ DISGUISE(ring_attach(&ds->ring));
+
+ if ((ds->task_exp = task_new_here()) == NULL)
+ goto error;
+
+ ds->task_exp->process = dns_process_query_exp;
+ ds->task_exp->context = ds;
+
+ ds->appctx = dns_session_create(ds);
+ if (!ds->appctx)
+ goto error;
+
+ dss->cur_conns++;
+
+ return ds;
+
+error:
+ task_destroy(ds->task_exp);
+ pool_free(dns_msg_buf, ds->rx_msg.area);
+ pool_free(dns_msg_buf, ds->tx_ring_area);
+
+ pool_free(dns_session_pool, ds);
+
+ return NULL;
+}
+
+/*
+ * Task used to consume pending messages from nameserver ring
+ * and forward them to dns_session ring.
+ * Note: If no slot found a new dns_session is allocated
+ */
+static struct task *dns_process_req(struct task *t, void *context, unsigned int state)
+{
+ struct dns_nameserver *ns = (struct dns_nameserver *)context;
+ struct dns_stream_server *dss = ns->stream;
+ struct ring *ring = dss->ring_req;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+ struct dns_session *ds, *ads;
+ HA_SPIN_LOCK(DNS_LOCK, &dss->lock);
+
+ HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(dss->ofs_req == ~0)) {
+ dss->ofs_req = b_peek_ofs(buf, 0);
+ HA_ATOMIC_INC(b_orig(buf) + dss->ofs_req);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = dss->ofs_req - b_head_ofs(buf);
+ if (dss->ofs_req < b_head_ofs(buf))
+ ofs += b_size(buf);
+
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ while (ofs + 1 < b_data(buf)) {
+ struct ist myist;
+
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+ if (unlikely(msg_len > DNS_TCP_MSG_MAX_SIZE)) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ len = b_getblk(buf, dns_msg_trash, msg_len, ofs + cnt);
+
+ myist = ist2(dns_msg_trash, len);
+
+ ads = NULL;
+ /* try to push request into active sess with free slot */
+ if (!LIST_ISEMPTY(&dss->free_sess)) {
+ ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list);
+
+ if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) {
+ ds->nb_queries++;
+ if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ)
+ LIST_DEL_INIT(&ds->list);
+ ads = ds;
+ }
+ else {
+ /* it means we were unable to put a request in this slot,
+ * it may be close to be full so we put it at the end
+ * of free conn list */
+ LIST_DEL_INIT(&ds->list);
+ LIST_APPEND(&dss->free_sess, &ds->list);
+ }
+ }
+
+ if (!ads) {
+ /* try to push request into idle, this one should have enough free space */
+ if (!LIST_ISEMPTY(&dss->idle_sess)) {
+ ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list);
+
+ /* ring is empty so this ring_write should never fail */
+ ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ ds->nb_queries++;
+ LIST_DEL_INIT(&ds->list);
+
+ ds->dss->idle_conns--;
+
+ /* we may have to update the max_active_conns */
+ if (ds->dss->max_active_conns < ds->dss->cur_conns - ds->dss->idle_conns)
+ ds->dss->max_active_conns = ds->dss->cur_conns - ds->dss->idle_conns;
+
+ /* since we may unable to find a free list to handle
+ * this request, this request may be large and fill
+ * the ring buffer so we prefer to put at the end of free
+ * list. */
+ LIST_APPEND(&dss->free_sess, &ds->list);
+ ads = ds;
+ }
+ }
+
+ /* we didn't find a session available with large enough room */
+ if (!ads) {
+ /* allocate a new session */
+ ads = dns_session_new(dss);
+ if (ads) {
+ /* ring is empty so this ring_write should never fail */
+ ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1);
+ ads->nb_queries++;
+ LIST_INSERT(&dss->free_sess, &ads->list);
+ }
+ else
+ ns->counters->snd_error++;
+ }
+
+ if (ads)
+ ns->counters->sent++;
+
+ ofs += cnt + len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ dss->ofs_req = b_peek_ofs(buf, ofs);
+ HA_RWLOCK_RDUNLOCK(DNS_LOCK, &ring->lock);
+
+
+ HA_SPIN_UNLOCK(DNS_LOCK, &dss->lock);
+ return t;
+}
+
+/*
+ * Task used to consume response
+ * Note: upper layer callback is called
+ */
+static struct task *dns_process_rsp(struct task *t, void *context, unsigned int state)
+{
+ struct dns_nameserver *ns = (struct dns_nameserver *)context;
+
+ ns->process_responses(ns);
+
+ return t;
+}
+
+/* Function used to initialize an TCP nameserver */
+int dns_stream_init(struct dns_nameserver *ns, struct server *srv)
+{
+ struct dns_stream_server *dss = NULL;
+
+ dss = calloc(1, sizeof(*dss));
+ if (!dss) {
+ ha_alert("memory allocation error initializing dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ dss->srv = srv;
+ dss->maxconn = srv->maxconn;
+
+ dss->ofs_req = ~0; /* init ring offset */
+ dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE);
+ if (!dss->ring_req) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_req = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_req->process = dns_process_req;
+ dss->task_req->context = ns;
+
+ /* attach the task as reader */
+ if (!ring_attach(dss->ring_req)) {
+ /* mark server attached to the ring */
+ ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id);
+ goto out;
+ }
+
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_rsp = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_rsp->process = dns_process_rsp;
+ dss->task_rsp->context = ns;
+
+ /* Create the task associated to the resolver target handling conns */
+ if ((dss->task_idle = task_new_anywhere()) == NULL) {
+ ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id);
+ goto out;
+ }
+
+ /* Update task's parameters */
+ dss->task_idle->process = dns_process_idle_exp;
+ dss->task_idle->context = dss;
+ dss->task_idle->expire = tick_add(now_ms, 5000);
+
+ /* let start the task to free idle conns immediately */
+ task_queue(dss->task_idle);
+
+ LIST_INIT(&dss->free_sess);
+ LIST_INIT(&dss->idle_sess);
+ LIST_INIT(&dss->wait_sess);
+ HA_SPIN_INIT(&dss->lock);
+ ns->stream = dss;
+ return 0;
+out:
+ if (dss && dss->task_rsp)
+ task_destroy(dss->task_rsp);
+ if (dss && dss->task_req)
+ task_destroy(dss->task_req);
+ if (dss && dss->ring_req)
+ ring_free(dss->ring_req);
+
+ free(dss);
+ return -1;
+}
+
+int init_dns_buffers()
+{
+ dns_msg_trash = malloc(DNS_TCP_MSG_MAX_SIZE);
+ if (!dns_msg_trash)
+ return 0;
+
+ return 1;
+}
+
+void deinit_dns_buffers()
+{
+ ha_free(&dns_msg_trash);
+}
+
+REGISTER_PER_THREAD_ALLOC(init_dns_buffers);
+REGISTER_PER_THREAD_FREE(deinit_dns_buffers);
diff --git a/src/dynbuf.c b/src/dynbuf.c
new file mode 100644
index 0000000..712e334
--- /dev/null
+++ b/src/dynbuf.c
@@ -0,0 +1,129 @@
+/*
+ * Buffer management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+
+struct pool_head *pool_head_buffer __read_mostly;
+
+/* perform minimal initializations, report 0 in case of error, 1 if OK. */
+int init_buffer()
+{
+ void *buffer;
+ int thr;
+ int done;
+
+ pool_head_buffer = create_pool("buffer", global.tune.bufsize, MEM_F_SHARED|MEM_F_EXACT);
+ if (!pool_head_buffer)
+ return 0;
+
+ for (thr = 0; thr < MAX_THREADS; thr++)
+ LIST_INIT(&ha_thread_ctx[thr].buffer_wq);
+
+
+ /* The reserved buffer is what we leave behind us. Thus we always need
+ * at least one extra buffer in minavail otherwise we'll end up waking
+ * up tasks with no memory available, causing a lot of useless wakeups.
+ * That means that we always want to have at least 3 buffers available
+ * (2 for current session, one for next session that might be needed to
+ * release a server connection).
+ */
+ pool_head_buffer->minavail = MAX(global.tune.reserved_bufs, 3);
+ if (global.tune.buf_limit)
+ pool_head_buffer->limit = global.tune.buf_limit;
+
+ for (done = 0; done < pool_head_buffer->minavail - 1; done++) {
+ buffer = pool_alloc_nocache(pool_head_buffer, init_buffer);
+ if (!buffer)
+ return 0;
+ pool_free(pool_head_buffer, buffer);
+ }
+ return 1;
+}
+
+/*
+ * Dumps part or all of a buffer.
+ */
+void buffer_dump(FILE *o, struct buffer *b, int from, int to)
+{
+ fprintf(o, "Dumping buffer %p\n", b);
+ fprintf(o, " orig=%p size=%u head=%u tail=%u data=%u\n",
+ b_orig(b), (unsigned int)b_size(b), (unsigned int)b_head_ofs(b), (unsigned int)b_tail_ofs(b), (unsigned int)b_data(b));
+
+ fprintf(o, "Dumping contents from byte %d to byte %d\n", from, to);
+ fprintf(o, " 0 1 2 3 4 5 6 7 8 9 a b c d e f\n");
+ /* dump hexa */
+ while (from < to) {
+ int i;
+
+ fprintf(o, " %04x: ", from);
+ for (i = 0; ((from + i) < to) && (i < 16) ; i++) {
+ fprintf(o, "%02x ", (unsigned char)b_orig(b)[from + i]);
+ if (i == 7)
+ fprintf(o, "- ");
+ }
+ if (to - from < 16) {
+ int j = 0;
+
+ for (j = 0; j < from + 16 - to; j++)
+ fprintf(o, " ");
+ if (j > 8)
+ fprintf(o, " ");
+ }
+ fprintf(o, " ");
+ for (i = 0; (from + i < to) && (i < 16) ; i++) {
+ fprintf(o, "%c", isprint((unsigned char)b_orig(b)[from + i]) ? b_orig(b)[from + i] : '.') ;
+ if ((i == 15) && ((from + i) != to-1))
+ fprintf(o, "\n");
+ }
+ from += i;
+ }
+ fprintf(o, "\n--\n");
+ fflush(o);
+}
+
+/* see offer_buffers() for details */
+void __offer_buffers(void *from, unsigned int count)
+{
+ struct buffer_wait *wait, *wait_back;
+
+ /* For now, we consider that all objects need 1 buffer, so we can stop
+ * waking up them once we have enough of them to eat all the available
+ * buffers. Note that we don't really know if they are streams or just
+ * other tasks, but that's a rough estimate. Similarly, for each cached
+ * event we'll need 1 buffer.
+ */
+ list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq, list) {
+ if (!count)
+ break;
+
+ if (wait->target == from || !wait->wakeup_cb(wait->target))
+ continue;
+
+ LIST_DEL_INIT(&wait->list);
+ count--;
+ }
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/eb32sctree.c b/src/eb32sctree.c
new file mode 100644
index 0000000..af6a539
--- /dev/null
+++ b/src/eb32sctree.c
@@ -0,0 +1,472 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 32bit nodes.
+ * Version 6.0.6 with backports from v7-dev
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb32sctree.h for more details about those functions */
+
+#include <import/eb32sctree.h>
+
+
+/* This function is used to build a tree of duplicates by adding a new node to
+ * a subtree of at least 2 entries.
+ */
+struct eb32sc_node *eb32sc_insert_dup(struct eb_node *sub, struct eb_node *new, unsigned long scope)
+{
+ struct eb32sc_node *eb32;
+ struct eb_node *head = sub;
+ eb_troot_t *new_left = eb_dotag(&new->branches, EB_LEFT);
+ eb_troot_t *new_rght = eb_dotag(&new->branches, EB_RGHT);
+ eb_troot_t *new_leaf = eb_dotag(&new->branches, EB_LEAF);
+
+ /* first, identify the deepest hole on the right branch */
+ while (eb_gettag(head->branches.b[EB_RGHT]) != EB_LEAF) {
+ struct eb_node *last = head;
+
+ head = container_of(eb_untag(head->branches.b[EB_RGHT], EB_NODE),
+ struct eb_node, branches);
+
+ if (unlikely(head->bit > last->bit + 1)) {
+ /* there's a hole here, we must assign the top of the
+ * following sub-tree to <sub> and mark all intermediate
+ * nodes with the scope mask.
+ */
+ do {
+ eb32 = container_of(sub, struct eb32sc_node, node);
+ if (!(eb32->node_s & scope))
+ eb32->node_s |= scope;
+
+ sub = container_of(eb_untag(sub->branches.b[EB_RGHT], EB_NODE),
+ struct eb_node, branches);
+ } while (sub != head);
+ }
+
+ eb32 = container_of(head, struct eb32sc_node, node);
+ if (!(eb32->node_s & scope))
+ eb32->node_s |= scope;
+ }
+
+ /* Here we have a leaf attached to (head)->b[EB_RGHT] */
+ if (head->bit < -1) {
+ /* A hole exists just before the leaf, we insert there */
+ new->bit = -1;
+ sub = container_of(eb_untag(head->branches.b[EB_RGHT], EB_LEAF),
+ struct eb_node, branches);
+ head->branches.b[EB_RGHT] = eb_dotag(&new->branches, EB_NODE);
+
+ new->node_p = sub->leaf_p;
+ new->leaf_p = new_rght;
+ sub->leaf_p = new_left;
+ new->branches.b[EB_LEFT] = eb_dotag(&sub->branches, EB_LEAF);
+ new->branches.b[EB_RGHT] = new_leaf;
+ eb32 = container_of(new, struct eb32sc_node, node);
+ eb32->node_s = container_of(sub, struct eb32sc_node, node)->leaf_s | scope;
+ return eb32;
+ } else {
+ int side;
+ /* No hole was found before a leaf. We have to insert above
+ * <sub>. Note that we cannot be certain that <sub> is attached
+ * to the right of its parent, as this is only true if <sub>
+ * is inside the dup tree, not at the head.
+ */
+ new->bit = sub->bit - 1; /* install at the lowest level */
+ side = eb_gettag(sub->node_p);
+ head = container_of(eb_untag(sub->node_p, side), struct eb_node, branches);
+ head->branches.b[side] = eb_dotag(&new->branches, EB_NODE);
+
+ new->node_p = sub->node_p;
+ new->leaf_p = new_rght;
+ sub->node_p = new_left;
+ new->branches.b[EB_LEFT] = eb_dotag(&sub->branches, EB_NODE);
+ new->branches.b[EB_RGHT] = new_leaf;
+ eb32 = container_of(new, struct eb32sc_node, node);
+ eb32->node_s = container_of(sub, struct eb32sc_node, node)->node_s | scope;
+ return eb32;
+ }
+}
+
+/* Insert eb32sc_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the key. The eb32sc_node is returned. This
+ * implementation does NOT support unique trees.
+ */
+struct eb32sc_node *eb32sc_insert(struct eb_root *root, struct eb32sc_node *new, unsigned long scope)
+{
+ struct eb32sc_node *old;
+ unsigned int side;
+ eb_troot_t *troot, **up_ptr;
+ u32 newkey; /* caching the key saves approximately one cycle */
+ eb_troot_t *new_left, *new_rght;
+ eb_troot_t *new_leaf;
+ int old_node_bit;
+ unsigned long old_scope;
+
+ side = EB_LEFT;
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL)) {
+ /* Tree is empty, insert the leaf part below the left branch */
+ root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
+ new->node.leaf_p = eb_dotag(root, EB_LEFT);
+ new->node.node_p = NULL; /* node part unused */
+ new->node_s = scope;
+ new->leaf_s = scope;
+ return new;
+ }
+
+ /* The tree descent is fairly easy :
+ * - first, check if we have reached a leaf node
+ * - second, check if we have gone too far
+ * - third, reiterate
+ * Everywhere, we use <new> for the node node we are inserting, <root>
+ * for the node we attach it to, and <old> for the node we are
+ * displacing below <new>. <troot> will always point to the future node
+ * (tagged with its type). <side> carries the side the node <new> is
+ * attached to below its parent, which is also where previous node
+ * was attached. <newkey> carries the key being inserted.
+ */
+ newkey = new->key;
+
+ while (1) {
+ if (eb_gettag(troot) == EB_LEAF) {
+ /* insert above a leaf */
+ old = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ new->node.node_p = old->node.leaf_p;
+ up_ptr = &old->node.leaf_p;
+ old_scope = old->leaf_s;
+ break;
+ }
+
+ /* OK we're walking down this link */
+ old = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+ old_node_bit = old->node.bit;
+
+ /* our new node will be found through this one, we must mark it */
+ if ((old->node_s | scope) != old->node_s)
+ old->node_s |= scope;
+
+ /* Stop going down when we don't have common bits anymore. We
+ * also stop in front of a duplicates tree because it means we
+ * have to insert above.
+ */
+
+ if ((old_node_bit < 0) || /* we're above a duplicate tree, stop here */
+ (((new->key ^ old->key) >> old_node_bit) >= EB_NODE_BRANCHES)) {
+ /* The tree did not contain the key, so we insert <new> before the node
+ * <old>, and set ->bit to designate the lowest bit position in <new>
+ * which applies to ->branches.b[].
+ */
+ new->node.node_p = old->node.node_p;
+ up_ptr = &old->node.node_p;
+ old_scope = old->node_s;
+ break;
+ }
+
+ /* walk down */
+ root = &old->node.branches;
+ side = (newkey >> old_node_bit) & EB_NODE_BRANCH_MASK;
+ troot = root->b[side];
+ }
+
+ new_left = eb_dotag(&new->node.branches, EB_LEFT);
+ new_rght = eb_dotag(&new->node.branches, EB_RGHT);
+ new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
+
+ /* We need the common higher bits between new->key and old->key.
+ * What differences are there between new->key and the node here ?
+ * NOTE that bit(new) is always < bit(root) because highest
+ * bit of new->key and old->key are identical here (otherwise they
+ * would sit on different branches).
+ */
+
+ // note that if EB_NODE_BITS > 1, we should check that it's still >= 0
+ new->node.bit = flsnz(new->key ^ old->key) - EB_NODE_BITS;
+ new->leaf_s = scope;
+ new->node_s = old_scope | scope;
+
+ if (new->key == old->key) {
+ new->node.bit = -1; /* mark as new dup tree, just in case */
+
+ if (eb_gettag(troot) != EB_LEAF) {
+ /* there was already a dup tree below */
+ return eb32sc_insert_dup(&old->node, &new->node, scope);
+ }
+ /* otherwise fall through */
+ }
+
+ if (new->key >= old->key) {
+ new->node.branches.b[EB_LEFT] = troot;
+ new->node.branches.b[EB_RGHT] = new_leaf;
+ new->node.leaf_p = new_rght;
+ *up_ptr = new_left;
+ }
+ else {
+ new->node.branches.b[EB_LEFT] = new_leaf;
+ new->node.branches.b[EB_RGHT] = troot;
+ new->node.leaf_p = new_left;
+ *up_ptr = new_rght;
+ }
+
+ /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
+ * parent is already set to <new>, and the <root>'s branch is still in
+ * <side>. Update the root's leaf till we have it. Note that we can also
+ * find the side by checking the side of new->node.node_p.
+ */
+
+ root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
+ return new;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb32sc_node *eb32sc_lookup_ge(struct eb_root *root, u32 x, unsigned long scope)
+{
+ struct eb32sc_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ if ((node->leaf_s & scope) && node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if ((node->node_s & scope) && node->key >= x)
+ troot = eb_dotag(&node->node.branches, EB_LEFT);
+ else
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->node_s & scope) && (node->key >> node->node.bit) > (x >> node->node.bit))
+ troot = eb_dotag(&node->node.branches, EB_LEFT);
+ else
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ return eb32sc_next_with_parent(troot, scope);
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root> which is
+ * equal to or greater than <x>, matching scope <scope>. If not found, it loops
+ * back to the beginning of the tree. NULL is returned is no key matches.
+ */
+struct eb32sc_node *eb32sc_lookup_ge_or_first(struct eb_root *root, u32 x, unsigned long scope)
+{
+ struct eb32sc_node *eb32;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ eb32 = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32sc_node, node.branches);
+ if ((eb32->leaf_s & scope) && eb32->key >= x)
+ return eb32;
+ /* return next */
+ troot = eb32->node.leaf_p;
+ break;
+ }
+ eb32 = container_of(eb_untag(troot, EB_NODE),
+ struct eb32sc_node, node.branches);
+
+ if (eb32->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if ((eb32->node_s & scope) && eb32->key >= x)
+ troot = eb_dotag(&eb32->node.branches, EB_LEFT);
+ else
+ troot = eb32->node.node_p;
+ break;
+ }
+
+ if (((x ^ eb32->key) >> eb32->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((eb32->node_s & scope) && (eb32->key >> eb32->node.bit) > (x >> eb32->node.bit))
+ troot = eb_dotag(&eb32->node.branches, EB_LEFT);
+ else
+ troot = eb32->node.node_p;
+ break;
+ }
+ troot = eb32->node.branches.b[(x >> eb32->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ eb32 = eb32sc_next_with_parent(troot, scope);
+ if (!eb32)
+ eb32 = eb32sc_walk_down_left(root->b[EB_LEFT], scope);
+
+ return eb32;
+}
+
+/* Removes a leaf node from the tree if it was still in it. Marks the node
+ * as unlinked.
+ */
+void eb32sc_delete(struct eb32sc_node *eb32)
+{
+ struct eb_node *node = &eb32->node;
+ unsigned int pside, gpside, sibtype;
+ struct eb_node *parent;
+ struct eb_root *gparent;
+ unsigned long scope;
+
+ if (!node->leaf_p)
+ return;
+
+ /* we need the parent, our side, and the grand parent */
+ pside = eb_gettag(node->leaf_p);
+ parent = eb_root_to_node(eb_untag(node->leaf_p, pside));
+
+ /* We likely have to release the parent link, unless it's the root,
+ * in which case we only set our branch to NULL. Note that we can
+ * only be attached to the root by its left branch.
+ */
+
+ if (eb_clrtag(parent->branches.b[EB_RGHT]) == NULL) {
+ /* we're just below the root, it's trivial. */
+ parent->branches.b[EB_LEFT] = NULL;
+ goto delete_unlink;
+ }
+
+ /* To release our parent, we have to identify our sibling, and reparent
+ * it directly to/from the grand parent. Note that the sibling can
+ * either be a link or a leaf.
+ */
+
+ gpside = eb_gettag(parent->node_p);
+ gparent = eb_untag(parent->node_p, gpside);
+
+ gparent->b[gpside] = parent->branches.b[!pside];
+ sibtype = eb_gettag(gparent->b[gpside]);
+
+ if (sibtype == EB_LEAF) {
+ eb_root_to_node(eb_untag(gparent->b[gpside], EB_LEAF))->leaf_p =
+ eb_dotag(gparent, gpside);
+ } else {
+ eb_root_to_node(eb_untag(gparent->b[gpside], EB_NODE))->node_p =
+ eb_dotag(gparent, gpside);
+ }
+ /* Mark the parent unused. Note that we do not check if the parent is
+ * our own node, but that's not a problem because if it is, it will be
+ * marked unused at the same time, which we'll use below to know we can
+ * safely remove it.
+ */
+ parent->node_p = NULL;
+
+ /* The parent node has been detached, and is currently unused. It may
+ * belong to another node, so we cannot remove it that way. Also, our
+ * own node part might still be used. so we can use this spare node
+ * to replace ours if needed.
+ */
+
+ /* If our link part is unused, we can safely exit now */
+ if (!node->node_p)
+ goto delete_unlink;
+
+ /* From now on, <node> and <parent> are necessarily different, and the
+ * <node>'s node part is in use. By definition, <parent> is at least
+ * below <node>, so keeping its key for the bit string is OK. However
+ * its scope must be enlarged to cover the new branch it absorbs.
+ */
+
+ parent->node_p = node->node_p;
+ parent->branches = node->branches;
+ parent->bit = node->bit;
+
+ /* We must now update the new node's parent... */
+ gpside = eb_gettag(parent->node_p);
+ gparent = eb_untag(parent->node_p, gpside);
+ gparent->b[gpside] = eb_dotag(&parent->branches, EB_NODE);
+
+ /* ... and its branches */
+ scope = 0;
+ for (pside = 0; pside <= 1; pside++) {
+ if (eb_gettag(parent->branches.b[pside]) == EB_NODE) {
+ eb_root_to_node(eb_untag(parent->branches.b[pside], EB_NODE))->node_p =
+ eb_dotag(&parent->branches, pside);
+ scope |= container_of(eb_untag(parent->branches.b[pside], EB_NODE), struct eb32sc_node, node.branches)->node_s;
+ } else {
+ eb_root_to_node(eb_untag(parent->branches.b[pside], EB_LEAF))->leaf_p =
+ eb_dotag(&parent->branches, pside);
+ scope |= container_of(eb_untag(parent->branches.b[pside], EB_LEAF), struct eb32sc_node, node.branches)->leaf_s;
+ }
+ }
+ container_of(parent, struct eb32sc_node, node)->node_s = scope;
+
+ delete_unlink:
+ /* Now the node has been completely unlinked */
+ node->leaf_p = NULL;
+ return; /* tree is not empty yet */
+}
diff --git a/src/eb32tree.c b/src/eb32tree.c
new file mode 100644
index 0000000..38ddab0
--- /dev/null
+++ b/src/eb32tree.c
@@ -0,0 +1,218 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 32bit nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb32tree.h for more details about those functions */
+
+#include <import/eb32tree.h>
+
+struct eb32_node *eb32_insert(struct eb_root *root, struct eb32_node *new)
+{
+ return __eb32_insert(root, new);
+}
+
+struct eb32_node *eb32i_insert(struct eb_root *root, struct eb32_node *new)
+{
+ return __eb32i_insert(root, new);
+}
+
+struct eb32_node *eb32_lookup(struct eb_root *root, u32 x)
+{
+ return __eb32_lookup(root, x);
+}
+
+struct eb32_node *eb32i_lookup(struct eb_root *root, s32 x)
+{
+ return __eb32i_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct eb32_node *eb32_lookup_le(struct eb_root *root, u32 x)
+{
+ struct eb32_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if ((node->key >> node->node.bit) < (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = eb32_entry(eb_walk_down(troot, EB_RGHT), struct eb32_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb32_node *eb32_lookup_ge(struct eb_root *root, u32 x)
+{
+ struct eb32_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb32_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb32_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->key >> node->node.bit) > (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = eb32_entry(eb_walk_down(troot, EB_LEFT), struct eb32_node, node);
+ return node;
+}
diff --git a/src/eb64tree.c b/src/eb64tree.c
new file mode 100644
index 0000000..b908d4d
--- /dev/null
+++ b/src/eb64tree.c
@@ -0,0 +1,218 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on 64bit nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult eb64tree.h for more details about those functions */
+
+#include <import/eb64tree.h>
+
+struct eb64_node *eb64_insert(struct eb_root *root, struct eb64_node *new)
+{
+ return __eb64_insert(root, new);
+}
+
+struct eb64_node *eb64i_insert(struct eb_root *root, struct eb64_node *new)
+{
+ return __eb64i_insert(root, new);
+}
+
+struct eb64_node *eb64_lookup(struct eb_root *root, u64 x)
+{
+ return __eb64_lookup(root, x);
+}
+
+struct eb64_node *eb64i_lookup(struct eb_root *root, s64 x)
+{
+ return __eb64i_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct eb64_node *eb64_lookup_le(struct eb_root *root, u64 x)
+{
+ struct eb64_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb64_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if ((node->key >> node->node.bit) < (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return eb64_entry(eb_walk_down(troot, EB_RGHT), struct eb64_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = eb64_entry(eb_walk_down(troot, EB_RGHT), struct eb64_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct eb64_node *eb64_lookup_ge(struct eb_root *root, u64 x)
+{
+ struct eb64_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct eb64_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct eb64_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if (((x ^ node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if ((node->key >> node->node.bit) > (x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return eb64_entry(eb_walk_down(troot, EB_LEFT), struct eb64_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[(x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = eb64_entry(eb_walk_down(troot, EB_LEFT), struct eb64_node, node);
+ return node;
+}
diff --git a/src/ebimtree.c b/src/ebimtree.c
new file mode 100644
index 0000000..1ac444a
--- /dev/null
+++ b/src/ebimtree.c
@@ -0,0 +1,44 @@
+/*
+ * Elastic Binary Trees - exported functions for Indirect Multi-Byte data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebimtree.h for more details about those functions */
+
+#include <import/ebimtree.h>
+#include <import/ebpttree.h>
+
+/* Find the first occurrence of a key of <len> bytes in the tree <root>.
+ * If none can be found, return NULL.
+ */
+struct ebpt_node *
+ebim_lookup(struct eb_root *root, const void *x, unsigned int len)
+{
+ return __ebim_lookup(root, x, len);
+}
+
+/* Insert ebpt_node <new> into subtree starting at node root <root>.
+ * Only new->key needs be set with the key. The ebpt_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebpt_node *
+ebim_insert(struct eb_root *root, struct ebpt_node *new, unsigned int len)
+{
+ return __ebim_insert(root, new, len);
+}
diff --git a/src/ebistree.c b/src/ebistree.c
new file mode 100644
index 0000000..193950d
--- /dev/null
+++ b/src/ebistree.c
@@ -0,0 +1,42 @@
+/*
+ * Elastic Binary Trees - exported functions for Indirect String data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebistree.h for more details about those functions */
+
+#include <import/ebistree.h>
+
+/* Find the first occurrence of a zero-terminated string <x> in the tree <root>.
+ * It's the caller's responsibility to use this function only on trees which
+ * only contain zero-terminated strings. If none can be found, return NULL.
+ */
+struct ebpt_node *ebis_lookup(struct eb_root *root, const char *x)
+{
+ return __ebis_lookup(root, x);
+}
+
+/* Insert ebpt_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the zero-terminated string key. The ebpt_node is
+ * returned. If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * caller is responsible for properly terminating the key with a zero.
+ */
+struct ebpt_node *ebis_insert(struct eb_root *root, struct ebpt_node *new)
+{
+ return __ebis_insert(root, new);
+}
diff --git a/src/ebmbtree.c b/src/ebmbtree.c
new file mode 100644
index 0000000..a3de9a1
--- /dev/null
+++ b/src/ebmbtree.c
@@ -0,0 +1,77 @@
+/*
+ * Elastic Binary Trees - exported functions for Multi-Byte data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebmbtree.h for more details about those functions */
+
+#include <import/ebmbtree.h>
+
+/* Find the first occurrence of a key of <len> bytes in the tree <root>.
+ * If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
+{
+ return __ebmb_lookup(root, x, len);
+}
+
+/* Insert ebmb_node <new> into subtree starting at node root <root>.
+ * Only new->key needs be set with the key. The ebmb_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebmb_node *
+ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
+{
+ return __ebmb_insert(root, new, len);
+}
+
+/* Find the first occurrence of the longest prefix matching a key <x> in the
+ * tree <root>. It's the caller's responsibility to ensure that key <x> is at
+ * least as long as the keys in the tree. If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup_longest(struct eb_root *root, const void *x)
+{
+ return __ebmb_lookup_longest(root, x);
+}
+
+/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
+ * tree <root>. If none can be found, return NULL.
+ */
+struct ebmb_node *
+ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
+{
+ return __ebmb_lookup_prefix(root, x, pfx);
+}
+
+/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
+ * Only new->key and new->pfx need be set with the key and its prefix length.
+ * Note that bits between <pfx> and <len> are theoretically ignored and should be
+ * zero, as it is not certain yet that they will always be ignored everywhere
+ * (eg in bit compare functions).
+ * The ebmb_node is returned.
+ * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * len is specified in bytes.
+ */
+struct ebmb_node *
+ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
+{
+ return __ebmb_insert_prefix(root, new, len);
+}
diff --git a/src/ebpttree.c b/src/ebpttree.c
new file mode 100644
index 0000000..558d334
--- /dev/null
+++ b/src/ebpttree.c
@@ -0,0 +1,208 @@
+/*
+ * Elastic Binary Trees - exported functions for operations on pointer nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebpttree.h for more details about those functions */
+
+#include <import/ebpttree.h>
+
+struct ebpt_node *ebpt_insert(struct eb_root *root, struct ebpt_node *new)
+{
+ return __ebpt_insert(root, new);
+}
+
+struct ebpt_node *ebpt_lookup(struct eb_root *root, void *x)
+{
+ return __ebpt_lookup(root, x);
+}
+
+/*
+ * Find the last occurrence of the highest key in the tree <root>, which is
+ * equal to or less than <x>. NULL is returned is no key matches.
+ */
+struct ebpt_node *ebpt_lookup_le(struct eb_root *root, void *x)
+{
+ struct ebpt_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ if (node->key <= x)
+ return node;
+ /* return prev */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct ebpt_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the rightmost node, or
+ * we don't and we skip the whole subtree to return the
+ * prev node before the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * prev node without first trying to escape from the
+ * tree.
+ */
+ if (node->key <= x) {
+ troot = node->node.branches.b[EB_RGHT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_RGHT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ }
+ /* return prev */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if ((((ptr_t)x ^ (ptr_t)node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * small and we need to get its highest value, or it is
+ * too large, and we need to get the prev value.
+ */
+ if (((ptr_t)node->key >> node->node.bit) < ((ptr_t)x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_RGHT];
+ return ebpt_entry(eb_walk_down(troot, EB_RGHT), struct ebpt_node, node);
+ }
+
+ /* Further values will be too high here, so return the prev
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[((ptr_t)x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report previous node before the
+ * current one which is not above. <troot> is already initialised to
+ * the parent's branches.
+ */
+ while (eb_gettag(troot) == EB_LEFT) {
+ /* Walking up from left branch. We must ensure that we never
+ * walk beyond root.
+ */
+ if (unlikely(eb_clrtag((eb_untag(troot, EB_LEFT))->b[EB_RGHT]) == NULL))
+ return NULL;
+ troot = (eb_root_to_node(eb_untag(troot, EB_LEFT)))->node_p;
+ }
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_RGHT))->b[EB_LEFT];
+ node = ebpt_entry(eb_walk_down(troot, EB_RGHT), struct ebpt_node, node);
+ return node;
+}
+
+/*
+ * Find the first occurrence of the lowest key in the tree <root>, which is
+ * equal to or greater than <x>. NULL is returned is no key matches.
+ */
+struct ebpt_node *ebpt_lookup_ge(struct eb_root *root, void *x)
+{
+ struct ebpt_node *node;
+ eb_troot_t *troot;
+
+ troot = root->b[EB_LEFT];
+ if (unlikely(troot == NULL))
+ return NULL;
+
+ while (1) {
+ if ((eb_gettag(troot) == EB_LEAF)) {
+ /* We reached a leaf, which means that the whole upper
+ * parts were common. We will return either the current
+ * node or its next one if the former is too small.
+ */
+ node = container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ if (node->key >= x)
+ return node;
+ /* return next */
+ troot = node->node.leaf_p;
+ break;
+ }
+ node = container_of(eb_untag(troot, EB_NODE),
+ struct ebpt_node, node.branches);
+
+ if (node->node.bit < 0) {
+ /* We're at the top of a dup tree. Either we got a
+ * matching value and we return the leftmost node, or
+ * we don't and we skip the whole subtree to return the
+ * next node after the subtree. Note that since we're
+ * at the top of the dup tree, we can simply return the
+ * next node without first trying to escape from the
+ * tree.
+ */
+ if (node->key >= x) {
+ troot = node->node.branches.b[EB_LEFT];
+ while (eb_gettag(troot) != EB_LEAF)
+ troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
+ return container_of(eb_untag(troot, EB_LEAF),
+ struct ebpt_node, node.branches);
+ }
+ /* return next */
+ troot = node->node.node_p;
+ break;
+ }
+
+ if ((((ptr_t)x ^ (ptr_t)node->key) >> node->node.bit) >= EB_NODE_BRANCHES) {
+ /* No more common bits at all. Either this node is too
+ * large and we need to get its lowest value, or it is too
+ * small, and we need to get the next value.
+ */
+ if (((ptr_t)node->key >> node->node.bit) > ((ptr_t)x >> node->node.bit)) {
+ troot = node->node.branches.b[EB_LEFT];
+ return ebpt_entry(eb_walk_down(troot, EB_LEFT), struct ebpt_node, node);
+ }
+
+ /* Further values will be too low here, so return the next
+ * unique node (if it exists).
+ */
+ troot = node->node.node_p;
+ break;
+ }
+ troot = node->node.branches.b[((ptr_t)x >> node->node.bit) & EB_NODE_BRANCH_MASK];
+ }
+
+ /* If we get here, it means we want to report next node after the
+ * current one which is not below. <troot> is already initialised
+ * to the parent's branches.
+ */
+ while (eb_gettag(troot) != EB_LEFT)
+ /* Walking up from right branch, so we cannot be below root */
+ troot = (eb_root_to_node(eb_untag(troot, EB_RGHT)))->node_p;
+
+ /* Note that <troot> cannot be NULL at this stage */
+ troot = (eb_untag(troot, EB_LEFT))->b[EB_RGHT];
+ if (eb_clrtag(troot) == NULL)
+ return NULL;
+
+ node = ebpt_entry(eb_walk_down(troot, EB_LEFT), struct ebpt_node, node);
+ return node;
+}
diff --git a/src/ebsttree.c b/src/ebsttree.c
new file mode 100644
index 0000000..a4fbe33
--- /dev/null
+++ b/src/ebsttree.c
@@ -0,0 +1,42 @@
+/*
+ * Elastic Binary Trees - exported functions for String data nodes.
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Consult ebsttree.h for more details about those functions */
+
+#include <import/ebsttree.h>
+
+/* Find the first occurrence of a zero-terminated string <x> in the tree <root>.
+ * It's the caller's responsibility to use this function only on trees which
+ * only contain zero-terminated strings. If none can be found, return NULL.
+ */
+struct ebmb_node *ebst_lookup(struct eb_root *root, const char *x)
+{
+ return __ebst_lookup(root, x);
+}
+
+/* Insert ebmb_node <new> into subtree starting at node root <root>. Only
+ * new->key needs be set with the zero-terminated string key. The ebmb_node is
+ * returned. If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
+ * caller is responsible for properly terminating the key with a zero.
+ */
+struct ebmb_node *ebst_insert(struct eb_root *root, struct ebmb_node *new)
+{
+ return __ebst_insert(root, new);
+}
diff --git a/src/ebtree.c b/src/ebtree.c
new file mode 100644
index 0000000..db27875
--- /dev/null
+++ b/src/ebtree.c
@@ -0,0 +1,50 @@
+/*
+ * Elastic Binary Trees - exported generic functions
+ * Version 6.0.6
+ * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ebtree.h>
+
+void eb_delete(struct eb_node *node)
+{
+ __eb_delete(node);
+}
+
+/* used by insertion primitives */
+struct eb_node *eb_insert_dup(struct eb_node *sub, struct eb_node *new)
+{
+ return __eb_insert_dup(sub, new);
+}
+
+/* compares memory blocks m1 and m2 for up to <len> bytes. Immediately stops at
+ * the first non-matching byte. It returns 0 on full match, non-zero otherwise.
+ * One byte will always be checked so this must not be called with len==0. It
+ * takes 2+5cy/B on x86_64 and is ~29 bytes long.
+ */
+int eb_memcmp(const void *m1, const void *m2, size_t len)
+{
+ const char *p1 = (const char *)m1 + len;
+ const char *p2 = (const char *)m2 + len;
+ ssize_t ofs = -len;
+ char diff;
+
+ do {
+ diff = p1[ofs] - p2[ofs];
+ } while (!diff && ++ofs);
+ return diff;
+}
diff --git a/src/errors.c b/src/errors.c
new file mode 100644
index 0000000..7a2d14a
--- /dev/null
+++ b/src/errors.c
@@ -0,0 +1,567 @@
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <syslog.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/ring.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+/* A global buffer used to store all startup alerts/warnings. It will then be
+ * retrieve on the CLI. */
+struct ring *startup_logs = NULL;
+uint tot_warnings = 0;
+#ifdef USE_SHM_OPEN
+static struct ring *shm_startup_logs = NULL;
+#endif
+
+/* A thread local buffer used to store all alerts/warnings. It can be used to
+ * retrieve them for CLI commands after startup.
+ */
+#define USER_MESSAGES_BUFSIZE 1024
+static THREAD_LOCAL struct buffer usermsgs_buf = BUF_NULL;
+
+/* A thread local context used for stderr output via ha_alert/warning/notice/diag.
+ */
+#define USERMSGS_CTX_BUFSIZE PATH_MAX
+static THREAD_LOCAL struct usermsgs_ctx usermsgs_ctx = { .str = BUF_NULL, };
+
+#ifdef USE_SHM_OPEN
+
+/* initialise an SHM for the startup logs and return its fd */
+static int startup_logs_new_shm()
+{
+ char *path = NULL;
+ int fd = -1;
+ int flags;
+
+ /* create a unique path per PID so we don't collide with another
+ process */
+ memprintf(&path, "/haproxy_startup_logs_%d", getpid());
+ fd = shm_open(path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+ if (fd == -1)
+ goto error;
+ shm_unlink(path);
+ ha_free(&path);
+
+ if (ftruncate(fd, STARTUP_LOG_SIZE) == -1)
+ goto error;
+
+ flags = fcntl(fd, F_GETFD);
+ if (flags == -1)
+ goto error;
+ flags &= ~FD_CLOEXEC;
+ flags = fcntl(fd, F_SETFD, flags);
+ if (flags == -1)
+ goto error;
+
+ return fd;
+error:
+ if (fd != -1) {
+ close(fd);
+ fd = -1;
+ }
+ return fd;
+}
+
+/* mmap a startup-logs from a <fd>.
+ * if <new> is set to one, initialize the buffer.
+ * Returns the ring.
+ */
+static struct ring *startup_logs_from_fd(int fd, int new)
+{
+ char *area;
+ struct ring *r = NULL;
+
+ if (fd == -1)
+ goto error;
+
+ area = mmap(NULL, STARTUP_LOG_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (area == MAP_FAILED || area == NULL)
+ goto error;
+
+ if (new)
+ r = ring_make_from_area(area, STARTUP_LOG_SIZE);
+ else
+ r = ring_cast_from_area(area);
+
+ if (r == NULL)
+ goto error;
+
+ shm_startup_logs = r; /* save the ptr so we can unmap later */
+
+ return r;
+error:
+ return NULL;
+}
+
+/*
+ * Use a shm across reexec of the master.
+ *
+ * During the startup of the master, a shm_open must be done and the FD saved
+ * into the HAPROXY_STARTUPLOGS_FD environment variable.
+ *
+ * When forking workers, the child must use a copy of the shm, not the shm itself.
+ *
+ * Once in wait mode, the shm must be copied and closed.
+ *
+ */
+void startup_logs_init()
+{
+ struct ring *r = NULL;
+ char *str_fd, *endptr;
+ int fd = -1;
+
+ str_fd = getenv("HAPROXY_STARTUPLOGS_FD");
+ if (str_fd) {
+ fd = strtol(str_fd, &endptr, 10);
+ if (*endptr != '\0')
+ goto error;
+ unsetenv("HAPROXY_STARTUPLOGS_FD");
+ }
+
+ /* during startup, or just after a reload.
+ * Note: the WAIT_ONLY env variable must be
+ * check in case of an early call */
+ if (!(global.mode & MODE_MWORKER_WAIT) &&
+ getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) {
+ if (fd != -1)
+ close(fd);
+
+ fd = startup_logs_new_shm();
+ if (fd == -1)
+ goto error;
+
+ r = startup_logs_from_fd(fd, 1);
+ if (!r)
+ goto error;
+
+ str_fd = NULL;
+ memprintf(&str_fd, "%d", fd);
+ setenv("HAPROXY_STARTUPLOGS_FD", str_fd, 1);
+ ha_free(&str_fd);
+
+ } else {
+ /* in wait mode, copy the shm to an allocated buffer */
+ struct ring *prev = NULL;
+
+ if (fd == -1)
+ goto error;
+
+ prev = startup_logs_from_fd(fd, 0);
+ if (!prev)
+ goto error;
+
+ r = startup_logs_dup(prev);
+ if (!r)
+ goto error;
+ startup_logs_free(prev);
+ close(fd);
+ }
+
+ startup_logs = r;
+
+ return;
+error:
+ if (fd != -1)
+ close(fd);
+ /* couldn't get a mmap to work */
+ startup_logs = ring_new(STARTUP_LOG_SIZE);
+
+}
+
+#else /* ! USE_SHM_OPEN */
+
+void startup_logs_init()
+{
+ startup_logs = ring_new(STARTUP_LOG_SIZE);
+}
+
+#endif
+
+/* free the startup logs, unmap if it was an shm */
+void startup_logs_free(struct ring *r)
+{
+#ifdef USE_SHM_OPEN
+ if (r == shm_startup_logs)
+ munmap(r, STARTUP_LOG_SIZE);
+ else
+#endif /* ! USE_SHM_OPEN */
+ ring_free(r);
+}
+
+/* duplicate a startup logs which was previously allocated in a shm */
+struct ring *startup_logs_dup(struct ring *src)
+{
+ struct ring *dst = NULL;
+
+ /* must use the size of the previous buffer */
+ dst = ring_new(b_size(&src->buf));
+ if (!dst)
+ goto error;
+
+ b_reset(&dst->buf);
+ b_ncat(&dst->buf, &src->buf, b_data(&src->buf));
+error:
+ return dst;
+}
+
+/* Put msg in usermsgs_buf.
+ *
+ * The message should not be terminated by a newline because this function
+ * manually insert it.
+ *
+ * If there is not enough room in the buffer, the message is silently discarded.
+ * Do not forget to frequently clear the buffer.
+ */
+static void usermsgs_put(const struct ist *msg)
+{
+ /* Allocate the buffer if not already done. */
+ if (unlikely(b_is_null(&usermsgs_buf))) {
+ usermsgs_buf.area = malloc(USER_MESSAGES_BUFSIZE * sizeof(char));
+ if (usermsgs_buf.area)
+ usermsgs_buf.size = USER_MESSAGES_BUFSIZE;
+ }
+
+ if (likely(!b_is_null(&usermsgs_buf))) {
+ if (b_room(&usermsgs_buf) >= msg->len + 2) {
+ /* Insert the message + newline. */
+ b_putblk(&usermsgs_buf, msg->ptr, msg->len);
+ b_putchr(&usermsgs_buf, '\n');
+ /* Insert NUL outside of the buffer. */
+ *b_tail(&usermsgs_buf) = '\0';
+ }
+ }
+}
+
+/* Clear the user messages log buffer.
+ *
+ * <prefix> will set the local-thread context appended to every output
+ * following this call. It can be NULL if not necessary.
+ */
+void usermsgs_clr(const char *prefix)
+{
+ if (likely(!b_is_null(&usermsgs_buf))) {
+ b_reset(&usermsgs_buf);
+ usermsgs_buf.area[0] = '\0';
+ }
+
+ usermsgs_ctx.prefix = prefix;
+}
+
+/* Check if the user messages buffer is empty. */
+int usermsgs_empty(void)
+{
+ return !!(b_is_null(&usermsgs_buf) || !b_data(&usermsgs_buf));
+}
+
+/* Return the messages log buffer content. */
+const char *usermsgs_str(void)
+{
+ if (unlikely(b_is_null(&usermsgs_buf)))
+ return "";
+
+ return b_head(&usermsgs_buf);
+}
+
+/* Set thread-local context infos to prefix forthcoming stderr output during
+ * configuration parsing.
+ *
+ * <file> and <line> specify the location of the parsed configuration.
+ *
+ * <obj> can be of various types. If not NULL, the string prefix generated will
+ * depend on its type.
+ */
+void set_usermsgs_ctx(const char *file, int line, enum obj_type *obj)
+{
+ usermsgs_ctx.file = file;
+ usermsgs_ctx.line = line;
+ usermsgs_ctx.obj = obj;
+}
+
+/* Set thread-local context infos to prefix forthcoming stderr output. It will
+ * be set as a complement to possibly already defined file/line.
+ *
+ * <obj> can be of various types. If not NULL, the string prefix generated will
+ * depend on its type.
+ */
+void register_parsing_obj(enum obj_type *obj)
+{
+ usermsgs_ctx.obj = obj;
+}
+
+/* Reset thread-local context infos for stderr output. */
+void reset_usermsgs_ctx(void)
+{
+ usermsgs_ctx.file = NULL;
+ usermsgs_ctx.line = 0;
+ usermsgs_ctx.obj = NULL;
+}
+
+static void generate_usermsgs_ctx_str(void)
+{
+ struct usermsgs_ctx *ctx = &usermsgs_ctx;
+ void *area;
+ int ret;
+
+ if (unlikely(b_is_null(&ctx->str))) {
+ area = calloc(USERMSGS_CTX_BUFSIZE, sizeof(*area));
+ if (area)
+ ctx->str = b_make(area, USERMSGS_CTX_BUFSIZE, 0, 0);
+ }
+
+ if (likely(!b_is_null(&ctx->str))) {
+ b_reset(&ctx->str);
+
+ if (ctx->prefix) {
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "%s : ", ctx->prefix);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ }
+
+ if (ctx->file) {
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "[%s:%d] : ", ctx->file, ctx->line);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ }
+
+ switch (obj_type(ctx->obj)) {
+ case OBJ_TYPE_SERVER:
+ ret = snprintf(b_tail(&ctx->str), b_room(&ctx->str),
+ "'server %s/%s' : ",
+ __objt_server(ctx->obj)->proxy->id,
+ __objt_server(ctx->obj)->id);
+ b_add(&ctx->str, MIN(ret, b_room(&ctx->str)));
+ break;
+
+ case OBJ_TYPE_NONE:
+ default:
+ break;
+ }
+
+ if (!b_data(&ctx->str))
+ snprintf(b_tail(&ctx->str), b_room(&ctx->str), "%s", "");
+ }
+}
+
+/* Generic function to display messages prefixed by a label */
+static void print_message(int use_usermsgs_ctx, const char *label, const char *fmt, va_list argp)
+{
+ struct ist msg_ist = IST_NULL;
+ char *head, *parsing_str, *msg;
+ char prefix[11]; // '[' + 8 chars + ']' + 0.
+
+ *prefix = '[';
+ strncpy(prefix + 1, label, sizeof(prefix) - 2);
+ msg = prefix + strlen(prefix);
+ *msg++ = ']';
+ while (msg < prefix + sizeof(prefix) - 1)
+ *msg++ = ' ';
+ *msg = 0;
+
+ head = parsing_str = msg = NULL;
+ memprintf(&head, "%s (%u) : ", prefix, (uint)getpid());
+ memvprintf(&msg, fmt, argp);
+
+ /* trim the trailing '\n' */
+ msg_ist = ist(msg);
+ if (msg_ist.len > 0 && msg_ist.ptr[msg_ist.len - 1] == '\n')
+ msg_ist.len--;
+
+ if (use_usermsgs_ctx) {
+ generate_usermsgs_ctx_str();
+ parsing_str = b_head(&usermsgs_ctx.str);
+ reset_usermsgs_ctx();
+ }
+ else {
+ parsing_str = "";
+ }
+
+ if (global.mode & MODE_STARTING) {
+ if (unlikely(!startup_logs))
+ startup_logs_init();
+
+ if (likely(startup_logs)) {
+ struct ist m[3];
+
+ m[0] = ist(head);
+ m[1] = ist(parsing_str);
+ m[2] = msg_ist;
+
+ ring_write(startup_logs, ~0, 0, 0, m, 3);
+ }
+ }
+ else {
+ usermsgs_put(&msg_ist);
+ }
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
+ fprintf(stderr, "%s%s%s", head, parsing_str, msg);
+ fflush(stderr);
+ }
+
+ free(head);
+ free(msg);
+}
+
+static void print_message_args(int use_usermsgs_ctx, const char *label, const char *fmt, ...)
+{
+ va_list argp;
+ va_start(argp, fmt);
+ print_message(use_usermsgs_ctx, label, fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Display a notice with the happroxy version and executable path when the
+ * first message is emitted in starting mode.
+ */
+static void warn_exec_path()
+{
+ if (!(warned & WARN_EXEC_PATH) && (global.mode & MODE_STARTING)) {
+ const char *path = get_exec_path();
+
+ warned |= WARN_EXEC_PATH;
+ print_message_args(0, "NOTICE", "haproxy version is %s\n", haproxy_version);
+ if (path)
+ print_message_args(0, "NOTICE", "path to executable is %s\n", path);
+ }
+}
+
+/*
+ * Displays the message on stderr with the pid.
+ */
+void ha_alert(const char *fmt, ...)
+{
+ va_list argp;
+
+ warn_exec_path();
+ va_start(argp, fmt);
+ print_message(1, "ALERT", fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Displays the message on stderr with the pid.
+ */
+void ha_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ warned |= WARN_ANY;
+ HA_ATOMIC_INC(&tot_warnings);
+
+ warn_exec_path();
+ va_start(argp, fmt);
+ print_message(1, "WARNING", fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Variant of _ha_diag_warning with va_list.
+ * Use it only if MODE_DIAG has been previously checked.
+ */
+void _ha_vdiag_warning(const char *fmt, va_list argp)
+{
+ warned |= WARN_ANY;
+ HA_ATOMIC_INC(&tot_warnings);
+
+ warn_exec_path();
+ print_message(1, "DIAG", fmt, argp);
+}
+
+/*
+ * Output a diagnostic warning.
+ * Use it only if MODE_DIAG has been previously checked.
+ */
+void _ha_diag_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Output a diagnostic warning. Do nothing of MODE_DIAG is not on.
+ */
+void ha_diag_warning(const char *fmt, ...)
+{
+ va_list argp;
+
+ if (global.mode & MODE_DIAG) {
+ va_start(argp, fmt);
+ _ha_vdiag_warning(fmt, argp);
+ va_end(argp);
+ }
+}
+
+/*
+ * Displays the message on stderr with the pid.
+ */
+void ha_notice(const char *fmt, ...)
+{
+ va_list argp;
+
+ va_start(argp, fmt);
+ print_message(1, "NOTICE", fmt, argp);
+ va_end(argp);
+}
+
+/*
+ * Displays the message on <out> only if quiet mode is not set.
+ */
+void qfprintf(FILE *out, const char *fmt, ...)
+{
+ va_list argp;
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
+ va_start(argp, fmt);
+ vfprintf(out, fmt, argp);
+ fflush(out);
+ va_end(argp);
+ }
+}
+
+
+/* parse the "show startup-logs" command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_startup_logs(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!startup_logs)
+ return cli_msg(appctx, LOG_INFO, "\n"); // nothing to print
+
+ return ring_attach_cli(startup_logs, appctx, 0);
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "startup-logs", NULL }, "show startup-logs : report logs emitted during HAProxy startup", cli_parse_show_startup_logs, NULL, NULL, NULL, ACCESS_MASTER },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+static void deinit_errors_buffers()
+{
+ ring_free(_HA_ATOMIC_XCHG(&startup_logs, NULL));
+ ha_free(&usermsgs_buf.area);
+ ha_free(&usermsgs_ctx.str.area);
+}
+
+/* errors might be used in threads and even before forking, thus 2 deinit */
+REGISTER_PER_THREAD_FREE(deinit_errors_buffers);
+REGISTER_POST_DEINIT(deinit_errors_buffers);
diff --git a/src/ev_epoll.c b/src/ev_epoll.c
new file mode 100644
index 0000000..c42cf2e
--- /dev/null
+++ b/src/ev_epoll.c
@@ -0,0 +1,413 @@
+/*
+ * FD polling functions for Linux epoll
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <sys/epoll.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/ticks.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+
+/* private data */
+static THREAD_LOCAL struct epoll_event *epoll_events = NULL;
+static int epoll_fd[MAX_THREADS] __read_mostly; // per-thread epoll_fd
+
+#ifndef EPOLLRDHUP
+/* EPOLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */
+#define EPOLLRDHUP 0x2000
+#endif
+
+/*
+ * Immediately remove file descriptor from epoll set upon close.
+ * Since we forked, some fds share inodes with the other process, and epoll may
+ * send us events even though this process closed the fd (see man 7 epoll,
+ * "Questions and answers", Q 6).
+ */
+static void __fd_clo(int fd)
+{
+ if (unlikely(fdtab[fd].state & FD_CLONED)) {
+ unsigned long m = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv) | _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+ int tgrp = fd_tgid(fd);
+ struct epoll_event ev;
+ int i;
+
+ if (!m)
+ return;
+
+ /* since FDs may only be shared per group and are only closed
+ * once entirely reset, it should never happen that we have to
+ * close an FD for another group, unless we're stopping from the
+ * wrong thread or during startup, which is what we're checking
+ * for. Regardless, it is not a problem to do so.
+ */
+ if (unlikely(!(global.mode & MODE_STARTING))) {
+ CHECK_IF(tgid != tgrp && !thread_isolated());
+ }
+
+ for (i = ha_tgroup_info[tgrp-1].base; i < ha_tgroup_info[tgrp-1].base + ha_tgroup_info[tgrp-1].count; i++)
+ if (m & ha_thread_info[i].ltid_bit)
+ epoll_ctl(epoll_fd[i], EPOLL_CTL_DEL, fd, &ev);
+ }
+}
+
+static void _update_fd(int fd)
+{
+ int en, opcode;
+ struct epoll_event ev = { };
+ ulong pr, ps;
+
+ en = fdtab[fd].state;
+ pr = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv);
+ ps = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+
+ /* Try to force EPOLLET on FDs that support it */
+ if (fdtab[fd].state & FD_ET_POSSIBLE) {
+ /* already done ? */
+ if (pr & ps & ti->ltid_bit)
+ return;
+
+ /* enable ET polling in both directions */
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ opcode = EPOLL_CTL_ADD;
+ ev.events = EPOLLIN | EPOLLRDHUP | EPOLLOUT | EPOLLET;
+ goto done;
+ }
+
+ /* if we're already polling or are going to poll for this FD and it's
+ * neither active nor ready, force it to be active so that we don't
+ * needlessly unsubscribe then re-subscribe it.
+ */
+ if (!(en & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW | FD_POLL_ERR)) &&
+ ((en & FD_EV_ACTIVE_W) || ((ps | pr) & ti->ltid_bit)))
+ en |= FD_EV_ACTIVE_R;
+
+ if ((ps | pr) & ti->ltid_bit) {
+ if (!(fdtab[fd].thread_mask & ti->ltid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ /* fd removed from poll list */
+ opcode = EPOLL_CTL_DEL;
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+ else {
+ if (((en & FD_EV_ACTIVE_R) != 0) == ((pr & ti->ltid_bit) != 0) &&
+ ((en & FD_EV_ACTIVE_W) != 0) == ((ps & ti->ltid_bit) != 0))
+ return;
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(pr & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ } else {
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ }
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(ps & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ } else {
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+ /* fd status changed */
+ opcode = EPOLL_CTL_MOD;
+ }
+ }
+ else if ((fdtab[fd].thread_mask & ti->ltid_bit) && (en & FD_EV_ACTIVE_RW)) {
+ /* new fd in the poll list */
+ opcode = EPOLL_CTL_ADD;
+ if (en & FD_EV_ACTIVE_R)
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ if (en & FD_EV_ACTIVE_W)
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ }
+ else {
+ return;
+ }
+
+ /* construct the epoll events based on new state */
+ if (en & FD_EV_ACTIVE_R)
+ ev.events |= EPOLLIN | EPOLLRDHUP;
+
+ if (en & FD_EV_ACTIVE_W)
+ ev.events |= EPOLLOUT;
+
+ done:
+ ev.data.fd = fd;
+ epoll_ctl(epoll_fd[tid], opcode, fd, &ev);
+}
+
+/*
+ * Linux epoll() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd;
+ int count;
+ int updt_idx;
+ int wait_time;
+ int old_fd;
+
+ /* first, scan the update list to find polling changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~ti->ltid_bit);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ fd_nbupdt = 0;
+
+ /* Scan the shared update list */
+ for (old_fd = fd = update_list[tgid - 1].first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & ti->ltid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+
+ status = epoll_wait(epoll_fd[tid], epoll_events, global.tune.maxpollevents, timeout);
+ clock_update_local_date(timeout, status);
+
+ if (status) {
+ activity[tid].poll_io++;
+ break;
+ }
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while (1);
+
+ clock_update_global_date();
+ fd_leaving_poll(wait_time, status);
+
+ /* process polled events */
+
+ for (count = 0; count < status; count++) {
+ unsigned int n, e;
+
+ e = epoll_events[count].events;
+ fd = epoll_events[count].data.fd;
+
+ if ((e & EPOLLRDHUP) && !(cur_poller.flags & HAP_POLL_F_RDHUP))
+ _HA_ATOMIC_OR(&cur_poller.flags, HAP_POLL_F_RDHUP);
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ n = ((e & EPOLLIN) ? FD_EV_READY_R : 0) |
+ ((e & EPOLLOUT) ? FD_EV_READY_W : 0) |
+ ((e & EPOLLRDHUP) ? FD_EV_SHUT_R : 0) |
+ ((e & EPOLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((e & EPOLLERR) ? FD_EV_ERR_RW : 0);
+
+ fd_update_events(fd, n);
+ }
+ /* the caller will take care of cached events */
+}
+
+static int init_epoll_per_thread()
+{
+ epoll_events = calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents);
+ if (epoll_events == NULL)
+ goto fail_alloc;
+
+ if (MAX_THREADS > 1 && tid) {
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ goto fail_fd;
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ fd_reregister_all(tgid, ti->ltid_bit);
+
+ return 1;
+ fail_fd:
+ free(epoll_events);
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_epoll_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(epoll_fd[tid]);
+
+ ha_free(&epoll_events);
+}
+
+/*
+ * Initialization of the epoll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ goto fail_fd;
+
+ hap_register_per_thread_init(init_epoll_per_thread);
+ hap_register_per_thread_deinit(deinit_epoll_per_thread);
+
+ return 1;
+
+ fail_fd:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the epoll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ if (epoll_fd[tid] >= 0) {
+ close(epoll_fd[tid]);
+ epoll_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ fd = epoll_create(global.maxsock + 1);
+ if (fd < 0)
+ return 0;
+ close(fd);
+ return 1;
+}
+
+/*
+ * Recreate the epoll file descriptor after a fork(). Returns 1 if OK,
+ * otherwise 0. It will ensure that all processes will not share their
+ * epoll_fd. Some side effects were encountered because of this, such
+ * as epoll_wait() returning an FD which was previously deleted.
+ */
+static int _do_fork(struct poller *p)
+{
+ if (epoll_fd[tid] >= 0)
+ close(epoll_fd[tid]);
+ epoll_fd[tid] = epoll_create(global.maxsock + 1);
+ if (epoll_fd[tid] < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ epoll_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "epoll";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_ERRHUP; // note: RDHUP might be dynamically added
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_evports.c b/src/ev_evports.c
new file mode 100644
index 0000000..07676e6
--- /dev/null
+++ b/src/ev_evports.c
@@ -0,0 +1,441 @@
+/*
+ * FD polling functions for SunOS event ports.
+ *
+ * Copyright 2018 Joyent, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <poll.h>
+#include <port.h>
+#include <errno.h>
+#include <syslog.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+/*
+ * Private data:
+ */
+static int evports_fd[MAX_THREADS]; // per-thread evports_fd
+static THREAD_LOCAL port_event_t *evports_evlist = NULL;
+static THREAD_LOCAL int evports_evlist_max = 0;
+
+/*
+ * Convert the "state" member of "fdtab" into an event ports event mask.
+ */
+static inline int evports_state_to_events(int state)
+{
+ int events = 0;
+
+ if (state & FD_EV_ACTIVE_W)
+ events |= POLLOUT;
+ if (state & FD_EV_ACTIVE_R)
+ events |= POLLIN;
+
+ return (events);
+}
+
+/*
+ * Associate or dissociate this file descriptor with the event port, using the
+ * specified event mask.
+ */
+static inline void evports_resync_fd(int fd, int events)
+{
+ if (events == 0)
+ port_dissociate(evports_fd[tid], PORT_SOURCE_FD, fd);
+ else
+ port_associate(evports_fd[tid], PORT_SOURCE_FD, fd, events, NULL);
+}
+
+static void _update_fd(int fd)
+{
+ int en;
+ int events;
+ ulong pr, ps;
+
+ en = fdtab[fd].state;
+ pr = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv);
+ ps = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+
+ if (!(fdtab[fd].thread_mask & ti->ltid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ if (!((pr | ps) & ti->ltid_bit)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ events = 0;
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ events = evports_state_to_events(en);
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(pr & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ } else {
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ }
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(ps & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ } else {
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+
+ }
+ evports_resync_fd(fd, events);
+}
+
+/*
+ * Event Ports poller. This routine interacts with the file descriptor
+ * management data structures and routines; see the large block comment in
+ * "src/fd.c" for more information.
+ */
+
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int i;
+ int wait_time;
+ struct timespec timeout_ts;
+ unsigned int nevlist;
+ int fd, old_fd;
+ int status;
+
+ /*
+ * Scan the list of file descriptors with an updated status:
+ */
+ for (i = 0; i < fd_nbupdt; i++) {
+ fd = fd_updt[i];
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~ti->ltid_bit);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ fd_nbupdt = 0;
+
+ /* Scan the shared update list */
+ for (old_fd = fd = update_list[tgid - 1].first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & ti->ltid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ _update_fd(fd);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+ int interrupted = 0;
+ nevlist = 1; /* desired number of events to be retrieved */
+ timeout_ts.tv_sec = (timeout / 1000);
+ timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
+
+ status = port_getn(evports_fd[tid],
+ evports_evlist,
+ evports_evlist_max,
+ &nevlist, /* updated to the number of events retrieved */
+ &timeout_ts);
+ if (status != 0) {
+ int e = errno;
+ switch (e) {
+ case ETIME:
+ /*
+ * Though the manual page has not historically made it
+ * clear, port_getn() can return -1 with an errno of
+ * ETIME and still have returned some number of events.
+ */
+ /* nevlist >= 0 */
+ break;
+ default:
+ nevlist = 0;
+ interrupted = 1;
+ break;
+ }
+ }
+ clock_update_local_date(timeout, nevlist);
+
+ if (nevlist || interrupted)
+ break;
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while(1);
+
+ clock_update_global_date();
+ fd_leaving_poll(wait_time, nevlist);
+
+ if (nevlist > 0)
+ activity[tid].poll_io++;
+
+ for (i = 0; i < nevlist; i++) {
+ unsigned int n = 0;
+ int events, rebind_events;
+ int ret;
+
+ fd = evports_evlist[i].portev_object;
+ events = evports_evlist[i].portev_events;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ /*
+ * By virtue of receiving an event for this file descriptor, it
+ * is no longer associated with the port in question. Store
+ * the previous event mask so that we may reassociate after
+ * processing is complete.
+ */
+ rebind_events = evports_state_to_events(fdtab[fd].state);
+ /* rebind_events != 0 */
+
+ /*
+ * Set bits based on the events we received from the port:
+ */
+ n = ((events & POLLIN) ? FD_EV_READY_R : 0) |
+ ((events & POLLOUT) ? FD_EV_READY_W : 0) |
+ ((events & POLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((events & POLLERR) ? FD_EV_ERR_RW : 0);
+
+ /*
+ * Call connection processing callbacks. Note that it's
+ * possible for this processing to alter the required event
+ * port association; i.e., the "state" member of the "fdtab"
+ * entry. If it changes, the fd will be placed on the updated
+ * list for processing the next time we are called.
+ */
+ ret = fd_update_events(fd, n);
+
+ /* polling will be on this instance if the FD was migrated */
+ if (ret == FD_UPDT_MIGRATED)
+ continue;
+
+ /*
+ * This file descriptor was closed during the processing of
+ * polled events. No need to reassociate.
+ */
+ if (ret == FD_UPDT_CLOSED)
+ continue;
+
+ /*
+ * Reassociate with the port, using the same event mask as
+ * before. This call will not result in a dissociation as we
+ * asserted that _some_ events needed to be rebound above.
+ *
+ * Reassociating with the same mask allows us to mimic the
+ * level-triggered behaviour of poll(2). In the event that we
+ * are interested in the same events on the next turn of the
+ * loop, this represents no extra work.
+ *
+ * If this additional port_associate(3C) call becomes a
+ * performance problem, we would need to verify that we can
+ * correctly interact with the file descriptor cache and update
+ * list (see "src/fd.c") to avoid reassociating here, or to use
+ * a different events mask.
+ */
+ evports_resync_fd(fd, rebind_events);
+ }
+}
+
+static int init_evports_per_thread()
+{
+ evports_evlist_max = global.tune.maxpollevents;
+ evports_evlist = calloc(evports_evlist_max, sizeof(*evports_evlist));
+ if (evports_evlist == NULL) {
+ goto fail_alloc;
+ }
+
+ if (MAX_THREADS > 1 && tid) {
+ if ((evports_fd[tid] = port_create()) == -1) {
+ goto fail_fd;
+ }
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ fd_reregister_all(tgid, ti->ltid_bit);
+
+ return 1;
+
+ fail_fd:
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_evports_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(evports_fd[tid]);
+
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+}
+
+/*
+ * Initialisation of the event ports poller.
+ * Returns 0 in case of failure, non-zero in case of success.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ if ((evports_fd[tid] = port_create()) == -1) {
+ goto fail;
+ }
+
+ hap_register_per_thread_init(init_evports_per_thread);
+ hap_register_per_thread_deinit(deinit_evports_per_thread);
+
+ return 1;
+
+fail:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the event ports poller.
+ * All resources are released and the poller is marked as inoperative.
+ */
+static void _do_term(struct poller *p)
+{
+ if (evports_fd[tid] != -1) {
+ close(evports_fd[tid]);
+ evports_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+
+ ha_free(&evports_evlist);
+ evports_evlist_max = 0;
+}
+
+/*
+ * Run-time check to make sure we can allocate the resources needed for
+ * the poller to function correctly.
+ * Returns 1 on success, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ if ((fd = port_create()) == -1) {
+ return 0;
+ }
+
+ close(fd);
+ return 1;
+}
+
+/*
+ * Close and recreate the event port after fork(). Returns 1 on success,
+ * otherwise 0. If this function fails, "_do_term()" must be called to
+ * clean up the poller.
+ */
+static int _do_fork(struct poller *p)
+{
+ if (evports_fd[tid] != -1) {
+ close(evports_fd[tid]);
+ }
+
+ if ((evports_fd[tid] = port_create()) == -1) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ evports_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "evports";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = NULL;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
diff --git a/src/ev_kqueue.c b/src/ev_kqueue.c
new file mode 100644
index 0000000..f123e7b
--- /dev/null
+++ b/src/ev_kqueue.c
@@ -0,0 +1,380 @@
+/*
+ * FD polling functions for FreeBSD kqueue()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <sys/event.h>
+#include <sys/time.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+/* private data */
+static int kqueue_fd[MAX_THREADS] __read_mostly; // per-thread kqueue_fd
+static THREAD_LOCAL struct kevent *kev = NULL;
+static struct kevent *kev_out = NULL; // Trash buffer for kevent() to write the eventlist in
+
+static int _update_fd(int fd, int start)
+{
+ int en;
+ int changes = start;
+ ulong pr, ps;
+
+ en = fdtab[fd].state;
+ pr = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv);
+ ps = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+
+ if (!(fdtab[fd].thread_mask & ti->ltid_bit) || !(en & FD_EV_ACTIVE_RW)) {
+ if (!((pr | ps) & ti->ltid_bit)) {
+ /* fd was not watched, it's still not */
+ return changes;
+ }
+ /* fd totally removed from poll list */
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+
+ if (en & FD_EV_ACTIVE_R) {
+ if (!(pr & ti->ltid_bit)) {
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ }
+ }
+ else if (pr & ti->ltid_bit) {
+ EV_SET(&kev[changes++], fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
+ HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ }
+
+ if (en & FD_EV_ACTIVE_W) {
+ if (!(ps & ti->ltid_bit)) {
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ }
+ }
+ else if (ps & ti->ltid_bit) {
+ EV_SET(&kev[changes++], fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ }
+
+ }
+ return changes;
+}
+
+/*
+ * kqueue() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int count, fd, wait_time;
+ struct timespec timeout_ts;
+ int updt_idx;
+ int changes = 0;
+ int old_fd;
+
+ timeout_ts.tv_sec = 0;
+ timeout_ts.tv_nsec = 0;
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~ti->ltid_bit);
+
+ if (fdtab[fd].owner)
+ changes = _update_fd(fd, changes);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+ /* Scan the global update list */
+ for (old_fd = fd = update_list[tgid - 1].first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+
+ if (!fd_grab_tgid(fd, tgid)) {
+ /* was reassigned */
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+
+ if (!(fdtab[fd].update_mask & ti->ltid_bit)) {
+ fd_drop_tgid(fd);
+ continue;
+ }
+
+ done_update_polling(fd);
+
+ if (fdtab[fd].owner)
+ changes = _update_fd(fd, changes);
+ else
+ activity[tid].poll_drop_fd++;
+
+ fd_drop_tgid(fd);
+ }
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ if (changes) {
+#ifdef EV_RECEIPT
+ kev[0].flags |= EV_RECEIPT;
+#else
+ /* If EV_RECEIPT isn't defined, just add an invalid entry,
+ * so that we get an error and kevent() stops before scanning
+ * the kqueue.
+ */
+ EV_SET(&kev[changes++], -1, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
+#endif
+ kevent(kqueue_fd[tid], kev, changes, kev_out, changes, &timeout_ts);
+ }
+ fd_nbupdt = 0;
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ fd = global.tune.maxpollevents;
+ clock_entering_poll();
+
+ do {
+ int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time;
+
+ timeout_ts.tv_sec = (timeout / 1000);
+ timeout_ts.tv_nsec = (timeout % 1000) * 1000000;
+
+ status = kevent(kqueue_fd[tid], // int kq
+ NULL, // const struct kevent *changelist
+ 0, // int nchanges
+ kev, // struct kevent *eventlist
+ fd, // int nevents
+ &timeout_ts); // const struct timespec *timeout
+ clock_update_local_date(timeout, status);
+
+ if (status) {
+ activity[tid].poll_io++;
+ break;
+ }
+ if (timeout || !wait_time)
+ break;
+ if (tick_isset(exp) && tick_is_expired(exp, now_ms))
+ break;
+ } while (1);
+
+ clock_update_global_date();
+ fd_leaving_poll(wait_time, status);
+
+ for (count = 0; count < status; count++) {
+ unsigned int n = 0;
+
+ fd = kev[count].ident;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ if (kev[count].filter == EVFILT_READ) {
+ if (kev[count].data || !(kev[count].flags & EV_EOF))
+ n |= FD_EV_READY_R;
+ if (kev[count].flags & EV_EOF)
+ n |= FD_EV_SHUT_R;
+ }
+ else if (kev[count].filter == EVFILT_WRITE) {
+ n |= FD_EV_READY_W;
+ if (kev[count].flags & EV_EOF)
+ n |= FD_EV_ERR_RW;
+ }
+
+ fd_update_events(fd, n);
+ }
+}
+
+
+static int init_kqueue_per_thread()
+{
+ /* we can have up to two events per fd, so allocate enough to store
+ * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
+ * so that we can add an invalid entry and get an error, to avoid
+ * scanning the kqueue uselessly.
+ */
+ kev = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
+ if (kev == NULL)
+ goto fail_alloc;
+
+ if (MAX_THREADS > 1 && tid) {
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ goto fail_fd;
+ }
+
+ /* we may have to unregister some events initially registered on the
+ * original fd when it was alone, and/or to register events on the new
+ * fd for this thread. Let's just mark them as updated, the poller will
+ * do the rest.
+ */
+ fd_reregister_all(tgid, ti->ltid_bit);
+
+ return 1;
+ fail_fd:
+ free(kev);
+ fail_alloc:
+ return 0;
+}
+
+static void deinit_kqueue_per_thread()
+{
+ if (MAX_THREADS > 1 && tid)
+ close(kqueue_fd[tid]);
+
+ ha_free(&kev);
+}
+
+/*
+ * Initialization of the kqueue() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ p->private = NULL;
+
+ /* we can have up to two events per fd, so allocate enough to store
+ * 2*fd event, and an extra one, in case EV_RECEIPT isn't defined,
+ * so that we can add an invalid entry and get an error, to avoid
+ * scanning the kqueue uselessly.
+ */
+ kev_out = calloc(1, sizeof(struct kevent) * (2 * global.maxsock + 1));
+ if (!kev_out)
+ goto fail_alloc;
+
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ goto fail_fd;
+
+ hap_register_per_thread_init(init_kqueue_per_thread);
+ hap_register_per_thread_deinit(deinit_kqueue_per_thread);
+ return 1;
+
+ fail_fd:
+ ha_free(&kev_out);
+fail_alloc:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the kqueue() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ if (kqueue_fd[tid] >= 0) {
+ close(kqueue_fd[tid]);
+ kqueue_fd[tid] = -1;
+ }
+
+ p->private = NULL;
+ p->pref = 0;
+ if (kev_out) {
+ ha_free(&kev_out);
+ }
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ int fd;
+
+ fd = kqueue();
+ if (fd < 0)
+ return 0;
+ close(fd);
+ return 1;
+}
+
+/*
+ * Recreate the kqueue file descriptor after a fork(). Returns 1 if OK,
+ * otherwise 0. Note that some pollers need to be reopened after a fork()
+ * (such as kqueue), and some others may fail to do so in a chroot.
+ */
+static int _do_fork(struct poller *p)
+{
+ kqueue_fd[tid] = kqueue();
+ if (kqueue_fd[tid] < 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+ int i;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ kqueue_fd[i] = -1;
+
+ p = &pollers[nbpollers++];
+
+ p->name = "kqueue";
+ p->pref = 300;
+ p->flags = HAP_POLL_F_RDHUP | HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = NULL;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+ p->fork = _do_fork;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_poll.c b/src/ev_poll.c
new file mode 100644
index 0000000..e98630c
--- /dev/null
+++ b/src/ev_poll.c
@@ -0,0 +1,348 @@
+/*
+ * FD polling functions for generic poll()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE // for POLLRDHUP on Linux
+
+#include <unistd.h>
+#include <poll.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+#ifndef POLLRDHUP
+/* POLLRDHUP was defined late in libc, and it appeared in kernel 2.6.17 */
+#define POLLRDHUP 0
+#endif
+
+static int maxfd; /* # of the highest fd + 1 */
+static unsigned int *fd_evts[2];
+
+/* private data */
+static THREAD_LOCAL int nbfd = 0;
+static THREAD_LOCAL struct pollfd *poll_events = NULL;
+
+static void __fd_clo(int fd)
+{
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+}
+
+static void _update_fd(int fd, int *max_add_fd)
+{
+ int en;
+ ulong pr, ps;
+
+ en = fdtab[fd].state;
+ pr = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv);
+ ps = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+
+ /* we have a single state for all threads, which is why we
+ * don't check the tid_bit. First thread to see the update
+ * takes it for every other one.
+ */
+ if (!(en & FD_EV_ACTIVE_RW)) {
+ if (!(pr | ps)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, 0);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, 0);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ if (!(en & FD_EV_ACTIVE_R)) {
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_RD]);
+ if (!(pr & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ }
+
+ if (!(en & FD_EV_ACTIVE_W)) {
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_WR]);
+ if (!(ps & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ }
+
+ if (fd > *max_add_fd)
+ *max_add_fd = fd;
+ }
+}
+
+/*
+ * Poll() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd;
+ int wait_time;
+ int updt_idx;
+ int fds, count;
+ int sr, sw;
+ int old_maxfd, new_maxfd, max_add_fd;
+ unsigned rn, wn; /* read new, write new */
+ int old_fd;
+
+ max_add_fd = -1;
+
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~ti->ltid_bit);
+ if (!fdtab[fd].owner) {
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+ _update_fd(fd, &max_add_fd);
+ }
+
+ /* Now scan the global update list */
+ for (old_fd = fd = update_list[tgid - 1].first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+ if (fdtab[fd].update_mask & ti->ltid_bit) {
+ /* Cheat a bit, as the state is global to all pollers
+ * we don't need every thread to take care of the
+ * update.
+ */
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tg->threads_enabled);
+ done_update_polling(fd);
+ } else
+ continue;
+ if (!fdtab[fd].owner)
+ continue;
+ _update_fd(fd, &max_add_fd);
+ }
+
+ /* maybe we added at least one fd larger than maxfd */
+ for (old_maxfd = maxfd; old_maxfd <= max_add_fd; ) {
+ if (_HA_ATOMIC_CAS(&maxfd, &old_maxfd, max_add_fd + 1))
+ break;
+ }
+
+ /* maxfd doesn't need to be precise but it needs to cover *all* active
+ * FDs. Thus we only shrink it if we have such an opportunity. The algo
+ * is simple : look for the previous used place, try to update maxfd to
+ * point to it, abort if maxfd changed in the mean time.
+ */
+ old_maxfd = maxfd;
+ do {
+ new_maxfd = old_maxfd;
+ while (new_maxfd - 1 >= 0 && !fdtab[new_maxfd - 1].owner)
+ new_maxfd--;
+ if (new_maxfd >= old_maxfd)
+ break;
+ } while (!_HA_ATOMIC_CAS(&maxfd, &old_maxfd, new_maxfd));
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ fd_nbupdt = 0;
+
+ nbfd = 0;
+ for (fds = 0; (fds * 8*sizeof(**fd_evts)) < maxfd; fds++) {
+ rn = fd_evts[DIR_RD][fds];
+ wn = fd_evts[DIR_WR][fds];
+
+ if (!(rn|wn))
+ continue;
+
+ for (count = 0, fd = fds * 8*sizeof(**fd_evts); count < 8*sizeof(**fd_evts) && fd < maxfd; count++, fd++) {
+ sr = (rn >> count) & 1;
+ sw = (wn >> count) & 1;
+ if ((sr|sw)) {
+ if (!fdtab[fd].owner) {
+ /* should normally not happen here except
+ * due to rare thread concurrency
+ */
+ continue;
+ }
+
+ if (!(fdtab[fd].thread_mask & ti->ltid_bit)) {
+ continue;
+ }
+
+ poll_events[nbfd].fd = fd;
+ poll_events[nbfd].events = (sr ? (POLLIN | POLLRDHUP) : 0) | (sw ? POLLOUT : 0);
+ nbfd++;
+ }
+ }
+ }
+
+ /* Now let's wait for polled events. */
+ wait_time = wake ? 0 : compute_poll_timeout(exp);
+ clock_entering_poll();
+ status = poll(poll_events, nbfd, wait_time);
+ clock_update_date(wait_time, status);
+
+ fd_leaving_poll(wait_time, status);
+
+ if (status > 0)
+ activity[tid].poll_io++;
+
+ for (count = 0; status > 0 && count < nbfd; count++) {
+ unsigned int n;
+ int e = poll_events[count].revents;
+
+ fd = poll_events[count].fd;
+
+ if ((e & POLLRDHUP) && !(cur_poller.flags & HAP_POLL_F_RDHUP))
+ _HA_ATOMIC_OR(&cur_poller.flags, HAP_POLL_F_RDHUP);
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+ if (!(e & ( POLLOUT | POLLIN | POLLERR | POLLHUP | POLLRDHUP )))
+ continue;
+
+ /* ok, we found one active fd */
+ status--;
+
+ n = ((e & POLLIN) ? FD_EV_READY_R : 0) |
+ ((e & POLLOUT) ? FD_EV_READY_W : 0) |
+ ((e & POLLRDHUP) ? FD_EV_SHUT_R : 0) |
+ ((e & POLLHUP) ? FD_EV_SHUT_RW : 0) |
+ ((e & POLLERR) ? FD_EV_ERR_RW : 0);
+
+ fd_update_events(fd, n);
+ }
+}
+
+
+static int init_poll_per_thread()
+{
+ poll_events = calloc(1, sizeof(struct pollfd) * global.maxsock);
+ if (poll_events == NULL)
+ return 0;
+ return 1;
+}
+
+static void deinit_poll_per_thread()
+{
+ ha_free(&poll_events);
+}
+
+/*
+ * Initialization of the poll() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ __label__ fail_swevt, fail_srevt;
+ int fd_evts_bytes;
+
+ p->private = NULL;
+
+ /* this old poller uses a process-wide FD list that cannot work with
+ * groups.
+ */
+ if (global.nbtgroups > 1)
+ goto fail_srevt;
+
+ fd_evts_bytes = (global.maxsock + sizeof(**fd_evts) * 8 - 1) / (sizeof(**fd_evts) * 8) * sizeof(**fd_evts);
+
+ if ((fd_evts[DIR_RD] = calloc(1, fd_evts_bytes)) == NULL)
+ goto fail_srevt;
+ if ((fd_evts[DIR_WR] = calloc(1, fd_evts_bytes)) == NULL)
+ goto fail_swevt;
+
+ hap_register_per_thread_init(init_poll_per_thread);
+ hap_register_per_thread_deinit(deinit_poll_per_thread);
+
+ return 1;
+
+ fail_swevt:
+ free(fd_evts[DIR_RD]);
+ fail_srevt:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the poll() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ free(fd_evts[DIR_WR]);
+ free(fd_evts[DIR_RD]);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+ p = &pollers[nbpollers++];
+
+ p->name = "poll";
+ p->pref = 200;
+ p->flags = HAP_POLL_F_ERRHUP;
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ev_select.c b/src/ev_select.c
new file mode 100644
index 0000000..eadd588
--- /dev/null
+++ b/src/ev_select.c
@@ -0,0 +1,335 @@
+/*
+ * FD polling functions for generic select()
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+
+
+/* private data */
+static int maxfd; /* # of the highest fd + 1 */
+static unsigned int *fd_evts[2];
+static THREAD_LOCAL fd_set *tmp_evts[2];
+
+/* Immediately remove the entry upon close() */
+static void __fd_clo(int fd)
+{
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+}
+
+static void _update_fd(int fd, int *max_add_fd)
+{
+ int en;
+ ulong pr, ps;
+
+ en = fdtab[fd].state;
+ pr = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_recv);
+ ps = _HA_ATOMIC_LOAD(&polled_mask[fd].poll_send);
+
+ /* we have a single state for all threads, which is why we
+ * don't check the tid_bit. First thread to see the update
+ * takes it for every other one.
+ */
+ if (!(en & FD_EV_ACTIVE_RW)) {
+ if (!(pr | ps)) {
+ /* fd was not watched, it's still not */
+ return;
+ }
+ /* fd totally removed from poll list */
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, 0);
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, 0);
+ }
+ else {
+ /* OK fd has to be monitored, it was either added or changed */
+ if (!(en & FD_EV_ACTIVE_R)) {
+ hap_fd_clr(fd, fd_evts[DIR_RD]);
+ if (pr & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_recv, ~ti->ltid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_RD]);
+ if (!(pr & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_recv, ti->ltid_bit);
+ }
+
+ if (!(en & FD_EV_ACTIVE_W)) {
+ hap_fd_clr(fd, fd_evts[DIR_WR]);
+ if (ps & ti->ltid_bit)
+ _HA_ATOMIC_AND(&polled_mask[fd].poll_send, ~ti->ltid_bit);
+ } else {
+ hap_fd_set(fd, fd_evts[DIR_WR]);
+ if (!(ps & ti->ltid_bit))
+ _HA_ATOMIC_OR(&polled_mask[fd].poll_send, ti->ltid_bit);
+ }
+
+ if (fd > *max_add_fd)
+ *max_add_fd = fd;
+ }
+}
+
+/*
+ * Select() poller
+ */
+static void _do_poll(struct poller *p, int exp, int wake)
+{
+ int status;
+ int fd, i;
+ struct timeval delta;
+ int delta_ms;
+ int fds;
+ int updt_idx;
+ char count;
+ int readnotnull, writenotnull;
+ int old_maxfd, new_maxfd, max_add_fd;
+ int old_fd;
+
+ max_add_fd = -1;
+
+ /* first, scan the update list to find changes */
+ for (updt_idx = 0; updt_idx < fd_nbupdt; updt_idx++) {
+ fd = fd_updt[updt_idx];
+
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~ti->ltid_bit);
+ if (!fdtab[fd].owner) {
+ activity[tid].poll_drop_fd++;
+ continue;
+ }
+ _update_fd(fd, &max_add_fd);
+ }
+ /* Now scan the global update list */
+ for (old_fd = fd = update_list[tgid - 1].first; fd != -1; fd = fdtab[fd].update.next) {
+ if (fd == -2) {
+ fd = old_fd;
+ continue;
+ }
+ else if (fd <= -3)
+ fd = -fd -4;
+ if (fd == -1)
+ break;
+ if (fdtab[fd].update_mask & ti->ltid_bit) {
+ /* Cheat a bit, as the state is global to all pollers
+ * we don't need every thread to take care of the
+ * update.
+ */
+ _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tg->threads_enabled);
+ done_update_polling(fd);
+ } else
+ continue;
+ if (!fdtab[fd].owner)
+ continue;
+ _update_fd(fd, &max_add_fd);
+ }
+
+
+ /* maybe we added at least one fd larger than maxfd */
+ for (old_maxfd = maxfd; old_maxfd <= max_add_fd; ) {
+ if (_HA_ATOMIC_CAS(&maxfd, &old_maxfd, max_add_fd + 1))
+ break;
+ }
+
+ /* maxfd doesn't need to be precise but it needs to cover *all* active
+ * FDs. Thus we only shrink it if we have such an opportunity. The algo
+ * is simple : look for the previous used place, try to update maxfd to
+ * point to it, abort if maxfd changed in the mean time.
+ */
+ old_maxfd = maxfd;
+ do {
+ new_maxfd = old_maxfd;
+ while (new_maxfd - 1 >= 0 && !fdtab[new_maxfd - 1].owner)
+ new_maxfd--;
+ if (new_maxfd >= old_maxfd)
+ break;
+ } while (!_HA_ATOMIC_CAS(&maxfd, &old_maxfd, new_maxfd));
+
+ thread_idle_now();
+ thread_harmless_now();
+
+ fd_nbupdt = 0;
+
+ /* let's restore fdset state */
+ readnotnull = 0; writenotnull = 0;
+ for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) {
+ readnotnull |= (*(((int*)tmp_evts[DIR_RD])+i) = *(((int*)fd_evts[DIR_RD])+i)) != 0;
+ writenotnull |= (*(((int*)tmp_evts[DIR_WR])+i) = *(((int*)fd_evts[DIR_WR])+i)) != 0;
+ }
+
+ /* now let's wait for events */
+ delta_ms = wake ? 0 : compute_poll_timeout(exp);
+ delta.tv_sec = (delta_ms / 1000);
+ delta.tv_usec = (delta_ms % 1000) * 1000;
+ clock_entering_poll();
+ status = select(maxfd,
+ readnotnull ? tmp_evts[DIR_RD] : NULL,
+ writenotnull ? tmp_evts[DIR_WR] : NULL,
+ NULL,
+ &delta);
+ clock_update_date(delta_ms, status);
+ fd_leaving_poll(delta_ms, status);
+
+ if (status <= 0)
+ return;
+
+ activity[tid].poll_io++;
+
+ for (fds = 0; (fds * BITS_PER_INT) < maxfd; fds++) {
+ if ((((int *)(tmp_evts[DIR_RD]))[fds] | ((int *)(tmp_evts[DIR_WR]))[fds]) == 0)
+ continue;
+
+ for (count = BITS_PER_INT, fd = fds * BITS_PER_INT; count && fd < maxfd; count--, fd++) {
+ unsigned int n = 0;
+
+ if (FD_ISSET(fd, tmp_evts[DIR_RD]))
+ n |= FD_EV_READY_R;
+
+ if (FD_ISSET(fd, tmp_evts[DIR_WR]))
+ n |= FD_EV_READY_W;
+
+ if (!n)
+ continue;
+
+#ifdef DEBUG_FD
+ _HA_ATOMIC_INC(&fdtab[fd].event_count);
+#endif
+
+ fd_update_events(fd, n);
+ }
+ }
+}
+
+static int init_select_per_thread()
+{
+ int fd_set_bytes;
+
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+ tmp_evts[DIR_RD] = calloc(1, fd_set_bytes);
+ if (tmp_evts[DIR_RD] == NULL)
+ goto fail;
+ tmp_evts[DIR_WR] = calloc(1, fd_set_bytes);
+ if (tmp_evts[DIR_WR] == NULL)
+ goto fail;
+ return 1;
+ fail:
+ free(tmp_evts[DIR_RD]);
+ free(tmp_evts[DIR_WR]);
+ return 0;
+}
+
+static void deinit_select_per_thread()
+{
+ ha_free(&tmp_evts[DIR_WR]);
+ ha_free(&tmp_evts[DIR_RD]);
+}
+
+/*
+ * Initialization of the select() poller.
+ * Returns 0 in case of failure, non-zero in case of success. If it fails, it
+ * disables the poller by setting its pref to 0.
+ */
+static int _do_init(struct poller *p)
+{
+ int fd_set_bytes;
+
+ p->private = NULL;
+
+ /* this old poller uses a process-wide FD list that cannot work with
+ * groups.
+ */
+ if (global.nbtgroups > 1)
+ goto fail_srevt;
+
+ if (global.maxsock > FD_SETSIZE)
+ goto fail_srevt;
+
+ fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE;
+
+ if ((fd_evts[DIR_RD] = calloc(1, fd_set_bytes)) == NULL)
+ goto fail_srevt;
+ if ((fd_evts[DIR_WR] = calloc(1, fd_set_bytes)) == NULL)
+ goto fail_swevt;
+
+ hap_register_per_thread_init(init_select_per_thread);
+ hap_register_per_thread_deinit(deinit_select_per_thread);
+
+ return 1;
+
+ fail_swevt:
+ free(fd_evts[DIR_RD]);
+ fail_srevt:
+ p->pref = 0;
+ return 0;
+}
+
+/*
+ * Termination of the select() poller.
+ * Memory is released and the poller is marked as unselectable.
+ */
+static void _do_term(struct poller *p)
+{
+ free(fd_evts[DIR_WR]);
+ free(fd_evts[DIR_RD]);
+ p->private = NULL;
+ p->pref = 0;
+}
+
+/*
+ * Check that the poller works.
+ * Returns 1 if OK, otherwise 0.
+ */
+static int _do_test(struct poller *p)
+{
+ if (global.maxsock > FD_SETSIZE)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Registers the poller.
+ */
+static void _do_register(void)
+{
+ struct poller *p;
+
+ if (nbpollers >= MAX_POLLERS)
+ return;
+ p = &pollers[nbpollers++];
+
+ p->name = "select";
+ p->pref = 150;
+ p->flags = 0;
+ p->private = NULL;
+
+ p->clo = __fd_clo;
+ p->test = _do_test;
+ p->init = _do_init;
+ p->term = _do_term;
+ p->poll = _do_poll;
+}
+
+INITCALL0(STG_REGISTER, _do_register);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/event_hdl.c b/src/event_hdl.c
new file mode 100644
index 0000000..aeb4d24
--- /dev/null
+++ b/src/event_hdl.c
@@ -0,0 +1,999 @@
+/*
+ * general purpose event handlers management
+ *
+ * Copyright 2022 HAProxy Technologies
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+#include <haproxy/event_hdl.h>
+#include <haproxy/compiler.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+#include <haproxy/errors.h>
+#include <haproxy/signal.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/cfgparse.h>
+
+/* event types changes in event_hdl-t.h file should be reflected in the
+ * map below to allow string to type and type to string conversions
+ */
+static struct event_hdl_sub_type_map event_hdl_sub_type_map[] = {
+ {"NONE", EVENT_HDL_SUB_NONE},
+ {"SERVER", EVENT_HDL_SUB_SERVER},
+ {"SERVER_ADD", EVENT_HDL_SUB_SERVER_ADD},
+ {"SERVER_DEL", EVENT_HDL_SUB_SERVER_DEL},
+ {"SERVER_UP", EVENT_HDL_SUB_SERVER_UP},
+ {"SERVER_DOWN", EVENT_HDL_SUB_SERVER_DOWN},
+ {"SERVER_STATE", EVENT_HDL_SUB_SERVER_STATE},
+ {"SERVER_ADMIN", EVENT_HDL_SUB_SERVER_ADMIN},
+ {"SERVER_CHECK", EVENT_HDL_SUB_SERVER_CHECK},
+ {"SERVER_INETADDR", EVENT_HDL_SUB_SERVER_INETADDR},
+};
+
+/* internal types (only used in this file) */
+struct event_hdl_async_task_default_ctx
+{
+ event_hdl_async_equeue e_queue; /* event queue list */
+ event_hdl_cb_async func; /* event handling func */
+};
+
+/* memory pools declarations */
+DECLARE_STATIC_POOL(pool_head_sub, "ehdl_sub", sizeof(struct event_hdl_sub));
+DECLARE_STATIC_POOL(pool_head_sub_event, "ehdl_sub_e", sizeof(struct event_hdl_async_event));
+DECLARE_STATIC_POOL(pool_head_sub_event_data, "ehdl_sub_ed", sizeof(struct event_hdl_async_event_data));
+DECLARE_STATIC_POOL(pool_head_sub_taskctx, "ehdl_sub_tctx", sizeof(struct event_hdl_async_task_default_ctx));
+
+/* global event_hdl tunables (public variable) */
+struct event_hdl_tune event_hdl_tune;
+
+/* global subscription list (implicit where NULL is used as sublist argument) */
+static event_hdl_sub_list global_event_hdl_sub_list;
+
+/* every known subscription lists are tracked in this list (including the global one) */
+static struct mt_list known_event_hdl_sub_list = MT_LIST_HEAD_INIT(known_event_hdl_sub_list);
+
+static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list);
+
+static void event_hdl_deinit(struct sig_handler *sh)
+{
+ event_hdl_sub_list *cur_list;
+ struct mt_list *elt1, elt2;
+
+ /* destroy all known subscription lists */
+ mt_list_for_each_entry_safe(cur_list, &known_event_hdl_sub_list, known, elt1, elt2) {
+ /* remove cur elem from list */
+ MT_LIST_DELETE_SAFE(elt1);
+ /* then destroy it */
+ _event_hdl_sub_list_destroy(cur_list);
+ }
+}
+
+static void event_hdl_init(void)
+{
+ /* initialize global subscription list */
+ event_hdl_sub_list_init(&global_event_hdl_sub_list);
+ /* register the deinit function, will be called on soft-stop */
+ signal_register_fct(0, event_hdl_deinit, 0);
+
+ /* set some default values */
+ event_hdl_tune.max_events_at_once = EVENT_HDL_MAX_AT_ONCE;
+}
+
+/* general purpose hashing function when you want to compute
+ * an ID based on <scope> x <name>
+ * It is your responsibility to make sure <scope> is not used
+ * elsewhere in the code (or that you are fine with sharing
+ * the scope).
+ */
+inline uint64_t event_hdl_id(const char *scope, const char *name)
+{
+ XXH64_state_t state;
+
+ XXH64_reset(&state, 0);
+ XXH64_update(&state, scope, strlen(scope));
+ XXH64_update(&state, name, strlen(name));
+ return XXH64_digest(&state);
+}
+
+/* takes a sub_type as input, returns corresponding sub_type
+ * printable string or "N/A" if not found.
+ * If not found, an error will be reported to stderr so the developers
+ * know that a sub_type is missing its associated string in event_hdl-t.h
+ */
+const char *event_hdl_sub_type_to_string(struct event_hdl_sub_type sub_type)
+{
+ int it;
+
+ for (it = 0; it < (int)(sizeof(event_hdl_sub_type_map) / sizeof(event_hdl_sub_type_map[0])); it++) {
+ if (sub_type.family == event_hdl_sub_type_map[it].type.family &&
+ sub_type.subtype == event_hdl_sub_type_map[it].type.subtype)
+ return event_hdl_sub_type_map[it].name;
+ }
+ ha_alert("event_hdl-t.h: missing sub_type string representation.\n"
+ "Please reflect any changes in event_hdl_sub_type_map.\n");
+ return "N/A";
+}
+
+/* returns the internal sub_type corresponding
+ * to the printable representation <name>
+ * or EVENT_HDL_SUB_NONE if no such event exists
+ * (see event_hdl-t.h for the complete list of supported types)
+ */
+struct event_hdl_sub_type event_hdl_string_to_sub_type(const char *name)
+{
+ int it;
+
+ for (it = 0; it < (int)(sizeof(event_hdl_sub_type_map) / sizeof(event_hdl_sub_type_map[0])); it++) {
+ if (!strcmp(name, event_hdl_sub_type_map[it].name))
+ return event_hdl_sub_type_map[it].type;
+ }
+ return EVENT_HDL_SUB_NONE;
+}
+
+/* Takes <subscriptions> sub list as input, returns a printable string
+ * containing every sub_types contained in <subscriptions>
+ * separated by '|' char.
+ * Returns NULL if no sub_types are found in <subscriptions>
+ * This functions leverages memprintf, thus it is up to the
+ * caller to free the returned value (if != NULL) when he no longer
+ * uses it.
+ */
+char *event_hdl_sub_type_print(struct event_hdl_sub_type subscriptions)
+{
+ char *out = NULL;
+ int it;
+ uint8_t first = 1;
+
+ for (it = 0; it < (int)(sizeof(event_hdl_sub_type_map) / sizeof(event_hdl_sub_type_map[0])); it++) {
+ if (subscriptions.family == event_hdl_sub_type_map[it].type.family &&
+ ((subscriptions.subtype & event_hdl_sub_type_map[it].type.subtype) ==
+ event_hdl_sub_type_map[it].type.subtype)) {
+ if (first) {
+ memprintf(&out, "%s", event_hdl_sub_type_map[it].name);
+ first--;
+ }
+ else
+ memprintf(&out, "%s%s%s", out, "|", event_hdl_sub_type_map[it].name);
+ }
+ }
+
+ return out;
+}
+
+/* event_hdl debug/reporting function */
+typedef void (*event_hdl_report_hdl_state_func)(const char *fmt, ...);
+static void event_hdl_report_hdl_state(event_hdl_report_hdl_state_func report_func,
+ const struct event_hdl *hdl, const char *what, const char *state)
+{
+ report_func("[event_hdl]:%s (%s)'#%llu@%s': %s\n",
+ what,
+ (hdl->async) ? "ASYNC" : "SYNC",
+ (long long unsigned int)hdl->id,
+ hdl->dorigin,
+ state);
+}
+
+static inline void _event_hdl_async_data_drop(struct event_hdl_async_event_data *data)
+{
+ if (HA_ATOMIC_SUB_FETCH(&data->refcount, 1) == 0) {
+ /* we were the last one holding a reference to event data - free required */
+ if (data->mfree) {
+ /* Some event data members are dynamically allocated and thus
+ * require specific cleanup using user-provided function.
+ * We directly pass a pointer to internal data storage but
+ * we only expect the cleanup function to typecast it in the
+ * relevant data type to give enough context to the function to
+ * perform the cleanup on data members, and not actually freeing
+ * data pointer since it is our internal buffer :)
+ */
+ data->mfree(&data->data);
+ }
+ pool_free(pool_head_sub_event_data, data);
+ }
+}
+
+void event_hdl_async_free_event(struct event_hdl_async_event *e)
+{
+ if (unlikely(event_hdl_sub_type_equal(e->type, EVENT_HDL_SUB_END))) {
+ /* last event for hdl, special case */
+ /* free subscription entry as we're the last one still using it
+ * (it is already removed from mt_list, no race can occur)
+ */
+ event_hdl_drop(e->sub_mgmt.this);
+ HA_ATOMIC_DEC(&jobs);
+ }
+ else if (e->_data)
+ _event_hdl_async_data_drop(e->_data); /* data wrapper */
+ pool_free(pool_head_sub_event, e);
+}
+
+/* wakeup the task depending on its type:
+ * normal async mode internally uses tasklets but advanced async mode
+ * allows both tasks and tasklets.
+ * While tasks and tasklets may be easily casted, we need to use the proper
+ * API to wake them up (the waiting queues are exclusive).
+ */
+static void event_hdl_task_wakeup(struct tasklet *task)
+{
+ if (TASK_IS_TASKLET(task))
+ tasklet_wakeup(task);
+ else
+ task_wakeup((struct task *)task, TASK_WOKEN_OTHER); /* TODO: switch to TASK_WOKEN_EVENT? */
+}
+
+/* task handler used for normal async subscription mode
+ * if you use advanced async subscription mode, you can use this
+ * as an example to implement your own task wrapper
+ */
+static struct task *event_hdl_async_task_default(struct task *task, void *ctx, unsigned int state)
+{
+ struct tasklet *tl = (struct tasklet *)task;
+ struct event_hdl_async_task_default_ctx *task_ctx = ctx;
+ struct event_hdl_async_event *event;
+ int max_notif_at_once_it = 0;
+ uint8_t done = 0;
+
+ /* run through e_queue, and call func() for each event
+ * if we read END event, it indicates we must stop:
+ * no more events to come (handler is unregistered)
+ * so we must free task_ctx and stop task
+ */
+ while (max_notif_at_once_it < event_hdl_tune.max_events_at_once &&
+ (event = event_hdl_async_equeue_pop(&task_ctx->e_queue)))
+ {
+ if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_END)) {
+ done = 1;
+ event_hdl_async_free_event(event);
+ /* break is normally not even required, EVENT_HDL_SUB_END
+ * is guaranteed to be last event of e_queue
+ * (because in normal mode one sub == one e_queue)
+ */
+ break;
+ }
+ else {
+ struct event_hdl_cb cb;
+
+ cb.e_type = event->type;
+ cb.e_data = event->data;
+ cb.sub_mgmt = &event->sub_mgmt;
+ cb._sync = 0;
+
+ /* call user function */
+ task_ctx->func(&cb, event->private);
+ max_notif_at_once_it++;
+ }
+ event_hdl_async_free_event(event);
+ }
+
+ if (done) {
+ /* our job is done, subscription is over: no more events to come */
+ pool_free(pool_head_sub_taskctx, task_ctx);
+ tasklet_free(tl);
+ return NULL;
+ }
+ return task;
+}
+
+/* internal subscription mgmt functions */
+static inline struct event_hdl_sub_type _event_hdl_getsub(struct event_hdl_sub *cur_sub)
+{
+ return cur_sub->sub;
+}
+
+static inline struct event_hdl_sub_type _event_hdl_getsub_async(struct event_hdl_sub *cur_sub)
+{
+ struct mt_list lock;
+ struct event_hdl_sub_type type = EVENT_HDL_SUB_NONE;
+
+ lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list);
+ if (lock.next != &cur_sub->mt_list)
+ type = _event_hdl_getsub(cur_sub);
+ // else already removed
+ MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock);
+ return type;
+}
+
+static inline int _event_hdl_resub(struct event_hdl_sub *cur_sub, struct event_hdl_sub_type type)
+{
+ if (!event_hdl_sub_family_equal(cur_sub->sub, type))
+ return 0; /* family types differ, do nothing */
+ cur_sub->sub.subtype = type.subtype; /* new subtype assignment */
+ return 1;
+}
+
+static inline int _event_hdl_resub_async(struct event_hdl_sub *cur_sub, struct event_hdl_sub_type type)
+{
+ int status = 0;
+ struct mt_list lock;
+
+ lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list);
+ if (lock.next != &cur_sub->mt_list)
+ status = _event_hdl_resub(cur_sub, type);
+ // else already removed
+ MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock);
+ return status;
+}
+
+static inline void _event_hdl_unsubscribe(struct event_hdl_sub *del_sub)
+{
+ struct mt_list lock;
+
+ if (del_sub->hdl.async) {
+ /* ASYNC SUB MODE */
+ /* push EVENT_HDL_SUB_END (to notify the task that the subscription is dead) */
+
+ /* push END EVENT in busy state so we can safely wakeup
+ * the task before releasing it.
+ * Not doing that would expose us to a race where the task could've already
+ * consumed the END event before the wakeup, and some tasks
+ * kill themselves (ie: normal async mode) when they receive such event
+ */
+ HA_ATOMIC_INC(&del_sub->hdl.async_equeue->size);
+ lock = MT_LIST_APPEND_LOCKED(&del_sub->hdl.async_equeue->head, &del_sub->async_end->mt_list);
+
+ /* wake up the task */
+ event_hdl_task_wakeup(del_sub->hdl.async_task);
+
+ /* unlock END EVENT (we're done, the task is now free to consume it) */
+ MT_LIST_UNLOCK_ELT(&del_sub->async_end->mt_list, lock);
+
+ /* we don't free sub here
+ * freeing will be performed by async task so it can safely rely
+ * on the pointer until it notices it
+ */
+ } else {
+ /* SYNC SUB MODE */
+
+ /* we can directly free the subscription:
+ * no other thread can access it since we successfully
+ * removed it from the list
+ */
+ event_hdl_drop(del_sub);
+ }
+}
+
+static inline void _event_hdl_unsubscribe_async(struct event_hdl_sub *del_sub)
+{
+ if (!MT_LIST_DELETE(&del_sub->mt_list))
+ return; /* already removed (but may be pending in e_queues) */
+ _event_hdl_unsubscribe(del_sub);
+}
+
+/* sub_mgmt function pointers (for handlers) */
+static struct event_hdl_sub_type event_hdl_getsub_sync(const struct event_hdl_sub_mgmt *mgmt)
+{
+ if (!mgmt)
+ return EVENT_HDL_SUB_NONE;
+
+ if (!mgmt->this)
+ return EVENT_HDL_SUB_NONE; /* already removed from sync ctx */
+ return _event_hdl_getsub(mgmt->this);
+}
+
+static struct event_hdl_sub_type event_hdl_getsub_async(const struct event_hdl_sub_mgmt *mgmt)
+{
+ if (!mgmt)
+ return EVENT_HDL_SUB_NONE;
+
+ return _event_hdl_getsub_async(mgmt->this);
+}
+
+static int event_hdl_resub_sync(const struct event_hdl_sub_mgmt *mgmt, struct event_hdl_sub_type type)
+{
+ if (!mgmt)
+ return 0;
+
+ if (!mgmt->this)
+ return 0; /* already removed from sync ctx */
+ return _event_hdl_resub(mgmt->this, type);
+}
+
+static int event_hdl_resub_async(const struct event_hdl_sub_mgmt *mgmt, struct event_hdl_sub_type type)
+{
+ if (!mgmt)
+ return 0;
+
+ return _event_hdl_resub_async(mgmt->this, type);
+}
+
+static void event_hdl_unsubscribe_sync(const struct event_hdl_sub_mgmt *mgmt)
+{
+ if (!mgmt)
+ return;
+
+ if (!mgmt->this)
+ return; /* already removed from sync ctx */
+
+ /* assuming that publish sync code will notice that mgmt->this is NULL
+ * and will perform the list removal using MT_LIST_DELETE_SAFE and
+ * _event_hdl_unsubscribe()
+ * while still owning the lock
+ */
+ ((struct event_hdl_sub_mgmt *)mgmt)->this = NULL;
+}
+
+static void event_hdl_unsubscribe_async(const struct event_hdl_sub_mgmt *mgmt)
+{
+ if (!mgmt)
+ return;
+
+ _event_hdl_unsubscribe_async(mgmt->this);
+}
+
+#define EVENT_HDL_SUB_MGMT_ASYNC(_sub) (struct event_hdl_sub_mgmt){ .this = _sub, \
+ .getsub = event_hdl_getsub_async, \
+ .resub = event_hdl_resub_async, \
+ .unsub = event_hdl_unsubscribe_async}
+#define EVENT_HDL_SUB_MGMT_SYNC(_sub) (struct event_hdl_sub_mgmt){ .this = _sub, \
+ .getsub = event_hdl_getsub_sync, \
+ .resub = event_hdl_resub_sync, \
+ .unsub = event_hdl_unsubscribe_sync}
+
+struct event_hdl_sub *event_hdl_subscribe_ptr(event_hdl_sub_list *sub_list,
+ struct event_hdl_sub_type e_type, struct event_hdl hdl)
+{
+ struct event_hdl_sub *new_sub = NULL;
+ struct mt_list *elt1, elt2;
+ struct event_hdl_async_task_default_ctx *task_ctx = NULL;
+ struct mt_list lock;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ /* hdl API consistency check */
+ /*FIXME: do we need to ensure that if private is set, private_free should be set as well? */
+ BUG_ON((!hdl.async && !hdl.sync_ptr) ||
+ (hdl.async == EVENT_HDL_ASYNC_MODE_NORMAL && !hdl.async_ptr) ||
+ (hdl.async == EVENT_HDL_ASYNC_MODE_ADVANCED &&
+ (!hdl.async_equeue || !hdl.async_task)));
+
+ new_sub = pool_alloc(pool_head_sub);
+ if (new_sub == NULL) {
+ goto memory_error;
+ }
+
+ /* assignments */
+ new_sub->sub.family = e_type.family;
+ new_sub->sub.subtype = e_type.subtype;
+ new_sub->flags = 0;
+ new_sub->hdl = hdl;
+
+ if (hdl.async) {
+ /* async END event pre-allocation */
+ new_sub->async_end = pool_alloc(pool_head_sub_event);
+ if (!new_sub->async_end) {
+ /* memory error */
+ goto memory_error;
+ }
+ if (hdl.async == EVENT_HDL_ASYNC_MODE_NORMAL) {
+ /* normal mode: no task provided, we must initialize it */
+
+ /* initialize task context */
+ task_ctx = pool_alloc(pool_head_sub_taskctx);
+
+ if (!task_ctx) {
+ /* memory error */
+ goto memory_error;
+ }
+ event_hdl_async_equeue_init(&task_ctx->e_queue);
+ task_ctx->func = new_sub->hdl.async_ptr;
+
+ new_sub->hdl.async_equeue = &task_ctx->e_queue;
+ new_sub->hdl.async_task = tasklet_new();
+
+ if (!new_sub->hdl.async_task) {
+ /* memory error */
+ goto memory_error;
+ }
+ new_sub->hdl.async_task->context = task_ctx;
+ new_sub->hdl.async_task->process = event_hdl_async_task_default;
+ }
+ /* initialize END event (used to notify about subscription ending)
+ * used by both normal and advanced mode:
+ * - to safely terminate the task in normal mode
+ * - to safely free subscription and
+ * keep track of active subscriptions in advanced mode
+ */
+ new_sub->async_end->type = EVENT_HDL_SUB_END;
+ new_sub->async_end->sub_mgmt = EVENT_HDL_SUB_MGMT_ASYNC(new_sub);
+ new_sub->async_end->private = new_sub->hdl.private;
+ new_sub->async_end->_data = NULL;
+ MT_LIST_INIT(&new_sub->async_end->mt_list);
+ }
+ /* set refcount to 2:
+ * 1 for handler (because handler can manage the subscription itself)
+ * 1 for caller (will be dropped automatically if caller use the non-ptr version)
+ */
+ new_sub->refcount = 2;
+
+ /* ready for registration */
+ MT_LIST_INIT(&new_sub->mt_list);
+
+ lock = MT_LIST_LOCK_ELT(&sub_list->known);
+
+ /* check if such identified hdl is not already registered */
+ if (hdl.id) {
+ struct event_hdl_sub *cur_sub;
+ uint8_t found = 0;
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (hdl.id == cur_sub->hdl.id) {
+ /* we found matching registered hdl */
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ /* error already registered */
+ MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
+ event_hdl_report_hdl_state(ha_alert, &hdl, "SUB", "could not subscribe: subscription with this id already exists");
+ goto cleanup;
+ }
+ }
+
+ if (lock.next == &sub_list->known) {
+ /* this is an expected corner case on de-init path, a subscribe attempt
+ * was made but the subscription list is already destroyed, we pretend
+ * it is a memory/IO error since it should not be long before haproxy
+ * enters the deinit() function anyway
+ */
+ MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
+ goto cleanup;
+ }
+
+ /* Append in list (global or user specified list).
+ * For now, append when sync mode, and insert when async mode
+ * so that async handlers are executed first
+ */
+ if (hdl.async) {
+ /* Prevent the task from being aborted on soft-stop: let's wait
+ * until the END event is acknowledged by the task.
+ * (decrease is performed in event_hdl_async_free_event())
+ *
+ * If we don't do this, event_hdl API will leak and we won't give
+ * a chance to the event-handling task to perform cleanup
+ */
+ HA_ATOMIC_INC(&jobs);
+ /* async mode, insert at the beginning of the list */
+ MT_LIST_INSERT(&sub_list->head, &new_sub->mt_list);
+ } else {
+ /* sync mode, append at the end of the list */
+ MT_LIST_APPEND(&sub_list->head, &new_sub->mt_list);
+ }
+
+ MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
+
+ return new_sub;
+
+ cleanup:
+ if (new_sub) {
+ if (hdl.async == EVENT_HDL_ASYNC_MODE_NORMAL) {
+ tasklet_free(new_sub->hdl.async_task);
+ pool_free(pool_head_sub_taskctx, task_ctx);
+ }
+ if (hdl.async)
+ pool_free(pool_head_sub_event, new_sub->async_end);
+ pool_free(pool_head_sub, new_sub);
+ }
+
+ return NULL;
+
+ memory_error:
+ event_hdl_report_hdl_state(ha_warning, &hdl, "SUB", "could not register subscription due to memory error");
+ goto cleanup;
+}
+
+void event_hdl_take(struct event_hdl_sub *sub)
+{
+ HA_ATOMIC_INC(&sub->refcount);
+}
+
+void event_hdl_drop(struct event_hdl_sub *sub)
+{
+ if (HA_ATOMIC_SUB_FETCH(&sub->refcount, 1) != 0)
+ return;
+
+ /* we were the last one holding a reference to event sub - free required */
+ if (sub->hdl.private_free) {
+ /* free private data if specified upon registration */
+ sub->hdl.private_free(sub->hdl.private);
+ }
+ pool_free(pool_head_sub, sub);
+}
+
+int event_hdl_resubscribe(struct event_hdl_sub *cur_sub, struct event_hdl_sub_type type)
+{
+ return _event_hdl_resub_async(cur_sub, type);
+}
+
+void _event_hdl_pause(struct event_hdl_sub *cur_sub)
+{
+ cur_sub->flags |= EHDL_SUB_F_PAUSED;
+}
+
+void event_hdl_pause(struct event_hdl_sub *cur_sub)
+{
+ struct mt_list lock;
+
+ lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list);
+ if (lock.next != &cur_sub->mt_list)
+ _event_hdl_pause(cur_sub);
+ // else already removed
+ MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock);
+}
+
+void _event_hdl_resume(struct event_hdl_sub *cur_sub)
+{
+ cur_sub->flags &= ~EHDL_SUB_F_PAUSED;
+}
+
+void event_hdl_resume(struct event_hdl_sub *cur_sub)
+{
+ struct mt_list lock;
+
+ lock = MT_LIST_LOCK_ELT(&cur_sub->mt_list);
+ if (lock.next != &cur_sub->mt_list)
+ _event_hdl_resume(cur_sub);
+ // else already removed
+ MT_LIST_UNLOCK_ELT(&cur_sub->mt_list, lock);
+}
+
+void event_hdl_unsubscribe(struct event_hdl_sub *del_sub)
+{
+ _event_hdl_unsubscribe_async(del_sub);
+ /* drop refcount, assuming caller no longer use ptr */
+ event_hdl_drop(del_sub);
+}
+
+int event_hdl_subscribe(event_hdl_sub_list *sub_list, struct event_hdl_sub_type e_type, struct event_hdl hdl)
+{
+ struct event_hdl_sub *sub;
+
+ sub = event_hdl_subscribe_ptr(sub_list, e_type, hdl);
+ if (sub) {
+ /* drop refcount because the user is not willing to hold a reference */
+ event_hdl_drop(sub);
+ return 1;
+ }
+ return 0;
+}
+
+/* Subscription external lookup functions
+ */
+int event_hdl_lookup_unsubscribe(event_hdl_sub_list *sub_list,
+ uint64_t lookup_id)
+{
+ struct event_hdl_sub *del_sub = NULL;
+ struct mt_list *elt1, elt2;
+ int found = 0;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ mt_list_for_each_entry_safe(del_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (lookup_id == del_sub->hdl.id) {
+ /* we found matching registered hdl */
+ MT_LIST_DELETE_SAFE(elt1);
+ _event_hdl_unsubscribe(del_sub);
+ found = 1;
+ break; /* id is unique, stop searching */
+ }
+ }
+ return found;
+}
+
+int event_hdl_lookup_resubscribe(event_hdl_sub_list *sub_list,
+ uint64_t lookup_id, struct event_hdl_sub_type type)
+{
+ struct event_hdl_sub *cur_sub = NULL;
+ struct mt_list *elt1, elt2;
+ int status = 0;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (lookup_id == cur_sub->hdl.id) {
+ /* we found matching registered hdl */
+ status = _event_hdl_resub(cur_sub, type);
+ break; /* id is unique, stop searching */
+ }
+ }
+ return status;
+}
+
+int event_hdl_lookup_pause(event_hdl_sub_list *sub_list,
+ uint64_t lookup_id)
+{
+ struct event_hdl_sub *cur_sub = NULL;
+ struct mt_list *elt1, elt2;
+ int found = 0;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (lookup_id == cur_sub->hdl.id) {
+ /* we found matching registered hdl */
+ _event_hdl_pause(cur_sub);
+ found = 1;
+ break; /* id is unique, stop searching */
+ }
+ }
+ return found;
+}
+
+int event_hdl_lookup_resume(event_hdl_sub_list *sub_list,
+ uint64_t lookup_id)
+{
+ struct event_hdl_sub *cur_sub = NULL;
+ struct mt_list *elt1, elt2;
+ int found = 0;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (lookup_id == cur_sub->hdl.id) {
+ /* we found matching registered hdl */
+ _event_hdl_resume(cur_sub);
+ found = 1;
+ break; /* id is unique, stop searching */
+ }
+ }
+ return found;
+}
+
+struct event_hdl_sub *event_hdl_lookup_take(event_hdl_sub_list *sub_list,
+ uint64_t lookup_id)
+{
+ struct event_hdl_sub *cur_sub = NULL;
+ struct mt_list *elt1, elt2;
+ uint8_t found = 0;
+
+ if (!sub_list)
+ sub_list = &global_event_hdl_sub_list; /* fall back to global list */
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ if (lookup_id == cur_sub->hdl.id) {
+ /* we found matching registered hdl */
+ event_hdl_take(cur_sub);
+ found = 1;
+ break; /* id is unique, stop searching */
+ }
+ }
+ if (found)
+ return cur_sub;
+ return NULL;
+}
+
+/* event publishing functions
+ */
+static int _event_hdl_publish(event_hdl_sub_list *sub_list, struct event_hdl_sub_type e_type,
+ const struct event_hdl_cb_data *data)
+{
+ struct event_hdl_sub *cur_sub;
+ struct mt_list *elt1, elt2;
+ struct event_hdl_async_event_data *async_data = NULL; /* reuse async data for multiple async hdls */
+ int error = 0;
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ /* notify each function that has subscribed to sub_family.type, unless paused */
+ if ((cur_sub->sub.family == e_type.family) &&
+ ((cur_sub->sub.subtype & e_type.subtype) == e_type.subtype) &&
+ !(cur_sub->flags & EHDL_SUB_F_PAUSED)) {
+ /* hdl should be notified */
+ if (!cur_sub->hdl.async) {
+ /* sync mode: simply call cb pointer
+ * it is up to the callee to schedule a task if needed or
+ * take specific precautions in order to return as fast as possible
+ * and not use locks that are already held by the caller
+ */
+ struct event_hdl_cb cb;
+ struct event_hdl_sub_mgmt sub_mgmt;
+
+ sub_mgmt = EVENT_HDL_SUB_MGMT_SYNC(cur_sub);
+ cb.e_type = e_type;
+ if (data)
+ cb.e_data = data->_ptr;
+ else
+ cb.e_data = NULL;
+ cb.sub_mgmt = &sub_mgmt;
+ cb._sync = 1;
+
+ /* call user function */
+ cur_sub->hdl.sync_ptr(&cb, cur_sub->hdl.private);
+
+ if (!sub_mgmt.this) {
+ /* user has performed hdl unsub
+ * we must remove it from the list
+ */
+ MT_LIST_DELETE_SAFE(elt1);
+ /* then free it */
+ _event_hdl_unsubscribe(cur_sub);
+ }
+ } else {
+ /* async mode: here we need to prepare event data
+ * and push it to the event_queue of the task(s)
+ * responsible for consuming the events of current
+ * subscription.
+ * Once the event is pushed, we wake up the associated task.
+ * This feature depends on <haproxy/task> that also
+ * depends on <haproxy/pool>:
+ * If STG_PREPARE+STG_POOL is not performed prior to publishing to
+ * async handler, program may crash.
+ * Hopefully, STG_PREPARE+STG_POOL should be done early in
+ * HAProxy startup sequence.
+ */
+ struct event_hdl_async_event *new_event;
+
+ new_event = pool_alloc(pool_head_sub_event);
+ if (!new_event) {
+ error = 1;
+ break; /* stop on error */
+ }
+ new_event->type = e_type;
+ new_event->private = cur_sub->hdl.private;
+ new_event->when = date;
+ new_event->sub_mgmt = EVENT_HDL_SUB_MGMT_ASYNC(cur_sub);
+ if (data) {
+ /* if this fails, please adjust EVENT_HDL_ASYNC_EVENT_DATA in
+ * event_hdl-t.h file or consider providing dynamic struct members
+ * to reduce overall struct size
+ */
+ BUG_ON(data->_size > sizeof(async_data->data));
+ if (!async_data) {
+ /* first async hdl reached - preparing async_data cache */
+ async_data = pool_alloc(pool_head_sub_event_data);
+ if (!async_data) {
+ error = 1;
+ pool_free(pool_head_sub_event, new_event);
+ break; /* stop on error */
+ }
+
+ /* async data assignment */
+ memcpy(async_data->data, data->_ptr, data->_size);
+ async_data->mfree = data->_mfree;
+ /* Initialize refcount, we start at 1 to prevent async
+ * data from being freed by an async handler while we
+ * still use it. We will drop the reference when the
+ * publish is over.
+ *
+ * (first use, atomic operation not required)
+ */
+ async_data->refcount = 1;
+ }
+ new_event->_data = async_data;
+ new_event->data = async_data->data;
+ /* increment refcount because multiple hdls could
+ * use the same async_data
+ */
+ HA_ATOMIC_INC(&async_data->refcount);
+ } else
+ new_event->data = NULL;
+
+ /* appending new event to event hdl queue */
+ MT_LIST_INIT(&new_event->mt_list);
+ HA_ATOMIC_INC(&cur_sub->hdl.async_equeue->size);
+ MT_LIST_APPEND(&cur_sub->hdl.async_equeue->head, &new_event->mt_list);
+
+ /* wake up the task */
+ event_hdl_task_wakeup(cur_sub->hdl.async_task);
+ } /* end async mode */
+ } /* end hdl should be notified */
+ } /* end mt_list */
+ if (async_data) {
+ /* we finished publishing, drop the reference on async data */
+ _event_hdl_async_data_drop(async_data);
+ } else {
+ /* no async subscribers, we are responsible for calling the data
+ * member freeing function if it was provided
+ */
+ if (data && data->_mfree)
+ data->_mfree(data->_ptr);
+ }
+ if (error) {
+ event_hdl_report_hdl_state(ha_warning, &cur_sub->hdl, "PUBLISH", "memory error");
+ return 0;
+ }
+ return 1;
+}
+
+/* Publish function should not be used from high calling rate or time sensitive
+ * places for now, because list lookup based on e_type is not optimized at
+ * all!
+ * Returns 1 in case of SUCCESS:
+ * Subscribed handlers were notified successfully
+ * Returns 0 in case of FAILURE:
+ * FAILURE means memory error while handling the very first async handler from
+ * the subscription list.
+ * As async handlers are executed first within the list, when such failure occurs
+ * you can safely assume that no events were published for the current call
+ */
+int event_hdl_publish(event_hdl_sub_list *sub_list,
+ struct event_hdl_sub_type e_type, const struct event_hdl_cb_data *data)
+{
+ if (!e_type.family) {
+ /* do nothing, these types are reserved for internal use only
+ * (ie: unregistering) */
+ return 0;
+ }
+ if (sub_list) {
+ /* if sublist is provided, first publish event to list subscribers */
+ return _event_hdl_publish(sub_list, e_type, data);
+ } else {
+ /* publish to global list */
+ return _event_hdl_publish(&global_event_hdl_sub_list, e_type, data);
+ }
+}
+
+void event_hdl_sub_list_init(event_hdl_sub_list *sub_list)
+{
+ BUG_ON(!sub_list); /* unexpected, global sublist is managed internally */
+ MT_LIST_INIT(&sub_list->head);
+ MT_LIST_APPEND(&known_event_hdl_sub_list, &sub_list->known);
+}
+
+/* internal function, assumes that sub_list ptr is always valid */
+static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list)
+{
+ struct event_hdl_sub *cur_sub;
+ struct mt_list *elt1, elt2;
+
+ mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
+ /* remove cur elem from list */
+ MT_LIST_DELETE_SAFE(elt1);
+ /* then free it */
+ _event_hdl_unsubscribe(cur_sub);
+ }
+}
+
+/* when a subscription list is no longer used, call this
+ * to do the cleanup and make sure all related subscriptions are
+ * safely ended according to their types
+ */
+void event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list)
+{
+ BUG_ON(!sub_list); /* unexpected, global sublist is managed internally */
+ if (!MT_LIST_DELETE(&sub_list->known))
+ return; /* already destroyed */
+ _event_hdl_sub_list_destroy(sub_list);
+}
+
+/* config parser for global "tune.events.max-events-at-once" */
+static int event_hdl_parse_max_events_at_once(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int arg = -1;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 1 || arg > 10000) {
+ memprintf(err, "'%s' expects an integer argument between 1 and 10000.", args[0]);
+ return -1;
+ }
+
+ event_hdl_tune.max_events_at_once = arg;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.events.max-events-at-once", event_hdl_parse_max_events_at_once },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+INITCALL0(STG_INIT, event_hdl_init);
diff --git a/src/extcheck.c b/src/extcheck.c
new file mode 100644
index 0000000..c667b16
--- /dev/null
+++ b/src/extcheck.c
@@ -0,0 +1,694 @@
+/*
+ * External health-checks functions.
+ *
+ * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu>
+ * Copyright 2014 Horms Solutions Ltd, Simon Horman <horms@verge.net.au>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+#include <haproxy/signal.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+static struct list pid_list = LIST_HEAD_INIT(pid_list);
+static struct pool_head *pool_head_pid_list __read_mostly;
+__decl_spinlock(pid_list_lock);
+
+struct extcheck_env {
+ char *name; /* environment variable name */
+ int vmaxlen; /* value maximum length, used to determine the required memory allocation */
+};
+
+/* environment variables memory requirement for different types of data */
+#define EXTCHK_SIZE_EVAL_INIT 0 /* size determined during the init phase,
+ * such environment variables are not updatable. */
+#define EXTCHK_SIZE_ULONG 20 /* max string length for an unsigned long value */
+#define EXTCHK_SIZE_UINT 11 /* max string length for an unsigned int value */
+#define EXTCHK_SIZE_ADDR 256 /* max string length for an IPv4/IPv6/UNIX address */
+
+/* external checks environment variables */
+enum {
+ EXTCHK_PATH = 0,
+
+ /* Proxy specific environment variables */
+ EXTCHK_HAPROXY_PROXY_NAME, /* the backend name */
+ EXTCHK_HAPROXY_PROXY_ID, /* the backend id */
+ EXTCHK_HAPROXY_PROXY_ADDR, /* the first bind address if available (or empty) */
+ EXTCHK_HAPROXY_PROXY_PORT, /* the first bind port if available (or empty) */
+
+ /* Server specific environment variables */
+ EXTCHK_HAPROXY_SERVER_NAME, /* the server name */
+ EXTCHK_HAPROXY_SERVER_ID, /* the server id */
+ EXTCHK_HAPROXY_SERVER_ADDR, /* the server address */
+ EXTCHK_HAPROXY_SERVER_PORT, /* the server port if available (or empty) */
+ EXTCHK_HAPROXY_SERVER_MAXCONN, /* the server max connections */
+ EXTCHK_HAPROXY_SERVER_CURCONN, /* the current number of connections on the server */
+ EXTCHK_HAPROXY_SERVER_SSL, /* "1" if the server supports SSL, otherwise zero */
+ EXTCHK_HAPROXY_SERVER_PROTO, /* the server's configured proto, if any */
+
+ EXTCHK_SIZE
+};
+
+const struct extcheck_env extcheck_envs[EXTCHK_SIZE] = {
+ [EXTCHK_PATH] = { "PATH", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_NAME] = { "HAPROXY_PROXY_NAME", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_ID] = { "HAPROXY_PROXY_ID", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_ADDR] = { "HAPROXY_PROXY_ADDR", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_PROXY_PORT] = { "HAPROXY_PROXY_PORT", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_NAME] = { "HAPROXY_SERVER_NAME", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_ID] = { "HAPROXY_SERVER_ID", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_ADDR] = { "HAPROXY_SERVER_ADDR", EXTCHK_SIZE_ADDR },
+ [EXTCHK_HAPROXY_SERVER_PORT] = { "HAPROXY_SERVER_PORT", EXTCHK_SIZE_UINT },
+ [EXTCHK_HAPROXY_SERVER_MAXCONN] = { "HAPROXY_SERVER_MAXCONN", EXTCHK_SIZE_EVAL_INIT },
+ [EXTCHK_HAPROXY_SERVER_CURCONN] = { "HAPROXY_SERVER_CURCONN", EXTCHK_SIZE_ULONG },
+ [EXTCHK_HAPROXY_SERVER_SSL] = { "HAPROXY_SERVER_SSL", EXTCHK_SIZE_UINT },
+ [EXTCHK_HAPROXY_SERVER_PROTO] = { "HAPROXY_SERVER_PROTO", EXTCHK_SIZE_EVAL_INIT },
+};
+
+void block_sigchld(void)
+{
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ assert(ha_sigmask(SIG_BLOCK, &set, NULL) == 0);
+}
+
+void unblock_sigchld(void)
+{
+ sigset_t set;
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ assert(ha_sigmask(SIG_UNBLOCK, &set, NULL) == 0);
+}
+
+static struct pid_list *pid_list_add(pid_t pid, struct task *t)
+{
+ struct pid_list *elem;
+ struct check *check = t->context;
+
+ elem = pool_alloc(pool_head_pid_list);
+ if (!elem)
+ return NULL;
+ elem->pid = pid;
+ elem->t = t;
+ elem->exited = 0;
+ check->curpid = elem;
+ LIST_INIT(&elem->list);
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ LIST_INSERT(&pid_list, &elem->list);
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+
+ return elem;
+}
+
+static void pid_list_del(struct pid_list *elem)
+{
+ struct check *check;
+
+ if (!elem)
+ return;
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ LIST_DELETE(&elem->list);
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+
+ if (!elem->exited)
+ kill(elem->pid, SIGTERM);
+
+ check = elem->t->context;
+ check->curpid = NULL;
+ pool_free(pool_head_pid_list, elem);
+}
+
+/* Called from inside SIGCHLD handler, SIGCHLD is blocked */
+static void pid_list_expire(pid_t pid, int status)
+{
+ struct pid_list *elem;
+
+ HA_SPIN_LOCK(PID_LIST_LOCK, &pid_list_lock);
+ list_for_each_entry(elem, &pid_list, list) {
+ if (elem->pid == pid) {
+ elem->t->expire = now_ms;
+ elem->status = status;
+ elem->exited = 1;
+ task_wakeup(elem->t, TASK_WOKEN_IO);
+ break;
+ }
+ }
+ HA_SPIN_UNLOCK(PID_LIST_LOCK, &pid_list_lock);
+}
+
+static void sigchld_handler(struct sig_handler *sh)
+{
+ pid_t pid;
+ int status;
+
+ while ((pid = waitpid(0, &status, WNOHANG)) > 0)
+ pid_list_expire(pid, status);
+}
+
+int init_pid_list(void)
+{
+ if (pool_head_pid_list != NULL)
+ /* Nothing to do */
+ return 0;
+
+ if (!signal_register_fct(SIGCHLD, sigchld_handler, SIGCHLD)) {
+ ha_alert("Failed to set signal handler for external health checks: %s. Aborting.\n",
+ strerror(errno));
+ return 1;
+ }
+
+ pool_head_pid_list = create_pool("pid_list", sizeof(struct pid_list), MEM_F_SHARED);
+ if (pool_head_pid_list == NULL) {
+ ha_alert("Failed to allocate memory pool for external health checks: %s. Aborting.\n",
+ strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+
+/* helper macro to set an environment variable and jump to a specific label on failure. */
+#define EXTCHK_SETENV(check, envidx, value, fail) { if (extchk_setenv(check, envidx, value)) goto fail; }
+
+/*
+ * helper function to allocate enough memory to store an environment variable.
+ * It will also check that the environment variable is updatable, and silently
+ * fail if not.
+ */
+static int extchk_setenv(struct check *check, int idx, const char *value)
+{
+ int len, ret;
+ char *envname;
+ int vmaxlen;
+
+ if (idx < 0 || idx >= EXTCHK_SIZE) {
+ ha_alert("Illegal environment variable index %d. Aborting.\n", idx);
+ return 1;
+ }
+
+ envname = extcheck_envs[idx].name;
+ vmaxlen = extcheck_envs[idx].vmaxlen;
+
+ /* Check if the environment variable is already set, and silently reject
+ * the update if this one is not updatable. */
+ if ((vmaxlen == EXTCHK_SIZE_EVAL_INIT) && (check->envp[idx]))
+ return 0;
+
+ /* Instead of sending NOT_USED, sending an empty value is preferable */
+ if (strcmp(value, "NOT_USED") == 0) {
+ value = "";
+ }
+
+ len = strlen(envname) + 1;
+ if (vmaxlen == EXTCHK_SIZE_EVAL_INIT)
+ len += strlen(value);
+ else
+ len += vmaxlen;
+
+ if (!check->envp[idx])
+ check->envp[idx] = malloc(len + 1);
+
+ if (!check->envp[idx]) {
+ ha_alert("Failed to allocate memory for the environment variable '%s'. Aborting.\n", envname);
+ return 1;
+ }
+ ret = snprintf(check->envp[idx], len + 1, "%s=%s", envname, value);
+ if (ret < 0) {
+ ha_alert("Failed to store the environment variable '%s'. Reason : %s. Aborting.\n", envname, strerror(errno));
+ return 1;
+ }
+ else if (ret > len) {
+ ha_alert("Environment variable '%s' was truncated. Aborting.\n", envname);
+ return 1;
+ }
+ return 0;
+}
+
+int prepare_external_check(struct check *check)
+{
+ struct server *s = check->server;
+ struct proxy *px = s->proxy;
+ struct listener *listener = NULL, *l;
+ int i;
+ const char *path = px->check_path ? px->check_path : DEF_CHECK_PATH;
+ char buf[256];
+ const char *svmode = NULL;
+
+ list_for_each_entry(l, &px->conf.listeners, by_fe)
+ /* Use the first INET, INET6 or UNIX listener */
+ if (l->rx.addr.ss_family == AF_INET ||
+ l->rx.addr.ss_family == AF_INET6 ||
+ l->rx.addr.ss_family == AF_UNIX) {
+ listener = l;
+ break;
+ }
+
+ check->curpid = NULL;
+ check->envp = calloc((EXTCHK_SIZE + 1), sizeof(*check->envp));
+ if (!check->envp) {
+ ha_alert("Failed to allocate memory for environment variables. Aborting\n");
+ goto err;
+ }
+
+ check->argv = calloc(6, sizeof(*check->argv));
+ if (!check->argv) {
+ ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
+ goto err;
+ }
+
+ check->argv[0] = px->check_command;
+
+ if (!listener) {
+ check->argv[1] = strdup("NOT_USED");
+ check->argv[2] = strdup("NOT_USED");
+ }
+ else if (listener->rx.addr.ss_family == AF_INET ||
+ listener->rx.addr.ss_family == AF_INET6) {
+ addr_to_str(&listener->rx.addr, buf, sizeof(buf));
+ check->argv[1] = strdup(buf);
+ port_to_str(&listener->rx.addr, buf, sizeof(buf));
+ check->argv[2] = strdup(buf);
+ }
+ else if (listener->rx.addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un;
+
+ un = (struct sockaddr_un *)&listener->rx.addr;
+ check->argv[1] = strdup(un->sun_path);
+ check->argv[2] = strdup("NOT_USED");
+ }
+ else {
+ ha_alert("Starting [%s:%s] check: unsupported address family.\n", px->id, s->id);
+ goto err;
+ }
+
+ /* args 3 and 4 are the address, they're replaced on each check */
+ check->argv[3] = calloc(EXTCHK_SIZE_ADDR, sizeof(*check->argv[3]));
+ check->argv[4] = calloc(EXTCHK_SIZE_UINT, sizeof(*check->argv[4]));
+
+ for (i = 0; i < 5; i++) {
+ if (!check->argv[i]) {
+ ha_alert("Starting [%s:%s] check: out of memory.\n", px->id, s->id);
+ goto err;
+ }
+ }
+
+ EXTCHK_SETENV(check, EXTCHK_PATH, path, err);
+ /* Add proxy environment variables */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_NAME, px->id, err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ID, ultoa_r(px->uuid, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_ADDR, check->argv[1], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_PROXY_PORT, check->argv[2], err);
+ /* Add server environment variables */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_NAME, s->id, err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ID, ultoa_r(s->puid, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_MAXCONN, ultoa_r(s->maxconn, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), err);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_SSL, s->use_ssl ? "1" : "0", err);
+
+ switch (px->mode) {
+ case PR_MODE_CLI: svmode = "cli"; break;
+ case PR_MODE_SYSLOG: svmode = "syslog"; break;
+ case PR_MODE_PEERS: svmode = "peers"; break;
+ case PR_MODE_HTTP: svmode = (s->mux_proto) ? s->mux_proto->token.ptr : "h1"; break;
+ case PR_MODE_TCP: svmode = "tcp"; break;
+ /* all valid cases must be enumerated above, below is to avoid a warning */
+ case PR_MODES: svmode = "?"; break;
+ }
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PROTO, svmode, err);
+
+ /* Ensure that we don't leave any hole in check->envp */
+ for (i = 0; i < EXTCHK_SIZE; i++)
+ if (!check->envp[i])
+ EXTCHK_SETENV(check, i, "", err);
+
+ return 1;
+err:
+ if (check->envp) {
+ for (i = 0; i < EXTCHK_SIZE; i++)
+ free(check->envp[i]);
+ ha_free(&check->envp);
+ }
+
+ if (check->argv) {
+ for (i = 1; i < 5; i++)
+ free(check->argv[i]);
+ ha_free(&check->argv);
+ }
+ return 0;
+}
+
+/*
+ * establish a server health-check that makes use of a process.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * Blocks and then unblocks SIGCHLD
+ */
+static int connect_proc_chk(struct task *t)
+{
+ char buf[256];
+ struct check *check = t->context;
+ struct server *s = check->server;
+ struct proxy *px = s->proxy;
+ int status;
+ pid_t pid;
+
+ status = SF_ERR_RESOURCE;
+
+ block_sigchld();
+
+ pid = fork();
+ if (pid < 0) {
+ ha_alert("Failed to fork process for external health check%s: %s. Aborting.\n",
+ (global.tune.options & GTUNE_INSECURE_FORK) ?
+ "" : " (likely caused by missing 'insecure-fork-wanted')",
+ strerror(errno));
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+ goto out;
+ }
+ if (pid == 0) {
+ /* Child */
+ extern char **environ;
+ struct rlimit limit;
+ int fd;
+
+ /* close all FDs. Keep stdin/stdout/stderr in verbose mode */
+ fd = (global.mode & (MODE_QUIET|MODE_VERBOSE)) == MODE_QUIET ? 0 : 3;
+
+ my_closefrom(fd);
+
+ /* restore the initial FD limits */
+ limit.rlim_cur = rlim_fd_cur_at_boot;
+ limit.rlim_max = rlim_fd_max_at_boot;
+ if (raise_rlim_nofile(NULL, &limit) != 0) {
+ getrlimit(RLIMIT_NOFILE, &limit);
+ ha_warning("External check: failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
+ rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
+ (unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
+ }
+
+ if (global.external_check < 2) {
+ /* fresh new env for each check */
+ environ = check->envp;
+ }
+
+ /* Update some environment variables and command args: curconn, server addr and server port */
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_CURCONN, ultoa_r(s->cur_sess, buf, sizeof(buf)), fail);
+
+ if (s->addr.ss_family == AF_UNIX) {
+ const struct sockaddr_un *un = (struct sockaddr_un *)&s->addr;
+ strlcpy2(check->argv[3], un->sun_path, EXTCHK_SIZE_ADDR);
+ memcpy(check->argv[4], "NOT_USED", 9);
+ } else {
+ addr_to_str(&s->addr, check->argv[3], EXTCHK_SIZE_ADDR);
+ *check->argv[4] = 0; // just in case the address family changed
+ if (s->addr.ss_family == AF_INET || s->addr.ss_family == AF_INET6)
+ snprintf(check->argv[4], EXTCHK_SIZE_UINT, "%u", s->svc_port);
+ }
+
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_ADDR, check->argv[3], fail);
+ EXTCHK_SETENV(check, EXTCHK_HAPROXY_SERVER_PORT, check->argv[4], fail);
+
+ if (global.external_check >= 2) {
+ /* environment is preserved, let's merge new vars */
+ int i;
+
+ for (i = 0; check->envp[i] && *check->envp[i]; i++) {
+ char *delim = strchr(check->envp[i], '=');
+ if (!delim)
+ continue;
+ *(delim++) = 0;
+ if (setenv(check->envp[i], delim, 1) != 0)
+ goto fail;
+ }
+ }
+ haproxy_unblock_signals();
+ execvp(px->check_command, check->argv);
+ ha_alert("Failed to exec process for external health check: %s. Aborting.\n",
+ strerror(errno));
+ fail:
+ exit(-1);
+ }
+
+ /* Parent */
+ if (check->result == CHK_RES_UNKNOWN) {
+ if (pid_list_add(pid, t) != NULL) {
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (px->timeout.check && px->timeout.connect) {
+ int t_con = tick_add(now_ms, px->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ status = SF_ERR_NONE;
+ goto out;
+ }
+ else {
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+ }
+ kill(pid, SIGTERM); /* process creation error */
+ }
+ else
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, strerror(errno));
+
+out:
+ unblock_sigchld();
+ return status;
+}
+
+/*
+ * manages a server health-check that uses an external process. Returns
+ * the time the task accepts to wait, or TIME_ETERNITY for infinity.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_unlock label.
+ */
+struct task *process_chk_proc(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct server *s = check->server;
+ int rv;
+ int ret;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &check->server->lock);
+ if (!(check->state & CHK_ST_INPROGRESS)) {
+ /* no check currently running */
+ if (!expired) /* woke up too early */
+ goto out_unlock;
+
+ /* we don't send any health-checks when the proxy is
+ * stopped, the server should not be checked or the check
+ * is disabled.
+ */
+ if (((check->state & (CHK_ST_ENABLED | CHK_ST_PAUSED)) != CHK_ST_ENABLED) ||
+ (s->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ goto reschedule;
+
+ /* we'll initiate a new check */
+ set_server_check_status(check, HCHK_STATUS_START, NULL);
+
+ check->state |= CHK_ST_INPROGRESS;
+
+ ret = connect_proc_chk(t);
+ if (ret == SF_ERR_NONE) {
+ /* the process was forked, we allow up to min(inter,
+ * timeout.connect) for it to report its status, but
+ * only when timeout.check is set as it may be to short
+ * for a full check otherwise.
+ */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (s->proxy->timeout.check && s->proxy->timeout.connect) {
+ int t_con = tick_add(now_ms, s->proxy->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ task_set_thread(t, tid);
+ goto reschedule;
+ }
+
+ /* here, we failed to start the check */
+
+ check->state &= ~CHK_ST_INPROGRESS;
+ check_notify_failure(check);
+
+ /* we allow up to min(inter, timeout.connect) for a connection
+ * to establish but only when timeout.check is set
+ * as it may be to short for a full check otherwise
+ */
+ while (tick_is_expired(t->expire, now_ms)) {
+ int t_con;
+
+ t_con = tick_add(t->expire, s->proxy->timeout.connect);
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+
+ if (s->proxy->timeout.check)
+ t->expire = tick_first(t->expire, t_con);
+ }
+ }
+ else {
+ /* there was a test running.
+ * First, let's check whether there was an uncaught error,
+ * which can happen on connect timeout or error.
+ */
+ if (check->result == CHK_RES_UNKNOWN) {
+ /* good connection is enough for pure TCP check */
+ struct pid_list *elem = check->curpid;
+ int status = HCHK_STATUS_UNKNOWN;
+
+ if (elem->exited) {
+ status = elem->status; /* Save in case the process exits between use below */
+ if (!WIFEXITED(status))
+ check->code = -1;
+ else
+ check->code = WEXITSTATUS(status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ status = HCHK_STATUS_PROCERR;
+ else
+ status = HCHK_STATUS_PROCOK;
+ } else if (expired) {
+ status = HCHK_STATUS_PROCTOUT;
+ ha_warning("kill %d\n", (int)elem->pid);
+ kill(elem->pid, SIGTERM);
+ }
+ set_server_check_status(check, status, NULL);
+ }
+
+ if (check->result == CHK_RES_FAILED) {
+ /* a failure or timeout detected */
+ check_notify_failure(check);
+ }
+ else if (check->result == CHK_RES_CONDPASS) {
+ /* check is OK but asks for stopping mode */
+ check_notify_stopping(check);
+ }
+ else if (check->result == CHK_RES_PASSED) {
+ /* a success was detected */
+ check_notify_success(check);
+ }
+ task_set_thread(t, 0);
+ check->state &= ~CHK_ST_INPROGRESS;
+
+ pid_list_del(check->curpid);
+
+ rv = 0;
+ if (global.spread_checks > 0) {
+ rv = srv_getinter(check) * global.spread_checks / 100;
+ rv -= (int) (2 * rv * (statistical_prng() / 4294967295.0));
+ }
+ t->expire = tick_add(now_ms, MS_TO_TICKS(srv_getinter(check) + rv));
+ }
+
+ reschedule:
+ while (tick_is_expired(t->expire, now_ms))
+ t->expire = tick_add(t->expire, MS_TO_TICKS(check->inter));
+
+ out_unlock:
+ HA_SPIN_UNLOCK(SERVER_LOCK, &check->server->lock);
+ return t;
+}
+
+/* Parses the "external-check" proxy keyword */
+int proxy_parse_extcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ int cur_arg, ret = 0;
+
+ cur_arg = 1;
+ if (!*(args[cur_arg])) {
+ memprintf(errmsg, "missing argument after '%s'.\n", args[0]);
+ goto error;
+ }
+
+ if (strcmp(args[cur_arg], "command") == 0) {
+ if (too_many_args(2, args, errmsg, NULL))
+ goto error;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "missing argument after '%s'.", args[cur_arg]);
+ goto error;
+ }
+ free(curpx->check_command);
+ curpx->check_command = strdup(args[cur_arg+1]);
+ }
+ else if (strcmp(args[cur_arg], "path") == 0) {
+ if (too_many_args(2, args, errmsg, NULL))
+ goto error;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "missing argument after '%s'.", args[cur_arg]);
+ goto error;
+ }
+ free(curpx->check_path);
+ curpx->check_path = strdup(args[cur_arg+1]);
+ }
+ else {
+ memprintf(errmsg, "'%s' only supports 'command' and 'path'. but got '%s'.",
+ args[0], args[1]);
+ goto error;
+ }
+
+ ret = (*errmsg != NULL); /* Handle warning */
+ return ret;
+
+error:
+ return -1;
+}
+
+int proxy_parse_external_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ int err_code = 0;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_EXT_CHK;
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ out:
+ return err_code;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "external-check", proxy_parse_extcheck },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/fcgi-app.c b/src/fcgi-app.c
new file mode 100644
index 0000000..00562f8
--- /dev/null
+++ b/src/fcgi-app.c
@@ -0,0 +1,1133 @@
+/*
+ * Functions about FCGI applications and filters.
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/errors.h>
+#include <haproxy/fcgi-app.h>
+#include <haproxy/filters.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server-t.h>
+#include <haproxy/session.h>
+#include <haproxy/sink.h>
+#include <haproxy/tools.h>
+
+
+/* Global list of all FCGI applications */
+static struct fcgi_app *fcgi_apps = NULL;
+
+struct flt_ops fcgi_flt_ops;
+const char *fcgi_flt_id = "FCGI filter";
+
+DECLARE_STATIC_POOL(pool_head_fcgi_flt_ctx, "fcgi_flt_ctx", sizeof(struct fcgi_flt_ctx));
+DECLARE_STATIC_POOL(pool_head_fcgi_param_rule, "fcgi_param_rule", sizeof(struct fcgi_param_rule));
+DECLARE_STATIC_POOL(pool_head_fcgi_hdr_rule, "fcgi_hdr_rule", sizeof(struct fcgi_hdr_rule));
+
+/**************************************************************************/
+/***************************** Uitls **************************************/
+/**************************************************************************/
+/* Makes a fcgi parameter name (prefixed by ':fcgi-') with <name> (in
+ * lowercase). All non alphanumeric character are replaced by an underscore
+ * ('_'). The result is copied into <dst>. the corresponding ist is returned.
+ */
+static struct ist fcgi_param_name(char *dst, const struct ist name)
+{
+ size_t ofs1, ofs2;
+
+ memcpy(dst, ":fcgi-", 6);
+ ofs1 = 6;
+ for (ofs2 = 0; ofs2 < name.len; ofs2++) {
+ if (isalnum((unsigned char)name.ptr[ofs2]))
+ dst[ofs1++] = ist_lc[(unsigned char)name.ptr[ofs2]];
+ else
+ dst[ofs1++] = '_';
+ }
+ return ist2(dst, ofs1);
+}
+
+/* Returns a pointer to the FCGi application matching the name <name>. NULL is
+ * returned if no match found.
+ */
+struct fcgi_app *fcgi_app_find_by_name(const char *name)
+{
+ struct fcgi_app *app;
+
+ for (app = fcgi_apps; app != NULL; app = app->next) {
+ if (strcmp(app->name, name) == 0)
+ return app;
+ }
+
+ return NULL;
+}
+
+struct fcgi_flt_conf *find_px_fcgi_conf(struct proxy *px)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ if (fconf->id == fcgi_flt_id)
+ return fconf->conf;
+ }
+ return NULL;
+}
+
+struct fcgi_flt_ctx *find_strm_fcgi_ctx(struct stream *s)
+{
+ struct filter *filter;
+
+ if (!s)
+ return NULL;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_ID(filter) == fcgi_flt_id)
+ return FLT_CONF(filter);
+ }
+ return NULL;
+}
+
+struct fcgi_app *get_px_fcgi_app(struct proxy *px)
+{
+ struct fcgi_flt_conf *fcgi_conf = find_px_fcgi_conf(px);
+
+ if (fcgi_conf)
+ return fcgi_conf->app;
+ return NULL;
+}
+
+struct fcgi_app *get_strm_fcgi_app(struct stream *s)
+{
+ struct fcgi_flt_ctx *fcgi_ctx = find_strm_fcgi_ctx(s);
+
+ if (fcgi_ctx)
+ return fcgi_ctx->app;
+ return NULL;
+}
+
+static void fcgi_release_rule_conf(struct fcgi_rule_conf *rule)
+{
+ if (!rule)
+ return;
+ free(rule->name);
+ free(rule->value);
+ free_acl_cond(rule->cond);
+ free(rule);
+}
+
+static void fcgi_release_rule(struct fcgi_rule *rule)
+{
+ if (!rule)
+ return;
+
+ if (!LIST_ISEMPTY(&rule->value)) {
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, &rule->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+ /* ->cond and ->name are not owned by the rule */
+ free(rule);
+}
+
+/**************************************************************************/
+/*********************** FCGI Sample fetches ******************************/
+/**************************************************************************/
+
+static int smp_fetch_fcgi_docroot(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct fcgi_app *app = get_strm_fcgi_app(smp->strm);
+
+ if (!app)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = app->docroot.ptr;
+ smp->data.u.str.data = app->docroot.len;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_fcgi_index(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct fcgi_app *app = get_strm_fcgi_app(smp->strm);
+
+ if (!app || !istlen(app->index))
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = app->index.ptr;
+ smp->data.u.str.data = app->index.len;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/**************************************************************************/
+/************************** FCGI filter ***********************************/
+/**************************************************************************/
+static int fcgi_flt_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static void fcgi_flt_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct fcgi_flt_conf *fcgi_conf = fconf->conf;
+ struct fcgi_rule *rule, *back;
+
+ if (!fcgi_conf)
+ return;
+
+ free(fcgi_conf->name);
+
+ list_for_each_entry_safe(rule, back, &fcgi_conf->param_rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule(rule);
+ }
+
+ list_for_each_entry_safe(rule, back, &fcgi_conf->hdr_rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule(rule);
+ }
+
+ free(fcgi_conf);
+}
+
+static int fcgi_flt_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct fcgi_flt_conf *fcgi_conf = fconf->conf;
+ struct fcgi_rule_conf *crule, *back;
+ struct fcgi_rule *rule = NULL;
+ struct flt_conf *f;
+ char *errmsg = NULL;
+
+ fcgi_conf->app = fcgi_app_find_by_name(fcgi_conf->name);
+ if (!fcgi_conf->app) {
+ ha_alert("proxy '%s' : fcgi-app '%s' not found.\n",
+ px->id, fcgi_conf->name);
+ goto err;
+ }
+
+ list_for_each_entry(f, &px->filter_configs, list) {
+ if (f->id == http_comp_flt_id || f->id == cache_store_flt_id)
+ continue;
+ else if ((f->id == fconf->id) && f->conf != fcgi_conf) {
+ ha_alert("proxy '%s' : only one fcgi-app supported per backend.\n",
+ px->id);
+ goto err;
+ }
+ else if (f->id != fconf->id) {
+ /* Implicit declaration is only allowed with the
+ * compression and cache. For other filters, an implicit
+ * declaration is required. */
+ ha_alert("config: proxy '%s': require an explicit filter declaration "
+ "to use the fcgi-app '%s'.\n", px->id, fcgi_conf->name);
+ goto err;
+ }
+ }
+
+ list_for_each_entry_safe(crule, back, &fcgi_conf->app->conf.rules, list) {
+ rule = calloc(1, sizeof(*rule));
+ if (!rule) {
+ ha_alert("proxy '%s' : out of memory.\n", px->id);
+ goto err;
+ }
+ rule->type = crule->type;
+ rule->name = ist(crule->name);
+ rule->cond = crule->cond;
+ LIST_INIT(&rule->value);
+
+ if (crule->value) {
+ if (!parse_logformat_string(crule->value, px, &rule->value, LOG_OPT_HTTP,
+ SMP_VAL_BE_HRQ_HDR, &errmsg)) {
+ ha_alert("proxy '%s' : %s.\n", px->id, errmsg);
+ goto err;
+ }
+ }
+
+ if (rule->type == FCGI_RULE_SET_PARAM || rule->type == FCGI_RULE_UNSET_PARAM)
+ LIST_APPEND(&fcgi_conf->param_rules, &rule->list);
+ else /* FCGI_RULE_PASS_HDR/FCGI_RULE_HIDE_HDR */
+ LIST_APPEND(&fcgi_conf->hdr_rules, &rule->list);
+ }
+ return 0;
+
+ err:
+ free(errmsg);
+ free(rule);
+ return 1;
+}
+
+static int fcgi_flt_start(struct stream *s, struct filter *filter)
+{
+ struct fcgi_flt_conf *fcgi_conf = FLT_CONF(filter);
+ struct fcgi_flt_ctx *fcgi_ctx;
+
+ fcgi_ctx = pool_alloc(pool_head_fcgi_flt_ctx);
+ if (fcgi_ctx == NULL) {
+ // FIXME: send a warning
+ return 0;
+ }
+ fcgi_ctx->filter = filter;
+ fcgi_ctx->app = fcgi_conf->app;
+ filter->ctx = fcgi_ctx;
+
+ s->req.analysers |= AN_REQ_HTTP_BODY;
+ return 1;
+}
+
+static void fcgi_flt_stop(struct stream *s, struct filter *filter)
+{
+ struct flt_fcgi_ctx *fcgi_ctx = filter->ctx;
+
+ if (!fcgi_ctx)
+ return;
+ pool_free(pool_head_fcgi_flt_ctx, fcgi_ctx);
+ filter->ctx = NULL;
+}
+
+static int fcgi_flt_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct session *sess = strm_sess(s);
+ struct buffer *value;
+ struct fcgi_flt_conf *fcgi_conf = FLT_CONF(filter);
+ struct fcgi_rule *rule;
+ struct fcgi_param_rule *param_rule;
+ struct fcgi_hdr_rule *hdr_rule;
+ struct ebpt_node *node, *next;
+ struct eb_root param_rules = EB_ROOT;
+ struct eb_root hdr_rules = EB_ROOT;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ int ret;
+
+ htx = htxbuf(&msg->chn->buf);
+
+ if (msg->chn->flags & CF_ISRESP) {
+ struct htx_sl *sl;
+
+ /* Remove the header "Status:" from the response */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("status"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+
+ /* Add the header "Date:" if not found */
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("date"), &ctx, 1)) {
+ struct tm tm;
+
+ get_gmtime(date.tv_sec, &tm);
+ trash.data = strftime(trash.area, trash.size, "%a, %d %b %Y %T %Z", &tm);
+ if (trash.data)
+ http_add_header(htx, ist("date"), ist2(trash.area, trash.data));
+ }
+
+ /* Add the header "Content-Length:" if possible */
+ sl = http_get_stline(htx);
+ if (s->txn->meth != HTTP_METH_HEAD && sl &&
+ (msg->flags & (HTTP_MSGF_XFER_LEN|HTTP_MSGF_CNT_LEN|HTTP_MSGF_TE_CHNK)) == HTTP_MSGF_XFER_LEN &&
+ (htx->flags & HTX_FL_EOM)) {
+ struct htx_blk * blk;
+ char *end;
+ size_t len = 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ end = ultoa_o(len, trash.area, trash.size);
+ if (http_add_header(htx, ist("content-length"), ist2(trash.area, end-trash.area))) {
+ sl->flags |= HTX_SL_F_CLEN;
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ }
+ }
+
+ return 1;
+ }
+
+ /* Analyze the request's headers */
+
+ value = alloc_trash_chunk();
+ if (!value)
+ goto end;
+
+ list_for_each_entry(rule, &fcgi_conf->param_rules, list) {
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ continue;
+ }
+
+ param_rule = NULL;
+ node = ebis_lookup_len(&param_rules, rule->name.ptr, rule->name.len);
+ if (node) {
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ ebpt_delete(node);
+ }
+ else {
+ param_rule = pool_alloc(pool_head_fcgi_param_rule);
+ if (param_rule == NULL)
+ goto param_rule_err;
+ }
+
+ param_rule->node.key = rule->name.ptr;
+ param_rule->name = rule->name;
+ param_rule->value = &rule->value;
+ ebis_insert(&param_rules, &param_rule->node);
+ }
+
+ list_for_each_entry(rule, &fcgi_conf->hdr_rules, list) {
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ continue;
+ }
+
+ hdr_rule = NULL;
+ node = ebis_lookup_len(&hdr_rules, rule->name.ptr, rule->name.len);
+ if (node) {
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ ebpt_delete(node);
+ }
+ else {
+ hdr_rule = pool_alloc(pool_head_fcgi_hdr_rule);
+ if (hdr_rule == NULL)
+ goto hdr_rule_err;
+ }
+
+ hdr_rule->node.key = rule->name.ptr;
+ hdr_rule->name = rule->name;
+ hdr_rule->pass = (rule->type == FCGI_RULE_PASS_HDR);
+ ebis_insert(&hdr_rules, &hdr_rule->node);
+ }
+
+ node = ebpt_first(&param_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ node = next;
+
+ b_reset(value);
+ value->data = build_logline(s, value->area, value->size, param_rule->value);
+ if (!value->data) {
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ continue;
+ }
+ if (!http_add_header(htx, param_rule->name, ist2(value->area, value->data)))
+ goto rewrite_err;
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ }
+
+ node = ebpt_first(&hdr_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ node = next;
+
+ if (!hdr_rule->pass) {
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr_rule->name, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+ pool_free(pool_head_fcgi_hdr_rule, hdr_rule);
+ }
+
+ goto end;
+
+ rewrite_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ hdr_rule_err:
+ node = ebpt_first(&hdr_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ hdr_rule = container_of(node, struct fcgi_hdr_rule, node);
+ node = next;
+ pool_free(pool_head_fcgi_hdr_rule, hdr_rule);
+ }
+ param_rule_err:
+ node = ebpt_first(&param_rules);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ param_rule = container_of(node, struct fcgi_param_rule, node);
+ node = next;
+ pool_free(pool_head_fcgi_param_rule, param_rule);
+ }
+ end:
+ free_trash_chunk(value);
+ return 1;
+}
+
+struct flt_ops fcgi_flt_ops = {
+ .init = fcgi_flt_init,
+ .check = fcgi_flt_check,
+ .deinit = fcgi_flt_deinit,
+
+ .attach = fcgi_flt_start,
+ .detach = fcgi_flt_stop,
+
+ .http_headers = fcgi_flt_http_headers,
+};
+
+/**************************************************************************/
+/*********************** FCGI Config parsing ******************************/
+/**************************************************************************/
+static int
+parse_fcgi_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *f, *back;
+ struct fcgi_flt_conf *fcgi_conf = NULL;
+ char *name = NULL;
+ int pos = *cur_arg;
+
+ /* Get the fcgi-app name*/
+ if (!*args[pos + 1]) {
+ memprintf(err, "%s : expects a <name> argument", args[pos]);
+ goto err;
+ }
+ name = strdup(args[pos + 1]);
+ if (!name) {
+ memprintf(err, "%s '%s' : out of memory", args[pos], args[pos + 1]);
+ goto err;
+ }
+ pos += 2;
+
+ /* Check if an fcgi-app filter with the same name already exists */
+ list_for_each_entry_safe(f, back, &px->filter_configs, list) {
+ if (f->id != fcgi_flt_id)
+ continue;
+ fcgi_conf = f->conf;
+ if (strcmp(name, fcgi_conf->name) != 0) {
+ fcgi_conf = NULL;
+ continue;
+ }
+
+ /* Place the filter at its right position */
+ LIST_DELETE(&f->list);
+ free(f);
+ ha_free(&name);
+ break;
+ }
+
+ /* No other fcgi-app filter found, create configuration for the explicit one */
+ if (!fcgi_conf) {
+ fcgi_conf = calloc(1, sizeof(*fcgi_conf));
+ if (!fcgi_conf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto err;
+ }
+ fcgi_conf->name = name;
+ LIST_INIT(&fcgi_conf->param_rules);
+ LIST_INIT(&fcgi_conf->hdr_rules);
+ }
+
+ fconf->id = fcgi_flt_id;
+ fconf->conf = fcgi_conf;
+ fconf->ops = &fcgi_flt_ops;
+
+ *cur_arg = pos;
+ return 0;
+ err:
+ free(name);
+ return -1;
+}
+
+/* Parses the "use-fcgi-app" proxy keyword */
+static int proxy_parse_use_fcgi_app(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct flt_conf *fconf = NULL;
+ struct fcgi_flt_conf *fcgi_conf = NULL;
+ int retval = 0;
+
+ if ((curpx->cap & PR_CAP_DEF) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' only available in backend or listen section", args[0]);
+ retval = -1;
+ goto end;
+ }
+
+ if (!*(args[1])) {
+ memprintf(err, "'%s' expects <name> as argument", args[0]);
+ retval = -1;
+ goto end;
+ }
+
+ /* check if a fcgi filter was already registered with this name,
+ * if that's the case, must use it. */
+ list_for_each_entry(fconf, &curpx->filter_configs, list) {
+ if (fconf->id == fcgi_flt_id) {
+ fcgi_conf = fconf->conf;
+ if (fcgi_conf && strcmp((char *)fcgi_conf->name, args[1]) == 0)
+ goto end;
+ memprintf(err, "'%s' : only one fcgi-app supported per backend", args[0]);
+ retval = -1;
+ goto end;
+ }
+ }
+
+ /* Create the FCGI filter config */
+ fcgi_conf = calloc(1, sizeof(*fcgi_conf));
+ if (!fcgi_conf)
+ goto err;
+ fcgi_conf->name = strdup(args[1]);
+ LIST_INIT(&fcgi_conf->param_rules);
+ LIST_INIT(&fcgi_conf->hdr_rules);
+
+ /* Register the filter */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf)
+ goto err;
+ fconf->id = fcgi_flt_id;
+ fconf->conf = fcgi_conf;
+ fconf->ops = &fcgi_flt_ops;
+ LIST_APPEND(&curpx->filter_configs, &fconf->list);
+
+ end:
+ return retval;
+ err:
+ if (fcgi_conf) {
+ free(fcgi_conf->name);
+ free(fcgi_conf);
+ }
+ memprintf(err, "out of memory");
+ retval = -1;
+ goto end;
+}
+
+/* Finishes the parsing of FCGI application of proxies and servers */
+static int cfg_fcgi_apps_postparser()
+{
+ struct fcgi_app *curapp;
+ struct proxy *px;
+ struct server *srv;
+ int err_code = 0;
+
+ for (px = proxies_list; px; px = px->next) {
+ struct fcgi_flt_conf *fcgi_conf = find_px_fcgi_conf(px);
+ int nb_fcgi_srv = 0;
+
+ if (px->mode == PR_MODE_TCP && fcgi_conf) {
+ ha_alert("proxy '%s': FCGI application cannot be used in non-HTTP mode.\n",
+ px->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* By default, for FCGI-ready backend, HTTP request header names
+ * are restricted and the "delete" policy is set
+ */
+ if (fcgi_conf && !(px->options2 & PR_O2_RSTRICT_REQ_HDR_NAMES_MASK))
+ px->options2 |= PR_O2_RSTRICT_REQ_HDR_NAMES_DEL;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->mux_proto && isteq(srv->mux_proto->token, ist("fcgi"))) {
+ nb_fcgi_srv++;
+ if (fcgi_conf)
+ continue;
+ ha_alert("proxy '%s': FCGI server '%s' has no FCGI app configured.\n",
+ px->id, srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+ if (fcgi_conf && !nb_fcgi_srv) {
+ ha_alert("proxy '%s': FCGI app configured but no FCGI server found.\n",
+ px->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = curapp->next) {
+ if (!istlen(curapp->docroot)) {
+ ha_alert("fcgi-app '%s': no docroot configured.\n",
+ curapp->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ if (!(curapp->flags & (FCGI_APP_FL_MPXS_CONNS|FCGI_APP_FL_GET_VALUES))) {
+ if (curapp->maxreqs > 1) {
+ ha_warning("fcgi-app '%s': multiplexing not supported, "
+ "ignore the option 'max-reqs'.\n",
+ curapp->name);
+ err_code |= ERR_WARN;
+ }
+ curapp->maxreqs = 1;
+ }
+
+ err_code |= postresolve_logger_list(&curapp->loggers, "fcgi-app", curapp->name);
+ }
+
+ end:
+ return err_code;
+}
+
+static int fcgi_app_add_rule(struct fcgi_app *curapp, enum fcgi_rule_type type, char *name, char *value,
+ struct acl_cond *cond, char **err)
+{
+ struct fcgi_rule_conf *rule;
+
+ /* Param not found, add a new one */
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto err;
+ LIST_INIT(&rule->list);
+ rule->type = type;
+ if (type == FCGI_RULE_SET_PARAM || type == FCGI_RULE_UNSET_PARAM) {
+ struct ist fname = fcgi_param_name(trash.area, ist(name));
+ rule->name = my_strndup(fname.ptr, fname.len);
+ }
+ else { /* FCGI_RULE_PASS_HDR/FCGI_RULE_HIDE_HDR */
+ struct ist fname = ist2bin_lc(trash.area, ist(name));
+ rule->name = my_strndup(fname.ptr, fname.len);
+ }
+ if (!rule->name)
+ goto err;
+
+ if (value) {
+ rule->value = strdup(value);
+ if (!rule->value)
+ goto err;
+ }
+ rule->cond = cond;
+ LIST_APPEND(&curapp->conf.rules, &rule->list);
+ return 1;
+
+ err:
+ if (rule) {
+ free(rule->name);
+ free(rule->value);
+ free(rule);
+ }
+ free_acl_cond(cond);
+ memprintf(err, "out of memory");
+ return 0;
+}
+
+/* Parses "fcgi-app" section */
+static int cfg_parse_fcgi_app(const char *file, int linenum, char **args, int kwm)
+{
+ static struct fcgi_app *curapp = NULL;
+ struct acl_cond *cond = NULL;
+ char *name, *value = NULL;
+ enum fcgi_rule_type type;
+ int err_code = 0;
+ const char *err;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "fcgi-app") == 0) { /* new fcgi-app */
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d]: '%s' expects <name> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = curapp->next) {
+ if (strcmp(curapp->name, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: fcgi-app section '%s' has the same name as another one declared at %s:%d.\n",
+ file, linenum, args[1], curapp->conf.file, curapp->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ curapp = calloc(1, sizeof(*curapp));
+ if (!curapp) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curapp->next = fcgi_apps;
+ fcgi_apps = curapp;
+ curapp->flags = FCGI_APP_FL_KEEP_CONN;
+ curapp->docroot = ist(NULL);
+ curapp->index = ist(NULL);
+ curapp->pathinfo_re = NULL;
+ curapp->name = strdup(args[1]);
+ curapp->maxreqs = 1;
+ curapp->conf.file = strdup(file);
+ curapp->conf.line = linenum;
+ LIST_INIT(&curapp->acls);
+ LIST_INIT(&curapp->loggers);
+ LIST_INIT(&curapp->conf.args.list);
+ LIST_INIT(&curapp->conf.rules);
+
+ /* Set info about authentication */
+ if (!fcgi_app_add_rule(curapp, FCGI_RULE_SET_PARAM, "REMOTE_USER", "%[http_auth_user]", NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_SET_PARAM, "AUTH_TYPE", "%[http_auth_type]", NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Hide hop-by-hop headers by default */
+ if (!fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "connection", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "keep-alive", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "authorization", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy-authorization", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "proxy-authenticate", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "te", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "trailers", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "transfer-encoding", NULL, NULL, &errmsg) ||
+ !fcgi_app_add_rule(curapp, FCGI_RULE_HIDE_HDR, "upgrade", NULL, NULL, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "docroot") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <path> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ istfree(&curapp->docroot);
+ curapp->docroot = ist(strdup(args[1]));
+ if (!isttest(curapp->docroot)) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+ else if (strcmp(args[0], "path-info") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <regex> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ regex_free(curapp->pathinfo_re);
+ curapp->pathinfo_re = regex_comp(args[1], 1, 1, &errmsg);
+ if (!curapp->pathinfo_re) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "index") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <filename> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ istfree(&curapp->index);
+ curapp->index = ist(strdup(args[1]));
+ if (!isttest(curapp->index)) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+ else if (strcmp(args[0], "acl") == 0) {
+ const char *err;
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (parse_acl((const char **)args+1, &curapp->acls, &errmsg, &curapp->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "set-param") == 0) {
+ if (!*(args[1]) || !*(args[2])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <value> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_SET_PARAM;
+ name = args[1];
+ value = args[2];
+ cond = NULL;
+ args += 3;
+
+ parse_cond_rule:
+ if (!*(args[0])) /* No condition */
+ goto add_rule;
+
+ if (strcmp(args[0], "if") == 0)
+ cond = parse_acl_cond((const char **)args+1, &curapp->acls, ACL_COND_IF, &errmsg, &curapp->conf.args,
+ file, linenum);
+ else if (strcmp(args[0], "unless") == 0)
+ cond = parse_acl_cond((const char **)args+1, &curapp->acls, ACL_COND_UNLESS, &errmsg, &curapp->conf.args,
+ file, linenum);
+ if (!cond) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ name, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ add_rule:
+ if (!fcgi_app_add_rule(curapp, type, name, value, cond, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s' : %s.\n", file, linenum,
+ name, errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#if 0 /* Disabled for now */
+ else if (!strcmp(args[0], "unset-param")) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_UNSET_PARAM;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#endif
+ else if (strcmp(args[0], "pass-header") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_PASS_HDR;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#if 0 /* Disabled for now */
+ else if (!strcmp(args[0], "hide-header")) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ type = FCGI_RULE_HIDE_HDR;
+ name = args[1];
+ value = NULL;
+ cond = NULL;
+ args += 2;
+ goto parse_cond_rule;
+ }
+#endif
+ else if (strcmp(args[0], "option") == 0) {
+ if (!*(args[1])) {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (strcmp(args[1], "keep-conn") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_KEEP_CONN;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_KEEP_CONN;
+ }
+ else if (strcmp(args[1], "get-values") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_GET_VALUES;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_GET_VALUES;
+ }
+ else if (strcmp(args[1], "mpxs-conns") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == KWM_STD)
+ curapp->flags |= FCGI_APP_FL_MPXS_CONNS;
+ else if (kwm == KWM_NO)
+ curapp->flags &= ~FCGI_APP_FL_MPXS_CONNS;
+ }
+ else if (strcmp(args[1], "max-reqs") == 0) {
+ if (kwm != KWM_STD) {
+ ha_alert("parsing [%s:%d]: negation/default is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!*(args[2])) {
+ ha_alert("parsing [%s:%d]: option '%s' expects an integer argument.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code))
+ goto out;
+
+ curapp->maxreqs = atol(args[2]);
+ if (!curapp->maxreqs) {
+ ha_alert("parsing [%s:%d]: option '%s' expects a strictly positive integer argument.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else if (strcmp(args[0], "log-stderr") == 0) {
+ if (!parse_logger(args, &curapp->loggers, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d]: unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "fcgi-app");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+
+/**************************************************************************/
+/*********************** FCGI Deinit functions ****************************/
+/**************************************************************************/
+void fcgi_apps_deinit()
+{
+ struct fcgi_app *curapp, *nextapp;
+ struct logger *log, *logb;
+
+ for (curapp = fcgi_apps; curapp != NULL; curapp = nextapp) {
+ struct fcgi_rule_conf *rule, *back;
+
+ free(curapp->name);
+ istfree(&curapp->docroot);
+ istfree(&curapp->index);
+ regex_free(curapp->pathinfo_re);
+ free(curapp->conf.file);
+
+ list_for_each_entry_safe(log, logb, &curapp->loggers, list) {
+ LIST_DELETE(&log->list);
+ free(log);
+ }
+
+ list_for_each_entry_safe(rule, back, &curapp->conf.rules, list) {
+ LIST_DELETE(&rule->list);
+ fcgi_release_rule_conf(rule);
+ }
+
+ nextapp = curapp->next;
+ free(curapp);
+ }
+}
+
+
+/**************************************************************************/
+/*************** Keywords definition and registration *********************/
+/**************************************************************************/
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "use-fcgi-app", proxy_parse_use_fcgi_app },
+ { 0, NULL, NULL },
+}};
+
+// FIXME: Add rep.fcgi smp_fetch
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "fcgi.docroot", smp_fetch_fcgi_docroot, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "fcgi.index", smp_fetch_fcgi_index, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { /* END */ }
+}};
+
+/* Declare the filter parser for "fcgi-app" keyword */
+static struct flt_kw_list filter_kws = { "FCGI", { }, {
+ { "fcgi-app", parse_fcgi_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+INITCALL1(STG_REGISTER, hap_register_post_deinit, fcgi_apps_deinit);
+
+REGISTER_CONFIG_SECTION("fcgi-app", cfg_parse_fcgi_app, NULL);
+REGISTER_CONFIG_POSTPARSER("fcgi-apps", cfg_fcgi_apps_postparser);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fcgi.c b/src/fcgi.c
new file mode 100644
index 0000000..1d1a82b
--- /dev/null
+++ b/src/fcgi.c
@@ -0,0 +1,294 @@
+/*
+ * FastCGI protocol processing
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <haproxy/buf.h>
+#include <haproxy/fcgi.h>
+#include <haproxy/istbuf.h>
+
+/* Encodes header of a FCGI record into the chunk <out>. It returns non-zero on
+ * success and 0 on failure (buffer full). <out> is a chunk, so the wrapping is
+ * not handled by this function. It is the caller responsibility to ensure
+ * enough contiguous space is available
+ */
+int fcgi_encode_record_hdr(struct buffer *out, const struct fcgi_header *h)
+{
+ size_t len = out->data;
+
+ if (len + 8 >= b_size(out))
+ return 0;
+
+ out->area[len++] = h->vsn;
+ out->area[len++] = h->type;
+ out->area[len++] = ((h->id >> 8) & 0xff);
+ out->area[len++] = (h->id & 0xff);
+ out->area[len++] = ((h->len >> 8) & 0xff);
+ out->area[len++] = (h->len & 0xff);
+ out->area[len++] = h->padding;
+ out->area[len++] = 0; /* rsv */
+
+ out->data = len;
+ return 1;
+}
+
+/* Decodes a FCGI record header from offset <o> of buffer <in> into descriptor
+ * <h>. The buffer may wrap so each byte read must be checked. The header is
+ * formed like this :
+ *
+ * b0 b1 b2 b3 b4 b5 b6 b7
+ * +-----+------+-----+-----+------+------+--------+-----+
+ * | vsn | type | id1 | id0 | len1 | len0 | padlen | rsv |
+ * +-----+------+-----+-----+------+------+--------+-----+
+ *
+ * Returns zero if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_decode_record_hdr(const struct buffer *in, size_t o, struct fcgi_header *h)
+{
+ if (b_data(in) < o + 8)
+ return 0;
+
+ h->vsn = (uint8_t)(*b_peek(in, o));
+ h->type = (uint8_t)(*b_peek(in, o+1));
+ h->id = ((uint8_t)(*b_peek(in, o+2)) << 8) + (uint8_t)(*b_peek(in, o+3));
+ h->len = ((uint8_t)(*b_peek(in, o+4)) << 8) + (uint8_t)(*b_peek(in, o+5));
+ h->padding = (uint8_t)(*b_peek(in, o+6));
+ /* ignore rsv */
+
+ return 8;
+}
+
+/* Encodes the payload part of a BEGIN_REQUEST record into the chunk <out>. It
+ * returns non-zero on success and 0 on failure (buffer full). <out> is a chunk,
+ * so the wrapping is not handled by this function. It is the caller
+ * responsibility to ensure enough contiguous space is available
+ */
+int fcgi_encode_begin_request(struct buffer *out, const struct fcgi_begin_request *r)
+{
+ size_t len = out->data;
+
+ if (len + 8 >= b_size(out))
+ return 0;
+
+ out->area[len++] = ((r->role >> 8) & 0xff);
+ out->area[len++] = (r->role & 0xff);
+ out->area[len++] = r->flags;
+ out->area[len++] = 0; /* rsv */
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+ out->area[len++] = 0;
+
+ out->data = len;
+ return 1;
+}
+
+/* Encodes a parameter, part of the payload of a PARAM record, into the chunk
+ * <out>. It returns non-zero on success and 0 on failure (buffer full). <out>
+ * is a chunk, so the wrapping is not handled by this function. It is the caller
+ * responsibility to ensure enough contiguous space is available. The
+ * parameter's name is converted to upper case and non-alphanumeric character
+ * are replaced by an underscore.
+ */
+int fcgi_encode_param(struct buffer *out, const struct fcgi_param *p)
+{
+ size_t off, len = out->data;
+ int nbytes, vbytes;
+
+ nbytes = (!(p->n.len >> 7) ? 1 : 4);
+ vbytes = (!(p->v.len >> 7) ? 1 : 4);
+ if ((len + nbytes + p->n.len + vbytes + p->v.len) >= b_size(out))
+ return 0;
+
+ if (nbytes == 1)
+ out->area[len++] = (p->n.len & 0xff);
+ else {
+ out->area[len++] = (((p->n.len >> 24) & 0xff) | 0x80);
+ out->area[len++] = ((p->n.len >> 16) & 0xff);
+ out->area[len++] = ((p->n.len >> 8) & 0xff);
+ out->area[len++] = (p->n.len & 0xff);
+ }
+
+ if (vbytes == 1)
+ out->area[len++] = (p->v.len & 0xff);
+ else {
+ out->area[len++] = (((p->v.len >> 24) & 0xff) | 0x80);
+ out->area[len++] = ((p->v.len >> 16) & 0xff);
+ out->area[len++] = ((p->v.len >> 8) & 0xff);
+ out->area[len++] = (p->v.len & 0xff);
+ }
+
+ for (off = 0; off < p->n.len; off++) {
+ if (isalnum((unsigned char)p->n.ptr[off]))
+ out->area[len++] = ist_uc[(unsigned char)p->n.ptr[off]];
+ else
+ out->area[len++] = '_';
+ }
+ if (p->v.len) {
+ ist2bin(out->area + len, p->v);
+ len += p->v.len;
+ }
+
+ out->data = len;
+ return 1;
+}
+
+/* Decodes a parameter of a PARAM record from offset <o> of buffer <in> into the
+ * FCGI param <p>. The buffer may wrap so each byte read must be checked.
+ * Returns zero if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p)
+{
+ size_t data = b_data(in);
+ size_t nlen, vlen, len = 0;
+ uint8_t b0, b1, b2, b3;
+
+ if (data < o + 1)
+ return 0;
+ b0 = *b_peek(in, o++);
+ if (!(b0 >> 7)) {
+ nlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ nlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < o + 1)
+ return 0;
+ b0 = *b_peek(in, o++);
+ if (!(b0 >> 7)) {
+ vlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ vlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < nlen + vlen)
+ return 0;
+
+ p->n = ist2(b_peek(in, o), nlen);
+ p->v = ist2(b_peek(in, o + nlen), vlen);
+ len += nlen + vlen;
+
+ return len;
+}
+
+
+/* Decodes a parameter of a PARAM record from offset <o> of buffer <in> into the
+ * FCGI param <p>. To call this function, the buffer must not wrap. Returns zero
+ * if some bytes are missing, otherwise the number of read bytes.
+ */
+size_t fcgi_aligned_decode_param(const struct buffer *in, size_t o, struct fcgi_param *p)
+{
+ size_t data = b_data(in);
+ size_t nlen, vlen, len = 0;
+ uint8_t b0, b1, b2, b3;
+
+ if (data < o + 1)
+ return 0;
+ b0 = in->area[o++];
+ if (!(b0 >> 7)) {
+ nlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = in->area[o++];
+ b2 = in->area[o++];
+ b3 = in->area[o++];
+ nlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < o + 1)
+ return 0;
+ b0 = in->area[o++];
+ if (!(b0 >> 7)) {
+ vlen = b0;
+ len++;
+ }
+ else {
+ if (data < o + 3)
+ return 0;
+ b1 = in->area[o++];
+ b2 = in->area[o++];
+ b3 = in->area[o++];
+ vlen = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ len += 4;
+ }
+
+ if (data < nlen + vlen)
+ return 0;
+
+ p->n = ist2(in->area + o, nlen);
+ p->v = ist2(in->area + o + nlen, vlen);
+ len += nlen + vlen;
+
+ return len;
+}
+
+/* Decodes payload of a END_REQUEST record from offset <o> of buffer <in> into
+ * the FCGI param <p>. The buffer may wrap so each byte read must be
+ * checked. Returns zero if some bytes are missing, otherwise the number of read
+ * bytes.
+ */
+size_t fcgi_decode_end_request(const struct buffer *in, size_t o, struct fcgi_end_request *rec)
+{
+ uint8_t b0, b1, b2, b3;
+
+ if (b_data(in) < o + 8)
+ return 0;
+
+ b0 = *b_peek(in, o++);
+ b1 = *b_peek(in, o++);
+ b2 = *b_peek(in, o++);
+ b3 = *b_peek(in, o++);
+ rec->status = ((b0 & 0x7f) << 24) + (b1 << 16) + (b2 << 8) + b3;
+ rec->errcode = *b_peek(in, o++);
+ o += 3; /* ignore rsv */
+
+ return 8;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fd.c b/src/fd.c
new file mode 100644
index 0000000..9d34315
--- /dev/null
+++ b/src/fd.c
@@ -0,0 +1,1348 @@
+/*
+ * File descriptors management functions.
+ *
+ * Copyright 2000-2014 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * There is no direct link between the FD and the updates list. There is only a
+ * bit in the fdtab[] to indicate than a file descriptor is already present in
+ * the updates list. Once an fd is present in the updates list, it will have to
+ * be considered even if its changes are reverted in the middle or if the fd is
+ * replaced.
+ *
+ * The event state for an FD, as found in fdtab[].state, is maintained for each
+ * direction. The state field is built this way, with R bits in the low nibble
+ * and W bits in the high nibble for ease of access and debugging :
+ *
+ * 7 6 5 4 3 2 1 0
+ * [ 0 | 0 | RW | AW | 0 | 0 | RR | AR ]
+ *
+ * A* = active *R = read
+ * R* = ready *W = write
+ *
+ * An FD is marked "active" when there is a desire to use it.
+ * An FD is marked "ready" when it has not faced a new EAGAIN since last wake-up
+ * (it is a cache of the last EAGAIN regardless of polling changes). Each poller
+ * has its own "polled" state for the same fd, as stored in the polled_mask.
+ *
+ * We have 4 possible states for each direction based on these 2 flags :
+ *
+ * +---+---+----------+---------------------------------------------+
+ * | R | A | State | Description |
+ * +---+---+----------+---------------------------------------------+
+ * | 0 | 0 | DISABLED | No activity desired, not ready. |
+ * | 0 | 1 | ACTIVE | Activity desired. |
+ * | 1 | 0 | STOPPED | End of activity. |
+ * | 1 | 1 | READY | Activity desired and reported. |
+ * +---+---+----------+---------------------------------------------+
+ *
+ * The transitions are pretty simple :
+ * - fd_want_*() : set flag A
+ * - fd_stop_*() : clear flag A
+ * - fd_cant_*() : clear flag R (when facing EAGAIN)
+ * - fd_may_*() : set flag R (upon return from poll())
+ *
+ * Each poller then computes its own polled state :
+ * if (A) { if (!R) P := 1 } else { P := 0 }
+ *
+ * The state transitions look like the diagram below.
+ *
+ * may +----------+
+ * ,----| DISABLED | (READY=0, ACTIVE=0)
+ * | +----------+
+ * | want | ^
+ * | | |
+ * | v | stop
+ * | +----------+
+ * | | ACTIVE | (READY=0, ACTIVE=1)
+ * | +----------+
+ * | | ^
+ * | may | |
+ * | v | EAGAIN (can't)
+ * | +--------+
+ * | | READY | (READY=1, ACTIVE=1)
+ * | +--------+
+ * | stop | ^
+ * | | |
+ * | v | want
+ * | +---------+
+ * `--->| STOPPED | (READY=1, ACTIVE=0)
+ * +---------+
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/uio.h>
+
+#if defined(USE_POLL)
+#include <poll.h>
+#include <errno.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/port_range.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+struct fdtab *fdtab __read_mostly = NULL; /* array of all the file descriptors */
+struct polled_mask *polled_mask __read_mostly = NULL; /* Array for the polled_mask of each fd */
+struct fdinfo *fdinfo __read_mostly = NULL; /* less-often used infos for file descriptors */
+int totalconn; /* total # of terminated sessions */
+int actconn; /* # of active sessions */
+
+struct poller pollers[MAX_POLLERS] __read_mostly;
+struct poller cur_poller __read_mostly;
+int nbpollers = 0;
+
+volatile struct fdlist update_list[MAX_TGROUPS]; // Global update list
+
+THREAD_LOCAL int *fd_updt = NULL; // FD updates list
+THREAD_LOCAL int fd_nbupdt = 0; // number of updates in the list
+THREAD_LOCAL int poller_rd_pipe = -1; // Pipe to wake the thread
+int poller_wr_pipe[MAX_THREADS] __read_mostly; // Pipe to wake the threads
+
+volatile int ha_used_fds = 0; // Number of FD we're currently using
+static struct fdtab *fdtab_addr; /* address of the allocated area containing fdtab */
+
+/* adds fd <fd> to fd list <list> if it was not yet in it */
+void fd_add_to_fd_list(volatile struct fdlist *list, int fd)
+{
+ int next;
+ int new;
+ int old;
+ int last;
+
+redo_next:
+ next = HA_ATOMIC_LOAD(&fdtab[fd].update.next);
+ /* Check that we're not already in the cache, and if not, lock us. */
+ if (next > -2)
+ goto done;
+ if (next == -2)
+ goto redo_next;
+ if (!_HA_ATOMIC_CAS(&fdtab[fd].update.next, &next, -2))
+ goto redo_next;
+ __ha_barrier_atomic_store();
+
+ new = fd;
+redo_last:
+ /* First, insert in the linked list */
+ last = list->last;
+ old = -1;
+
+ fdtab[fd].update.prev = -2;
+ /* Make sure the "prev" store is visible before we update the last entry */
+ __ha_barrier_store();
+
+ if (unlikely(last == -1)) {
+ /* list is empty, try to add ourselves alone so that list->last=fd */
+ if (unlikely(!_HA_ATOMIC_CAS(&list->last, &old, new)))
+ goto redo_last;
+
+ /* list->first was necessary -1, we're guaranteed to be alone here */
+ list->first = fd;
+ } else {
+ /* adding ourselves past the last element
+ * The CAS will only succeed if its next is -1,
+ * which means it's in the cache, and the last element.
+ */
+ if (unlikely(!_HA_ATOMIC_CAS(&fdtab[last].update.next, &old, new)))
+ goto redo_last;
+
+ /* Then, update the last entry */
+ list->last = fd;
+ }
+ __ha_barrier_store();
+ /* since we're alone at the end of the list and still locked(-2),
+ * we know no one tried to add past us. Mark the end of list.
+ */
+ fdtab[fd].update.prev = last;
+ fdtab[fd].update.next = -1;
+ __ha_barrier_store();
+done:
+ return;
+}
+
+/* removes fd <fd> from fd list <list> */
+void fd_rm_from_fd_list(volatile struct fdlist *list, int fd)
+{
+#if defined(HA_HAVE_CAS_DW) || defined(HA_CAS_IS_8B)
+ volatile union {
+ struct fdlist_entry ent;
+ uint64_t u64;
+ uint32_t u32[2];
+ } cur_list, next_list;
+#endif
+ int old;
+ int new = -2;
+ int prev;
+ int next;
+ int last;
+lock_self:
+#if (defined(HA_CAS_IS_8B) || defined(HA_HAVE_CAS_DW))
+ next_list.ent.next = next_list.ent.prev = -2;
+ cur_list.ent = *(volatile typeof(fdtab->update)*)&fdtab[fd].update;
+ /* First, attempt to lock our own entries */
+ do {
+ /* The FD is not in the FD cache, give up */
+ if (unlikely(cur_list.ent.next <= -3))
+ return;
+ if (unlikely(cur_list.ent.prev == -2 || cur_list.ent.next == -2))
+ goto lock_self;
+ } while (
+#ifdef HA_CAS_IS_8B
+ unlikely(!_HA_ATOMIC_CAS(((uint64_t *)&fdtab[fd].update), (uint64_t *)&cur_list.u64, next_list.u64))
+#else
+ unlikely(!_HA_ATOMIC_DWCAS(((long *)&fdtab[fd].update), (uint32_t *)&cur_list.u32, (const uint32_t *)&next_list.u32))
+#endif
+ );
+ next = cur_list.ent.next;
+ prev = cur_list.ent.prev;
+
+#else
+lock_self_next:
+ next = HA_ATOMIC_LOAD(&fdtab[fd].update.next);
+ if (next == -2)
+ goto lock_self_next;
+ if (next <= -3)
+ goto done;
+ if (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].update.next, &next, -2)))
+ goto lock_self_next;
+lock_self_prev:
+ prev = HA_ATOMIC_LOAD(&fdtab[fd].update.prev);
+ if (prev == -2)
+ goto lock_self_prev;
+ if (unlikely(!_HA_ATOMIC_CAS(&fdtab[fd].update.prev, &prev, -2)))
+ goto lock_self_prev;
+#endif
+ __ha_barrier_atomic_store();
+
+ /* Now, lock the entries of our neighbours */
+ if (likely(prev != -1)) {
+redo_prev:
+ old = fd;
+
+ if (unlikely(!_HA_ATOMIC_CAS(&fdtab[prev].update.next, &old, new))) {
+ if (unlikely(old == -2)) {
+ /* Neighbour already locked, give up and
+ * retry again once he's done
+ */
+ fdtab[fd].update.prev = prev;
+ __ha_barrier_store();
+ fdtab[fd].update.next = next;
+ __ha_barrier_store();
+ goto lock_self;
+ }
+ goto redo_prev;
+ }
+ }
+ if (likely(next != -1)) {
+redo_next:
+ old = fd;
+ if (unlikely(!_HA_ATOMIC_CAS(&fdtab[next].update.prev, &old, new))) {
+ if (unlikely(old == -2)) {
+ /* Neighbour already locked, give up and
+ * retry again once he's done
+ */
+ if (prev != -1) {
+ fdtab[prev].update.next = fd;
+ __ha_barrier_store();
+ }
+ fdtab[fd].update.prev = prev;
+ __ha_barrier_store();
+ fdtab[fd].update.next = next;
+ __ha_barrier_store();
+ goto lock_self;
+ }
+ goto redo_next;
+ }
+ }
+ if (list->first == fd)
+ list->first = next;
+ __ha_barrier_store();
+ last = list->last;
+ while (unlikely(last == fd && (!_HA_ATOMIC_CAS(&list->last, &last, prev))))
+ __ha_compiler_barrier();
+ /* Make sure we let other threads know we're no longer in cache,
+ * before releasing our neighbours.
+ */
+ __ha_barrier_store();
+ if (likely(prev != -1))
+ fdtab[prev].update.next = next;
+ __ha_barrier_store();
+ if (likely(next != -1))
+ fdtab[next].update.prev = prev;
+ __ha_barrier_store();
+ /* Ok, now we're out of the fd cache */
+ fdtab[fd].update.next = -(next + 4);
+ __ha_barrier_store();
+done:
+ return;
+}
+
+/* deletes the FD once nobody uses it anymore, as detected by the caller by its
+ * thread_mask being zero and its running mask turning to zero. There is no
+ * protection against concurrent accesses, it's up to the caller to make sure
+ * only the last thread will call it. If called under isolation, it is safe to
+ * call this from another group than the FD's. This is only for internal use,
+ * please use fd_delete() instead.
+ */
+void _fd_delete_orphan(int fd)
+{
+ int tgrp = fd_tgid(fd);
+ uint fd_disown;
+
+ fd_disown = fdtab[fd].state & FD_DISOWN;
+ if (fdtab[fd].state & FD_LINGER_RISK) {
+ /* this is generally set when connecting to servers */
+ DISGUISE(setsockopt(fd, SOL_SOCKET, SO_LINGER,
+ (struct linger *) &nolinger, sizeof(struct linger)));
+ }
+
+ /* It's expected that a close() will result in the FD disappearing from
+ * pollers, but some pollers may have some internal bookkeeping to be
+ * done prior to the call (e.g. remove references from internal tables).
+ */
+ if (cur_poller.clo)
+ cur_poller.clo(fd);
+
+ /* now we're about to reset some of this FD's fields. We don't want
+ * anyone to grab it anymore and we need to make sure those which could
+ * possibly have stumbled upon it right now are leaving before we
+ * proceed. This is done in two steps. First we reset the tgid so that
+ * fd_take_tgid() and fd_grab_tgid() fail, then we wait for existing
+ * ref counts to drop. Past this point we're alone dealing with the
+ * FD's thead/running/update/polled masks.
+ */
+ fd_reset_tgid(fd);
+
+ while (_HA_ATOMIC_LOAD(&fdtab[fd].refc_tgid) != 0) // refc==0 ?
+ __ha_cpu_relax();
+
+ /* we don't want this FD anymore in the global list */
+ fd_rm_from_fd_list(&update_list[tgrp - 1], fd);
+
+ /* no more updates on this FD are relevant anymore */
+ HA_ATOMIC_STORE(&fdtab[fd].update_mask, 0);
+ if (fd_nbupdt > 0 && fd_updt[fd_nbupdt - 1] == fd)
+ fd_nbupdt--;
+
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ polled_mask[fd].poll_recv = polled_mask[fd].poll_send = 0;
+
+ fdtab[fd].state = 0;
+
+#ifdef DEBUG_FD
+ fdtab[fd].event_count = 0;
+#endif
+ fdinfo[fd].port_range = NULL;
+ fdtab[fd].owner = NULL;
+
+ /* perform the close() call last as it's what unlocks the instant reuse
+ * of this FD by any other thread.
+ */
+ if (!fd_disown)
+ close(fd);
+ _HA_ATOMIC_DEC(&ha_used_fds);
+}
+
+/* Deletes an FD from the fdsets. The file descriptor is also closed, possibly
+ * asynchronously. It is safe to call it from another thread from the same
+ * group as the FD's or from a thread from a different group. However if called
+ * from a thread from another group, there is an extra cost involved because
+ * the operation is performed under thread isolation, so doing so must be
+ * reserved for ultra-rare cases (e.g. stopping a listener).
+ */
+void fd_delete(int fd)
+{
+ /* This must never happen and would definitely indicate a bug, in
+ * addition to overwriting some unexpected memory areas.
+ */
+ BUG_ON(fd < 0 || fd >= global.maxsock);
+
+ /* NOTE: The master when going into reexec mode re-closes all FDs after
+ * they were already dispatched. But we know we didn't start the polling
+ * threads so we can still close them. The masks will probably not match
+ * however so we force the value and erase the refcount if any.
+ */
+ if (unlikely(global.mode & MODE_STARTING))
+ fdtab[fd].refc_tgid = ti->tgid;
+
+ /* the tgid cannot change before a complete close so we should never
+ * face the situation where we try to close an fd that was reassigned.
+ * However there is one corner case where this happens, it's when an
+ * attempt to pause a listener fails (e.g. abns), leaving the listener
+ * in fault state and it is forcefully stopped. This needs to be done
+ * under isolation, and it's quite rare (i.e. once per such FD per
+ * process). Since we'll be isolated we can clear the thread mask and
+ * close the FD ourselves.
+ */
+ if (unlikely(fd_tgid(fd) != ti->tgid)) {
+ int must_isolate = !thread_isolated() && !(global.mode & MODE_STOPPING);
+
+ if (must_isolate)
+ thread_isolate();
+
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, 0);
+ HA_ATOMIC_STORE(&fdtab[fd].running_mask, 0);
+ _fd_delete_orphan(fd);
+
+ if (must_isolate)
+ thread_release();
+ return;
+ }
+
+ /* we must postpone removal of an FD that may currently be in use
+ * by another thread. This can happen in the following two situations:
+ * - after a takeover, the owning thread closes the connection but
+ * the previous one just woke up from the poller and entered
+ * the FD handler iocb. That thread holds an entry in running_mask
+ * and requires removal protection.
+ * - multiple threads are accepting connections on a listener, and
+ * one of them (or even an separate one) decides to unbind the
+ * listener under the listener's lock while other ones still hold
+ * the running bit.
+ * In both situations the FD is marked as unused (thread_mask = 0) and
+ * will not take new bits in its running_mask so we have the guarantee
+ * that the last thread eliminating running_mask is the one allowed to
+ * safely delete the FD. Most of the time it will be the current thread.
+ * We still need to set and check the one-shot flag FD_MUST_CLOSE
+ * to take care of the rare cases where a thread wakes up on late I/O
+ * before the thread_mask is zero, and sets its bit in the running_mask
+ * just after the current thread finishes clearing its own bit, hence
+ * the two threads see themselves as last ones (which they really are).
+ */
+
+ HA_ATOMIC_OR(&fdtab[fd].running_mask, ti->ltid_bit);
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_MUST_CLOSE);
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, 0);
+ if (fd_clr_running(fd) == ti->ltid_bit) {
+ if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+ _fd_delete_orphan(fd);
+ }
+ }
+}
+
+/* makes the new fd non-blocking and clears all other O_* flags; this is meant
+ * to be used on new FDs. Returns -1 on failure. The result is disguised at the
+ * end because some callers need to be able to ignore it regardless of the libc
+ * attributes.
+ */
+int fd_set_nonblock(int fd)
+{
+ int ret = fcntl(fd, F_SETFL, O_NONBLOCK);
+
+ return DISGUISE(ret);
+}
+
+/* sets the close-on-exec flag on fd; returns -1 on failure. The result is
+ * disguised at the end because some callers need to be able to ignore it
+ * regardless of the libc attributes.
+ */
+int fd_set_cloexec(int fd)
+{
+ int flags, ret;
+
+ flags = fcntl(fd, F_GETFD);
+ flags |= FD_CLOEXEC;
+ ret = fcntl(fd, F_SETFD, flags);
+ return DISGUISE(ret);
+}
+
+/* Migrate a FD to a new thread <new_tid>. It is explicitly permitted to
+ * migrate to another thread group, the function takes the necessary locking
+ * for this. It is even permitted to migrate from a foreign group to another,
+ * but the calling thread must be certain that the FD is not about to close
+ * when doing so, reason why it is highly recommended that only one of the
+ * FD's owners performs this operation. The polling is completely disabled.
+ * The operation never fails.
+ */
+void fd_migrate_on(int fd, uint new_tid)
+{
+ struct thread_info *new_ti = &ha_thread_info[new_tid];
+
+ /* we must be alone to work on this idle FD. If not, it means that its
+ * poller is currently waking up and is about to use it, likely to
+ * close it on shut/error, but maybe also to process any unexpectedly
+ * pending data. It's also possible that the FD was closed and
+ * reassigned to another thread group, so let's be careful.
+ */
+ fd_lock_tgid(fd, new_ti->tgid);
+
+ /* now we have exclusive access to it. From now FD belongs to tid_bit
+ * for this tgid.
+ */
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, new_ti->ltid_bit);
+
+ /* Make sure the FD doesn't have the active bit. It is possible that
+ * the fd is polled by the thread that used to own it, the new thread
+ * is supposed to call subscribe() later, to activate polling.
+ */
+ fd_stop_both(fd);
+
+ /* we're done with it. As soon as we unlock it, other threads from the
+ * target group can manipulate it. However it may only disappear once
+ * we drop the reference.
+ */
+ fd_unlock_tgid(fd);
+ fd_drop_tgid(fd);
+}
+
+/*
+ * Take over a FD belonging to another thread.
+ * unexpected_conn is the expected owner of the fd.
+ * Returns 0 on success, and -1 on failure.
+ */
+int fd_takeover(int fd, void *expected_owner)
+{
+ unsigned long old;
+
+ /* protect ourself against a delete then an insert for the same fd,
+ * if it happens, then the owner will no longer be the expected
+ * connection.
+ */
+ if (fdtab[fd].owner != expected_owner)
+ return -1;
+
+ /* we must be alone to work on this idle FD. If not, it means that its
+ * poller is currently waking up and is about to use it, likely to
+ * close it on shut/error, but maybe also to process any unexpectedly
+ * pending data. It's also possible that the FD was closed and
+ * reassigned to another thread group, so let's be careful.
+ */
+ if (unlikely(!fd_grab_tgid(fd, ti->tgid)))
+ return -1;
+
+ old = 0;
+ if (!HA_ATOMIC_CAS(&fdtab[fd].running_mask, &old, ti->ltid_bit)) {
+ fd_drop_tgid(fd);
+ return -1;
+ }
+
+ /* success, from now on it's ours */
+ HA_ATOMIC_STORE(&fdtab[fd].thread_mask, ti->ltid_bit);
+
+ /* Make sure the FD doesn't have the active bit. It is possible that
+ * the fd is polled by the thread that used to own it, the new thread
+ * is supposed to call subscribe() later, to activate polling.
+ */
+ fd_stop_recv(fd);
+
+ /* we're done with it */
+ HA_ATOMIC_AND(&fdtab[fd].running_mask, ~ti->ltid_bit);
+
+ /* no more changes planned */
+ fd_drop_tgid(fd);
+ return 0;
+}
+
+void updt_fd_polling(const int fd)
+{
+ uint tgrp = fd_take_tgid(fd);
+
+ /* closed ? may happen */
+ if (!tgrp)
+ return;
+
+ if (unlikely(tgrp != tgid && tgrp <= MAX_TGROUPS)) {
+ /* Hmmm delivered an update for another group... That may
+ * happen on suspend/resume of a listener for example when
+ * the FD was not even marked for running. Let's broadcast
+ * the update.
+ */
+ unsigned long update_mask = fdtab[fd].update_mask;
+ int thr;
+
+ while (!_HA_ATOMIC_CAS(&fdtab[fd].update_mask, &update_mask,
+ _HA_ATOMIC_LOAD(&ha_tgroup_info[tgrp - 1].threads_enabled)))
+ __ha_cpu_relax();
+
+ fd_add_to_fd_list(&update_list[tgrp - 1], fd);
+
+ thr = one_among_mask(fdtab[fd].thread_mask & ha_tgroup_info[tgrp - 1].threads_enabled,
+ statistical_prng_range(ha_tgroup_info[tgrp - 1].count));
+ thr += ha_tgroup_info[tgrp - 1].base;
+ wake_thread(thr);
+
+ fd_drop_tgid(fd);
+ return;
+ }
+
+ fd_drop_tgid(fd);
+
+ if (tg->threads_enabled == 1UL || (fdtab[fd].thread_mask & tg->threads_enabled) == ti->ltid_bit) {
+ if (HA_ATOMIC_BTS(&fdtab[fd].update_mask, ti->ltid))
+ return;
+
+ fd_updt[fd_nbupdt++] = fd;
+ } else {
+ unsigned long update_mask = fdtab[fd].update_mask;
+ do {
+ if (update_mask == fdtab[fd].thread_mask) // FIXME: this works only on thread-groups 1
+ return;
+ } while (!_HA_ATOMIC_CAS(&fdtab[fd].update_mask, &update_mask, fdtab[fd].thread_mask));
+
+ fd_add_to_fd_list(&update_list[tgid - 1], fd);
+
+ if (fd_active(fd) && !(fdtab[fd].thread_mask & ti->ltid_bit)) {
+ /* we need to wake up another thread to handle it immediately, any will fit,
+ * so let's pick a random one so that it doesn't always end up on the same.
+ */
+ int thr = one_among_mask(fdtab[fd].thread_mask & tg->threads_enabled,
+ statistical_prng_range(tg->count));
+ thr += tg->base;
+ wake_thread(thr);
+ }
+ }
+}
+
+/* Update events seen for FD <fd> and its state if needed. This should be
+ * called by the poller, passing FD_EV_*_{R,W,RW} in <evts>. FD_EV_ERR_*
+ * doesn't need to also pass FD_EV_SHUT_*, it's implied. ERR and SHUT are
+ * allowed to be reported regardless of R/W readiness. Returns one of
+ * FD_UPDT_*.
+ */
+int fd_update_events(int fd, uint evts)
+{
+ unsigned long locked;
+ uint old, new;
+ uint new_flags, must_stop;
+ ulong rmask, tmask;
+
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_STUCK); // this thread is still running
+
+ if (unlikely(!fd_grab_tgid(fd, ti->tgid))) {
+ /* the FD changed to another tgid, we can't safely
+ * check it anymore. The bits in the masks are not
+ * ours anymore and we're not allowed to touch them.
+ * Ours have already been cleared and the FD was
+ * closed in between so we can safely leave now.
+ */
+ activity[tid].poll_drop_fd++;
+ return FD_UPDT_CLOSED;
+ }
+
+ /* Do not take running_mask if not strictly needed (will trigger a
+ * cosmetic BUG_ON() in fd_insert() anyway if done).
+ */
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+ if (!(tmask & ti->ltid_bit))
+ goto do_update;
+
+ HA_ATOMIC_OR(&fdtab[fd].running_mask, ti->ltid_bit);
+
+ /* From this point, our bit may possibly be in thread_mask, but it may
+ * still vanish, either because a takeover completed just before taking
+ * the bit above with the new owner deleting the FD, or because a
+ * takeover started just before taking the bit. In order to make sure a
+ * started takeover is complete, we need to verify that all bits of
+ * running_mask are present in thread_mask, since takeover first takes
+ * running then atomically replaces thread_mask. Once it's stable, if
+ * our bit remains there, no further takeover may happen because we
+ * hold running, but if our bit is not there it means we've lost the
+ * takeover race and have to decline touching the FD. Regarding the
+ * risk of deletion, our bit in running_mask prevents fd_delete() from
+ * finalizing the close, and the caller will leave the FD with a zero
+ * thread_mask and the FD_MUST_CLOSE flag set. It will then be our
+ * responsibility to close it.
+ */
+ do {
+ rmask = _HA_ATOMIC_LOAD(&fdtab[fd].running_mask);
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+ rmask &= ~ti->ltid_bit;
+ } while ((rmask & ~tmask) && (tmask & ti->ltid_bit));
+
+ /* Now tmask is stable. Do nothing if the FD was taken over under us */
+
+ if (!(tmask & ti->ltid_bit)) {
+ /* a takeover has started */
+ activity[tid].poll_skip_fd++;
+
+ if (fd_clr_running(fd) == ti->ltid_bit)
+ goto closed_or_migrated;
+
+ goto do_update;
+ }
+
+ /* with running we're safe now, we can drop the reference */
+ fd_drop_tgid(fd);
+
+ locked = (tmask != ti->ltid_bit);
+
+ /* OK now we are guaranteed that our thread_mask was present and
+ * that we're allowed to update the FD.
+ */
+
+ new_flags =
+ ((evts & FD_EV_READY_R) ? FD_POLL_IN : 0) |
+ ((evts & FD_EV_READY_W) ? FD_POLL_OUT : 0) |
+ ((evts & FD_EV_SHUT_R) ? FD_POLL_HUP : 0) |
+ ((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0);
+
+ /* SHUTW reported while FD was active for writes is an error */
+ if ((fdtab[fd].state & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W))
+ new_flags |= FD_POLL_ERR;
+
+ /* compute the inactive events reported late that must be stopped */
+ must_stop = 0;
+ if (unlikely(!fd_active(fd))) {
+ /* both sides stopped */
+ must_stop = FD_POLL_IN | FD_POLL_OUT;
+ }
+ else if (unlikely(!fd_recv_active(fd) && (evts & (FD_EV_READY_R | FD_EV_SHUT_R | FD_EV_ERR_RW)))) {
+ /* only send remains */
+ must_stop = FD_POLL_IN;
+ }
+ else if (unlikely(!fd_send_active(fd) && (evts & (FD_EV_READY_W | FD_EV_SHUT_W | FD_EV_ERR_RW)))) {
+ /* only recv remains */
+ must_stop = FD_POLL_OUT;
+ }
+
+ if (new_flags & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR))
+ new_flags |= FD_EV_READY_R;
+
+ if (new_flags & (FD_POLL_OUT | FD_POLL_ERR))
+ new_flags |= FD_EV_READY_W;
+
+ old = fdtab[fd].state;
+ new = (old & ~FD_POLL_UPDT_MASK) | new_flags;
+
+ if (unlikely(locked)) {
+ /* Locked FDs (those with more than 2 threads) are atomically updated */
+ while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new)))
+ new = (old & ~FD_POLL_UPDT_MASK) | new_flags;
+ } else {
+ if (new != old)
+ fdtab[fd].state = new;
+ }
+
+ if (fdtab[fd].iocb && fd_active(fd)) {
+ fdtab[fd].iocb(fd);
+ }
+
+ /*
+ * We entered iocb with running set and with the valid tgid.
+ * Since then, this is what could have happened:
+ * - another thread tried to close the FD (e.g. timeout task from
+ * another one that owns it). We still have running set, but not
+ * tmask. We must call fd_clr_running() then _fd_delete_orphan()
+ * if we were the last one.
+ *
+ * - the iocb tried to close the FD => bit no more present in running,
+ * nothing to do. If it managed to close it, the poller's ->clo()
+ * has already been called.
+ *
+ * - after we closed, the FD was reassigned to another thread in
+ * another group => running not present, tgid differs, nothing to
+ * do because if it got reassigned it indicates it was already
+ * closed.
+ *
+ * There's no risk of takeover of the valid FD here during this period.
+ * Also if we still have running, immediately after we release it, the
+ * events above might instantly happen due to another thread taking
+ * over.
+ *
+ * As such, the only cases where the FD is still relevant are:
+ * - tgid still set and running still set (most common)
+ * - tgid still valid but running cleared due to fd_delete(): we may
+ * still need to stop polling otherwise we may keep it enabled
+ * while waiting for other threads to close it.
+ * And given that we may need to program a tentative update in case we
+ * don't immediately close, it's easier to grab the tgid during the
+ * whole check.
+ */
+
+ if (!fd_grab_tgid(fd, tgid))
+ return FD_UPDT_CLOSED;
+
+ tmask = _HA_ATOMIC_LOAD(&fdtab[fd].thread_mask);
+
+ /* another thread might have attempted to close this FD in the mean
+ * time (e.g. timeout task) striking on a previous thread and closing.
+ * This is detected by us being the last owners of a running_mask bit,
+ * and the thread_mask being zero. At the moment we release the running
+ * bit, a takeover may also happen, so in practice we check for our loss
+ * of the thread_mask bitboth thread_mask and running_mask being 0 after
+ * we remove ourselves last. There is no risk the FD gets reassigned
+ * to a different group since it's not released until the real close()
+ * in _fd_delete_orphan().
+ */
+ if (fd_clr_running(fd) == ti->ltid_bit && !(tmask & ti->ltid_bit))
+ goto closed_or_migrated;
+
+ /* we had to stop this FD and it still must be stopped after the I/O
+ * cb's changes, so let's program an update for this.
+ */
+ if (must_stop && !(fdtab[fd].update_mask & ti->ltid_bit)) {
+ if (((must_stop & FD_POLL_IN) && !fd_recv_active(fd)) ||
+ ((must_stop & FD_POLL_OUT) && !fd_send_active(fd)))
+ if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, ti->ltid))
+ fd_updt[fd_nbupdt++] = fd;
+ }
+
+ fd_drop_tgid(fd);
+ return FD_UPDT_DONE;
+
+ closed_or_migrated:
+ /* We only come here once we've last dropped running and the FD is
+ * not for us as per !(tmask & tid_bit). It may imply we're
+ * responsible for closing it. Otherwise it's just a migration.
+ */
+ if (HA_ATOMIC_BTR(&fdtab[fd].state, FD_MUST_CLOSE_BIT)) {
+ fd_drop_tgid(fd);
+ _fd_delete_orphan(fd);
+ return FD_UPDT_CLOSED;
+ }
+
+ /* So we were alone, no close bit, at best the FD was migrated, at
+ * worst it's in the process of being closed by another thread. We must
+ * be ultra-careful as it can be re-inserted by yet another thread as
+ * the result of socket() or accept(). Let's just tell the poller the
+ * FD was lost. If it was closed it was already removed and this will
+ * only cost an update for nothing.
+ */
+
+ do_update:
+ /* The FD is not closed but we don't want the poller to wake up for
+ * it anymore.
+ */
+ if (!HA_ATOMIC_BTS(&fdtab[fd].update_mask, ti->ltid))
+ fd_updt[fd_nbupdt++] = fd;
+
+ fd_drop_tgid(fd);
+ return FD_UPDT_MIGRATED;
+}
+
+/* This is used by pollers at boot time to re-register desired events for
+ * all FDs after new pollers have been created. It doesn't do much, it checks
+ * that their thread group matches the one in argument, and that the thread
+ * mask matches at least one of the bits in the mask, and if so, marks the FD
+ * as updated.
+ */
+void fd_reregister_all(int tgrp, ulong mask)
+{
+ int fd;
+
+ for (fd = 0; fd < global.maxsock; fd++) {
+ if (!fdtab[fd].owner)
+ continue;
+
+ /* make sure we don't register other tgroups' FDs. We just
+ * avoid needlessly taking the lock if not needed.
+ */
+ if (!(_HA_ATOMIC_LOAD(&fdtab[fd].thread_mask) & mask) ||
+ !fd_grab_tgid(fd, tgrp))
+ continue; // was not for us anyway
+
+ if (_HA_ATOMIC_LOAD(&fdtab[fd].thread_mask) & mask)
+ updt_fd_polling(fd);
+ fd_drop_tgid(fd);
+ }
+}
+
+/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg>
+ * optionally followed by a newline if <nl> is non-null, to file descriptor
+ * <fd>. The message is sent atomically using writev(). It may be truncated to
+ * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the
+ * two lists, it's just a convenience to help the caller prepend some prefixes
+ * when necessary. It takes the fd's lock to make sure no other thread will
+ * write to the same fd in parallel. Returns the number of bytes sent, or <=0
+ * on failure. A limit to 31 total non-empty segments is enforced. The caller
+ * is responsible for taking care of making the fd non-blocking.
+ */
+ssize_t fd_write_frag_line(int fd, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg, int nl)
+{
+ struct iovec iovec[32];
+ size_t sent = 0;
+ int vec = 0;
+ int attempts = 0;
+
+ if (!maxlen)
+ maxlen = ~0;
+
+ /* keep one char for a possible trailing '\n' in any case */
+ maxlen--;
+
+ /* make an iovec from the concatenation of all parts of the original
+ * message. Skip empty fields and truncate the whole message to maxlen,
+ * leaving one spare iovec for the '\n'.
+ */
+ while (vec < (sizeof(iovec) / sizeof(iovec[0]) - 1)) {
+ if (!npfx) {
+ pfx = msg;
+ npfx = nmsg;
+ nmsg = 0;
+ if (!npfx)
+ break;
+ }
+
+ iovec[vec].iov_base = pfx->ptr;
+ iovec[vec].iov_len = MIN(maxlen, pfx->len);
+ maxlen -= iovec[vec].iov_len;
+ if (iovec[vec].iov_len)
+ vec++;
+ pfx++; npfx--;
+ };
+
+ if (nl) {
+ iovec[vec].iov_base = "\n";
+ iovec[vec].iov_len = 1;
+ vec++;
+ }
+
+ /* make sure we never interleave writes and we never block. This means
+ * we prefer to fail on collision than to block. But we don't want to
+ * lose too many logs so we just perform a few lock attempts then give
+ * up.
+ */
+
+ while (HA_ATOMIC_BTS(&fdtab[fd].state, FD_EXCL_SYSCALL_BIT)) {
+ if (++attempts >= 200) {
+ /* so that the caller knows the message couldn't be delivered */
+ sent = -1;
+ errno = EAGAIN;
+ goto leave;
+ }
+ ha_thread_relax();
+ }
+
+ if (unlikely(!(fdtab[fd].state & FD_INITIALIZED))) {
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_INITIALIZED);
+ if (!isatty(fd))
+ fd_set_nonblock(fd);
+ }
+ sent = writev(fd, iovec, vec);
+ HA_ATOMIC_BTR(&fdtab[fd].state, FD_EXCL_SYSCALL_BIT);
+
+ leave:
+ /* sent > 0 if the message was delivered */
+ return sent;
+}
+
+#if defined(USE_CLOSEFROM)
+void my_closefrom(int start)
+{
+ closefrom(start);
+}
+
+#elif defined(USE_POLL)
+/* This is a portable implementation of closefrom(). It closes all open file
+ * descriptors starting at <start> and above. It relies on the fact that poll()
+ * will return POLLNVAL for each invalid (hence close) file descriptor passed
+ * in argument in order to skip them. It acts with batches of FDs and will
+ * typically perform one poll() call per 1024 FDs so the overhead is low in
+ * case all FDs have to be closed.
+ */
+void my_closefrom(int start)
+{
+ struct pollfd poll_events[1024];
+ struct rlimit limit;
+ int nbfds, fd, ret, idx;
+ int step, next;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) == 0)
+ step = nbfds = limit.rlim_cur;
+ else
+ step = nbfds = 0;
+
+ if (nbfds <= 0) {
+ /* set safe limit */
+ nbfds = 1024;
+ step = 256;
+ }
+
+ if (step > sizeof(poll_events) / sizeof(poll_events[0]))
+ step = sizeof(poll_events) / sizeof(poll_events[0]);
+
+ while (start < nbfds) {
+ next = (start / step + 1) * step;
+
+ for (fd = start; fd < next && fd < nbfds; fd++) {
+ poll_events[fd - start].fd = fd;
+ poll_events[fd - start].events = 0;
+ }
+
+ do {
+ ret = poll(poll_events, fd - start, 0);
+ if (ret >= 0)
+ break;
+ } while (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ENOMEM);
+
+ if (ret)
+ ret = fd - start;
+
+ for (idx = 0; idx < ret; idx++) {
+ if (poll_events[idx].revents & POLLNVAL)
+ continue; /* already closed */
+
+ fd = poll_events[idx].fd;
+ close(fd);
+ }
+ start = next;
+ }
+}
+
+#else // defined(USE_POLL)
+
+/* This is a portable implementation of closefrom(). It closes all open file
+ * descriptors starting at <start> and above. This is a naive version for use
+ * when the operating system provides no alternative.
+ */
+void my_closefrom(int start)
+{
+ struct rlimit limit;
+ int nbfds;
+
+ if (getrlimit(RLIMIT_NOFILE, &limit) == 0)
+ nbfds = limit.rlim_cur;
+ else
+ nbfds = 0;
+
+ if (nbfds <= 0)
+ nbfds = 1024; /* safe limit */
+
+ while (start < nbfds)
+ close(start++);
+}
+#endif // defined(USE_POLL)
+
+/* Sets the RLIMIT_NOFILE setting to <new_limit> and returns the previous one
+ * in <old_limit> if the pointer is not NULL, even if set_rlimit() fails. The
+ * two pointers may point to the same variable as the copy happens after
+ * setting the new value. The value is only changed if at least one of the new
+ * limits is strictly higher than the current one, otherwise returns 0 without
+ * changing anything. The getrlimit() or setrlimit() syscall return value is
+ * returned and errno is preserved.
+ */
+int raise_rlim_nofile(struct rlimit *old_limit, struct rlimit *new_limit)
+{
+ struct rlimit limit = { };
+ int ret = 0;
+
+ ret = getrlimit(RLIMIT_NOFILE, &limit);
+
+ if (ret == 0 &&
+ (limit.rlim_max < new_limit->rlim_max ||
+ limit.rlim_cur < new_limit->rlim_cur)) {
+ ret = setrlimit(RLIMIT_NOFILE, new_limit);
+ }
+
+ if (old_limit)
+ *old_limit = limit;
+
+ return ret;
+}
+
+/* Computes the bounded poll() timeout based on the next expiration timer <next>
+ * by bounding it to MAX_DELAY_MS. <next> may equal TICK_ETERNITY. The pollers
+ * just needs to call this function right before polling to get their timeout
+ * value. Timeouts that are already expired (possibly due to a pending event)
+ * are accounted for in activity.poll_exp.
+ */
+int compute_poll_timeout(int next)
+{
+ int wait_time;
+
+ if (!tick_isset(next))
+ wait_time = MAX_DELAY_MS;
+ else if (tick_is_expired(next, now_ms)) {
+ activity[tid].poll_exp++;
+ wait_time = 0;
+ }
+ else {
+ wait_time = TICKS_TO_MS(tick_remain(now_ms, next)) + 1;
+ if (wait_time > MAX_DELAY_MS)
+ wait_time = MAX_DELAY_MS;
+ }
+ return wait_time;
+}
+
+/* Handle the return of the poller, which consists in calculating the idle
+ * time, saving a few clocks, marking the thread harmful again etc. All that
+ * is some boring stuff that all pollers have to do anyway.
+ */
+void fd_leaving_poll(int wait_time, int status)
+{
+ clock_leaving_poll(wait_time, status);
+
+ thread_harmless_end();
+ thread_idle_end();
+
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING);
+}
+
+/* disable the specified poller */
+void disable_poller(const char *poller_name)
+{
+ int p;
+
+ for (p = 0; p < nbpollers; p++)
+ if (strcmp(pollers[p].name, poller_name) == 0)
+ pollers[p].pref = 0;
+}
+
+void poller_pipe_io_handler(int fd)
+{
+ char buf[1024];
+ /* Flush the pipe */
+ while (read(fd, buf, sizeof(buf)) > 0);
+ fd_cant_recv(fd);
+}
+
+/* allocate the per-thread fd_updt thus needs to be called early after
+ * thread creation.
+ */
+static int alloc_pollers_per_thread()
+{
+ fd_updt = calloc(global.maxsock, sizeof(*fd_updt));
+ return fd_updt != NULL;
+}
+
+/* Initialize the pollers per thread.*/
+static int init_pollers_per_thread()
+{
+ int mypipe[2];
+
+ if (pipe(mypipe) < 0)
+ return 0;
+
+ poller_rd_pipe = mypipe[0];
+ poller_wr_pipe[tid] = mypipe[1];
+ fd_set_nonblock(poller_rd_pipe);
+ fd_insert(poller_rd_pipe, poller_pipe_io_handler, poller_pipe_io_handler, tgid, ti->ltid_bit);
+ fd_insert(poller_wr_pipe[tid], poller_pipe_io_handler, poller_pipe_io_handler, tgid, ti->ltid_bit);
+ fd_want_recv(poller_rd_pipe);
+ fd_stop_both(poller_wr_pipe[tid]);
+ return 1;
+}
+
+/* Deinitialize the pollers per thread */
+static void deinit_pollers_per_thread()
+{
+ /* rd and wr are init at the same place, but only rd is init to -1, so
+ we rely to rd to close. */
+ if (poller_rd_pipe > -1) {
+ fd_delete(poller_rd_pipe);
+ poller_rd_pipe = -1;
+ fd_delete(poller_wr_pipe[tid]);
+ poller_wr_pipe[tid] = -1;
+ }
+}
+
+/* Release the pollers per thread, to be called late */
+static void free_pollers_per_thread()
+{
+ fd_nbupdt = 0;
+ ha_free(&fd_updt);
+}
+
+/*
+ * Initialize the pollers till the best one is found.
+ * If none works, returns 0, otherwise 1.
+ */
+int init_pollers()
+{
+ int p;
+ struct poller *bp;
+
+ if ((fdtab_addr = calloc(global.maxsock, sizeof(*fdtab) + 64)) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for fdtab!\n", global.maxsock);
+ goto fail_tab;
+ }
+
+ /* always provide an aligned fdtab */
+ fdtab = (struct fdtab*)((((size_t)fdtab_addr) + 63) & -(size_t)64);
+
+ if ((polled_mask = calloc(global.maxsock, sizeof(*polled_mask))) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for polled_mask!\n", global.maxsock);
+ goto fail_polledmask;
+ }
+
+ if ((fdinfo = calloc(global.maxsock, sizeof(*fdinfo))) == NULL) {
+ ha_alert("Not enough memory to allocate %d entries for fdinfo!\n", global.maxsock);
+ goto fail_info;
+ }
+
+ for (p = 0; p < MAX_TGROUPS; p++)
+ update_list[p].first = update_list[p].last = -1;
+
+ for (p = 0; p < global.maxsock; p++) {
+ /* Mark the fd as out of the fd cache */
+ fdtab[p].update.next = -3;
+ }
+
+ do {
+ bp = NULL;
+ for (p = 0; p < nbpollers; p++)
+ if (!bp || (pollers[p].pref > bp->pref))
+ bp = &pollers[p];
+
+ if (!bp || bp->pref == 0)
+ break;
+
+ if (bp->init(bp)) {
+ memcpy(&cur_poller, bp, sizeof(*bp));
+ return 1;
+ }
+ } while (!bp || bp->pref == 0);
+
+ free(fdinfo);
+ fail_info:
+ free(polled_mask);
+ fail_polledmask:
+ free(fdtab_addr);
+ fail_tab:
+ return 0;
+}
+
+/*
+ * Deinitialize the pollers.
+ */
+void deinit_pollers() {
+
+ struct poller *bp;
+ int p;
+
+ for (p = 0; p < nbpollers; p++) {
+ bp = &pollers[p];
+
+ if (bp && bp->pref)
+ bp->term(bp);
+ }
+
+ ha_free(&fdinfo);
+ ha_free(&fdtab_addr);
+ ha_free(&polled_mask);
+}
+
+/*
+ * Lists the known pollers on <out>.
+ * Should be performed only before initialization.
+ */
+int list_pollers(FILE *out)
+{
+ int p;
+ int last, next;
+ int usable;
+ struct poller *bp;
+
+ fprintf(out, "Available polling systems :\n");
+
+ usable = 0;
+ bp = NULL;
+ last = next = -1;
+ while (1) {
+ for (p = 0; p < nbpollers; p++) {
+ if ((next < 0 || pollers[p].pref > next)
+ && (last < 0 || pollers[p].pref < last)) {
+ next = pollers[p].pref;
+ if (!bp || (pollers[p].pref > bp->pref))
+ bp = &pollers[p];
+ }
+ }
+
+ if (next == -1)
+ break;
+
+ for (p = 0; p < nbpollers; p++) {
+ if (pollers[p].pref == next) {
+ fprintf(out, " %10s : ", pollers[p].name);
+ if (pollers[p].pref == 0)
+ fprintf(out, "disabled, ");
+ else
+ fprintf(out, "pref=%3d, ", pollers[p].pref);
+ if (pollers[p].test(&pollers[p])) {
+ fprintf(out, " test result OK");
+ if (next > 0)
+ usable++;
+ } else {
+ fprintf(out, " test result FAILED");
+ if (bp == &pollers[p])
+ bp = NULL;
+ }
+ fprintf(out, "\n");
+ }
+ }
+ last = next;
+ next = -1;
+ };
+ fprintf(out, "Total: %d (%d usable), will use %s.\n", nbpollers, usable, bp ? bp->name : "none");
+ return 0;
+}
+
+/*
+ * Some pollers may lose their connection after a fork(). It may be necessary
+ * to create initialize part of them again. Returns 0 in case of failure,
+ * otherwise 1. The fork() function may be NULL if unused. In case of error,
+ * the the current poller is destroyed and the caller is responsible for trying
+ * another one by calling init_pollers() again.
+ */
+int fork_poller()
+{
+ int fd;
+ for (fd = 0; fd < global.maxsock; fd++) {
+ if (fdtab[fd].owner) {
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_CLONED);
+ }
+ }
+
+ if (cur_poller.fork) {
+ if (cur_poller.fork(&cur_poller))
+ return 1;
+ cur_poller.term(&cur_poller);
+ return 0;
+ }
+ return 1;
+}
+
+/* config parser for global "tune.fd.edge-triggered", accepts "on" or "off" */
+static int cfg_parse_tune_fd_edge_triggered(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_FD_ET;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_FD_ET;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.fd.edge-triggered", cfg_parse_tune_fd_edge_triggered, KWF_EXPERIMENTAL },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+REGISTER_PER_THREAD_ALLOC(alloc_pollers_per_thread);
+REGISTER_PER_THREAD_INIT(init_pollers_per_thread);
+REGISTER_PER_THREAD_DEINIT(deinit_pollers_per_thread);
+REGISTER_PER_THREAD_FREE(free_pollers_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/filters.c b/src/filters.c
new file mode 100644
index 0000000..e55adee
--- /dev/null
+++ b/src/filters.c
@@ -0,0 +1,1125 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/flt_http_comp.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+/* Pool used to allocate filters */
+DECLARE_STATIC_POOL(pool_head_filter, "filter", sizeof(struct filter));
+
+static int handle_analyzer_result(struct stream *s, struct channel *chn, unsigned int an_bit, int ret);
+
+/* - RESUME_FILTER_LOOP and RESUME_FILTER_END must always be used together.
+ * The first one begins a loop and the seconds one ends it.
+ *
+ * - BREAK_EXECUTION must be used to break the loop and set the filter from
+ * which to resume the next time.
+ *
+ * Here is an example:
+ *
+ * RESUME_FILTER_LOOP(stream, channel) {
+ * ...
+ * if (cond)
+ * BREAK_EXECUTION(stream, channel, label);
+ * ...
+ * } RESUME_FILTER_END;
+ * ...
+ * label:
+ * ...
+ *
+ */
+#define RESUME_FILTER_LOOP(strm, chn) \
+ do { \
+ struct filter *filter; \
+ \
+ if (strm_flt(strm)->current[CHN_IDX(chn)]) { \
+ filter = strm_flt(strm)->current[CHN_IDX(chn)]; \
+ strm_flt(strm)->current[CHN_IDX(chn)] = NULL; \
+ goto resume_execution; \
+ } \
+ \
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) { \
+ resume_execution:
+
+#define RESUME_FILTER_END \
+ } \
+ } while(0)
+
+#define BREAK_EXECUTION(strm, chn, label) \
+ do { \
+ strm_flt(strm)->current[CHN_IDX(chn)] = filter; \
+ goto label; \
+ } while (0)
+
+
+/* List head of all known filter keywords */
+static struct flt_kw_list flt_keywords = {
+ .list = LIST_HEAD_INIT(flt_keywords.list)
+};
+
+/*
+ * Registers the filter keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void
+flt_register_keywords(struct flt_kw_list *kwl)
+{
+ LIST_APPEND(&flt_keywords.list, &kwl->list);
+}
+
+/*
+ * Returns a pointer to the filter keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct flt_kw *
+flt_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct flt_kw_list *kwl;
+ struct flt_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &flt_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/*
+ * Dumps all registered "filter" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not found.
+ * If <out> is NULL, the output is emitted using a more compact format on stdout.
+ */
+void
+flt_dump_kws(char **out)
+{
+ struct flt_kw_list *kwl;
+ const struct flt_kw *kwp, *kw;
+ const char *scope = NULL;
+ int index;
+
+ if (out)
+ *out = NULL;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &flt_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((kwl->kw[index].parse ||
+ flt_find_kw(kwl->kw[index].kw) == &kwl->kw[index])
+ && strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL)) {
+ kw = &kwl->kw[index];
+ scope = kwl->scope;
+ }
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ if (out)
+ memprintf(out, "%s[%4s] %s%s\n", *out ? *out : "",
+ scope,
+ kw->kw,
+ kw->parse ? "" : " (not supported)");
+ else
+ printf("%s [%s]\n",
+ kw->kw, scope);
+ }
+}
+
+/*
+ * Lists the known filters on <out>
+ */
+void
+list_filters(FILE *out)
+{
+ char *filters, *p, *f;
+
+ fprintf(out, "Available filters :\n");
+ flt_dump_kws(&filters);
+ for (p = filters; (f = strtok_r(p,"\n",&p));)
+ fprintf(out, "\t%s\n", f);
+ free(filters);
+}
+
+/*
+ * Parses the "filter" keyword. All keywords must be handled by filters
+ * themselves
+ */
+static int
+parse_filter(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line, char **err)
+{
+ struct flt_conf *fconf = NULL;
+
+ /* Filter cannot be defined on a default proxy */
+ if (curpx == defpx) {
+ memprintf(err, "parsing [%s:%d] : %s is not allowed in a 'default' section.",
+ file, line, args[0]);
+ return -1;
+ }
+ if (strcmp(args[0], "filter") == 0) {
+ struct flt_kw *kw;
+ int cur_arg;
+
+ if (!*args[1]) {
+ memprintf(err,
+ "parsing [%s:%d] : missing argument for '%s' in %s '%s'.",
+ file, line, args[0], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ memprintf(err, "'%s' : out of memory", args[0]);
+ goto error;
+ }
+
+ cur_arg = 1;
+ kw = flt_find_kw(args[cur_arg]);
+ if (kw) {
+ if (!kw->parse) {
+ memprintf(err, "parsing [%s:%d] : '%s' : "
+ "'%s' option is not implemented in this version (check build options).",
+ file, line, args[0], args[cur_arg]);
+ goto error;
+ }
+ if (kw->parse(args, &cur_arg, curpx, fconf, err, kw->private) != 0) {
+ if (err && *err)
+ memprintf(err, "'%s' : '%s'",
+ args[0], *err);
+ else
+ memprintf(err, "'%s' : error encountered while processing '%s'",
+ args[0], args[cur_arg]);
+ goto error;
+ }
+ }
+ else {
+ flt_dump_kws(err);
+ indent_msg(err, 4);
+ memprintf(err, "'%s' : unknown keyword '%s'.%s%s",
+ args[0], args[cur_arg],
+ err && *err ? " Registered keywords :" : "", err && *err ? *err : "");
+ goto error;
+ }
+ if (*args[cur_arg]) {
+ memprintf(err, "'%s %s' : unknown keyword '%s'.",
+ args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+ if (fconf->ops == NULL) {
+ memprintf(err, "'%s %s' : no callbacks defined.",
+ args[0], args[1]);
+ goto error;
+ }
+
+ LIST_APPEND(&curpx->filter_configs, &fconf->list);
+ }
+ return 0;
+
+ error:
+ free(fconf);
+ return -1;
+
+
+}
+
+/*
+ * Calls 'init' callback for all filters attached to a proxy. This happens after
+ * the configuration parsing. Filters can finish to fill their config. Returns
+ * (ERR_ALERT|ERR_FATAL) if an error occurs, 0 otherwise.
+ */
+static int
+flt_init(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->init && fconf->ops->init(proxy, fconf) < 0)
+ return ERR_ALERT|ERR_FATAL;
+ }
+ return 0;
+}
+
+/*
+ * Calls 'init_per_thread' callback for all filters attached to a proxy for each
+ * threads. This happens after the thread creation. Filters can finish to fill
+ * their config. Returns (ERR_ALERT|ERR_FATAL) if an error occurs, 0 otherwise.
+ */
+static int
+flt_init_per_thread(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->init_per_thread && fconf->ops->init_per_thread(proxy, fconf) < 0)
+ return ERR_ALERT|ERR_FATAL;
+ }
+ return 0;
+}
+
+/* Calls flt_init() for all proxies, see above */
+static int
+flt_init_all()
+{
+ struct proxy *px;
+ int err_code = ERR_NONE;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ err_code |= flt_init(px);
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize filters for proxy '%s'.\n",
+ px->id);
+ return err_code;
+ }
+ }
+ return 0;
+}
+
+/* Calls flt_init_per_thread() for all proxies, see above. Be careful here, it
+ * returns 0 if an error occurred. This is the opposite of flt_init_all. */
+static int
+flt_init_all_per_thread()
+{
+ struct proxy *px;
+ int err_code = 0;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ err_code = flt_init_per_thread(px);
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize filters for proxy '%s' for thread %u.\n",
+ px->id, tid);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * Calls 'check' callback for all filters attached to a proxy. This happens
+ * after the configuration parsing but before filters initialization. Returns
+ * the number of encountered errors.
+ */
+int
+flt_check(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+ int err = 0;
+
+ err += check_implicit_http_comp_flt(proxy);
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->ops->check)
+ err += fconf->ops->check(proxy, fconf);
+ }
+ return err;
+}
+
+/*
+ * Calls 'denit' callback for all filters attached to a proxy. This happens when
+ * HAProxy is stopped.
+ */
+void
+flt_deinit(struct proxy *proxy)
+{
+ struct flt_conf *fconf, *back;
+
+ list_for_each_entry_safe(fconf, back, &proxy->filter_configs, list) {
+ if (fconf->ops->deinit)
+ fconf->ops->deinit(proxy, fconf);
+ LIST_DELETE(&fconf->list);
+ free(fconf);
+ }
+}
+
+/*
+ * Calls 'denit_per_thread' callback for all filters attached to a proxy for
+ * each threads. This happens before exiting a thread.
+ */
+void
+flt_deinit_per_thread(struct proxy *proxy)
+{
+ struct flt_conf *fconf, *back;
+
+ list_for_each_entry_safe(fconf, back, &proxy->filter_configs, list) {
+ if (fconf->ops->deinit_per_thread)
+ fconf->ops->deinit_per_thread(proxy, fconf);
+ }
+}
+
+
+/* Calls flt_deinit_per_thread() for all proxies, see above */
+static void
+flt_deinit_all_per_thread()
+{
+ struct proxy *px;
+
+ for (px = proxies_list; px; px = px->next)
+ flt_deinit_per_thread(px);
+}
+
+/* Attaches a filter to a stream. Returns -1 if an error occurs, 0 otherwise. */
+static int
+flt_stream_add_filter(struct stream *s, struct flt_conf *fconf, unsigned int flags)
+{
+ struct filter *f;
+
+ if (IS_HTX_STRM(s) && !(fconf->flags & FLT_CFG_FL_HTX))
+ return 0;
+
+ f = pool_zalloc(pool_head_filter);
+ if (!f) /* not enough memory */
+ return -1;
+ f->config = fconf;
+ f->flags |= flags;
+
+ if (FLT_OPS(f)->attach) {
+ int ret = FLT_OPS(f)->attach(s, f);
+ if (ret <= 0) {
+ pool_free(pool_head_filter, f);
+ return ret;
+ }
+ }
+
+ LIST_APPEND(&strm_flt(s)->filters, &f->list);
+ strm_flt(s)->flags |= STRM_FLT_FL_HAS_FILTERS;
+ return 0;
+}
+
+/*
+ * Called when a stream is created. It attaches all frontend filters to the
+ * stream. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_stream_init(struct stream *s)
+{
+ struct flt_conf *fconf;
+
+ memset(strm_flt(s), 0, sizeof(*strm_flt(s)));
+ LIST_INIT(&strm_flt(s)->filters);
+ list_for_each_entry(fconf, &strm_fe(s)->filter_configs, list) {
+ if (flt_stream_add_filter(s, fconf, 0) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Called when a stream is closed or when analyze ends (For an HTTP stream, this
+ * happens after each request/response exchange). When analyze ends, backend
+ * filters are removed. When the stream is closed, all filters attached to the
+ * stream are removed.
+ */
+void
+flt_stream_release(struct stream *s, int only_backend)
+{
+ struct filter *filter, *back;
+
+ list_for_each_entry_safe(filter, back, &strm_flt(s)->filters, list) {
+ if (!only_backend || (filter->flags & FLT_FL_IS_BACKEND_FILTER)) {
+ if (FLT_OPS(filter)->detach)
+ FLT_OPS(filter)->detach(s, filter);
+ LIST_DELETE(&filter->list);
+ pool_free(pool_head_filter, filter);
+ }
+ }
+ if (LIST_ISEMPTY(&strm_flt(s)->filters))
+ strm_flt(s)->flags &= ~STRM_FLT_FL_HAS_FILTERS;
+}
+
+/*
+ * Calls 'stream_start' for all filters attached to a stream. This happens when
+ * the stream is created, just after calling flt_stream_init
+ * function. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_stream_start(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_start && FLT_OPS(filter)->stream_start(s, filter) < 0)
+ return -1;
+ }
+ if (strm_li(s) && (strm_li(s)->bind_conf->analysers & AN_REQ_FLT_START_FE)) {
+ s->req.flags |= CF_FLT_ANALYZE;
+ s->req.analysers |= AN_REQ_FLT_END;
+ }
+ return 0;
+}
+
+/*
+ * Calls 'stream_stop' for all filters attached to a stream. This happens when
+ * the stream is stopped, just before calling flt_stream_release function.
+ */
+void
+flt_stream_stop(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_stop)
+ FLT_OPS(filter)->stream_stop(s, filter);
+ }
+}
+
+/*
+ * Calls 'check_timeouts' for all filters attached to a stream. This happens when
+ * the stream is woken up because of expired timer.
+ */
+void
+flt_stream_check_timeouts(struct stream *s)
+{
+ struct filter *filter;
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->check_timeouts)
+ FLT_OPS(filter)->check_timeouts(s, filter);
+ }
+}
+
+/*
+ * Called when a backend is set for a stream. If the frontend and the backend
+ * are not the same, this function attaches all backend filters to the
+ * stream. Returns -1 if an error occurs, 0 otherwise.
+ */
+int
+flt_set_stream_backend(struct stream *s, struct proxy *be)
+{
+ struct flt_conf *fconf;
+ struct filter *filter;
+
+ if (strm_fe(s) == be)
+ goto end;
+
+ list_for_each_entry(fconf, &be->filter_configs, list) {
+ if (flt_stream_add_filter(s, fconf, FLT_FL_IS_BACKEND_FILTER) < 0)
+ return -1;
+ }
+
+ end:
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->stream_set_backend &&
+ FLT_OPS(filter)->stream_set_backend(s, filter, be) < 0)
+ return -1;
+ }
+ if (be->be_req_ana & AN_REQ_FLT_START_BE) {
+ s->req.flags |= CF_FLT_ANALYZE;
+ s->req.analysers |= AN_REQ_FLT_END;
+ }
+ if ((strm_fe(s)->fe_rsp_ana | be->be_rsp_ana) & (AN_RES_FLT_START_FE|AN_RES_FLT_START_BE)) {
+ s->res.flags |= CF_FLT_ANALYZE;
+ s->res.analysers |= AN_RES_FLT_END;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Calls 'http_end' callback for all filters attached to a stream. All filters
+ * are called here, but only if there is at least one "data" filter. This
+ * functions is called when all data were parsed and forwarded. 'http_end'
+ * callback is resumable, so this function returns a negative value if an error
+ * occurs, 0 if it needs to wait for some reason, any other value otherwise.
+ */
+int
+flt_http_end(struct stream *s, struct http_msg *msg)
+{
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, msg->chn);
+ unsigned int offset = 0;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ RESUME_FILTER_LOOP(s, msg->chn) {
+ unsigned long long flt_off = FLT_OFF(filter, msg->chn);
+ offset = flt_off - *strm_off;
+
+ /* Call http_end for data filters only. But the filter offset is
+ * still valid for all filters
+ . */
+ if (!IS_DATA_FILTER(filter, msg->chn))
+ continue;
+
+ if (FLT_OPS(filter)->http_end) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_end(s, filter, msg);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, msg->chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ c_adv(msg->chn, offset);
+ *strm_off += offset;
+
+end:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'http_reset' callback for all filters attached to a stream. This
+ * happens when a 100-continue response is received.
+ */
+void
+flt_http_reset(struct stream *s, struct http_msg *msg)
+{
+ struct filter *filter;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->http_reset) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ FLT_OPS(filter)->http_reset(s, filter, msg);
+ }
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+}
+
+/*
+ * Calls 'http_reply' callback for all filters attached to a stream when HA
+ * decides to stop the HTTP message processing.
+ */
+void
+flt_http_reply(struct stream *s, short status, const struct buffer *msg)
+{
+ struct filter *filter;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->http_reply) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ FLT_OPS(filter)->http_reply(s, filter, status, msg);
+ }
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+}
+
+/*
+ * Calls 'http_payload' callback for all "data" filters attached to a
+ * stream. This function is called when some data can be forwarded in the
+ * AN_REQ_HTTP_XFER_BODY and AN_RES_HTTP_XFER_BODY analyzers. It takes care to
+ * update the filters and the stream offset to be sure that a filter cannot
+ * forward more data than its predecessors. A filter can choose to not forward
+ * all data. Returns a negative value if an error occurs, else the number of
+ * forwarded bytes.
+ */
+int
+flt_http_payload(struct stream *s, struct http_msg *msg, unsigned int len)
+{
+ struct filter *filter;
+ struct htx *htx;
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, msg->chn);
+ unsigned int out = co_data(msg->chn);
+ int ret, data;
+
+ strm_flt(s)->flags &= ~STRM_FLT_FL_HOLD_HTTP_HDRS;
+
+ ret = data = len - out;
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ unsigned long long *flt_off = &FLT_OFF(filter, msg->chn);
+ unsigned int offset = *flt_off - *strm_off;
+
+ /* Call http_payload for filters only. Forward all data for
+ * others and update the filter offset
+ */
+ if (!IS_DATA_FILTER(filter, msg->chn)) {
+ *flt_off += data - offset;
+ continue;
+ }
+
+ if (FLT_OPS(filter)->http_payload) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_payload(s, filter, msg, out + offset, data - offset);
+ if (ret < 0)
+ goto end;
+ data = ret + *flt_off - *strm_off;
+ *flt_off += ret;
+ }
+ }
+
+ /* If nothing was forwarded yet, we take care to hold the headers if
+ * following conditions are met :
+ *
+ * - *strm_off == 0 (nothing forwarded yet)
+ * - ret == 0 (no data forwarded at all on this turn)
+ * - STRM_FLT_FL_HOLD_HTTP_HDRS flag set (at least one filter want to hold the headers)
+ *
+ * Be careful, STRM_FLT_FL_HOLD_HTTP_HDRS is removed before each http_payload loop.
+ * Thus, it must explicitly be set when necessary. We must do that to hold the headers
+ * when there is no payload.
+ */
+ if (!ret && !*strm_off && (strm_flt(s)->flags & STRM_FLT_FL_HOLD_HTTP_HDRS))
+ goto end;
+
+ ret = data;
+ *strm_off += ret;
+ end:
+ htx = htxbuf(&msg->chn->buf);
+ if (msg->flags & HTTP_MSGF_XFER_LEN)
+ htx->extra = 0;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_start_analyze' callback for all filters attached to a
+ * stream. This function is called when we start to analyze a request or a
+ * response. For frontend filters, it is called before all other analyzers. For
+ * backend ones, it is called before all backend
+ * analyzers. 'channel_start_analyze' callback is resumable, so this function
+ * returns 0 if an error occurs or if it needs to wait, any other value
+ * otherwise.
+ */
+int
+flt_start_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ /* If this function is called, this means there is at least one filter,
+ * so we do not need to check the filter list's emptiness. */
+
+ /* Set flag on channel to tell that the channel is filtered */
+ chn->flags |= CF_FLT_ANALYZE;
+ chn->analysers |= ((chn->flags & CF_ISRESP) ? AN_RES_FLT_END : AN_REQ_FLT_END);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (!(chn->flags & CF_ISRESP)) {
+ if (an_bit == AN_REQ_FLT_START_BE &&
+ !(filter->flags & FLT_FL_IS_BACKEND_FILTER))
+ continue;
+ }
+ else {
+ if (an_bit == AN_RES_FLT_START_BE &&
+ !(filter->flags & FLT_FL_IS_BACKEND_FILTER))
+ continue;
+ }
+
+ FLT_OFF(filter, chn) = 0;
+ if (FLT_OPS(filter)->channel_start_analyze) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_start_analyze(s, filter, chn);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ end:
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_pre_analyze' callback for all filters attached to a
+ * stream. This function is called BEFORE each analyzer attached to a channel,
+ * expects analyzers responsible for data sending. 'channel_pre_analyze'
+ * callback is resumable, so this function returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ *
+ * Note this function can be called many times for the same analyzer. In fact,
+ * it is called until the analyzer finishes its processing.
+ */
+int
+flt_pre_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (FLT_OPS(filter)->channel_pre_analyze && (filter->pre_analyzers & an_bit)) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_pre_analyze(s, filter, chn, an_bit);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, check_result);
+ filter->pre_analyzers &= ~an_bit;
+ }
+ } RESUME_FILTER_END;
+
+ check_result:
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_post_analyze' callback for all filters attached to a
+ * stream. This function is called AFTER each analyzer attached to a channel,
+ * expects analyzers responsible for data sending. 'channel_post_analyze'
+ * callback is NOT resumable, so this function returns a 0 if an error occurs,
+ * any other value otherwise.
+ *
+ * Here, AFTER means when the analyzer finishes its processing.
+ */
+int
+flt_post_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ struct filter *filter;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ if (FLT_OPS(filter)->channel_post_analyze && (filter->post_analyzers & an_bit)) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_post_analyze(s, filter, chn, an_bit);
+ if (ret < 0)
+ break;
+ filter->post_analyzers &= ~an_bit;
+ }
+ }
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * This function is the AN_REQ/RES_FLT_HTTP_HDRS analyzer, used to filter HTTP
+ * headers or a request or a response. Returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ */
+int
+flt_analyze_http_headers(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ struct http_msg *msg;
+ int ret = 1;
+
+ msg = ((chn->flags & CF_ISRESP) ? &s->txn->rsp : &s->txn->req);
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s, s->txn, msg);
+
+ RESUME_FILTER_LOOP(s, chn) {
+ if (FLT_OPS(filter)->http_headers) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->http_headers(s, filter, msg);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, check_result);
+ }
+ } RESUME_FILTER_END;
+
+ if (HAS_DATA_FILTERS(s, chn)) {
+ size_t data = http_get_hdrs_size(htxbuf(&chn->buf));
+ struct filter *f;
+
+ list_for_each_entry(f, &strm_flt(s)->filters, list)
+ FLT_OFF(f, chn) = data;
+ }
+
+ check_result:
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Calls 'channel_end_analyze' callback for all filters attached to a
+ * stream. This function is called when we stop to analyze a request or a
+ * response. It is called after all other analyzers. 'channel_end_analyze'
+ * callback is resumable, so this function returns 0 if an error occurs or if it
+ * needs to wait, any other value otherwise.
+ */
+int
+flt_end_analyze(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+
+ /* Check if all filters attached on the stream have finished their
+ * processing on this channel. */
+ if (!(chn->flags & CF_FLT_ANALYZE))
+ goto sync;
+
+ RESUME_FILTER_LOOP(s, chn) {
+ FLT_OFF(filter, chn) = 0;
+ unregister_data_filter(s, chn, filter);
+
+ if (FLT_OPS(filter)->channel_end_analyze) {
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->channel_end_analyze(s, filter, chn);
+ if (ret <= 0)
+ BREAK_EXECUTION(s, chn, end);
+ }
+ } RESUME_FILTER_END;
+
+ end:
+ /* We don't remove yet this analyzer because we need to synchronize the
+ * both channels. So here, we just remove the flag CF_FLT_ANALYZE. */
+ ret = handle_analyzer_result(s, chn, 0, ret);
+ if (ret) {
+ chn->flags &= ~CF_FLT_ANALYZE;
+
+ /* Pretend there is an activity on both channels. Flag on the
+ * current one will be automatically removed, so only the other
+ * one will remain. This is a way to be sure that
+ * 'channel_end_analyze' callback will have a chance to be
+ * called at least once for the other side to finish the current
+ * processing. Of course, this is the filter responsibility to
+ * wakeup the stream if it choose to loop on this callback. */
+ s->req.flags |= CF_WAKE_ONCE;
+ s->res.flags |= CF_WAKE_ONCE;
+ }
+
+
+ sync:
+ /* Now we can check if filters have finished their work on the both
+ * channels */
+ if (!(s->req.flags & CF_FLT_ANALYZE) && !(s->res.flags & CF_FLT_ANALYZE)) {
+ /* Sync channels by removing this analyzer for the both channels */
+ s->req.analysers &= ~AN_REQ_FLT_END;
+ s->res.analysers &= ~AN_RES_FLT_END;
+
+ /* Remove backend filters from the list */
+ flt_stream_release(s, 1);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ }
+ else {
+ DBG_TRACE_DEVEL("waiting for sync", STRM_EV_STRM_ANA|STRM_EV_FLT_ANA, s);
+ }
+ return ret;
+}
+
+
+/*
+ * Calls 'tcp_payload' callback for all "data" filters attached to a
+ * stream. This function is called when some data can be forwarded in the
+ * AN_REQ_FLT_XFER_BODY and AN_RES_FLT_XFER_BODY analyzers. It takes care to
+ * update the filters and the stream offset to be sure that a filter cannot
+ * forward more data than its predecessors. A filter can choose to not forward
+ * all data. Returns a negative value if an error occurs, else the number of
+ * forwarded bytes.
+ */
+int
+flt_tcp_payload(struct stream *s, struct channel *chn, unsigned int len)
+{
+ struct filter *filter;
+ unsigned long long *strm_off = &FLT_STRM_OFF(s, chn);
+ unsigned int out = co_data(chn);
+ int ret, data;
+
+ ret = data = len - out;
+ DBG_TRACE_ENTER(STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ list_for_each_entry(filter, &strm_flt(s)->filters, list) {
+ unsigned long long *flt_off = &FLT_OFF(filter, chn);
+ unsigned int offset = *flt_off - *strm_off;
+
+ /* Call tcp_payload for filters only. Forward all data for
+ * others and update the filter offset
+ */
+ if (!IS_DATA_FILTER(filter, chn)) {
+ *flt_off += data - offset;
+ continue;
+ }
+
+ if (FLT_OPS(filter)->tcp_payload) {
+
+ DBG_TRACE_DEVEL(FLT_ID(filter), STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ ret = FLT_OPS(filter)->tcp_payload(s, filter, chn, out + offset, data - offset);
+ if (ret < 0)
+ goto end;
+ data = ret + *flt_off - *strm_off;
+ *flt_off += ret;
+ }
+ }
+
+ /* Only forward data if the last filter decides to forward something */
+ if (ret > 0) {
+ ret = data;
+ *strm_off += ret;
+ }
+ end:
+ DBG_TRACE_LEAVE(STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Called when TCP data must be filtered on a channel. This function is the
+ * AN_REQ/RES_FLT_XFER_DATA analyzer. When called, it is responsible to forward
+ * data when the proxy is not in http mode. Behind the scene, it calls
+ * consecutively 'tcp_data' and 'tcp_forward_data' callbacks for all "data"
+ * filters attached to a stream. Returns 0 if an error occurs or if it needs to
+ * wait, any other value otherwise.
+ */
+int
+flt_xfer_data(struct stream *s, struct channel *chn, unsigned int an_bit)
+{
+ unsigned int len;
+ int ret = 1;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+
+ /* If there is no "data" filters, we do nothing */
+ if (!HAS_DATA_FILTERS(s, chn))
+ goto end;
+
+ if (s->flags & SF_HTX) {
+ struct htx *htx = htxbuf(&chn->buf);
+ len = htx->data;
+ }
+ else
+ len = c_data(chn);
+
+ ret = flt_tcp_payload(s, chn, len);
+ if (ret < 0)
+ goto end;
+ c_adv(chn, ret);
+
+ /* Stop waiting data if:
+ * - it the output is closed
+ * - the input in closed and no data is pending
+ * - There is a READ/WRITE timeout
+ */
+ if (chn_cons(chn)->flags & SC_FL_SHUT_DONE) {
+ ret = 1;
+ goto end;
+ }
+ if (chn_prod(chn)->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) {
+ if (((s->flags & SF_HTX) && htx_is_empty(htxbuf(&chn->buf))) || c_empty(chn)) {
+ ret = 1;
+ goto end;
+ }
+ }
+ if (chn->flags & (CF_READ_TIMEOUT|CF_WRITE_TIMEOUT)) {
+ ret = 1;
+ goto end;
+ }
+
+ /* Wait for data */
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return 0;
+ end:
+ /* Terminate the data filtering. If <ret> is negative, an error was
+ * encountered during the filtering. */
+ ret = handle_analyzer_result(s, chn, an_bit, ret);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_FLT_ANA, s);
+ return ret;
+}
+
+/*
+ * Handles result of filter's analyzers. It returns 0 if an error occurs or if
+ * it needs to wait, any other value otherwise.
+ */
+static int
+handle_analyzer_result(struct stream *s, struct channel *chn,
+ unsigned int an_bit, int ret)
+{
+ if (ret < 0)
+ goto return_bad_req;
+ else if (!ret)
+ goto wait;
+
+ /* End of job, return OK */
+ if (an_bit) {
+ chn->analysers &= ~an_bit;
+ chn->analyse_exp = TICK_ETERNITY;
+ }
+ return 1;
+
+ return_bad_req:
+ /* An error occurs */
+ if (IS_HTX_STRM(s)) {
+ http_set_term_flags(s);
+
+ if (s->txn->status > 0)
+ http_reply_and_close(s, s->txn->status, NULL);
+ else {
+ s->txn->status = (!(chn->flags & CF_ISRESP)) ? 400 : 502;
+ http_reply_and_close(s, s->txn->status, http_error_message(s));
+ }
+ }
+ else {
+ sess_set_term_flags(s);
+ stream_retnclose(s, NULL);
+ }
+
+ if (!(chn->flags & CF_ISRESP))
+ s->req.analysers &= AN_REQ_FLT_END;
+ else
+ s->res.analysers &= AN_RES_FLT_END;
+
+
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_FLT_ANA|STRM_EV_FLT_ERR, s);
+ return 0;
+
+ wait:
+ if (!(chn->flags & CF_ISRESP))
+ channel_dont_connect(chn);
+ DBG_TRACE_DEVEL("wairing for more data", STRM_EV_FLT_ANA, s);
+ return 0;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled. */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "filter", parse_filter },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+REGISTER_POST_CHECK(flt_init_all);
+REGISTER_PER_THREAD_INIT(flt_init_all_per_thread);
+REGISTER_PER_THREAD_DEINIT(flt_deinit_all_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fix.c b/src/fix.c
new file mode 100644
index 0000000..abf3119
--- /dev/null
+++ b/src/fix.c
@@ -0,0 +1,264 @@
+/*
+ * Financial Information eXchange Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/intops.h>
+#include <haproxy/fix.h>
+/*
+ * Return the corresponding numerical tag id if <str> looks like a valid FIX
+ * protocol tag ID. Otherwise, 0 is returned (0 is an invalid id).
+ *
+ * If <version> is given, it must be one of a defined FIX version string (see
+ * FIX_X_Y macros). In this case, the function will also check tag ID ranges. If
+ * no <version> is provided, any strictly positive integer is valid.
+ *
+ * tag ID range depends on FIX protocol version:
+ * - FIX.4.0: 1-140
+ * - FIX.4.1: 1-211
+ * - FIX.4.2: 1-446
+ * - FIX.4.3: 1-659
+ * - FIX.4.4: 1-956
+ * - FIX.5.0: 1-1139
+ * - FIX.5.0SP1: 1-1426
+ * - FIX.5.0SP2: 1-1621
+ * range 10000 to 19999 is for "user defined tags"
+ */
+unsigned int fix_check_id(const struct ist str, const struct ist version) {
+ const char *s, *end;
+ unsigned int ret;
+
+ s = istptr(str);
+ end = istend(str);
+ ret = read_uint(&s, end);
+
+ /* we did not consume all characters from <str>, this is an error */
+ if (s != end)
+ return 0;
+
+ /* field ID can't be 0 */
+ if (ret == 0)
+ return 0;
+
+ /* we can leave now if version was not provided */
+ if (!isttest(version))
+ return ret;
+
+ /* we can leave now if this is a "user defined tag id" */
+ if (ret >= 10000 && ret <= 19999)
+ return ret;
+
+ /* now perform checking per FIX version */
+ if (istissame(FIX_4_0, version) && (ret <= 140))
+ return ret;
+ else if (istissame(FIX_4_1, version) && (ret <= 211))
+ return ret;
+ else if (istissame(FIX_4_2, version) && (ret <= 446))
+ return ret;
+ else if (istissame(FIX_4_3, version) && (ret <= 659))
+ return ret;
+ else if (istissame(FIX_4_4, version) && (ret <= 956))
+ return ret;
+ /* version string is the same for all 5.0 versions, so we can only take
+ * into consideration the biggest range
+ */
+ else if (istissame(FIX_5_0, version) && (ret <= 1621))
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Parse a FIX message <msg> and performs following sanity checks:
+ *
+ * - checks tag ids and values are not empty
+ * - checks tag ids are numerical value
+ * - checks the first tag is BeginString with a valid version
+ * - checks the second tag is BodyLength with the right body length
+ * - checks the third tag is MsgType
+ * - checks the last tag is CheckSum with a valid checksum
+ *
+ * Returns:
+ * FIX_INVALID_MESSAGE if the message is invalid
+ * FIX_NEED_MORE_DATA if we need more data to fully validate the message
+ * FIX_VALID_MESSAGE if the message looks valid
+ */
+int fix_validate_message(const struct ist msg)
+{
+ struct ist parser, version;
+ unsigned int tagnum, bodylen;
+ unsigned char checksum;
+ char *body;
+ int ret = FIX_INVALID_MESSAGE;
+
+ if (istlen(msg) < FIX_MSG_MINSIZE) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+
+ /* parsing the whole message to compute the checksum and check all tag
+ * ids are properly set. Here we are sure to have the 2 first tags. Thus
+ * the version and the body length can be checked.
+ */
+ parser = msg;
+ version = IST_NULL;
+ checksum = tagnum = bodylen = 0;
+ body = NULL;
+ while (istlen(parser) > 0) {
+ struct ist tag, value;
+ unsigned int tagid;
+ const char *p, *end;
+
+ /* parse the tag ID and its value and perform first sanity checks */
+ value = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* end of value not found */
+ if (istend(value) == istend(parser)) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+ /* empty tag or empty value are forbidden */
+ if (istptr(parser) == istptr(value) ||!istlen(value))
+ goto end;
+
+ /* value points on '='. get the tag and skip '=' */
+ tag = ist2(istptr(parser), istptr(value) - istptr(parser));
+ value = istnext(value);
+
+ /* Check the tag id */
+ tagid = fix_check_id(tag, version);
+ if (!tagid)
+ goto end;
+ tagnum++;
+
+ if (tagnum == 1) {
+ /* the first tag must be BeginString */
+ if (tagid != FIX_TAG_BeginString)
+ goto end;
+
+ version = fix_version(value);
+ if (!isttest(version))
+ goto end;
+ }
+ else if (tagnum == 2) {
+ /* the second tag must be bodyLength */
+ if (tagid != FIX_TAG_BodyLength)
+ goto end;
+
+ p = istptr(value);
+ end = istend(value);
+ bodylen = read_uint(&p, end);
+
+ /* we did not consume all characters from <str> or no body, this is an error.
+ * There is at least the message type in the body.
+ */
+ if (p != end || !bodylen)
+ goto end;
+
+ body = istend(value) + 1;
+ }
+ else if (tagnum == 3) {
+ /* the third tag must be MsgType */
+ if (tagid != FIX_TAG_MsgType)
+ goto end;
+ }
+ else if (tagnum > 3 && tagid == FIX_TAG_CheckSum) {
+ /* CheckSum tag should be the last one and is not taken into account
+ * to compute the checksum itself and the body length. The value is
+ * a three-octet representation of the checksum decimal value.
+ */
+ if (bodylen != istptr(parser) - body)
+ goto end;
+
+ if (istlen(value) != 3)
+ goto end;
+ if (checksum != strl2ui(istptr(value), istlen(value)))
+ goto end;
+
+ /* End of the message, exit from the loop */
+ ret = FIX_VALID_MESSAGE;
+ goto end;
+ }
+
+ /* compute checksum of tag=value<delim> */
+ for (p = istptr(tag) ; p < istend(tag) ; ++p)
+ checksum += *p;
+ checksum += '=';
+ for (p = istptr(value) ; p < istend(value) ; ++p)
+ checksum += *p;
+ checksum += FIX_DELIMITER;
+
+ /* move the parser after the value and its delimiter */
+ parser = istadv(parser, istlen(tag) + istlen(value) + 2);
+ }
+
+ if (body) {
+ /* We start to read the body but we don't reached the checksum tag */
+ ret = FIX_NEED_MORE_DATA;
+ }
+
+ end:
+ return ret;
+}
+
+
+/*
+ * Iter on a FIX message <msg> and return the value of <tagid>.
+ *
+ * Returns the corresponding value if <tagid> is found. If <tagid> is not found
+ * because more data are required, the message with a length set to 0 is
+ * returned. If <tagid> is not found in the message or if the message is
+ * invalid, IST_NULL is returned.
+ *
+ * Note: Only simple sanity checks are performed on tags and values (not empty).
+ *
+ * the tag looks like
+ * <tagid>=<value>FIX_DELIMITER with <tag> and <value> not empty
+ */
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid)
+{
+ struct ist parser, t, v;
+ unsigned int id;
+
+ parser = msg;
+ while (istlen(parser) > 0) {
+ v = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* delimiter not found, need more data */
+ if (istend(v) == istend(parser))
+ break;
+
+ /* empty tag or empty value, invalid */
+ if (istptr(parser) == istptr(v) || !istlen(v))
+ goto not_found_or_invalid;
+
+ t = ist2(istptr(parser), istptr(v) - istptr(parser));
+ v = istnext(v);
+
+ id = fix_check_id(t, IST_NULL);
+ if (!id)
+ goto not_found_or_invalid;
+ if (id == tagid) {
+ /* <tagId> found, return the corresponding value */
+ return v;
+ }
+
+ /* CheckSum tag is the last one, no <tagid> found */
+ if (id == FIX_TAG_CheckSum)
+ goto not_found_or_invalid;
+
+ parser = istadv(parser, istlen(t) + istlen(v) + 2);
+ }
+ /* not enough data to find <tagid> */
+ return ist2(istptr(msg), 0);
+
+ not_found_or_invalid:
+ return IST_NULL;
+}
diff --git a/src/flt_bwlim.c b/src/flt_bwlim.c
new file mode 100644
index 0000000..66c2883
--- /dev/null
+++ b/src/flt_bwlim.c
@@ -0,0 +1,976 @@
+/*
+ * Bandwidth limitation filter.
+ *
+ * Copyright 2022 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+
+#include <haproxy/api.h>
+#include <haproxy/channel-t.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+const char *bwlim_flt_id = "bandwidth limitation filter";
+
+struct flt_ops bwlim_ops;
+
+#define BWLIM_FL_NONE 0x00000000 /* For init purposr */
+#define BWLIM_FL_IN 0x00000001 /* Limit clients uploads */
+#define BWLIM_FL_OUT 0x00000002 /* Limit clients downloads */
+#define BWLIM_FL_SHARED 0x00000004 /* Limit shared between clients (using stick-tables) */
+
+#define BWLIM_ACT_LIMIT_EXPR 0x00000001
+#define BWLIM_ACT_LIMIT_CONST 0x00000002
+#define BWLIM_ACT_PERIOD_EXPR 0x00000004
+#define BWLIM_ACT_PERIOD_CONST 0x00000008
+
+struct bwlim_config {
+ struct proxy *proxy;
+ char *name;
+ unsigned int flags;
+ struct sample_expr *expr;
+ union {
+ char *n;
+ struct stktable *t;
+ } table;
+ unsigned int period;
+ unsigned int limit;
+ unsigned int min_size;
+};
+
+struct bwlim_state {
+ struct freq_ctr bytes_rate;
+ struct stksess *ts;
+ struct act_rule *rule;
+ unsigned int limit;
+ unsigned int period;
+ unsigned int exp;
+};
+
+
+/* Pools used to allocate comp_state structs */
+DECLARE_STATIC_POOL(pool_head_bwlim_state, "bwlim_state", sizeof(struct bwlim_state));
+
+
+/* Apply the bandwidth limitation of the filter <filter>. <len> is the maximum
+ * amount of data that the filter can forward. This function applies the
+ * limitation and returns what the stream is authorized to forward. Several
+ * limitation can be stacked.
+ */
+static int bwlim_apply_limit(struct filter *filter, struct channel *chn, unsigned int len)
+{
+ struct bwlim_config *conf = FLT_CONF(filter);
+ struct bwlim_state *st = filter->ctx;
+ struct freq_ctr *bytes_rate;
+ unsigned int period, limit, remain, tokens, users;
+ unsigned int wait = 0;
+ int overshoot, ret = 0;
+
+ /* Don't forward anything if there is nothing to forward or the waiting
+ * time is not expired
+ */
+ if (!len || (tick_isset(st->exp) && !tick_is_expired(st->exp, now_ms)))
+ goto end;
+
+ st->exp = TICK_ETERNITY;
+ ret = len;
+ if (conf->flags & BWLIM_FL_SHARED) {
+ void *ptr;
+ unsigned int type = ((conf->flags & BWLIM_FL_IN) ? STKTABLE_DT_BYTES_IN_RATE : STKTABLE_DT_BYTES_OUT_RATE);
+
+ /* In shared mode, get a pointer on the stick table entry. it
+ * will be used to get the freq-counter. It is also used to get
+ * The number of users.
+ */
+ ptr = stktable_data_ptr(conf->table.t, st->ts, type);
+ if (!ptr)
+ goto end;
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &st->ts->lock);
+ bytes_rate = &stktable_data_cast(ptr, std_t_frqp);
+ period = conf->table.t->data_arg[type].u;
+ limit = conf->limit;
+ users = st->ts->ref_cnt;
+ }
+ else {
+ /* On per-stream mode, the freq-counter is private to the
+ * stream. Get it from the filter state. Rely on the custom
+ * limit/period if defined or use the default ones. In this mode,
+ * there is only one user.
+ */
+ bytes_rate = &st->bytes_rate;
+ period = (st->period ? st->period : conf->period);
+ limit = (st->limit ? st->limit : conf->limit);
+ users = 1;
+ }
+
+ /* Be sure the current rate does not exceed the limit over the current
+ * period. In this case, nothing is forwarded and the waiting time is
+ * computed to be sure to not retry too early.
+ *
+ * The test is used to avoid the initial burst. Otherwise, streams will
+ * consume the limit as fast as possible and will then be paused for
+ * long time.
+ */
+ overshoot = freq_ctr_overshoot_period(bytes_rate, period, limit);
+ if (overshoot > 0) {
+ if (conf->flags & BWLIM_FL_SHARED)
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &st->ts->lock);
+ wait = div64_32((uint64_t)(conf->min_size + overshoot) * period * users,
+ limit);
+ st->exp = tick_add(now_ms, (wait ? wait : 1));
+ ret = 0;
+ goto end;
+ }
+
+ /* Get the allowed quota per user. */
+ remain = freq_ctr_remain_period(bytes_rate, period, limit, 0);
+ tokens = div64_32((uint64_t)(remain + users - 1), users);
+
+ if (tokens < len) {
+ /* The stream cannot forward all its data. But we will check if
+ * it can perform a small burst if the global quota is large
+ * enough. But, in this case, its waiting time will be
+ * increased accordingly.
+ */
+ ret = tokens;
+ if (tokens < conf->min_size) {
+ ret = (chn_prod(chn)->flags & (SC_FL_EOI|SC_FL_EOS|SC_FL_ABRT_DONE))
+ ? MIN(len, conf->min_size)
+ : conf->min_size;
+
+ if (ret <= remain)
+ wait = div64_32((uint64_t)(ret - tokens) * period * users + limit - 1, limit);
+ else
+ ret = (limit < ret) ? remain : 0;
+ }
+ }
+
+ /* At the end, update the freq-counter and compute the waiting time if
+ * the stream is limited
+ */
+ update_freq_ctr_period(bytes_rate, period, ret);
+ if (ret < len) {
+ wait += next_event_delay_period(bytes_rate, period, limit, MIN(len - ret, conf->min_size * users));
+ st->exp = tick_add(now_ms, (wait ? wait : 1));
+ }
+
+ if (conf->flags & BWLIM_FL_SHARED)
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &st->ts->lock);
+
+ end:
+ chn->analyse_exp = tick_first((tick_is_expired(chn->analyse_exp, now_ms) ? TICK_ETERNITY : chn->analyse_exp),
+ st->exp);
+ return ret;
+}
+
+/***************************************************************************
+ * Hooks that manage the filter lifecycle (init/check/deinit)
+ **************************************************************************/
+/* Initialize the filter. Returns -1 on error, else 0. */
+static int bwlim_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+/* Free resources allocated by the bwlim filter. */
+static void bwlim_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct bwlim_config *conf = fconf->conf;
+
+ if (conf) {
+ ha_free(&conf->name);
+ release_sample_expr(conf->expr);
+ conf->expr = NULL;
+ ha_free(&fconf->conf);
+ }
+}
+
+/* Check configuration of a bwlim filter for a specified proxy.
+ * Return 1 on error, else 0. */
+static int bwlim_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct bwlim_config *conf = fconf->conf;
+ struct stktable *target;
+
+ if (!(conf->flags & BWLIM_FL_SHARED))
+ return 0;
+
+ if (conf->table.n)
+ target = stktable_find_by_name(conf->table.n);
+ else
+ target = px->table;
+
+ if (!target) {
+ ha_alert("Proxy %s : unable to find table '%s' referenced by bwlim filter '%s'",
+ px->id, conf->table.n ? conf->table.n : px->id, conf->name);
+ return 1;
+ }
+
+ if ((conf->flags & BWLIM_FL_IN) && !target->data_ofs[STKTABLE_DT_BYTES_IN_RATE]) {
+ ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'."
+ " It must be 'bytes_in_rate'",
+ px->id, conf->table.n ? conf->table.n : px->id, conf->name);
+ return 1;
+ }
+ else if ((conf->flags & BWLIM_FL_OUT) && !target->data_ofs[STKTABLE_DT_BYTES_OUT_RATE]) {
+ ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'."
+ " It must be 'bytes_out_rate'",
+ px->id, conf->table.n ? conf->table.n : px->id, conf->name);
+ return 1;
+ }
+
+ if (!stktable_compatible_sample(conf->expr, target->type)) {
+ ha_alert("Proxy %s : stick-table '%s' uses a key type incompatible with bwlim filter '%s'",
+ px->id, conf->table.n ? conf->table.n : px->id, conf->name);
+ return 1;
+ }
+ else {
+ if (!in_proxies_list(target->proxies_list, px)) {
+ px->next_stkt_ref = target->proxies_list;
+ target->proxies_list = px;
+ }
+ ha_free(&conf->table.n);
+ conf->table.t = target;
+ }
+
+ return 0;
+}
+
+/**************************************************************************
+ * Hooks to handle start/stop of streams
+ *************************************************************************/
+/* Called when a filter instance is created and attach to a stream */
+static int bwlim_attach(struct stream *s, struct filter *filter)
+{
+ struct bwlim_state *st;
+
+ st = pool_zalloc(pool_head_bwlim_state);
+ if (!st)
+ return -1;
+ filter->ctx = st;
+ return 1;
+}
+
+/* Called when a filter instance is detach from a stream, just before its
+ * destruction */
+static void bwlim_detach(struct stream *s, struct filter *filter)
+{
+ struct bwlim_config *conf = FLT_CONF(filter);
+ struct bwlim_state *st = filter->ctx;
+ struct stktable *t = conf->table.t;
+
+ if (!st)
+ return;
+
+ if (st->ts)
+ stktable_touch_local(t, st->ts, 1);
+
+ /* release any possible compression context */
+ pool_free(pool_head_bwlim_state, st);
+ filter->ctx = NULL;
+}
+
+/**************************************************************************
+ * Hooks to handle channels activity
+ *************************************************************************/
+
+/* Called when analyze ends for a given channel */
+static int bwlim_chn_end_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ chn->analyse_exp = TICK_ETERNITY;
+ return 1;
+}
+
+
+/**************************************************************************
+ * Hooks to filter HTTP messages
+ *************************************************************************/
+static int bwlim_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ msg->chn->analyse_exp = TICK_ETERNITY;
+ return 1;
+}
+
+static int bwlim_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ return bwlim_apply_limit(filter, msg->chn, len);
+}
+
+/**************************************************************************
+ * Hooks to filter TCP data
+ *************************************************************************/
+static int bwlim_tcp_payload(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned int offset, unsigned int len)
+{
+ return bwlim_apply_limit(filter, chn, len);
+}
+
+/********************************************************************
+ * Functions that manage the filter initialization
+ ********************************************************************/
+struct flt_ops bwlim_ops = {
+ /* Manage bwlim filter, called for each filter declaration */
+ .init = bwlim_init,
+ .deinit = bwlim_deinit,
+ .check = bwlim_check,
+
+ /* Handle start/stop of streams */
+ .attach = bwlim_attach,
+ .detach = bwlim_detach,
+
+ /* Handle channels activity */
+ .channel_end_analyze = bwlim_chn_end_analyze,
+
+ /* Filter HTTP requests and responses */
+ .http_headers = bwlim_http_headers,
+ .http_payload = bwlim_http_payload,
+
+ /* Filter TCP data */
+ .tcp_payload = bwlim_tcp_payload,
+};
+
+/* Set a bandwidth limitation. It always return ACT_RET_CONT. On error, the rule
+ * is ignored. First of all, it looks for the corresponding filter. Then, for a
+ * shared limitation, the stick-table entry is retrieved. For a per-stream
+ * limitation, the custom limit and period are computed, if necessary. At the
+ * end, the filter is registered on the data filtering for the right channel
+ * (bwlim-in = request, bwlim-out = response).
+ */
+static enum act_return bwlim_set_limit(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct bwlim_config *conf = rule->arg.act.p[3];
+ struct filter *filter;
+ struct bwlim_state *st = NULL;
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ int opt;
+
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (FLT_ID(filter) == bwlim_flt_id && FLT_CONF(filter) == conf) {
+ st = filter->ctx;
+ break;
+ }
+ }
+
+ if (!st)
+ goto end;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CNT: opt = SMP_OPT_DIR_REQ | SMP_OPT_FINAL; break;
+ case ACT_F_TCP_RES_CNT: opt = SMP_OPT_DIR_RES | SMP_OPT_FINAL; break;
+ case ACT_F_HTTP_REQ: opt = SMP_OPT_DIR_REQ | SMP_OPT_FINAL; break;
+ case ACT_F_HTTP_RES: opt = SMP_OPT_DIR_RES | SMP_OPT_FINAL; break;
+ default:
+ goto end;
+ }
+
+ if (conf->flags & BWLIM_FL_SHARED) {
+ t = conf->table.t;
+ key = stktable_fetch_key(t, px, sess, s, opt, conf->expr, NULL);
+ if (!key)
+ goto end;
+
+ ts = stktable_get_entry(t, key);
+ if (!ts)
+ goto end;
+
+ st->ts = ts;
+ st->rule = rule;
+ }
+ else {
+ struct sample *smp;
+
+ st->limit = 0;
+ st->period = 0;
+ if (rule->action & BWLIM_ACT_LIMIT_EXPR) {
+ smp = sample_fetch_as_type(px, sess, s, opt, rule->arg.act.p[1], SMP_T_SINT);
+ if (smp && smp->data.u.sint > 0)
+ st->limit = smp->data.u.sint;
+ }
+ else if (rule->action & BWLIM_ACT_LIMIT_CONST)
+ st->limit = (uintptr_t)rule->arg.act.p[1];
+
+ if (rule->action & BWLIM_ACT_PERIOD_EXPR) {
+ smp = sample_fetch_as_type(px, sess, s, opt, rule->arg.act.p[2], SMP_T_SINT);
+ if (smp && smp->data.u.sint > 0)
+ st->period = smp->data.u.sint;
+ }
+ else if (rule->action & BWLIM_ACT_PERIOD_CONST)
+ st->period = (uintptr_t)rule->arg.act.p[2];
+ }
+
+ st->exp = TICK_ETERNITY;
+ if (conf->flags & BWLIM_FL_IN)
+ register_data_filter(s, &s->req, filter);
+ else
+ register_data_filter(s, &s->res, filter);
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/* Check function for "set-bandwidth-limit" action. It returns 1 on
+ * success. Otherwise, it returns 0 and <err> is filled.
+ */
+int check_bwlim_action(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct flt_conf *fconf;
+ struct bwlim_config *conf = NULL;
+ unsigned int where;
+
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ conf = NULL;
+ if (fconf->id == bwlim_flt_id) {
+ conf = fconf->conf;
+ if (strcmp(rule->arg.act.p[0], conf->name) == 0)
+ break;
+ }
+ }
+ if (!conf) {
+ memprintf(err, "unable to find bwlim filter '%s' referenced by set-bandwidth-limit rule",
+ (char *)rule->arg.act.p[0]);
+ return 0;
+ }
+
+ if ((conf->flags & BWLIM_FL_SHARED) && rule->arg.act.p[1]) {
+ memprintf(err, "set-bandwidth-limit rule cannot define a limit for a shared bwlim filter");
+ return 0;
+ }
+
+ if ((conf->flags & BWLIM_FL_SHARED) && rule->arg.act.p[2]) {
+ memprintf(err, "set-bandwidth-limit rule cannot define a period for a shared bwlim filter");
+ return 0;
+ }
+
+ where = 0;
+ if (px->cap & PR_CAP_FE) {
+ if (rule->from == ACT_F_TCP_REQ_CNT)
+ where |= SMP_VAL_FE_REQ_CNT;
+ else if (rule->from == ACT_F_HTTP_REQ)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ else if (rule->from == ACT_F_TCP_RES_CNT)
+ where |= SMP_VAL_FE_RES_CNT;
+ else if (rule->from == ACT_F_HTTP_RES)
+ where |= SMP_VAL_FE_HRS_HDR;
+ }
+ if (px->cap & PR_CAP_BE) {
+ if (rule->from == ACT_F_TCP_REQ_CNT)
+ where |= SMP_VAL_BE_REQ_CNT;
+ else if (rule->from == ACT_F_HTTP_REQ)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ else if (rule->from == ACT_F_TCP_RES_CNT)
+ where |= SMP_VAL_BE_RES_CNT;
+ else if (rule->from == ACT_F_HTTP_RES)
+ where |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ if ((rule->action & BWLIM_ACT_LIMIT_EXPR) && rule->arg.act.p[1]) {
+ struct sample_expr *expr = rule->arg.act.p[1];
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err, "set-bandwidth-limit rule uses a limit extracting information from '%s', none of which is available here",
+ sample_src_names(expr->fetch->use));
+ return 0;
+ }
+
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE)) {
+ if (!px->tcp_req.inspect_delay && !(expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request content set-bandwidth-limit*' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ if (rule->from == ACT_F_TCP_RES_CNT && (px->cap & PR_CAP_BE)) {
+ if (!px->tcp_rep.inspect_delay && !(expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
+ ha_warning("%s '%s' : a 'tcp-response content set-bandwidth-limit*' rule explicitly depending on response"
+ " contents without any 'tcp-response inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-response inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ }
+
+ if ((rule->action & BWLIM_ACT_PERIOD_EXPR) && rule->arg.act.p[2]) {
+ struct sample_expr *expr = rule->arg.act.p[2];
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err, "set-bandwidth-limit rule uses a period extracting information from '%s', none of which is available here",
+ sample_src_names(expr->fetch->use));
+ return 0;
+ }
+
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE)) {
+ if (!px->tcp_req.inspect_delay && !(expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request content set-bandwidth-limit*' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ if (rule->from == ACT_F_TCP_RES_CNT && (px->cap & PR_CAP_BE)) {
+ if (!px->tcp_rep.inspect_delay && !(expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
+ ha_warning("%s '%s' : a 'tcp-response content set-bandwidth-limit*' rule explicitly depending on response"
+ " contents without any 'tcp-response inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-response inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ }
+
+ if (conf->expr) {
+ if (!(conf->expr->fetch->val & where)) {
+ memprintf(err, "bwlim filter '%s uses a key extracting information from '%s', none of which is available here",
+ conf->name, sample_src_names(conf->expr->fetch->use));
+ return 0;
+ }
+
+ if (rule->from == ACT_F_TCP_REQ_CNT && (px->cap & PR_CAP_FE)) {
+ if (!px->tcp_req.inspect_delay && !(conf->expr->fetch->val & SMP_VAL_FE_SES_ACC)) {
+ ha_warning("%s '%s' : a 'tcp-request content set-bandwidth-limit*' rule explicitly depending on request"
+ " contents without any 'tcp-request inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-request inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ if (rule->from == ACT_F_TCP_RES_CNT && (px->cap & PR_CAP_BE)) {
+ if (!px->tcp_rep.inspect_delay && !(conf->expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
+ ha_warning("%s '%s' : a 'tcp-response content set-bandwidth-limit*' rule explicitly depending on response"
+ " contents without any 'tcp-response inspect-delay' setting."
+ " This means that this rule will randomly find its contents. This can be fixed by"
+ " setting the tcp-response inspect-delay.\n",
+ proxy_type_str(px), px->id);
+ }
+ }
+ }
+
+ end:
+ rule->arg.act.p[3] = conf;
+ return 1;
+}
+
+/* Release memory allocated by "set-bandwidth-limit" action. */
+static void release_bwlim_action(struct act_rule *rule)
+{
+ ha_free(&rule->arg.act.p[0]);
+ if ((rule->action & BWLIM_ACT_LIMIT_EXPR) && rule->arg.act.p[1]) {
+ release_sample_expr(rule->arg.act.p[1]);
+ rule->arg.act.p[1] = NULL;
+ }
+ if ((rule->action & BWLIM_ACT_PERIOD_EXPR) && rule->arg.act.p[2]) {
+ release_sample_expr(rule->arg.act.p[2]);
+ rule->arg.act.p[2] = NULL;
+ }
+ rule->arg.act.p[3] = NULL; /* points on the filter's config */
+}
+
+/* Parse "set-bandwidth-limit" action. The filter name must be specified. For
+ * shared limitations, there is no other supported parameter. For per-stream
+ * limitations, a custom limit and period may be specified. In both case, it
+ * must be an expression. On success:
+ *
+ * arg.act.p[0] will be the filter name (mandatory)
+ * arg.act.p[1] will be an expression for the custom limit (optional, may be NULL)
+ * arg.act.p[2] will be an expression for the custom period (optional, may be NULL)
+ *
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_bandwidth_limit(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ int cur_arg;
+
+ cur_arg = *orig_arg;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing bwlim filter name");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.act.p[0] = strdup(args[cur_arg]);
+ if (!rule->arg.act.p[0]) {
+ memprintf(err, "out of memory");
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ while (1) {
+ if (strcmp(args[cur_arg], "limit") == 0) {
+ const char *res;
+ unsigned int limit;
+
+ cur_arg++;
+ if (!args[cur_arg]) {
+ memprintf(err, "missing limit value or expression");
+ goto error;
+ }
+
+ res = parse_size_err(args[cur_arg], &limit);
+ if (!res) {
+ rule->action |= BWLIM_ACT_LIMIT_CONST;
+ rule->arg.act.p[1] = (void *)(uintptr_t)limit;
+ cur_arg++;
+ continue;
+ }
+
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, NULL, &px->conf.args, NULL);
+ if (!expr) {
+ memprintf(err, "'%s': invalid size value or unknown fetch method '%s'", args[cur_arg-1], args[cur_arg]);
+ goto error;
+ }
+ rule->action |= BWLIM_ACT_LIMIT_EXPR;
+ rule->arg.act.p[1] = expr;
+ }
+ else if (strcmp(args[cur_arg], "period") == 0) {
+ const char *res;
+ unsigned int period;
+
+ cur_arg++;
+ if (!args[cur_arg]) {
+ memprintf(err, "missing period value or expression");
+ goto error;
+ }
+
+ res = parse_time_err(args[cur_arg], &period, TIME_UNIT_MS);
+ if (!res) {
+ rule->action |= BWLIM_ACT_PERIOD_CONST;
+ rule->arg.act.p[2] = (void *)(uintptr_t)period;
+ cur_arg++;
+ continue;
+ }
+
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, NULL, &px->conf.args, NULL);
+ if (!expr) {
+ memprintf(err, "'%s': invalid time value or unknown fetch method '%s'", args[cur_arg-1], args[cur_arg]);
+ goto error;
+ }
+ rule->action |= BWLIM_ACT_PERIOD_EXPR;
+ rule->arg.act.p[2] = expr;
+ }
+ else
+ break;
+ }
+
+ rule->action_ptr = bwlim_set_limit;
+ rule->check_ptr = check_bwlim_action;
+ rule->release_ptr = release_bwlim_action;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+
+error:
+ release_bwlim_action(rule);
+ return ACT_RET_PRS_ERR;
+}
+
+
+static struct action_kw_list tcp_req_cont_actions = {
+ .kw = {
+ { "set-bandwidth-limit", parse_bandwidth_limit, 0 },
+ { NULL, NULL }
+ }
+};
+
+static struct action_kw_list tcp_res_cont_actions = {
+ .kw = {
+ { "set-bandwidth-limit", parse_bandwidth_limit, 0 },
+ { NULL, NULL }
+ }
+};
+
+static struct action_kw_list http_req_actions = {
+ .kw = {
+ { "set-bandwidth-limit", parse_bandwidth_limit, 0 },
+ { NULL, NULL }
+ }
+};
+
+static struct action_kw_list http_res_actions = {
+ .kw = {
+ { "set-bandwidth-limit", parse_bandwidth_limit, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions);
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+
+/* Generic function to parse bandwidth limitation filter configurartion. It
+ * Returns -1 on error and 0 on success. It handles configuration for per-stream
+ * and shared limitations.
+ */
+static int parse_bwlim_flt(char **args, int *cur_arg, struct proxy *px, struct flt_conf *fconf,
+ char **err, void *private)
+{
+ struct flt_conf *fc;
+ struct bwlim_config *conf;
+ int shared, per_stream;
+ int pos = *cur_arg + 1;
+
+ conf = calloc(1, sizeof(*conf));
+ if (!conf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ return -1;
+ }
+ conf->proxy = px;
+
+ if (!*args[pos]) {
+ memprintf(err, "'%s' : a name is expected as first argument ", args[*cur_arg]);
+ goto error;
+ }
+ conf->flags = BWLIM_FL_NONE;
+ conf->name = strdup(args[pos]);
+ if (!conf->name) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+
+ list_for_each_entry(fc, &px->filter_configs, list) {
+ if (fc->id == bwlim_flt_id) {
+ struct bwlim_config *c = fc->conf;
+
+ if (strcmp(conf->name, c->name) == 0) {
+ memprintf(err, "bwlim filter '%s' already declared for proxy '%s'\n",
+ conf->name, px->id);
+ goto error;
+ }
+ }
+ }
+ shared = per_stream = 0;
+ pos++;
+ while (*args[pos]) {
+ if (strcmp(args[pos], "key") == 0) {
+ if (per_stream) {
+ memprintf(err, "'%s' : cannot mix per-stream and shared parameter",
+ args[*cur_arg]);
+ goto error;
+ }
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the sample expression is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ shared = 1;
+ pos++;
+ conf->expr = sample_parse_expr((char **)args, &pos, px->conf.args.file, px->conf.args.line,
+ err, &px->conf.args, NULL);
+ if (!conf->expr)
+ goto error;
+ }
+ else if (strcmp(args[pos], "table") == 0) {
+ if (per_stream) {
+ memprintf(err, "'%s' : cannot mix per-stream and shared parameter",
+ args[*cur_arg]);
+ goto error;
+ }
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the table name is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ shared = 1;
+ conf->table.n = strdup(args[pos + 1]);
+ if (!conf->table.n) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "default-period") == 0) {
+ const char *res;
+
+ if (shared) {
+ memprintf(err, "'%s' : cannot mix per-stream and shared parameter",
+ args[*cur_arg]);
+ goto error;
+ }
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the value is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ per_stream = 1;
+ res = parse_time_err(args[pos + 1], &conf->period, TIME_UNIT_MS);
+ if (res) {
+ memprintf(err, "'%s' : invalid value for option '%s' (unexpected character '%c')",
+ args[*cur_arg], args[pos], *res);
+ goto error;
+ }
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "limit") == 0) {
+ const char *res;
+
+ if (per_stream) {
+ memprintf(err, "'%s' : cannot mix per-stream and shared parameter",
+ args[*cur_arg]);
+ goto error;
+ }
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the value is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ shared = 1;
+ res = parse_size_err(args[pos + 1], &conf->limit);
+ if (res) {
+ memprintf(err, "'%s' : invalid value for option '%s' (unexpected character '%c')",
+ args[*cur_arg], args[pos], *res);
+ goto error;
+ }
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "default-limit") == 0) {
+ const char *res;
+
+ if (shared) {
+ memprintf(err, "'%s' : cannot mix per-stream and shared parameter",
+ args[*cur_arg]);
+ goto error;
+ }
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the value is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ per_stream = 1;
+ res = parse_size_err(args[pos + 1], &conf->limit);
+ if (res) {
+ memprintf(err, "'%s' : invalid value for option '%s' (unexpected character '%c')",
+ args[*cur_arg], args[pos], *res);
+ goto error;
+ }
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "min-size") == 0) {
+ const char *res;
+
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : the value is missing for '%s' option",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ res = parse_size_err(args[pos + 1], &conf->min_size);
+ if (res) {
+ memprintf(err, "'%s' : invalid value for option '%s' (unexpected character '%c')",
+ args[*cur_arg], args[pos], *res);
+ goto error;
+ }
+ pos += 2;
+ }
+ else
+ break;
+ }
+
+ if (shared) {
+ conf->flags |= BWLIM_FL_SHARED;
+ if (!conf->expr) {
+ memprintf(err, "'%s' : <key> option is missing", args[*cur_arg]);
+ goto error;
+ }
+ if (!conf->limit) {
+ memprintf(err, "'%s' : <limit> option is missing", args[*cur_arg]);
+ goto error;
+ }
+ }
+ else {
+ /* Per-stream: limit downloads only for now */
+ conf->flags |= BWLIM_FL_OUT;
+ if (!conf->period) {
+ memprintf(err, "'%s' : <default-period> option is missing", args[*cur_arg]);
+ goto error;
+ }
+ if (!conf->limit) {
+ memprintf(err, "'%s' : <default-limit> option is missing", args[*cur_arg]);
+ goto error;
+ }
+ }
+
+ *cur_arg = pos;
+ fconf->id = bwlim_flt_id;
+ fconf->ops = &bwlim_ops;
+ fconf->conf = conf;
+ return 0;
+
+ error:
+ if (conf->name)
+ ha_free(&conf->name);
+ if (conf->expr) {
+ release_sample_expr(conf->expr);
+ conf->expr = NULL;
+ }
+ if (conf->table.n)
+ ha_free(&conf->table.n);
+ free(conf);
+ return -1;
+}
+
+
+static int parse_bwlim_in_flt(char **args, int *cur_arg, struct proxy *px, struct flt_conf *fconf,
+ char **err, void *private)
+{
+ int ret;
+
+ ret = parse_bwlim_flt(args, cur_arg, px, fconf, err, private);
+ if (!ret) {
+ struct bwlim_config *conf = fconf->conf;
+
+ conf->flags |= BWLIM_FL_IN;
+ }
+
+ return ret;
+}
+
+static int parse_bwlim_out_flt(char **args, int *cur_arg, struct proxy *px, struct flt_conf *fconf,
+ char **err, void *private)
+{
+ int ret;
+
+ ret = parse_bwlim_flt(args, cur_arg, px, fconf, err, private);
+ if (!ret) {
+ struct bwlim_config *conf = fconf->conf;
+
+ conf->flags |= BWLIM_FL_OUT;
+ }
+ return ret;
+}
+
+/* Declare the filter parser for "trace" keyword */
+static struct flt_kw_list flt_kws = { "BWLIM", { }, {
+ { "bwlim-in", parse_bwlim_in_flt, NULL },
+ { "bwlim-out", parse_bwlim_out_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &flt_kws);
diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c
new file mode 100644
index 0000000..30f9d2a
--- /dev/null
+++ b/src/flt_http_comp.c
@@ -0,0 +1,1076 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/compression.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/filters.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+
+#define COMP_STATE_PROCESSING 0x01
+
+const char *http_comp_flt_id = "compression filter";
+
+struct flt_ops comp_ops;
+
+struct comp_state {
+ /*
+ * For both comp_ctx and comp_algo, COMP_DIR_REQ is the index
+ * for requests, and COMP_DIR_RES for responses
+ */
+ struct comp_ctx *comp_ctx[2]; /* compression context */
+ struct comp_algo *comp_algo[2]; /* compression algorithm if not NULL */
+ unsigned int flags; /* COMP_STATE_* */
+};
+
+/* Pools used to allocate comp_state structs */
+DECLARE_STATIC_POOL(pool_head_comp_state, "comp_state", sizeof(struct comp_state));
+
+static THREAD_LOCAL struct buffer tmpbuf;
+static THREAD_LOCAL struct buffer zbuf;
+
+static int select_compression_request_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+static int select_compression_response_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+static int set_compression_header(struct comp_state *st,
+ struct stream *s,
+ struct http_msg *msg);
+
+static int htx_compression_buffer_init(struct htx *htx, struct buffer *out);
+static int htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
+ struct buffer *out, int dir);
+static int htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end, int dir);
+
+/***********************************************************************/
+static int
+comp_flt_init(struct proxy *px, struct flt_conf *fconf)
+{
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+static int
+comp_flt_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ if (b_alloc(&tmpbuf) == NULL)
+ return -1;
+ if (b_alloc(&zbuf) == NULL)
+ return -1;
+ return 0;
+}
+
+static void
+comp_flt_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ if (tmpbuf.size)
+ b_free(&tmpbuf);
+ if (zbuf.size)
+ b_free(&zbuf);
+}
+
+static int
+comp_strm_init(struct stream *s, struct filter *filter)
+{
+ struct comp_state *st;
+
+ st = pool_alloc(pool_head_comp_state);
+ if (st == NULL)
+ return -1;
+
+ st->comp_algo[COMP_DIR_REQ] = NULL;
+ st->comp_algo[COMP_DIR_RES] = NULL;
+ st->comp_ctx[COMP_DIR_REQ] = NULL;
+ st->comp_ctx[COMP_DIR_RES] = NULL;
+ st->flags = 0;
+ filter->ctx = st;
+
+ /* Register post-analyzer on AN_RES_WAIT_HTTP because we need to
+ * analyze response headers before http-response rules execution
+ * to be sure we can use res.comp and res.comp_algo sample
+ * fetches */
+ filter->post_analyzers |= AN_RES_WAIT_HTTP;
+ return 1;
+}
+
+static void
+comp_strm_deinit(struct stream *s, struct filter *filter)
+{
+ struct comp_state *st = filter->ctx;
+
+ if (!st)
+ return;
+
+ /* release any possible compression context */
+ if (st->comp_algo[COMP_DIR_REQ])
+ st->comp_algo[COMP_DIR_REQ]->end(&st->comp_ctx[COMP_DIR_REQ]);
+ if (st->comp_algo[COMP_DIR_RES])
+ st->comp_algo[COMP_DIR_RES]->end(&st->comp_ctx[COMP_DIR_RES]);
+ pool_free(pool_head_comp_state, st);
+ filter->ctx = NULL;
+}
+
+static void
+comp_prepare_compress_request(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx;
+ struct comp_type *comp_type;
+
+ ctx.blk = NULL;
+ /* Already compressed, don't bother */
+ if (http_find_header(htx, ist("Content-Encoding"), &ctx, 1))
+ return;
+ /* HTTP < 1.1 should not be compressed */
+ if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11))
+ return;
+ comp_type = NULL;
+
+ /*
+ * We don't want to compress content-types not listed in the "compression type" directive if any. If no content-type was found but configuration
+ * requires one, we don't compress either. Backend has the priority.
+ */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Content-Type"), &ctx, 1)) {
+ if ((s->be->comp && (comp_type = s->be->comp->types_req)) ||
+ (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types_req))) {
+ for (; comp_type; comp_type = comp_type->next) {
+ if (ctx.value.len >= comp_type->name_len &&
+ strncasecmp(ctx.value.ptr, comp_type->name, comp_type->name_len) == 0)
+ /* this Content-Type should be compressed */
+ break;
+ }
+ /* this Content-Type should not be compressed */
+ if (comp_type == NULL)
+ goto fail;
+ }
+ }
+ else { /* no content-type header */
+ if ((s->be->comp && s->be->comp->types_req) ||
+ (strm_fe(s)->comp && strm_fe(s)->comp->types_req))
+ goto fail; /* a content-type was required */
+ }
+
+ /* limit compression rate */
+ if (global.comp_rate_lim > 0)
+ if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
+ goto fail;
+
+ /* limit cpu usage */
+ if (th_ctx->idle_pct < compress_min_idle)
+ goto fail;
+
+ if (txn->meth == HTTP_METH_HEAD)
+ return;
+ if (s->be->comp && s->be->comp->algo_req != NULL)
+ st->comp_algo[COMP_DIR_REQ] = s->be->comp->algo_req;
+ else if (strm_fe(s)->comp && strm_fe(s)->comp->algo_req != NULL)
+ st->comp_algo[COMP_DIR_REQ] = strm_fe(s)->comp->algo_req;
+ else
+ goto fail; /* no algo selected: nothing to do */
+
+
+ /* limit compression rate */
+ if (global.comp_rate_lim > 0)
+ if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
+ goto fail;
+
+ /* limit cpu usage */
+ if (th_ctx->idle_pct < compress_min_idle)
+ goto fail;
+
+ /* initialize compression */
+ if (st->comp_algo[COMP_DIR_REQ]->init(&st->comp_ctx[COMP_DIR_REQ], global.tune.comp_maxlevel) < 0)
+ goto fail;
+
+ return;
+fail:
+ st->comp_algo[COMP_DIR_REQ] = NULL;
+}
+
+static int
+comp_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct comp_state *st = filter->ctx;
+ int comp_flags = 0;
+
+ if (!strm_fe(s)->comp && !s->be->comp)
+ goto end;
+ if (strm_fe(s)->comp)
+ comp_flags |= strm_fe(s)->comp->flags;
+ if (s->be->comp)
+ comp_flags |= s->be->comp->flags;
+
+ if (!(msg->chn->flags & CF_ISRESP)) {
+ if (comp_flags & COMP_FL_DIR_REQ) {
+ comp_prepare_compress_request(st, s, msg);
+ if (st->comp_algo[COMP_DIR_REQ]) {
+ if (!set_compression_header(st, s, msg))
+ goto end;
+ register_data_filter(s, msg->chn, filter);
+ st->flags |= COMP_STATE_PROCESSING;
+ }
+ }
+ if (comp_flags & COMP_FL_DIR_RES)
+ select_compression_request_header(st, s, msg);
+ } else if (comp_flags & COMP_FL_DIR_RES) {
+ /* Response headers have already been checked in
+ * comp_http_post_analyze callback. */
+ if (st->comp_algo[COMP_DIR_RES]) {
+ if (!set_compression_header(st, s, msg))
+ goto end;
+ register_data_filter(s, msg->chn, filter);
+ st->flags |= COMP_STATE_PROCESSING;
+ }
+ }
+
+ end:
+ return 1;
+}
+
+static int
+comp_http_post_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct comp_state *st = filter->ctx;
+
+ if (an_bit != AN_RES_WAIT_HTTP)
+ goto end;
+
+ if (!strm_fe(s)->comp && !s->be->comp)
+ goto end;
+
+ select_compression_response_header(st, s, msg);
+
+ end:
+ return 1;
+}
+
+static int
+comp_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct comp_state *st = filter->ctx;
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+ struct htx_blk *blk, *next;
+ int ret, consumed = 0, to_forward = 0, last = 0;
+ int dir;
+
+ if (msg->chn->flags & CF_ISRESP)
+ dir = COMP_DIR_RES;
+ else
+ dir = COMP_DIR_REQ;
+
+ blk = htxret.blk;
+ offset = htxret.ret;
+ for (next = NULL; blk && len; blk = next) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+
+ next = htx_get_next_blk(htx, blk);
+ while (next && htx_get_blk_type(next) == HTX_BLK_UNUSED)
+ next = htx_get_next_blk(htx, next);
+
+ if (!(st->flags & COMP_STATE_PROCESSING))
+ goto consume;
+
+ if (htx_compression_buffer_init(htx, &trash) < 0) {
+ msg->chn->flags |= CF_WAKE_WRITE;
+ goto end;
+ }
+
+ switch (type) {
+ case HTX_BLK_DATA:
+ /* it is the last data block */
+ last = ((!next && (htx->flags & HTX_FL_EOM)) || (next && htx_get_blk_type(next) != HTX_BLK_DATA));
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ if (v.len > len) {
+ last = 0;
+ v.len = len;
+ }
+
+ ret = htx_compression_buffer_add_data(st, v.ptr, v.len, &trash, dir);
+ if (ret < 0 || htx_compression_buffer_end(st, &trash, last, dir) < 0)
+ goto error;
+ BUG_ON(v.len != ret);
+
+ if (ret == sz && !b_data(&trash))
+ next = htx_remove_blk(htx, blk);
+ else {
+ blk = htx_replace_blk_value(htx, blk, v, ist2(b_head(&trash), b_data(&trash)));
+ next = htx_get_next_blk(htx, blk);
+ }
+
+ len -= ret;
+ consumed += ret;
+ to_forward += b_data(&trash);
+ if (last)
+ st->flags &= ~COMP_STATE_PROCESSING;
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ if (htx_compression_buffer_end(st, &trash, 1, dir) < 0)
+ goto error;
+ if (b_data(&trash)) {
+ struct htx_blk *last = htx_add_last_data(htx, ist2(b_head(&trash), b_data(&trash)));
+ if (!last)
+ goto error;
+ blk = htx_get_next_blk(htx, last);
+ if (!blk)
+ goto error;
+ next = htx_get_next_blk(htx, blk);
+ to_forward += b_data(&trash);
+ }
+ st->flags &= ~COMP_STATE_PROCESSING;
+ __fallthrough;
+
+ default:
+ consume:
+ sz -= offset;
+ if (sz > len)
+ sz = len;
+ consumed += sz;
+ to_forward += sz;
+ len -= sz;
+ break;
+ }
+
+ offset = 0;
+ }
+
+ end:
+ if (to_forward != consumed)
+ flt_update_offsets(filter, msg->chn, to_forward - consumed);
+
+ if (st->comp_ctx[dir] && st->comp_ctx[dir]->cur_lvl > 0) {
+ update_freq_ctr(&global.comp_bps_in, consumed);
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_in[dir], consumed);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_in[dir], consumed);
+ update_freq_ctr(&global.comp_bps_out, to_forward);
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_out[dir], to_forward);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_out[dir], to_forward);
+ } else {
+ _HA_ATOMIC_ADD(&strm_fe(s)->fe_counters.comp_byp[dir], consumed);
+ _HA_ATOMIC_ADD(&s->be->be_counters.comp_byp[dir], consumed);
+ }
+ return to_forward;
+
+ error:
+ return -1;
+}
+
+
+static int
+comp_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct comp_state *st = filter->ctx;
+
+ if (!(msg->chn->flags & CF_ISRESP) || !st || !st->comp_algo[COMP_DIR_RES])
+ goto end;
+
+ if (strm_fe(s)->mode == PR_MODE_HTTP)
+ _HA_ATOMIC_INC(&strm_fe(s)->fe_counters.p.http.comp_rsp);
+ if ((s->flags & SF_BE_ASSIGNED) && (s->be->mode == PR_MODE_HTTP))
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.comp_rsp);
+ end:
+ return 1;
+}
+
+/***********************************************************************/
+static int
+set_compression_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx, last_vary;
+ struct comp_algo *comp_algo;
+ int comp_index;
+
+ if (msg->chn->flags & CF_ISRESP)
+ comp_index = COMP_DIR_RES;
+ else
+ comp_index = COMP_DIR_REQ;
+
+ sl = http_get_stline(htx);
+ if (!sl)
+ goto error;
+
+ comp_algo = st->comp_algo[comp_index];
+
+ /* add "Transfer-Encoding: chunked" header */
+ if (!(msg->flags & HTTP_MSGF_TE_CHNK)) {
+ if (!http_add_header(htx, ist("Transfer-Encoding"), ist("chunked")))
+ goto error;
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ sl->flags |= (HTX_SL_F_XFER_ENC|HTX_SL_F_CHNK);
+ }
+
+ /* remove Content-Length header */
+ if (msg->flags & HTTP_MSGF_CNT_LEN) {
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Content-Length"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ msg->flags &= ~HTTP_MSGF_CNT_LEN;
+ sl->flags &= ~HTX_SL_F_CLEN;
+ }
+
+ /* convert "ETag" header to a weak ETag */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+ if (ctx.value.ptr[0] == '"') {
+ /* This a strong ETag. Convert it to a weak one. */
+ struct ist v = ist2(trash.area, 0);
+ if (istcat(&v, ist("W/"), trash.size) == -1 || istcat(&v, ctx.value, trash.size) == -1)
+ goto error;
+
+ if (!http_replace_header_value(htx, &ctx, v))
+ goto error;
+ }
+ }
+
+ /* Add "Vary: Accept-Encoding" header but only if it is not found. */
+ ctx.blk = NULL;
+ last_vary.blk = NULL;
+ while (http_find_header(htx, ist("Vary"), &ctx, 0)) {
+ if (isteqi(ctx.value, ist("Accept-Encoding")))
+ break;
+ last_vary = ctx;
+ }
+ /* No "Accept-Encoding" value found. */
+ if (ctx.blk == NULL) {
+ if (last_vary.blk == NULL) {
+ /* No Vary header found at all. Add our header */
+ if (!http_add_header(htx, ist("Vary"), ist("Accept-Encoding")))
+ goto error;
+ }
+ else {
+ /* At least one Vary header found. Append the value to
+ * the last one.
+ */
+ if (!http_append_header_value(htx, &last_vary, ist("Accept-Encoding")))
+ goto error;
+ }
+ }
+
+ /*
+ * Add Content-Encoding header when it's not identity encoding.
+ * RFC 2616 : Identity encoding: This content-coding is used only in the
+ * Accept-Encoding header, and SHOULD NOT be used in the Content-Encoding
+ * header.
+ */
+ if (comp_algo->cfg_name_len != 8 || memcmp(comp_algo->cfg_name, "identity", 8) != 0) {
+ struct ist v = ist2(comp_algo->ua_name, comp_algo->ua_name_len);
+
+ if (!http_add_header(htx, ist("Content-Encoding"), v))
+ goto error;
+ }
+
+ return 1;
+
+ error:
+ st->comp_algo[comp_index]->end(&st->comp_ctx[comp_index]);
+ st->comp_algo[comp_index] = NULL;
+ return 0;
+}
+
+/*
+ * Selects a compression algorithm depending on the client request.
+ */
+static int
+select_compression_request_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_hdr_ctx ctx;
+ struct comp_algo *comp_algo = NULL;
+ struct comp_algo *comp_algo_back = NULL;
+
+ /* Disable compression for older user agents announcing themselves as "Mozilla/4"
+ * unless they are known good (MSIE 6 with XP SP2, or MSIE 7 and later).
+ * See http://zoompf.com/2012/02/lose-the-wait-http-compression for more details.
+ */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("User-Agent"), &ctx, 1) &&
+ ctx.value.len >= 9 &&
+ memcmp(ctx.value.ptr, "Mozilla/4", 9) == 0 &&
+ (ctx.value.len < 31 ||
+ memcmp(ctx.value.ptr + 25, "MSIE ", 5) != 0 ||
+ *(ctx.value.ptr + 30) < '6' ||
+ (*(ctx.value.ptr + 30) == '6' &&
+ (ctx.value.len < 54 || memcmp(ctx.value.ptr + 51, "SV1", 3) != 0)))) {
+ st->comp_algo[COMP_DIR_RES] = NULL;
+ return 0;
+ }
+
+ /* search for the algo in the backend in priority or the frontend */
+ if ((s->be->comp && (comp_algo_back = s->be->comp->algos_res)) ||
+ (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos_res))) {
+ int best_q = 0;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 0)) {
+ const char *qval;
+ int q;
+ int toklen;
+
+ /* try to isolate the token from the optional q-value */
+ toklen = 0;
+ while (toklen < ctx.value.len && HTTP_IS_TOKEN(*(ctx.value.ptr + toklen)))
+ toklen++;
+
+ qval = ctx.value.ptr + toklen;
+ while (1) {
+ while (qval < istend(ctx.value) && HTTP_IS_LWS(*qval))
+ qval++;
+
+ if (qval >= istend(ctx.value) || *qval != ';') {
+ qval = NULL;
+ break;
+ }
+ qval++;
+
+ while (qval < istend(ctx.value) && HTTP_IS_LWS(*qval))
+ qval++;
+
+ if (qval >= istend(ctx.value)) {
+ qval = NULL;
+ break;
+ }
+ if (strncmp(qval, "q=", MIN(istend(ctx.value) - qval, 2)) == 0)
+ break;
+
+ while (qval < istend(ctx.value) && *qval != ';')
+ qval++;
+ }
+
+ /* here we have qval pointing to the first "q=" attribute or NULL if not found */
+ q = qval ? http_parse_qvalue(qval + 2, NULL) : 1000;
+
+ if (q <= best_q)
+ continue;
+
+ for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
+ if (*(ctx.value.ptr) == '*' ||
+ word_match(ctx.value.ptr, toklen, comp_algo->ua_name, comp_algo->ua_name_len)) {
+ st->comp_algo[COMP_DIR_RES] = comp_algo;
+ best_q = q;
+ break;
+ }
+ }
+ }
+ }
+
+ /* remove all occurrences of the header when "compression offload" is set */
+ if (st->comp_algo[COMP_DIR_RES]) {
+ if ((s->be->comp && (s->be->comp->flags & COMP_FL_OFFLOAD)) ||
+ (strm_fe(s)->comp && (strm_fe(s)->comp->flags & COMP_FL_OFFLOAD))) {
+ http_remove_header(htx, &ctx);
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Accept-Encoding"), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+ return 1;
+ }
+
+ /* identity is implicit does not require headers */
+ if ((s->be->comp && (comp_algo_back = s->be->comp->algos_res)) ||
+ (strm_fe(s)->comp && (comp_algo_back = strm_fe(s)->comp->algos_res))) {
+ for (comp_algo = comp_algo_back; comp_algo; comp_algo = comp_algo->next) {
+ if (comp_algo->cfg_name_len == 8 && memcmp(comp_algo->cfg_name, "identity", 8) == 0) {
+ st->comp_algo[COMP_DIR_RES] = comp_algo;
+ return 1;
+ }
+ }
+ }
+
+ st->comp_algo[COMP_DIR_RES] = NULL;
+ return 0;
+}
+
+/*
+ * Selects a compression algorithm depending of the server response.
+ */
+static int
+select_compression_response_header(struct comp_state *st, struct stream *s, struct http_msg *msg)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx;
+ struct comp_type *comp_type;
+
+ /* no common compression algorithm was found in request header */
+ if (st->comp_algo[COMP_DIR_RES] == NULL)
+ goto fail;
+
+ /* compression already in progress */
+ if (msg->flags & HTTP_MSGF_COMPRESSING)
+ goto fail;
+
+ /* HTTP < 1.1 should not be compressed */
+ if (!(msg->flags & HTTP_MSGF_VER_11) || !(txn->req.flags & HTTP_MSGF_VER_11))
+ goto fail;
+
+ if (txn->meth == HTTP_METH_HEAD)
+ goto fail;
+
+ /* compress 200,201,202,203 responses only */
+ if ((txn->status != 200) &&
+ (txn->status != 201) &&
+ (txn->status != 202) &&
+ (txn->status != 203))
+ goto fail;
+
+ if (!(msg->flags & HTTP_MSGF_XFER_LEN) || msg->flags & HTTP_MSGF_BODYLESS)
+ goto fail;
+
+ /* content is already compressed */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Content-Encoding"), &ctx, 1))
+ goto fail;
+
+ /* no compression when Cache-Control: no-transform is present in the message */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Cache-Control"), &ctx, 0)) {
+ if (word_match(ctx.value.ptr, ctx.value.len, "no-transform", 12))
+ goto fail;
+ }
+
+ /* no compression when ETag is malformed */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+ if (http_get_etag_type(ctx.value) == ETAG_INVALID)
+ goto fail;
+ }
+ /* no compression when multiple ETags are present
+ * Note: Do not reset ctx.blk!
+ */
+ if (http_find_header(htx, ist("ETag"), &ctx, 1))
+ goto fail;
+
+ comp_type = NULL;
+
+ /* we don't want to compress multipart content-types, nor content-types that are
+ * not listed in the "compression type" directive if any. If no content-type was
+ * found but configuration requires one, we don't compress either. Backend has
+ * the priority.
+ */
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Content-Type"), &ctx, 1)) {
+ if (ctx.value.len >= 9 && strncasecmp("multipart", ctx.value.ptr, 9) == 0)
+ goto fail;
+
+ if ((s->be->comp && (comp_type = s->be->comp->types_res)) ||
+ (strm_fe(s)->comp && (comp_type = strm_fe(s)->comp->types_res))) {
+ for (; comp_type; comp_type = comp_type->next) {
+ if (ctx.value.len >= comp_type->name_len &&
+ strncasecmp(ctx.value.ptr, comp_type->name, comp_type->name_len) == 0)
+ /* this Content-Type should be compressed */
+ break;
+ }
+ /* this Content-Type should not be compressed */
+ if (comp_type == NULL)
+ goto fail;
+ }
+ }
+ else { /* no content-type header */
+ if ((s->be->comp && s->be->comp->types_res) ||
+ (strm_fe(s)->comp && strm_fe(s)->comp->types_res))
+ goto fail; /* a content-type was required */
+ }
+
+ /* limit compression rate */
+ if (global.comp_rate_lim > 0)
+ if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
+ goto fail;
+
+ /* limit cpu usage */
+ if (th_ctx->idle_pct < compress_min_idle)
+ goto fail;
+
+ /* initialize compression */
+ if (st->comp_algo[COMP_DIR_RES]->init(&st->comp_ctx[COMP_DIR_RES], global.tune.comp_maxlevel) < 0)
+ goto fail;
+ msg->flags |= HTTP_MSGF_COMPRESSING;
+ return 1;
+
+ fail:
+ st->comp_algo[COMP_DIR_RES] = NULL;
+ return 0;
+}
+
+/***********************************************************************/
+static int
+htx_compression_buffer_init(struct htx *htx, struct buffer *out)
+{
+ /* output stream requires at least 10 bytes for the gzip header, plus
+ * at least 8 bytes for the gzip trailer (crc+len), plus a possible
+ * plus at most 5 bytes per 32kB block and 2 bytes to close the stream.
+ */
+ if (htx_free_space(htx) < 20 + 5 * ((htx->data + 32767) >> 15))
+ return -1;
+ b_reset(out);
+ return 0;
+}
+
+static int
+htx_compression_buffer_add_data(struct comp_state *st, const char *data, size_t len,
+ struct buffer *out, int dir)
+{
+
+ return st->comp_algo[dir]->add_data(st->comp_ctx[dir], data, len, out);
+}
+
+static int
+htx_compression_buffer_end(struct comp_state *st, struct buffer *out, int end, int dir)
+{
+
+ if (end)
+ return st->comp_algo[dir]->finish(st->comp_ctx[dir], out);
+ else
+ return st->comp_algo[dir]->flush(st->comp_ctx[dir], out);
+}
+
+
+/***********************************************************************/
+struct flt_ops comp_ops = {
+ .init = comp_flt_init,
+ .init_per_thread = comp_flt_init_per_thread,
+ .deinit_per_thread = comp_flt_deinit_per_thread,
+
+ .attach = comp_strm_init,
+ .detach = comp_strm_deinit,
+
+ .channel_post_analyze = comp_http_post_analyze,
+
+ .http_headers = comp_http_headers,
+ .http_payload = comp_http_payload,
+ .http_end = comp_http_end,
+};
+
+static int
+parse_compression_options(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct comp *comp;
+ int ret = 0;
+
+ if (proxy->comp == NULL) {
+ comp = calloc(1, sizeof(*comp));
+ /* Always default to compress responses */
+ comp->flags = COMP_FL_DIR_RES;
+ proxy->comp = comp;
+ }
+ else
+ comp = proxy->comp;
+
+ if (strcmp(args[1], "algo") == 0 || strcmp(args[1], "algo-res") == 0) {
+ struct comp_ctx *ctx;
+ int cur_arg = 2;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "parsing [%s:%d] : '%s' expects <algorithm>.",
+ file, line, args[0]);
+ ret = -1;
+ goto end;
+ }
+ while (*(args[cur_arg])) {
+ int retval = comp_append_algo(&comp->algos_res, args[cur_arg]);
+ if (retval) {
+ if (retval < 0)
+ memprintf(err, "'%s' : '%s' is not a supported algorithm.",
+ args[0], args[cur_arg]);
+ else
+ memprintf(err, "'%s' : out of memory while parsing algo '%s'.",
+ args[0], args[cur_arg]);
+ ret = -1;
+ goto end;
+ }
+
+ if (proxy->comp->algos_res->init(&ctx, 9) == 0)
+ proxy->comp->algos_res->end(&ctx);
+ else {
+ memprintf(err, "'%s' : Can't init '%s' algorithm.",
+ args[0], args[cur_arg]);
+ ret = -1;
+ goto end;
+ }
+ cur_arg++;
+ continue;
+ }
+ }
+ else if (strcmp(args[1], "algo-req") == 0) {
+ struct comp_ctx *ctx;
+ int retval = comp_append_algo(&comp->algo_req, args[2]);
+
+ if (retval) {
+ if (retval < 0)
+ memprintf(err, "'%s' : '%s' is not a supported algorithm.",
+ args[0], args[2]);
+ else
+ memprintf(err, "'%s' : out of memory while parsing algo '%s'.",
+ args[0], args[2]);
+ ret = -1;
+ goto end;
+ }
+
+ if (proxy->comp->algo_req->init(&ctx, 9) == 0)
+ proxy->comp->algo_req->end(&ctx);
+ else {
+ memprintf(err, "'%s' : Can't init '%s' algorithm.",
+ args[0], args[2]);
+ ret = -1;
+ goto end;
+ }
+ }
+ else if (strcmp(args[1], "offload") == 0) {
+ if (proxy->cap & PR_CAP_DEF) {
+ memprintf(err, "'%s' : '%s' ignored in 'defaults' section.",
+ args[0], args[1]);
+ ret = 1;
+ }
+ comp->flags |= COMP_FL_OFFLOAD;
+ }
+ else if (strcmp(args[1], "type") == 0 || strcmp(args[1], "type-res") == 0) {
+ int cur_arg = 2;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "'%s' expects <type>.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ while (*(args[cur_arg])) {
+ if (comp_append_type(&comp->types_res, args[cur_arg])) {
+ memprintf(err, "'%s': out of memory.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ cur_arg++;
+ continue;
+ }
+ }
+ else if (strcmp(args[1], "type-req") == 0) {
+ int cur_arg = 2;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "'%s' expects <type>.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ while (*(args[cur_arg])) {
+ if (comp_append_type(&comp->types_req, args[cur_arg])) {
+ memprintf(err, "'%s': out of memory.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ cur_arg++;
+ continue;
+ }
+ }
+ else if (strcmp(args[1], "direction") == 0) {
+ if (!args[2]) {
+ memprintf(err, "'%s' expects 'request', 'response', or 'both'.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ if (strcmp(args[2], "request") == 0) {
+ comp->flags &= ~COMP_FL_DIR_RES;
+ comp->flags |= COMP_FL_DIR_REQ;
+ } else if (strcmp(args[2], "response") == 0) {
+ comp->flags &= COMP_FL_DIR_REQ;
+ comp->flags |= COMP_FL_DIR_RES;
+ } else if (strcmp(args[2], "both") == 0)
+ comp->flags |= COMP_FL_DIR_REQ | COMP_FL_DIR_RES;
+ else {
+ memprintf(err, "'%s' expects 'request', 'response', or 'both'.", args[0]);
+ ret = -1;
+ goto end;
+ }
+ }
+ else {
+ memprintf(err, "'%s' expects 'algo', 'type' 'direction' or 'offload'",
+ args[0]);
+ ret = -1;
+ goto end;
+ }
+
+ end:
+ return ret;
+}
+
+static int
+parse_http_comp_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct flt_conf *fc, *back;
+
+ list_for_each_entry_safe(fc, back, &px->filter_configs, list) {
+ if (fc->id == http_comp_flt_id) {
+ memprintf(err, "%s: Proxy supports only one compression filter\n", px->id);
+ return -1;
+ }
+ }
+
+ fconf->id = http_comp_flt_id;
+ fconf->conf = NULL;
+ fconf->ops = &comp_ops;
+ (*cur_arg)++;
+
+ return 0;
+}
+
+
+int
+check_implicit_http_comp_flt(struct proxy *proxy)
+{
+ struct flt_conf *fconf;
+ int explicit = 0;
+ int comp = 0;
+ int err = 0;
+
+ if (proxy->comp == NULL)
+ goto end;
+ if (!LIST_ISEMPTY(&proxy->filter_configs)) {
+ list_for_each_entry(fconf, &proxy->filter_configs, list) {
+ if (fconf->id == http_comp_flt_id)
+ comp = 1;
+ else if (fconf->id == cache_store_flt_id) {
+ if (comp) {
+ ha_alert("config: %s '%s': unable to enable the compression filter "
+ "before any cache filter.\n",
+ proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+ }
+ else if (fconf->id == fcgi_flt_id)
+ continue;
+ else
+ explicit = 1;
+ }
+ }
+ if (comp)
+ goto end;
+ else if (explicit) {
+ ha_alert("config: %s '%s': require an explicit filter declaration to use "
+ "HTTP compression\n", proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+
+ /* Implicit declaration of the compression filter is always the last
+ * one */
+ fconf = calloc(1, sizeof(*fconf));
+ if (!fconf) {
+ ha_alert("config: %s '%s': out of memory\n",
+ proxy_type_str(proxy), proxy->id);
+ err++;
+ goto end;
+ }
+ fconf->id = http_comp_flt_id;
+ fconf->conf = NULL;
+ fconf->ops = &comp_ops;
+ LIST_APPEND(&proxy->filter_configs, &fconf->list);
+ end:
+ return err;
+}
+
+/*
+ * boolean, returns true if compression is used (either gzip or deflate) in the
+ * response.
+ */
+static int
+smp_fetch_res_comp(const struct arg *args, struct sample *smp, const char *kw,
+ void *private)
+{
+ struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (txn && (txn->rsp.flags & HTTP_MSGF_COMPRESSING));
+ return 1;
+}
+
+/*
+ * string, returns algo
+ */
+static int
+smp_fetch_res_comp_algo(const struct arg *args, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct http_txn *txn = smp->strm ? smp->strm->txn : NULL;
+ struct filter *filter;
+ struct comp_state *st;
+
+ if (!txn || !(txn->rsp.flags & HTTP_MSGF_COMPRESSING))
+ return 0;
+
+ list_for_each_entry(filter, &strm_flt(smp->strm)->filters, list) {
+ if (FLT_ID(filter) != http_comp_flt_id)
+ continue;
+
+ if (!(st = filter->ctx))
+ break;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = st->comp_algo[COMP_DIR_RES]->cfg_name;
+ smp->data.u.str.data = st->comp_algo[COMP_DIR_RES]->cfg_name_len;
+ return 1;
+ }
+ return 0;
+}
+
+/* Declare the config parser for "compression" keyword */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "compression", parse_compression_options },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* Declare the filter parser for "compression" keyword */
+static struct flt_kw_list filter_kws = { "COMP", { }, {
+ { "compression", parse_http_comp_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &filter_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "res.comp", smp_fetch_res_comp, 0, NULL, SMP_T_BOOL, SMP_USE_HRSHP },
+ { "res.comp_algo", smp_fetch_res_comp_algo, 0, NULL, SMP_T_STR, SMP_USE_HRSHP },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/flt_spoe.c b/src/flt_spoe.c
new file mode 100644
index 0000000..70ea2ba
--- /dev/null
+++ b/src/flt_spoe.c
@@ -0,0 +1,4739 @@
+/*
+ * Stream processing offload engine management.
+ *
+ * Copyright 2016 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <ctype.h>
+#include <errno.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/applet.h>
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/filters.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/signal.h>
+#include <haproxy/sink.h>
+#include <haproxy/spoe.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+#define SPOE_PRINTF(x...) fprintf(x)
+#define SPOE_DEBUG_STMT(statement) statement
+#else
+#define SPOE_PRINTF(x...)
+#define SPOE_DEBUG_STMT(statement)
+#endif
+
+/* Reserved 4 bytes to the frame size. So a frame and its size can be written
+ * together in a buffer */
+#define MAX_FRAME_SIZE global.tune.bufsize - 4
+
+/* The minimum size for a frame */
+#define MIN_FRAME_SIZE 256
+
+/* Reserved for the metadata and the frame type.
+ * So <MAX_FRAME_SIZE> - <FRAME_HDR_SIZE> is the maximum payload size */
+#define FRAME_HDR_SIZE 32
+
+/* Helper to get SPOE ctx inside an appctx */
+#define SPOE_APPCTX(appctx) ((struct spoe_appctx *)((appctx)->svcctx))
+
+/* SPOE filter id. Used to identify SPOE filters */
+const char *spoe_filter_id = "SPOE filter";
+
+/* Set if the handle on SIGUSR1 is registered */
+static int sighandler_registered = 0;
+
+/* The name of the SPOE engine, used during the parsing */
+char *curengine = NULL;
+
+/* SPOE agent used during the parsing */
+/* SPOE agent/group/message used during the parsing */
+struct spoe_agent *curagent = NULL;
+struct spoe_group *curgrp = NULL;
+struct spoe_message *curmsg = NULL;
+
+/* list of SPOE messages and placeholders used during the parsing */
+struct list curmsgs;
+struct list curgrps;
+struct list curmphs;
+struct list curgphs;
+struct list curvars;
+
+/* list of log servers used during the parsing */
+struct list curloggers;
+
+/* agent's proxy flags (PR_O_* and PR_O2_*) used during parsing */
+int curpxopts;
+int curpxopts2;
+
+/* Pools used to allocate SPOE structs */
+DECLARE_STATIC_POOL(pool_head_spoe_ctx, "spoe_ctx", sizeof(struct spoe_context));
+DECLARE_STATIC_POOL(pool_head_spoe_appctx, "spoe_appctx", sizeof(struct spoe_appctx));
+
+struct flt_ops spoe_ops;
+
+static int spoe_queue_context(struct spoe_context *ctx);
+static int spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait);
+static void spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait);
+static struct appctx *spoe_create_appctx(struct spoe_config *conf);
+
+/********************************************************************
+ * helper functions/globals
+ ********************************************************************/
+static void
+spoe_release_placeholder(struct spoe_placeholder *ph)
+{
+ if (!ph)
+ return;
+ free(ph->id);
+ free(ph);
+}
+
+static void
+spoe_release_message(struct spoe_message *msg)
+{
+ struct spoe_arg *arg, *argback;
+ struct acl *acl, *aclback;
+
+ if (!msg)
+ return;
+ free(msg->id);
+ free(msg->conf.file);
+ list_for_each_entry_safe(arg, argback, &msg->args, list) {
+ release_sample_expr(arg->expr);
+ free(arg->name);
+ LIST_DELETE(&arg->list);
+ free(arg);
+ }
+ list_for_each_entry_safe(acl, aclback, &msg->acls, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+ free_acl_cond(msg->cond);
+ free(msg);
+}
+
+static void
+spoe_release_group(struct spoe_group *grp)
+{
+ if (!grp)
+ return;
+ free(grp->id);
+ free(grp->conf.file);
+ free(grp);
+}
+
+static void
+spoe_release_agent(struct spoe_agent *agent)
+{
+ struct spoe_message *msg, *msgback;
+ struct spoe_group *grp, *grpback;
+ int i;
+
+ if (!agent)
+ return;
+ free(agent->id);
+ free(agent->conf.file);
+ free(agent->var_pfx);
+ free(agent->var_on_error);
+ free(agent->var_t_process);
+ free(agent->var_t_total);
+ list_for_each_entry_safe(msg, msgback, &agent->messages, list) {
+ LIST_DELETE(&msg->list);
+ spoe_release_message(msg);
+ }
+ list_for_each_entry_safe(grp, grpback, &agent->groups, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ if (agent->rt) {
+ for (i = 0; i < global.nbthread; ++i) {
+ free(agent->rt[i].engine_id);
+ HA_SPIN_DESTROY(&agent->rt[i].lock);
+ }
+ }
+ free(agent->rt);
+ free(agent);
+}
+
+static const char *spoe_frm_err_reasons[SPOE_FRM_ERRS] = {
+ [SPOE_FRM_ERR_NONE] = "normal",
+ [SPOE_FRM_ERR_IO] = "I/O error",
+ [SPOE_FRM_ERR_TOUT] = "a timeout occurred",
+ [SPOE_FRM_ERR_TOO_BIG] = "frame is too big",
+ [SPOE_FRM_ERR_INVALID] = "invalid frame received",
+ [SPOE_FRM_ERR_NO_VSN] = "version value not found",
+ [SPOE_FRM_ERR_NO_FRAME_SIZE] = "max-frame-size value not found",
+ [SPOE_FRM_ERR_NO_CAP] = "capabilities value not found",
+ [SPOE_FRM_ERR_BAD_VSN] = "unsupported version",
+ [SPOE_FRM_ERR_BAD_FRAME_SIZE] = "max-frame-size too big or too small",
+ [SPOE_FRM_ERR_FRAG_NOT_SUPPORTED] = "fragmentation not supported",
+ [SPOE_FRM_ERR_INTERLACED_FRAMES] = "invalid interlaced frames",
+ [SPOE_FRM_ERR_FRAMEID_NOTFOUND] = "frame-id not found",
+ [SPOE_FRM_ERR_RES] = "resource allocation error",
+ [SPOE_FRM_ERR_UNKNOWN] = "an unknown error occurred",
+};
+
+static const char *spoe_event_str[SPOE_EV_EVENTS] = {
+ [SPOE_EV_ON_CLIENT_SESS] = "on-client-session",
+ [SPOE_EV_ON_TCP_REQ_FE] = "on-frontend-tcp-request",
+ [SPOE_EV_ON_TCP_REQ_BE] = "on-backend-tcp-request",
+ [SPOE_EV_ON_HTTP_REQ_FE] = "on-frontend-http-request",
+ [SPOE_EV_ON_HTTP_REQ_BE] = "on-backend-http-request",
+
+ [SPOE_EV_ON_SERVER_SESS] = "on-server-session",
+ [SPOE_EV_ON_TCP_RSP] = "on-tcp-response",
+ [SPOE_EV_ON_HTTP_RSP] = "on-http-response",
+};
+
+
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+
+static const char *spoe_ctx_state_str[SPOE_CTX_ST_ERROR+1] = {
+ [SPOE_CTX_ST_NONE] = "NONE",
+ [SPOE_CTX_ST_READY] = "READY",
+ [SPOE_CTX_ST_ENCODING_MSGS] = "ENCODING_MSGS",
+ [SPOE_CTX_ST_SENDING_MSGS] = "SENDING_MSGS",
+ [SPOE_CTX_ST_WAITING_ACK] = "WAITING_ACK",
+ [SPOE_CTX_ST_DONE] = "DONE",
+ [SPOE_CTX_ST_ERROR] = "ERROR",
+};
+
+static const char *spoe_appctx_state_str[SPOE_APPCTX_ST_END+1] = {
+ [SPOE_APPCTX_ST_CONNECT] = "CONNECT",
+ [SPOE_APPCTX_ST_CONNECTING] = "CONNECTING",
+ [SPOE_APPCTX_ST_IDLE] = "IDLE",
+ [SPOE_APPCTX_ST_PROCESSING] = "PROCESSING",
+ [SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY] = "SENDING_FRAG_NOTIFY",
+ [SPOE_APPCTX_ST_WAITING_SYNC_ACK] = "WAITING_SYNC_ACK",
+ [SPOE_APPCTX_ST_DISCONNECT] = "DISCONNECT",
+ [SPOE_APPCTX_ST_DISCONNECTING] = "DISCONNECTING",
+ [SPOE_APPCTX_ST_EXIT] = "EXIT",
+ [SPOE_APPCTX_ST_END] = "END",
+};
+
+#endif
+
+/* Used to generates a unique id for an engine. On success, it returns a
+ * allocated string. So it is the caller's responsibility to release it. If the
+ * allocation failed, it returns NULL. */
+static char *
+generate_pseudo_uuid()
+{
+ ha_generate_uuid(&trash);
+ return my_strndup(trash.area, trash.data);
+}
+
+/* set/add to <t> the elapsed time since <since> and now */
+static inline void
+spoe_update_stat_time(ullong *since, long *t)
+{
+ if (*t == -1)
+ *t = ns_to_ms(now_ns - *since);
+ else
+ *t += ns_to_ms(now_ns - *since);
+ *since = 0;
+}
+
+/********************************************************************
+ * Functions that encode/decode SPOE frames
+ ********************************************************************/
+/* Helper to get static string length, excluding the terminating null byte */
+#define SLEN(str) (sizeof(str)-1)
+
+/* Predefined key used in HELLO/DISCONNECT frames */
+#define SUPPORTED_VERSIONS_KEY "supported-versions"
+#define VERSION_KEY "version"
+#define MAX_FRAME_SIZE_KEY "max-frame-size"
+#define CAPABILITIES_KEY "capabilities"
+#define ENGINE_ID_KEY "engine-id"
+#define HEALTHCHECK_KEY "healthcheck"
+#define STATUS_CODE_KEY "status-code"
+#define MSG_KEY "message"
+
+struct spoe_version {
+ char *str;
+ int min;
+ int max;
+};
+
+/* All supported versions */
+static struct spoe_version supported_versions[] = {
+ /* 1.0 is now unsupported because of a bug about frame's flags*/
+ {"2.0", 2000, 2000},
+ {NULL, 0, 0}
+};
+
+/* Comma-separated list of supported versions */
+#define SUPPORTED_VERSIONS_VAL "2.0"
+
+/* Convert a string to a SPOE version value. The string must follow the format
+ * "MAJOR.MINOR". It will be concerted into the integer (1000 * MAJOR + MINOR).
+ * If an error occurred, -1 is returned. */
+static int
+spoe_str_to_vsn(const char *str, size_t len)
+{
+ const char *p, *end;
+ int maj, min, vsn;
+
+ p = str;
+ end = str+len;
+ maj = min = 0;
+ vsn = -1;
+
+ /* skip leading spaces */
+ while (p < end && isspace((unsigned char)*p))
+ p++;
+
+ /* parse Major number, until the '.' */
+ while (*p != '.') {
+ if (p >= end || *p < '0' || *p > '9')
+ goto out;
+ maj *= 10;
+ maj += (*p - '0');
+ p++;
+ }
+
+ /* check Major version */
+ if (!maj)
+ goto out;
+
+ p++; /* skip the '.' */
+ if (p >= end || *p < '0' || *p > '9') /* Minor number is missing */
+ goto out;
+
+ /* Parse Minor number */
+ while (p < end) {
+ if (*p < '0' || *p > '9')
+ break;
+ min *= 10;
+ min += (*p - '0');
+ p++;
+ }
+
+ /* check Minor number */
+ if (min > 999)
+ goto out;
+
+ /* skip trailing spaces */
+ while (p < end && isspace((unsigned char)*p))
+ p++;
+ if (p != end)
+ goto out;
+
+ vsn = maj * 1000 + min;
+ out:
+ return vsn;
+}
+
+/* Encode the HELLO frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hahello_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ struct buffer *chk;
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_HELLO;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* No stream-id and frame-id for HELLO frames */
+ *p++ = 0; *p++ = 0;
+
+ /* There are 3 mandatory items: "supported-versions", "max-frame-size"
+ * and "capabilities" */
+
+ /* "supported-versions" K/V item */
+ sz = SLEN(SUPPORTED_VERSIONS_KEY);
+ if (spoe_encode_buffer(SUPPORTED_VERSIONS_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = SLEN(SUPPORTED_VERSIONS_VAL);
+ if (spoe_encode_buffer(SUPPORTED_VERSIONS_VAL, sz, &p, end) == -1)
+ goto too_big;
+
+ /* "max-fram-size" K/V item */
+ sz = SLEN(MAX_FRAME_SIZE_KEY);
+ if (spoe_encode_buffer(MAX_FRAME_SIZE_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_UINT32;
+ if (encode_varint(SPOE_APPCTX(appctx)->max_frame_size, &p, end) == -1)
+ goto too_big;
+
+ /* "capabilities" K/V item */
+ sz = SLEN(CAPABILITIES_KEY);
+ if (spoe_encode_buffer(CAPABILITIES_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ chk = get_trash_chunk();
+ if (agent != NULL && (agent->flags & SPOE_FL_PIPELINING)) {
+ memcpy(chk->area, "pipelining", 10);
+ chk->data += 10;
+ }
+ if (agent != NULL && (agent->flags & SPOE_FL_ASYNC)) {
+ if (chk->data) chk->area[chk->data++] = ',';
+ memcpy(chk->area+chk->data, "async", 5);
+ chk->data += 5;
+ }
+ if (agent != NULL && (agent->flags & SPOE_FL_RCV_FRAGMENTATION)) {
+ if (chk->data) chk->area[chk->data++] = ',';
+ memcpy(chk->area+chk->data, "fragmentation", 13);
+ chk->data += 13;
+ }
+ if (spoe_encode_buffer(chk->area, chk->data, &p, end) == -1)
+ goto too_big;
+
+ /* (optional) "engine-id" K/V item, if present */
+ if (agent != NULL && agent->rt[tid].engine_id != NULL) {
+ sz = SLEN(ENGINE_ID_KEY);
+ if (spoe_encode_buffer(ENGINE_ID_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = strlen(agent->rt[tid].engine_id);
+ if (spoe_encode_buffer(agent->rt[tid].engine_id, sz, &p, end) == -1)
+ goto too_big;
+ }
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode DISCONNECT frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hadiscon_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ const char *reason;
+ char *p, *end;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_DISCON;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* No stream-id and frame-id for DISCONNECT frames */
+ *p++ = 0; *p++ = 0;
+
+ if (SPOE_APPCTX(appctx)->status_code >= SPOE_FRM_ERRS)
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_UNKNOWN;
+
+ /* There are 2 mandatory items: "status-code" and "message" */
+
+ /* "status-code" K/V item */
+ sz = SLEN(STATUS_CODE_KEY);
+ if (spoe_encode_buffer(STATUS_CODE_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ *p++ = SPOE_DATA_T_UINT32;
+ if (encode_varint(SPOE_APPCTX(appctx)->status_code, &p, end) == -1)
+ goto too_big;
+
+ /* "message" K/V item */
+ sz = SLEN(MSG_KEY);
+ if (spoe_encode_buffer(MSG_KEY, sz, &p, end) == -1)
+ goto too_big;
+
+ /*Get the message corresponding to the status code */
+ reason = spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code];
+
+ *p++ = SPOE_DATA_T_STR;
+ sz = strlen(reason);
+ if (spoe_encode_buffer(reason, sz, &p, end) == -1)
+ goto too_big;
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode the NOTIFY frame sent by HAProxy to an agent. It returns the number of
+ * encoded bytes in the frame on success, 0 if an encoding error occurred and -1
+ * if a fatal error occurred. */
+static int
+spoe_prepare_hanotify_frame(struct appctx *appctx, struct spoe_context *ctx,
+ char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int stream_id, frame_id;
+ unsigned int flags = SPOE_FRM_FL_FIN;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ stream_id = ctx->stream_id;
+ frame_id = ctx->frame_id;
+
+ if (ctx->flags & SPOE_CTX_FL_FRAGMENTED) {
+ /* The fragmentation is not supported by the applet */
+ if (!(SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_FRAGMENTATION)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+ flags = ctx->frag_ctx.flags;
+ }
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_HAPROXY_NOTIFY;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* Set stream-id and frame-id */
+ if (encode_varint(stream_id, &p, end) == -1)
+ goto too_big;
+ if (encode_varint(frame_id, &p, end) == -1)
+ goto too_big;
+
+ /* Copy encoded messages, if possible */
+ sz = b_data(&ctx->buffer);
+ if (p + sz >= end)
+ goto too_big;
+ memcpy(p, b_head(&ctx->buffer), sz);
+ p += sz;
+
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Encode next part of a fragmented frame sent by HAProxy to an agent. It
+ * returns the number of encoded bytes in the frame on success, 0 if an encoding
+ * error occurred and -1 if a fatal error occurred. */
+static int
+spoe_prepare_hafrag_frame(struct appctx *appctx, struct spoe_context *ctx,
+ char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int stream_id, frame_id;
+ unsigned int flags;
+ size_t sz;
+
+ p = frame;
+ end = frame+size;
+
+ /* <ctx> is null when the stream has aborted the processing of a
+ * fragmented frame. In this case, we must notify the corresponding
+ * agent using ids stored in <frag_ctx>. */
+ if (ctx == NULL) {
+ flags = (SPOE_FRM_FL_FIN|SPOE_FRM_FL_ABRT);
+ stream_id = SPOE_APPCTX(appctx)->frag_ctx.cursid;
+ frame_id = SPOE_APPCTX(appctx)->frag_ctx.curfid;
+ }
+ else {
+ flags = ctx->frag_ctx.flags;
+ stream_id = ctx->stream_id;
+ frame_id = ctx->frame_id;
+ }
+
+ /* Set Frame type */
+ *p++ = SPOE_FRM_T_UNSET;
+
+ /* Set flags */
+ flags = htonl(flags);
+ memcpy(p, (char *)&flags, 4);
+ p += 4;
+
+ /* Set stream-id and frame-id */
+ if (encode_varint(stream_id, &p, end) == -1)
+ goto too_big;
+ if (encode_varint(frame_id, &p, end) == -1)
+ goto too_big;
+
+ if (ctx == NULL)
+ goto end;
+
+ /* Copy encoded messages, if possible */
+ sz = b_data(&ctx->buffer);
+ if (p + sz >= end)
+ goto too_big;
+ memcpy(p, b_head(&ctx->buffer), sz);
+ p += sz;
+
+ end:
+ return (p - frame);
+
+ too_big:
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return 0;
+}
+
+/* Decode and process the HELLO frame sent by an agent. It returns the number of
+ * read bytes on success, 0 if a decoding error occurred, and -1 if a fatal
+ * error occurred. */
+static int
+spoe_handle_agenthello_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ int vsn, max_frame_size;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_HELLO) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for HELLO frame */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* stream-id and frame-id must be cleared */
+ if (*p != 0 || *(p+1) != 0) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ p += 2;
+
+ /* There are 3 mandatory items: "version", "max-frame-size" and
+ * "capabilities" */
+
+ /* Loop on K/V items */
+ vsn = max_frame_size = flags = 0;
+ while (p < end) {
+ char *str;
+ uint64_t sz;
+ int ret;
+
+ /* Decode the item key */
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || !sz) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Check "version" K/V item */
+ if (sz >= strlen(VERSION_KEY) && !memcmp(str, VERSION_KEY, strlen(VERSION_KEY))) {
+ int i, type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ vsn = spoe_str_to_vsn(str, sz);
+ if (vsn == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_VSN;
+ return -1;
+ }
+ for (i = 0; supported_versions[i].str != NULL; ++i) {
+ if (vsn >= supported_versions[i].min &&
+ vsn <= supported_versions[i].max)
+ break;
+ }
+ if (supported_versions[i].str == NULL) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_VSN;
+ return -1;
+ }
+ }
+ /* Check "max-frame-size" K/V item */
+ else if (sz >= strlen(MAX_FRAME_SIZE_KEY) && !memcmp(str, MAX_FRAME_SIZE_KEY, strlen(MAX_FRAME_SIZE_KEY))) {
+ int type = *p++;
+
+ /* The value must be integer */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT64 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT64) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (sz < MIN_FRAME_SIZE ||
+ sz > SPOE_APPCTX(appctx)->max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_BAD_FRAME_SIZE;
+ return -1;
+ }
+ max_frame_size = sz;
+ }
+ /* Check "capabilities" K/V item */
+ else if (sz >= strlen(CAPABILITIES_KEY) && !memcmp(str, CAPABILITIES_KEY, strlen(CAPABILITIES_KEY))) {
+ int type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ while (sz) {
+ char *delim;
+
+ /* Skip leading spaces */
+ for (; isspace((unsigned char)*str) && sz; str++, sz--);
+
+ if (sz >= 10 && !strncmp(str, "pipelining", 10)) {
+ str += 10; sz -= 10;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_PIPELINING;
+ }
+ else if (sz >= 5 && !strncmp(str, "async", 5)) {
+ str += 5; sz -= 5;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_ASYNC;
+ }
+ else if (sz >= 13 && !strncmp(str, "fragmentation", 13)) {
+ str += 13; sz -= 13;
+ if (!sz || isspace((unsigned char)*str) || *str == ',')
+ flags |= SPOE_APPCTX_FL_FRAGMENTATION;
+ }
+
+ /* Get the next comma or break */
+ if (!sz || (delim = memchr(str, ',', sz)) == NULL)
+ break;
+ delim++;
+ sz -= (delim - str);
+ str = delim;
+ }
+ }
+ else {
+ /* Silently ignore unknown item */
+ if (spoe_skip_data(&p, end) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ }
+ }
+
+ /* Final checks */
+ if (!vsn) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NO_VSN;
+ return -1;
+ }
+ if (!max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NO_FRAME_SIZE;
+ return -1;
+ }
+ if (!agent)
+ flags &= ~(SPOE_APPCTX_FL_PIPELINING|SPOE_APPCTX_FL_ASYNC);
+ else {
+ if ((flags & SPOE_APPCTX_FL_PIPELINING) && !(agent->flags & SPOE_FL_PIPELINING))
+ flags &= ~SPOE_APPCTX_FL_PIPELINING;
+ if ((flags & SPOE_APPCTX_FL_ASYNC) && !(agent->flags & SPOE_FL_ASYNC))
+ flags &= ~SPOE_APPCTX_FL_ASYNC;
+ }
+
+ SPOE_APPCTX(appctx)->version = (unsigned int)vsn;
+ SPOE_APPCTX(appctx)->max_frame_size = (unsigned int)max_frame_size;
+ SPOE_APPCTX(appctx)->flags |= flags;
+
+ return (p - frame);
+}
+
+/* Decode DISCONNECT frame sent by an agent. It returns the number of by read
+ * bytes on success, 0 if the frame can be ignored and -1 if an error
+ * occurred. */
+static int
+spoe_handle_agentdiscon_frame(struct appctx *appctx, char *frame, size_t size)
+{
+ char *p, *end;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_DISCON) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for DISCONNECT frame */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* stream-id and frame-id must be cleared */
+ if (*p != 0 || *(p+1) != 0) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ p += 2;
+
+ /* There are 2 mandatory items: "status-code" and "message" */
+
+ /* Loop on K/V items */
+ while (p < end) {
+ char *str;
+ uint64_t sz;
+ int ret;
+
+ /* Decode the item key */
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || !sz) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Check "status-code" K/V item */
+ if (sz >= strlen(STATUS_CODE_KEY) && !memcmp(str, STATUS_CODE_KEY, strlen(STATUS_CODE_KEY))) {
+ int type = *p++;
+
+ /* The value must be an integer */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_INT64 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT32 &&
+ (type & SPOE_DATA_T_MASK) != SPOE_DATA_T_UINT64) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &sz) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ SPOE_APPCTX(appctx)->status_code = sz;
+ }
+
+ /* Check "message" K/V item */
+ else if (sz >= strlen(MSG_KEY) && !memcmp(str, MSG_KEY, strlen(MSG_KEY))) {
+ int type = *p++;
+
+ /* The value must be a string */
+ if ((type & SPOE_DATA_T_MASK) != SPOE_DATA_T_STR) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ ret = spoe_decode_buffer(&p, end, &str, &sz);
+ if (ret == -1 || sz > 255) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+#if defined(DEBUG_SPOE) || defined(DEBUG_FULL)
+ SPOE_APPCTX(appctx)->reason = str;
+ SPOE_APPCTX(appctx)->rlen = sz;
+#endif
+ }
+ else {
+ /* Silently ignore unknown item */
+ if (spoe_skip_data(&p, end) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ }
+ }
+
+ return (p - frame);
+}
+
+
+/* Decode ACK frame sent by an agent. It returns the number of read bytes on
+ * success, 0 if the frame can be ignored and -1 if an error occurred. */
+static int
+spoe_handle_agentack_frame(struct appctx *appctx, struct spoe_context **ctx,
+ char *frame, size_t size)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *p, *end;
+ uint64_t stream_id, frame_id;
+ int len;
+ unsigned int flags;
+
+ p = frame;
+ end = frame + size;
+ *ctx = NULL;
+
+ /* Check frame type */
+ if (*p++ != SPOE_FRM_T_AGENT_ACK) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ if (size < 7 /* TYPE + METADATA */) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Retrieve flags */
+ memcpy((char *)&flags, p, 4);
+ flags = ntohl(flags);
+ p += 4;
+
+ /* Fragmentation is not supported for now */
+ if (!(flags & SPOE_FRM_FL_FIN)) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAG_NOT_SUPPORTED;
+ return -1;
+ }
+
+ /* Get the stream-id and the frame-id */
+ if (decode_varint(&p, end, &stream_id) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+ if (decode_varint(&p, end, &frame_id) == -1) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* Try to find the corresponding SPOE context */
+ if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_ASYNC) {
+ list_for_each_entry((*ctx), &agent->rt[tid].waiting_queue, list) {
+ if ((*ctx)->stream_id == (unsigned int)stream_id &&
+ (*ctx)->frame_id == (unsigned int)frame_id)
+ goto found;
+ }
+ }
+ else {
+ list_for_each_entry((*ctx), &SPOE_APPCTX(appctx)->waiting_queue, list) {
+ if ((*ctx)->stream_id == (unsigned int)stream_id &&
+ (*ctx)->frame_id == (unsigned int)frame_id)
+ goto found;
+ }
+ }
+
+ if (SPOE_APPCTX(appctx)->frag_ctx.ctx &&
+ SPOE_APPCTX(appctx)->frag_ctx.cursid == (unsigned int)stream_id &&
+ SPOE_APPCTX(appctx)->frag_ctx.curfid == (unsigned int)frame_id) {
+
+ /* ABRT bit is set for an unfinished fragmented frame */
+ if (flags & SPOE_FRM_FL_ABRT) {
+ *ctx = SPOE_APPCTX(appctx)->frag_ctx.ctx;
+ (*ctx)->state = SPOE_CTX_ST_ERROR;
+ (*ctx)->status_code = SPOE_CTX_ERR_FRAG_FRAME_ABRT;
+ /* Ignore the payload */
+ goto end;
+ }
+ /* TODO: Handle more flags for fragmented frames: RESUME, FINISH... */
+ /* For now, we ignore the ack */
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_INVALID;
+ return 0;
+ }
+
+ /* No Stream found, ignore the frame */
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Ignore ACK frame"
+ " - stream-id=%u - frame-id=%u\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx,
+ (unsigned int)stream_id, (unsigned int)frame_id);
+
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_FRAMEID_NOTFOUND;
+ if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK) {
+ /* Report an error if we are waiting the ack for another frame,
+ * but not if there is no longer frame waiting for a ack
+ * (timeout)
+ */
+ if (!LIST_ISEMPTY(&SPOE_APPCTX(appctx)->waiting_queue) ||
+ SPOE_APPCTX(appctx)->frag_ctx.ctx)
+ return -1;
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->cur_fpa = 0;
+ }
+ return 0;
+
+ found:
+ if (!spoe_acquire_buffer(&SPOE_APPCTX(appctx)->buffer,
+ &SPOE_APPCTX(appctx)->buffer_wait)) {
+ *ctx = NULL;
+ return 1; /* Retry later */
+ }
+
+ /* Copy encoded actions */
+ len = (end - p);
+ memcpy(b_head(&SPOE_APPCTX(appctx)->buffer), p, len);
+ b_set_data(&SPOE_APPCTX(appctx)->buffer, len);
+ p += len;
+
+ /* Transfer the buffer ownership to the SPOE context */
+ (*ctx)->buffer = SPOE_APPCTX(appctx)->buffer;
+ SPOE_APPCTX(appctx)->buffer = BUF_NULL;
+
+ (*ctx)->state = SPOE_CTX_ST_DONE;
+
+ end:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - ACK frame received"
+ " - ctx=%p - stream-id=%u - frame-id=%u - flags=0x%08x\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx, *ctx, (*ctx)->stream_id,
+ (*ctx)->frame_id, flags);
+ return (p - frame);
+}
+
+/* This function is used in cfgparse.c and declared in proto/checks.h. It
+ * prepare the request to send to agents during a healthcheck. It returns 0 on
+ * success and -1 if an error occurred. */
+int
+spoe_prepare_healthcheck_request(char **req, int *len)
+{
+ struct appctx appctx;
+ struct spoe_appctx spoe_appctx;
+ char *frame, *end, buf[MAX_FRAME_SIZE+4];
+ size_t sz;
+ int ret;
+
+ memset(&appctx, 0, sizeof(appctx));
+ memset(&spoe_appctx, 0, sizeof(spoe_appctx));
+ memset(buf, 0, sizeof(buf));
+
+ appctx.svcctx = &spoe_appctx;
+ SPOE_APPCTX(&appctx)->max_frame_size = MAX_FRAME_SIZE;
+
+ frame = buf+4; /* Reserved the 4 first bytes for the frame size */
+ end = frame + MAX_FRAME_SIZE;
+
+ ret = spoe_prepare_hahello_frame(&appctx, frame, MAX_FRAME_SIZE);
+ if (ret <= 0)
+ return -1;
+ frame += ret;
+
+ /* Add "healthcheck" K/V item */
+ sz = SLEN(HEALTHCHECK_KEY);
+ if (spoe_encode_buffer(HEALTHCHECK_KEY, sz, &frame, end) == -1)
+ return -1;
+ *frame++ = (SPOE_DATA_T_BOOL | SPOE_DATA_FL_TRUE);
+
+ *len = frame - buf;
+ sz = htonl(*len - 4);
+ memcpy(buf, (char *)&sz, 4);
+
+ if ((*req = malloc(*len)) == NULL)
+ return -1;
+ memcpy(*req, buf, *len);
+ return 0;
+}
+
+/* This function is used in checks.c and declared in proto/checks.h. It decode
+ * the response received from an agent during a healthcheck. It returns 0 on
+ * success and -1 if an error occurred. */
+int
+spoe_handle_healthcheck_response(char *frame, size_t size, char *err, int errlen)
+{
+ struct appctx appctx;
+ struct spoe_appctx spoe_appctx;
+
+ memset(&appctx, 0, sizeof(appctx));
+ memset(&spoe_appctx, 0, sizeof(spoe_appctx));
+
+ appctx.svcctx = &spoe_appctx;
+ SPOE_APPCTX(&appctx)->max_frame_size = MAX_FRAME_SIZE;
+
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ spoe_handle_agentdiscon_frame(&appctx, frame, size);
+ goto error;
+ }
+ if (spoe_handle_agenthello_frame(&appctx, frame, size) <= 0)
+ goto error;
+
+ return 0;
+
+ error:
+ if (SPOE_APPCTX(&appctx)->status_code >= SPOE_FRM_ERRS)
+ SPOE_APPCTX(&appctx)->status_code = SPOE_FRM_ERR_UNKNOWN;
+ strncpy(err, spoe_frm_err_reasons[SPOE_APPCTX(&appctx)->status_code], errlen);
+ return -1;
+}
+
+/* Send a SPOE frame to an agent. It returns -1 when an error occurred, 0 when
+ * the frame can be ignored, 1 to retry later, and the frame length on
+ * success. */
+static int
+spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int ret;
+ uint32_t netint;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ netint = htonl(framesz);
+ memcpy(buf, (char *)&netint, 4);
+ ret = applet_putblk(appctx, buf, framesz+4);
+ if (ret <= 0) {
+ if (ret == -3 && b_is_null(&sc_ic(sc)->buf)) {
+ /* WT: is this still needed for the case ret==-3 ? */
+ sc_need_room(sc, 0);
+ return 1; /* retry */
+ }
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ return -1; /* error */
+ }
+ return framesz;
+}
+
+/* Receive a SPOE frame from an agent. It return -1 when an error occurred, 0
+ * when the frame can be ignored, 1 to retry later and the frame length on
+ * success. */
+static int
+spoe_recv_frame(struct appctx *appctx, char *buf, size_t framesz)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int ret;
+ uint32_t netint;
+
+ ret = co_getblk(sc_oc(sc), (char *)&netint, 4, 0);
+ if (ret > 0) {
+ framesz = ntohl(netint);
+ if (framesz > SPOE_APPCTX(appctx)->max_frame_size) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOO_BIG;
+ return -1;
+ }
+ ret = co_getblk(sc_oc(sc), buf, framesz, 4);
+ }
+ if (ret <= 0) {
+ if (ret == 0) {
+ return 1; /* retry */
+ }
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO;
+ return -1; /* error */
+ }
+ return framesz;
+}
+
+/********************************************************************
+ * Functions that manage the SPOE applet
+ ********************************************************************/
+static int
+spoe_wakeup_appctx(struct appctx *appctx)
+{
+ applet_will_consume(appctx);
+ applet_have_more_data(appctx);
+ appctx_wakeup(appctx);
+ return 1;
+}
+
+/* Callback function that catches applet timeouts. If a timeout occurred, we set
+ * <appctx->st1> flag and the SPOE applet is woken up. */
+static struct task *
+spoe_process_appctx(struct task * task, void *context, unsigned int state)
+{
+ struct appctx *appctx = context;
+
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ if (tick_is_expired(task->expire, now_ms)) {
+ task->expire = TICK_ETERNITY;
+ appctx->st1 = SPOE_APPCTX_ERR_TOUT;
+ }
+ spoe_wakeup_appctx(appctx);
+ return task;
+}
+
+static int
+spoe_init_appctx(struct appctx *appctx)
+{
+ struct spoe_appctx *spoe_appctx = SPOE_APPCTX(appctx);
+ struct spoe_agent *agent = spoe_appctx->agent;
+ struct task *task;
+ struct stream *s;
+
+ if ((task = task_new_here()) == NULL)
+ goto out_error;
+ task->process = spoe_process_appctx;
+ task->context = appctx;
+
+ if (appctx_finalize_startup(appctx, &agent->spoe_conf->agent_fe, &BUF_NULL) == -1)
+ goto out_free_task;
+
+ spoe_appctx->owner = appctx;
+ spoe_appctx->task = task;
+
+ LIST_INIT(&spoe_appctx->buffer_wait.list);
+ spoe_appctx->buffer_wait.target = appctx;
+ spoe_appctx->buffer_wait.wakeup_cb = (int (*)(void *))spoe_wakeup_appctx;
+
+ s = appctx_strm(appctx);
+ stream_set_backend(s, agent->b.be);
+
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+
+ s->do_log = NULL;
+ s->scb->flags |= SC_FL_RCV_ONCE;
+
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ LIST_APPEND(&agent->rt[tid].applets, &spoe_appctx->list);
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ _HA_ATOMIC_INC(&agent->counters.applets);
+
+ appctx->st0 = SPOE_APPCTX_ST_CONNECT;
+ task_wakeup(spoe_appctx->task, TASK_WOKEN_INIT);
+ return 0;
+ out_free_task:
+ task_destroy(task);
+ out_error:
+ return -1;
+}
+
+/* Callback function that releases a SPOE applet. This happens when the
+ * connection with the agent is closed. */
+static void
+spoe_release_appctx(struct appctx *appctx)
+{
+ struct spoe_appctx *spoe_appctx = SPOE_APPCTX(appctx);
+ struct spoe_agent *agent;
+ struct spoe_context *ctx, *back;
+
+ if (spoe_appctx == NULL)
+ return;
+
+ appctx->svcctx = NULL;
+ agent = spoe_appctx->agent;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+
+ /* Remove applet from the list of running applets */
+ _HA_ATOMIC_DEC(&agent->counters.applets);
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+ if (!LIST_ISEMPTY(&spoe_appctx->list)) {
+ LIST_DELETE(&spoe_appctx->list);
+ LIST_INIT(&spoe_appctx->list);
+ }
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[tid].lock);
+
+ /* Shutdown the server connection, if needed */
+ if (appctx->st0 != SPOE_APPCTX_ST_END) {
+ if (appctx->st0 == SPOE_APPCTX_ST_IDLE) {
+ eb32_delete(&spoe_appctx->node);
+ _HA_ATOMIC_DEC(&agent->counters.idles);
+ agent->rt[tid].idles--;
+ }
+
+ appctx->st0 = SPOE_APPCTX_ST_END;
+ if (spoe_appctx->status_code == SPOE_FRM_ERR_NONE)
+ spoe_appctx->status_code = SPOE_FRM_ERR_IO;
+ }
+
+ /* Destroy the task attached to this applet */
+ task_destroy(spoe_appctx->task);
+
+ /* Report an error to all streams in the appctx waiting queue */
+ list_for_each_entry_safe(ctx, back, &spoe_appctx->waiting_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.wait_ts, &ctx->stats.t_waiting);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+
+ /* If the applet was processing a fragmented frame, report an error to
+ * the corresponding stream. */
+ if (spoe_appctx->frag_ctx.ctx) {
+ ctx = spoe_appctx->frag_ctx.ctx;
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+
+ if (!LIST_ISEMPTY(&agent->rt[tid].applets)) {
+ /* If there are still some running applets, remove reference on
+ * the current one from streams in the async waiting queue. In
+ * async mode, the ACK may be received from another appctx.
+ */
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ if (ctx->spoe_appctx == spoe_appctx)
+ ctx->spoe_appctx = NULL;
+ }
+ goto end;
+ }
+ else {
+ /* It is the last running applet and the sending and async
+ * waiting queues are not empty. So try to start a new applet if
+ * HAproxy is not stopping. On success, we remove reference on
+ * the current appctx from streams in the async waiting queue.
+ * In async mode, the ACK may be received from another appctx.
+ */
+ if (!stopping &&
+ (!LIST_ISEMPTY(&agent->rt[tid].sending_queue) || !LIST_ISEMPTY(&agent->rt[tid].waiting_queue)) &&
+ spoe_create_appctx(agent->spoe_conf)) {
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ if (ctx->spoe_appctx == spoe_appctx)
+ ctx->spoe_appctx = NULL;
+ }
+ goto end;
+ }
+
+ /* Otherwise, report an error to all streams in the sending and
+ * async waiting queues.
+ */
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].sending_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.queue_ts, &ctx->stats.t_queue);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+ list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.wait_ts, &ctx->stats.t_waiting);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (spoe_appctx->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ }
+ }
+
+ end:
+ /* Release allocated memory */
+ spoe_release_buffer(&spoe_appctx->buffer,
+ &spoe_appctx->buffer_wait);
+ pool_free(pool_head_spoe_appctx, spoe_appctx);
+
+ /* Update runtinme agent info */
+ agent->rt[tid].frame_size = agent->max_frame_size;
+ list_for_each_entry(spoe_appctx, &agent->rt[tid].applets, list)
+ HA_ATOMIC_UPDATE_MIN(&agent->rt[tid].frame_size, spoe_appctx->max_frame_size);
+}
+
+static int
+spoe_handle_connect_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame, *buf;
+ int ret;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ goto stop;
+ }
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Connection timed out\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ if (SPOE_APPCTX(appctx)->task->expire == TICK_ETERNITY)
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.hello);
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+ ret = spoe_prepare_hahello_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ goto exit;
+
+ case 1: /* retry later */
+ goto stop;
+
+ default:
+ /* HELLO frame successfully sent, now wait for the
+ * reply. */
+ appctx->st0 = SPOE_APPCTX_ST_CONNECTING;
+ goto next;
+ }
+
+ next:
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+static int
+spoe_handle_connecting_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame;
+ int ret;
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - Connection timed out\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx);
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ goto next;
+ }
+ trash.data = ret + 4;
+ ret = spoe_handle_agenthello_frame(appctx, frame, ret);
+ }
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto next;
+
+ case 1: /* retry later */
+ goto stop;
+
+ default:
+ _HA_ATOMIC_INC(&agent->counters.idles);
+ agent->rt[tid].idles++;
+ appctx->st0 = SPOE_APPCTX_ST_IDLE;
+ SPOE_APPCTX(appctx)->node.key = 0;
+ eb32_insert(&agent->rt[tid].idle_applets, &SPOE_APPCTX(appctx)->node);
+
+ /* Update runtinme agent info */
+ HA_ATOMIC_UPDATE_MIN(&agent->rt[tid].frame_size, SPOE_APPCTX(appctx)->max_frame_size);
+ goto next;
+ }
+
+ next:
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(sc), trash.data);
+
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+
+static int
+spoe_handle_sending_frame_appctx(struct appctx *appctx, int *skip)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ struct spoe_context *ctx = NULL;
+ char *frame, *buf;
+ int ret;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+
+ if (appctx->st0 == SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY) {
+ ctx = SPOE_APPCTX(appctx)->frag_ctx.ctx;
+ ret = spoe_prepare_hafrag_frame(appctx, ctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ }
+ else if (LIST_ISEMPTY(&agent->rt[tid].sending_queue)) {
+ *skip = 1;
+ ret = 1;
+ goto end;
+ }
+ else {
+ ctx = LIST_NEXT(&agent->rt[tid].sending_queue, typeof(ctx), list);
+ ret = spoe_prepare_hanotify_frame(appctx, ctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+
+ }
+
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto end;
+
+ case 0: /* ignore */
+ if (ctx == NULL)
+ goto abort_frag_frame;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.queue_ts, &ctx->stats.t_queue);
+ ctx->spoe_appctx = NULL;
+ ctx->state = SPOE_CTX_ST_ERROR;
+ ctx->status_code = (SPOE_APPCTX(appctx)->status_code + 0x100);
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ *skip = 1;
+ break;
+
+ case 1: /* retry */
+ *skip = 1;
+ break;
+
+ default:
+ if (ctx == NULL)
+ goto abort_frag_frame;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.queue_ts, &ctx->stats.t_queue);
+ ctx->spoe_appctx = SPOE_APPCTX(appctx);
+ if (!(ctx->flags & SPOE_CTX_FL_FRAGMENTED) ||
+ (ctx->frag_ctx.flags & SPOE_FRM_FL_FIN))
+ goto no_frag_frame_sent;
+ else
+ goto frag_frame_sent;
+ }
+ goto end;
+
+ frag_frame_sent:
+ appctx->st0 = SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY;
+ *skip = 1;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = ctx;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = ctx->stream_id;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = ctx->frame_id;
+ ctx->state = SPOE_CTX_ST_ENCODING_MSGS;
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ goto end;
+
+ no_frag_frame_sent:
+ if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_ASYNC) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ LIST_APPEND(&agent->rt[tid].waiting_queue, &ctx->list);
+ }
+ else if (SPOE_APPCTX(appctx)->flags & SPOE_APPCTX_FL_PIPELINING) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ LIST_APPEND(&SPOE_APPCTX(appctx)->waiting_queue, &ctx->list);
+ }
+ else {
+ appctx->st0 = SPOE_APPCTX_ST_WAITING_SYNC_ACK;
+ *skip = 1;
+ LIST_APPEND(&SPOE_APPCTX(appctx)->waiting_queue, &ctx->list);
+ }
+ _HA_ATOMIC_INC(&agent->counters.nb_waiting);
+ ctx->stats.wait_ts = now_ns;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ SPOE_APPCTX(appctx)->cur_fpa++;
+
+ ctx->state = SPOE_CTX_ST_WAITING_ACK;
+ goto end;
+
+ abort_frag_frame:
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ goto end;
+
+ end:
+ return ret;
+}
+
+static int
+spoe_handle_receiving_frame_appctx(struct appctx *appctx, int *skip)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ struct spoe_context *ctx = NULL;
+ char *frame;
+ int ret;
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ if (*frame == SPOE_FRM_T_AGENT_DISCON) {
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ ret = -1;
+ goto end;
+ }
+ trash.data = ret + 4;
+ ret = spoe_handle_agentack_frame(appctx, &ctx, frame, ret);
+ }
+ switch (ret) {
+ case -1: /* error */
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ break;
+
+ case 0: /* ignore */
+ break;
+
+ case 1: /* retry */
+ *skip = 1;
+ break;
+
+ default:
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+ spoe_update_stat_time(&ctx->stats.wait_ts, &ctx->stats.t_waiting);
+ ctx->stats.response_ts = now_ns;
+ if (ctx->spoe_appctx) {
+ ctx->spoe_appctx->cur_fpa--;
+ ctx->spoe_appctx = NULL;
+ }
+ if (appctx->st0 == SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY &&
+ ctx == SPOE_APPCTX(appctx)->frag_ctx.ctx) {
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
+ SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
+ SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
+ }
+ else if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK)
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ break;
+ }
+
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(appctx_sc(appctx)), trash.data);
+ end:
+ return ret;
+}
+
+static int
+spoe_handle_processing_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct server *srv = objt_server(__sc_strm(sc)->target);
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ int ret, skip_sending = 0, skip_receiving = 0, active_s = 0, active_r = 0, close_asap = 0;
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ goto next;
+ }
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - process: fpa=%u/%u - appctx-state=%s - weight=%u - flags=0x%08x\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx, SPOE_APPCTX(appctx)->cur_fpa,
+ agent->max_fpa, spoe_appctx_state_str[appctx->st0],
+ SPOE_APPCTX(appctx)->node.key, SPOE_APPCTX(appctx)->flags);
+
+
+ /* Close the applet ASAP because some sessions are waiting for a free
+ * connection slot. It is only an issue in multithreaded mode.
+ */
+ close_asap = (global.nbthread > 1 &&
+ (agent->b.be->queue.length ||
+ (srv && (srv->queue.length || (srv->maxconn && srv->served >= srv_dynamic_maxconn(srv))))));
+
+ /* receiving_frame loop */
+ while (!skip_receiving) {
+ ret = spoe_handle_receiving_frame_appctx(appctx, &skip_receiving);
+ switch (ret) {
+ case -1: /* error */
+ goto next;
+
+ case 0: /* ignore */
+ active_r = 1;
+ break;
+
+ case 1: /* retry */
+ break;
+
+ default:
+ active_r = 1;
+ break;
+ }
+ }
+
+ /* Don"t try to send new frame we are waiting for at lease a ack, in
+ * sync mode or if applet must be closed ASAP
+ */
+ if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK || (close_asap && SPOE_APPCTX(appctx)->cur_fpa))
+ skip_sending = 1;
+
+ /* send_frame loop */
+ while (!skip_sending && SPOE_APPCTX(appctx)->cur_fpa < agent->max_fpa) {
+ ret = spoe_handle_sending_frame_appctx(appctx, &skip_sending);
+ switch (ret) {
+ case -1: /* error */
+ goto next;
+
+ case 0: /* ignore */
+ if (SPOE_APPCTX(appctx)->node.key)
+ SPOE_APPCTX(appctx)->node.key--;
+ active_s++;
+ break;
+
+ case 1: /* retry */
+ break;
+
+ default:
+ if (SPOE_APPCTX(appctx)->node.key)
+ SPOE_APPCTX(appctx)->node.key--;
+ active_s++;
+ break;
+ }
+
+ /* if applet must be close ASAP, don't send more than a frame */
+ if (close_asap)
+ break;
+ }
+
+ if (active_s || active_r) {
+ update_freq_ctr(&agent->rt[tid].processing_per_sec, active_s);
+ SPOE_APPCTX(appctx)->task->expire = tick_add_ifset(now_ms, agent->timeout.idle);
+ }
+
+ if (appctx->st0 == SPOE_APPCTX_ST_PROCESSING && SPOE_APPCTX(appctx)->cur_fpa < agent->max_fpa) {
+ /* If applet must be closed, don't switch it in IDLE state and
+ * close it when the last waiting frame is acknowledged.
+ */
+ if (close_asap) {
+ if (SPOE_APPCTX(appctx)->cur_fpa)
+ goto out;
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NONE;
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ appctx->st1 = SPOE_APPCTX_ERR_NONE;
+ goto next;
+ }
+ _HA_ATOMIC_INC(&agent->counters.idles);
+ agent->rt[tid].idles++;
+ appctx->st0 = SPOE_APPCTX_ST_IDLE;
+ eb32_insert(&agent->rt[tid].idle_applets, &SPOE_APPCTX(appctx)->node);
+ }
+
+ out:
+ return 1;
+
+ next:
+ SPOE_APPCTX(appctx)->task->expire = tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+}
+
+static int
+spoe_handle_disconnect_appctx(struct appctx *appctx)
+{
+ struct spoe_agent *agent = SPOE_APPCTX(appctx)->agent;
+ char *frame, *buf;
+ int ret;
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT)
+ goto exit;
+
+ /* 4 bytes are reserved at the beginning of <buf> to store the frame
+ * length. */
+ buf = trash.area; frame = buf+4;
+ ret = spoe_prepare_hadiscon_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1)
+ ret = spoe_send_frame(appctx, buf, ret);
+
+ switch (ret) {
+ case -1: /* error */
+ case 0: /* ignore => an error, cannot be ignored */
+ goto exit;
+
+ case 1: /* retry */
+ goto stop;
+
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - disconnected by HAProxy (%d): %s\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx,
+ SPOE_APPCTX(appctx)->status_code,
+ spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code]);
+
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECTING;
+ goto next;
+ }
+
+ next:
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+static int
+spoe_handle_disconnecting_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ char *frame;
+ int ret;
+
+ if (appctx->st1 == SPOE_APPCTX_ERR_TOUT) {
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_TOUT;
+ goto exit;
+ }
+
+ frame = trash.area; trash.data = 0;
+ ret = spoe_recv_frame(appctx, frame,
+ SPOE_APPCTX(appctx)->max_frame_size);
+ if (ret > 1) {
+ trash.data = ret + 4;
+ ret = spoe_handle_agentdiscon_frame(appctx, frame, ret);
+ }
+
+ switch (ret) {
+ case -1: /* error */
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - error on frame (%s)\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_agent *)SPOE_APPCTX(appctx)->agent)->id,
+ __FUNCTION__, appctx,
+ spoe_frm_err_reasons[SPOE_APPCTX(appctx)->status_code]);
+ goto exit;
+
+ case 0: /* ignore */
+ goto next;
+
+ case 1: /* retry */
+ goto stop;
+
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - disconnected by peer (%d): %.*s\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_agent *)SPOE_APPCTX(appctx)->agent)->id,
+ __FUNCTION__, appctx, SPOE_APPCTX(appctx)->status_code,
+ SPOE_APPCTX(appctx)->rlen, SPOE_APPCTX(appctx)->reason);
+ goto exit;
+ }
+
+ next:
+ /* Do not forget to remove processed frame from the output buffer */
+ if (trash.data)
+ co_skip(sc_oc(sc), trash.data);
+
+ return 0;
+ stop:
+ return 1;
+ exit:
+ appctx->st0 = SPOE_APPCTX_ST_EXIT;
+ return 0;
+}
+
+/* I/O Handler processing messages exchanged with the agent */
+static void
+spoe_handle_appctx(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct spoe_agent *agent;
+
+ if (SPOE_APPCTX(appctx) == NULL)
+ return;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+
+ SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_NONE;
+ agent = SPOE_APPCTX(appctx)->agent;
+
+ switchstate:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: appctx=%p"
+ " - appctx-state=%s\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, appctx, spoe_appctx_state_str[appctx->st0]);
+
+ switch (appctx->st0) {
+ case SPOE_APPCTX_ST_CONNECT:
+ if (spoe_handle_connect_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_CONNECTING:
+ if (spoe_handle_connecting_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_IDLE:
+ _HA_ATOMIC_DEC(&agent->counters.idles);
+ agent->rt[tid].idles--;
+ eb32_delete(&SPOE_APPCTX(appctx)->node);
+ if (stopping &&
+ LIST_ISEMPTY(&agent->rt[tid].sending_queue) &&
+ LIST_ISEMPTY(&SPOE_APPCTX(appctx)->waiting_queue)) {
+ SPOE_APPCTX(appctx)->task->expire =
+ tick_add_ifset(now_ms, agent->timeout.idle);
+ appctx->st0 = SPOE_APPCTX_ST_DISCONNECT;
+ goto switchstate;
+ }
+ appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
+ __fallthrough;
+
+ case SPOE_APPCTX_ST_PROCESSING:
+ case SPOE_APPCTX_ST_SENDING_FRAG_NOTIFY:
+ case SPOE_APPCTX_ST_WAITING_SYNC_ACK:
+ if (spoe_handle_processing_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_DISCONNECT:
+ if (spoe_handle_disconnect_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_DISCONNECTING:
+ if (spoe_handle_disconnecting_appctx(appctx))
+ goto out;
+ goto switchstate;
+
+ case SPOE_APPCTX_ST_EXIT:
+ appctx->st0 = SPOE_APPCTX_ST_END;
+ SPOE_APPCTX(appctx)->task->expire = TICK_ETERNITY;
+ se_fl_set(appctx->sedesc, SE_FL_EOS);
+ if (SPOE_APPCTX(appctx)->status_code != SPOE_FRM_ERR_NONE)
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ else
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ __fallthrough;
+
+ case SPOE_APPCTX_ST_END:
+ return;
+ }
+ out:
+ if (SPOE_APPCTX(appctx)->task->expire != TICK_ETERNITY)
+ task_queue(SPOE_APPCTX(appctx)->task);
+}
+
+struct applet spoe_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SPOE>", /* used for logging */
+ .fct = spoe_handle_appctx,
+ .init = spoe_init_appctx,
+ .release = spoe_release_appctx,
+};
+
+/* Create a SPOE applet. On success, the created applet is returned, else
+ * NULL. */
+static struct appctx *
+spoe_create_appctx(struct spoe_config *conf)
+{
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_appctx *spoe_appctx;
+ struct appctx *appctx;
+
+ /* Do not try to create a new applet if there is no server up for the
+ * agent's backend. */
+ if (!agent->b.be->srv_act && !agent->b.be->srv_bck) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: don't create SPOE appctx: no server up\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id, __FUNCTION__);
+ goto out;
+ }
+
+ /* Do not try to create a new applet if we have reached the maximum of
+ * connection per seconds */
+ if (agent->cps_max > 0) {
+ if (!freq_ctr_remain(&agent->rt[tid].conn_per_sec, agent->cps_max, 0)) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: don't create SPOE appctx: max CPS reached\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id, __FUNCTION__);
+ goto out;
+ }
+ }
+
+ spoe_appctx = pool_zalloc(pool_head_spoe_appctx);
+ if (spoe_appctx == NULL)
+ goto out_error;
+
+ spoe_appctx->agent = agent;
+ spoe_appctx->version = 0;
+ spoe_appctx->max_frame_size = agent->max_frame_size;
+ spoe_appctx->flags = 0;
+ spoe_appctx->status_code = SPOE_FRM_ERR_NONE;
+ spoe_appctx->buffer = BUF_NULL;
+ spoe_appctx->cur_fpa = 0;
+ LIST_INIT(&spoe_appctx->list);
+ LIST_INIT(&spoe_appctx->waiting_queue);
+
+
+ if ((appctx = appctx_new_here(&spoe_applet, NULL)) == NULL)
+ goto out_free_spoe_appctx;
+
+ appctx->svcctx = spoe_appctx;
+ if (appctx_init(appctx) == -1)
+ goto out_free_appctx;
+
+ /* Increase the per-process number of cumulated connections */
+ if (agent->cps_max > 0)
+ update_freq_ctr(&agent->rt[tid].conn_per_sec, 1);
+
+ appctx_wakeup(appctx);
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_free_spoe_appctx:
+ pool_free(pool_head_spoe_appctx, spoe_appctx);
+ out_error:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: failed to create SPOE appctx\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id, __FUNCTION__);
+ send_log(&conf->agent_fe, LOG_EMERG, "SPOE: [%s] failed to create SPOE applet\n", agent->id);
+ out:
+
+ return NULL;
+}
+
+static int
+spoe_queue_context(struct spoe_context *ctx)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_appctx *spoe_appctx;
+
+ /* Check if we need to create a new SPOE applet or not. */
+ if (agent->rt[tid].processing < agent->rt[tid].idles ||
+ agent->rt[tid].processing < read_freq_ctr(&agent->rt[tid].processing_per_sec))
+ goto end;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - try to create new SPOE appctx\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id, __FUNCTION__,
+ ctx->strm);
+
+ spoe_create_appctx(conf);
+
+ end:
+ /* The only reason to return an error is when there is no applet */
+ if (LIST_ISEMPTY(&agent->rt[tid].applets)) {
+ ctx->status_code = SPOE_CTX_ERR_RES;
+ return -1;
+ }
+
+ /* Add the SPOE context in the sending queue if the stream has no applet
+ * already assigned and wakeup all idle applets. Otherwise, don't queue
+ * it. */
+ _HA_ATOMIC_INC(&agent->counters.nb_sending);
+ spoe_update_stat_time(&ctx->stats.request_ts, &ctx->stats.t_request);
+ ctx->stats.queue_ts = now_ns;
+ if (ctx->spoe_appctx)
+ return 1;
+ LIST_APPEND(&agent->rt[tid].sending_queue, &ctx->list);
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - Add stream in sending queue"
+ " - applets=%u - idles=%u - processing=%u\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id, __FUNCTION__,
+ ctx->strm, agent->counters.applets, agent->counters.idles,
+ agent->rt[tid].processing);
+
+ /* Finally try to wakeup an IDLE applet. */
+ if (!eb_is_empty(&agent->rt[tid].idle_applets)) {
+ struct eb32_node *node;
+
+ node = eb32_first(&agent->rt[tid].idle_applets);
+ spoe_appctx = eb32_entry(node, struct spoe_appctx, node);
+ if (node && spoe_appctx) {
+ eb32_delete(&spoe_appctx->node);
+ spoe_appctx->node.key++;
+ eb32_insert(&agent->rt[tid].idle_applets, &spoe_appctx->node);
+ spoe_wakeup_appctx(spoe_appctx->owner);
+ }
+ }
+ return 1;
+}
+
+/***************************************************************************
+ * Functions that encode SPOE messages
+ **************************************************************************/
+/* Encode a SPOE message. Info in <ctx->frag_ctx>, if any, are used to handle
+ * fragmented_content. If the next message can be processed, it returns 0. If
+ * the message is too big, it returns -1.*/
+static int
+spoe_encode_message(struct stream *s, struct spoe_context *ctx,
+ struct spoe_message *msg, int dir,
+ char **buf, char *end)
+{
+ struct sample *smp;
+ struct spoe_arg *arg;
+ int ret;
+
+ if (msg->cond) {
+ ret = acl_exec_cond(msg->cond, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (msg->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ /* the rule does not match */
+ if (!ret)
+ goto next;
+ }
+
+ /* Resume encoding of a SPOE argument */
+ if (ctx->frag_ctx.curarg != NULL) {
+ arg = ctx->frag_ctx.curarg;
+ goto encode_argument;
+ }
+
+ if (ctx->frag_ctx.curoff != UINT_MAX)
+ goto encode_msg_payload;
+
+ /* Check if there is enough space for the message name and the
+ * number of arguments. It implies <msg->id_len> is encoded on 2
+ * bytes, at most (< 2288). */
+ if (*buf + 2 + msg->id_len + 1 > end)
+ goto too_big;
+
+ /* Encode the message name */
+ if (spoe_encode_buffer(msg->id, msg->id_len, buf, end) == -1)
+ goto too_big;
+
+ /* Set the number of arguments for this message */
+ **buf = msg->nargs;
+ (*buf)++;
+
+ ctx->frag_ctx.curoff = 0;
+ encode_msg_payload:
+
+ /* Loop on arguments */
+ list_for_each_entry(arg, &msg->args, list) {
+ ctx->frag_ctx.curarg = arg;
+ ctx->frag_ctx.curoff = UINT_MAX;
+ ctx->frag_ctx.curlen = 0;
+
+ encode_argument:
+ if (ctx->frag_ctx.curoff != UINT_MAX)
+ goto encode_arg_value;
+
+ /* Encode the argument name as a string. It can by NULL */
+ if (spoe_encode_buffer(arg->name, arg->name_len, buf, end) == -1)
+ goto too_big;
+
+ ctx->frag_ctx.curoff = 0;
+ encode_arg_value:
+
+ /* Fetch the argument value */
+ smp = sample_process(s->be, s->sess, s, dir|SMP_OPT_FINAL, arg->expr, NULL);
+ if (smp) {
+ smp->ctx.a[0] = &ctx->frag_ctx.curlen;
+ smp->ctx.a[1] = &ctx->frag_ctx.curoff;
+ }
+ ret = spoe_encode_data(smp, buf, end);
+ if (ret == -1 || ctx->frag_ctx.curoff)
+ goto too_big;
+ }
+
+ next:
+ return 0;
+
+ too_big:
+ return -1;
+}
+
+/* Encode list of SPOE messages. Info in <ctx->frag_ctx>, if any, are used to
+ * handle fragmented content. On success it returns 1. If an error occurred, -1
+ * is returned. If nothing has been encoded, it returns 0 (this is only possible
+ * for unfragmented payload). */
+static int
+spoe_encode_messages(struct stream *s, struct spoe_context *ctx,
+ struct list *messages, int dir, int type)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_message *msg;
+ char *p, *end;
+
+ p = b_head(&ctx->buffer);
+ end = p + agent->rt[tid].frame_size - FRAME_HDR_SIZE;
+
+ if (type == SPOE_MSGS_BY_EVENT) { /* Loop on messages by event */
+ /* Resume encoding of a SPOE message */
+ if (ctx->frag_ctx.curmsg != NULL) {
+ msg = ctx->frag_ctx.curmsg;
+ goto encode_evt_message;
+ }
+
+ list_for_each_entry(msg, messages, by_evt) {
+ ctx->frag_ctx.curmsg = msg;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = UINT_MAX;
+
+ encode_evt_message:
+ if (spoe_encode_message(s, ctx, msg, dir, &p, end) == -1)
+ goto too_big;
+ }
+ }
+ else if (type == SPOE_MSGS_BY_GROUP) { /* Loop on messages by group */
+ /* Resume encoding of a SPOE message */
+ if (ctx->frag_ctx.curmsg != NULL) {
+ msg = ctx->frag_ctx.curmsg;
+ goto encode_grp_message;
+ }
+
+ list_for_each_entry(msg, messages, by_grp) {
+ ctx->frag_ctx.curmsg = msg;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = UINT_MAX;
+
+ encode_grp_message:
+ if (spoe_encode_message(s, ctx, msg, dir, &p, end) == -1)
+ goto too_big;
+ }
+ }
+ else
+ goto skip;
+
+
+ /* nothing has been encoded for an unfragmented payload */
+ if (!(ctx->flags & SPOE_CTX_FL_FRAGMENTED) && p == b_head(&ctx->buffer))
+ goto skip;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - encode %s messages - spoe_appctx=%p"
+ "- max_size=%u - encoded=%ld\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s,
+ ((ctx->flags & SPOE_CTX_FL_FRAGMENTED) ? "last fragment of" : "unfragmented"),
+ ctx->spoe_appctx, (agent->rt[tid].frame_size - FRAME_HDR_SIZE),
+ p - b_head(&ctx->buffer));
+
+ b_set_data(&ctx->buffer, p - b_head(&ctx->buffer));
+ ctx->frag_ctx.curmsg = NULL;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = 0;
+ ctx->frag_ctx.flags = SPOE_FRM_FL_FIN;
+
+ return 1;
+
+ too_big:
+ /* Return an error if fragmentation is unsupported or if nothing has
+ * been encoded because its too big and not splittable. */
+ if (!(agent->flags & SPOE_FL_SND_FRAGMENTATION) || p == b_head(&ctx->buffer)) {
+ ctx->status_code = SPOE_CTX_ERR_TOO_BIG;
+ return -1;
+ }
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - encode fragmented messages - spoe_appctx=%p"
+ " - curmsg=%p - curarg=%p - curoff=%u"
+ " - max_size=%u - encoded=%ld\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s, ctx->spoe_appctx,
+ ctx->frag_ctx.curmsg, ctx->frag_ctx.curarg, ctx->frag_ctx.curoff,
+ (agent->rt[tid].frame_size - FRAME_HDR_SIZE), p - b_head(&ctx->buffer));
+
+ b_set_data(&ctx->buffer, p - b_head(&ctx->buffer));
+ ctx->flags |= SPOE_CTX_FL_FRAGMENTED;
+ ctx->frag_ctx.flags &= ~SPOE_FRM_FL_FIN;
+ return 1;
+
+ skip:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - skip the frame because nothing has been encoded\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s);
+ return 0;
+}
+
+
+/***************************************************************************
+ * Functions that handle SPOE actions
+ **************************************************************************/
+/* Helper function to set a variable */
+static void
+spoe_set_var(struct spoe_context *ctx, char *scope, char *name, int len,
+ struct sample *smp)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ char varname[64];
+
+ memset(varname, 0, sizeof(varname));
+ len = snprintf(varname, sizeof(varname), "%s.%s.%.*s",
+ scope, agent->var_pfx, len, name);
+ if (agent->flags & SPOE_FL_FORCE_SET_VAR)
+ vars_set_by_name(varname, len, smp);
+ else
+ vars_set_by_name_ifexist(varname, len, smp);
+}
+
+/* Helper function to unset a variable */
+static void
+spoe_unset_var(struct spoe_context *ctx, char *scope, char *name, int len,
+ struct sample *smp)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ char varname[64];
+
+ memset(varname, 0, sizeof(varname));
+ len = snprintf(varname, sizeof(varname), "%s.%s.%.*s",
+ scope, agent->var_pfx, len, name);
+ vars_unset_by_name_ifexist(varname, len, smp);
+}
+
+
+static inline int
+spoe_decode_action_set_var(struct stream *s, struct spoe_context *ctx,
+ char **buf, char *end, int dir)
+{
+ char *str, *scope, *p = *buf;
+ struct sample smp;
+ uint64_t sz;
+ int ret;
+
+ if (p + 2 >= end)
+ goto skip;
+
+ /* SET-VAR requires 3 arguments */
+ if (*p++ != 3)
+ goto skip;
+
+ switch (*p++) {
+ case SPOE_SCOPE_PROC: scope = "proc"; break;
+ case SPOE_SCOPE_SESS: scope = "sess"; break;
+ case SPOE_SCOPE_TXN : scope = "txn"; break;
+ case SPOE_SCOPE_REQ : scope = "req"; break;
+ case SPOE_SCOPE_RES : scope = "res"; break;
+ default: goto skip;
+ }
+
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1)
+ goto skip;
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+
+ if (spoe_decode_data(&p, end, &smp) == -1)
+ goto skip;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - set-var '%s.%s.%.*s'\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ __FUNCTION__, s, scope,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->var_pfx,
+ (int)sz, str);
+
+ if (smp.data.type == SMP_T_ANY)
+ spoe_unset_var(ctx, scope, str, sz, &smp);
+ else
+ spoe_set_var(ctx, scope, str, sz, &smp);
+
+ ret = (p - *buf);
+ *buf = p;
+ return ret;
+ skip:
+ return 0;
+}
+
+static inline int
+spoe_decode_action_unset_var(struct stream *s, struct spoe_context *ctx,
+ char **buf, char *end, int dir)
+{
+ char *str, *scope, *p = *buf;
+ struct sample smp;
+ uint64_t sz;
+ int ret;
+
+ if (p + 2 >= end)
+ goto skip;
+
+ /* UNSET-VAR requires 2 arguments */
+ if (*p++ != 2)
+ goto skip;
+
+ switch (*p++) {
+ case SPOE_SCOPE_PROC: scope = "proc"; break;
+ case SPOE_SCOPE_SESS: scope = "sess"; break;
+ case SPOE_SCOPE_TXN : scope = "txn"; break;
+ case SPOE_SCOPE_REQ : scope = "req"; break;
+ case SPOE_SCOPE_RES : scope = "res"; break;
+ default: goto skip;
+ }
+
+ if (spoe_decode_buffer(&p, end, &str, &sz) == -1)
+ goto skip;
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - unset-var '%s.%s.%.*s'\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ __FUNCTION__, s, scope,
+ ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->var_pfx,
+ (int)sz, str);
+
+ spoe_unset_var(ctx, scope, str, sz, &smp);
+
+ ret = (p - *buf);
+ *buf = p;
+ return ret;
+ skip:
+ return 0;
+}
+
+/* Process SPOE actions for a specific event. It returns 1 on success. If an
+ * error occurred, 0 is returned. */
+static int
+spoe_process_actions(struct stream *s, struct spoe_context *ctx, int dir)
+{
+ char *p, *end;
+ int ret;
+
+ p = b_head(&ctx->buffer);
+ end = p + b_data(&ctx->buffer);
+
+ while (p < end) {
+ enum spoe_action_type type;
+
+ type = *p++;
+ switch (type) {
+ case SPOE_ACT_T_SET_VAR:
+ ret = spoe_decode_action_set_var(s, ctx, &p, end, dir);
+ if (!ret)
+ goto skip;
+ break;
+
+ case SPOE_ACT_T_UNSET_VAR:
+ ret = spoe_decode_action_unset_var(s, ctx, &p, end, dir);
+ if (!ret)
+ goto skip;
+ break;
+
+ default:
+ goto skip;
+ }
+ }
+
+ return 1;
+ skip:
+ return 0;
+}
+
+/***************************************************************************
+ * Functions that process SPOE events
+ **************************************************************************/
+static void
+spoe_update_stats(struct stream *s, struct spoe_agent *agent,
+ struct spoe_context *ctx, int dir)
+{
+ if (ctx->stats.start_ts != 0) {
+ spoe_update_stat_time(&ctx->stats.start_ts, &ctx->stats.t_process);
+ ctx->stats.t_total += ctx->stats.t_process;
+ ctx->stats.request_ts = 0;
+ ctx->stats.queue_ts = 0;
+ ctx->stats.wait_ts = 0;
+ ctx->stats.response_ts = 0;
+ }
+
+ if (agent->var_t_process) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->stats.t_process;
+ smp.data.type = SMP_T_SINT;
+
+ spoe_set_var(ctx, "txn", agent->var_t_process,
+ strlen(agent->var_t_process), &smp);
+ }
+
+ if (agent->var_t_total) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->stats.t_total;
+ smp.data.type = SMP_T_SINT;
+
+ spoe_set_var(ctx, "txn", agent->var_t_total,
+ strlen(agent->var_t_total), &smp);
+ }
+}
+
+static void
+spoe_handle_processing_error(struct stream *s, struct spoe_agent *agent,
+ struct spoe_context *ctx, int dir)
+{
+ if (agent->eps_max > 0)
+ update_freq_ctr(&agent->rt[tid].err_per_sec, 1);
+
+ if (agent->var_on_error) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, s->be, s->sess, s, dir|SMP_OPT_FINAL);
+ smp.data.u.sint = ctx->status_code;
+ smp.data.type = SMP_T_BOOL;
+
+ spoe_set_var(ctx, "txn", agent->var_on_error,
+ strlen(agent->var_on_error), &smp);
+ }
+
+ ctx->state = ((agent->flags & SPOE_FL_CONT_ON_ERR)
+ ? SPOE_CTX_ST_READY
+ : SPOE_CTX_ST_NONE);
+}
+
+static inline int
+spoe_start_processing(struct spoe_agent *agent, struct spoe_context *ctx, int dir)
+{
+ /* If a process is already started for this SPOE context, retry
+ * later. */
+ if (ctx->flags & SPOE_CTX_FL_PROCESS)
+ return 0;
+
+ agent->rt[tid].processing++;
+ ctx->stats.start_ts = now_ns;
+ ctx->stats.request_ts = now_ns;
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+
+ ctx->status_code = 0;
+
+ /* Set the right flag to prevent request and response processing
+ * in same time. */
+ ctx->flags |= ((dir == SMP_OPT_DIR_REQ)
+ ? SPOE_CTX_FL_REQ_PROCESS
+ : SPOE_CTX_FL_RSP_PROCESS);
+ return 1;
+}
+
+static inline void
+spoe_stop_processing(struct spoe_agent *agent, struct spoe_context *ctx)
+{
+ struct spoe_appctx *sa = ctx->spoe_appctx;
+
+ if (!(ctx->flags & SPOE_CTX_FL_PROCESS))
+ return;
+ _HA_ATOMIC_INC(&agent->counters.nb_processed);
+ if (sa) {
+ if (sa->frag_ctx.ctx == ctx) {
+ sa->frag_ctx.ctx = NULL;
+ spoe_wakeup_appctx(sa->owner);
+ }
+ else
+ sa->cur_fpa--;
+ }
+
+ /* Reset the flag to allow next processing */
+ agent->rt[tid].processing--;
+ ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
+
+ /* Reset processing timer */
+ ctx->process_exp = TICK_ETERNITY;
+
+ spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
+
+ ctx->spoe_appctx = NULL;
+ ctx->frag_ctx.curmsg = NULL;
+ ctx->frag_ctx.curarg = NULL;
+ ctx->frag_ctx.curoff = 0;
+ ctx->frag_ctx.flags = 0;
+
+ if (!LIST_ISEMPTY(&ctx->list)) {
+ if (ctx->state == SPOE_CTX_ST_SENDING_MSGS)
+ _HA_ATOMIC_DEC(&agent->counters.nb_sending);
+ else
+ _HA_ATOMIC_DEC(&agent->counters.nb_waiting);
+
+ LIST_DELETE(&ctx->list);
+ LIST_INIT(&ctx->list);
+ }
+}
+
+/* Process a list of SPOE messages. First, this functions will process messages
+ * and send them to an agent in a NOTIFY frame. Then, it will wait a ACK frame
+ * to process corresponding actions. During all the processing, it returns 0
+ * and it returns 1 when the processing is finished. If an error occurred, -1
+ * is returned. */
+static int
+spoe_process_messages(struct stream *s, struct spoe_context *ctx,
+ struct list *messages, int dir, int type)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int ret = 1;
+
+ if (ctx->state == SPOE_CTX_ST_ERROR)
+ goto end;
+
+ if (tick_is_expired(ctx->process_exp, now_ms) && ctx->state != SPOE_CTX_ST_DONE) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to process messages: timeout\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s);
+ ctx->status_code = SPOE_CTX_ERR_TOUT;
+ goto end;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_READY) {
+ if (agent->eps_max > 0) {
+ if (!freq_ctr_remain(&agent->rt[tid].err_per_sec, agent->eps_max, 0)) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - skip processing of messages: max EPS reached\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s);
+ goto skip;
+ }
+ }
+
+ if (!tick_isset(ctx->process_exp)) {
+ ctx->process_exp = tick_add_ifset(now_ms, agent->timeout.processing);
+ s->task->expire = tick_first((tick_is_expired(s->task->expire, now_ms) ? 0 : s->task->expire),
+ ctx->process_exp);
+ }
+ ret = spoe_start_processing(agent, ctx, dir);
+ if (!ret)
+ goto out;
+
+ ctx->state = SPOE_CTX_ST_ENCODING_MSGS;
+ /* fall through */
+ }
+
+ if (ctx->state == SPOE_CTX_ST_ENCODING_MSGS) {
+ if (ctx->stats.request_ts == 0)
+ ctx->stats.request_ts = now_ns;
+ if (!spoe_acquire_buffer(&ctx->buffer, &ctx->buffer_wait))
+ goto out;
+ ret = spoe_encode_messages(s, ctx, messages, dir, type);
+ if (ret < 0)
+ goto end;
+ if (!ret)
+ goto skip;
+ if (spoe_queue_context(ctx) < 0)
+ goto end;
+ ctx->state = SPOE_CTX_ST_SENDING_MSGS;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_SENDING_MSGS) {
+ if (ctx->spoe_appctx)
+ spoe_wakeup_appctx(ctx->spoe_appctx->owner);
+ ret = 0;
+ goto out;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_WAITING_ACK) {
+ ret = 0;
+ goto out;
+ }
+
+ if (ctx->state == SPOE_CTX_ST_DONE) {
+ spoe_process_actions(s, ctx, dir);
+ ret = 1;
+ ctx->frame_id++;
+ ctx->state = SPOE_CTX_ST_READY;
+ spoe_update_stat_time(&ctx->stats.response_ts, &ctx->stats.t_response);
+ goto end;
+ }
+
+ out:
+ return ret;
+
+ skip:
+ ctx->stats.start_ts = 0;
+ ctx->state = SPOE_CTX_ST_READY;
+ spoe_stop_processing(agent, ctx);
+ return 1;
+
+ end:
+ spoe_update_stats(s, agent, ctx, dir);
+ spoe_stop_processing(agent, ctx);
+ if (ctx->status_code) {
+ _HA_ATOMIC_INC(&agent->counters.nb_errors);
+ spoe_handle_processing_error(s, agent, ctx, dir);
+ ret = 1;
+ }
+ return ret;
+}
+
+/* Process a SPOE group, ie the list of messages attached to the group <grp>.
+ * See spoe_process_message for details. */
+static int
+spoe_process_group(struct stream *s, struct spoe_context *ctx,
+ struct spoe_group *group, int dir)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int ret;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - ctx-state=%s - Process messages for group=%s\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ group->id);
+
+ if (LIST_ISEMPTY(&group->messages))
+ return 1;
+
+ ret = spoe_process_messages(s, ctx, &group->messages, dir, SPOE_MSGS_BY_GROUP);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu %u/%u\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s, group->id, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed,
+ agent->rt[tid].processing, read_freq_ctr(&agent->rt[tid].processing_per_sec));
+ if (ctx->status_code || !(conf->agent_fe.options2 & PR_O2_NOLOGNORM))
+ send_log(&conf->agent_fe, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu\n",
+ agent->id, group->id, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed);
+ }
+ return ret;
+}
+
+/* Process a SPOE event, ie the list of messages attached to the event <ev>.
+ * See spoe_process_message for details. */
+static int
+spoe_process_event(struct stream *s, struct spoe_context *ctx,
+ enum spoe_event ev)
+{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
+ int dir, ret;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - ctx-state=%s - Process messages for event=%s\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ spoe_event_str[ev]);
+
+ dir = ((ev < SPOE_EV_ON_SERVER_SESS) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+
+ if (LIST_ISEMPTY(&(ctx->events[ev])))
+ return 1;
+
+ ret = spoe_process_messages(s, ctx, &(ctx->events[ev]), dir, SPOE_MSGS_BY_EVENT);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu %u/%u\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed,
+ agent->rt[tid].processing, read_freq_ctr(&agent->rt[tid].processing_per_sec));
+ if (ctx->status_code || !(conf->agent_fe.options2 & PR_O2_NOLOGNORM))
+ send_log(&conf->agent_fe, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld %u/%u %u/%u %llu/%llu\n",
+ agent->id, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process,
+ agent->counters.idles, agent->counters.applets,
+ agent->counters.nb_sending, agent->counters.nb_waiting,
+ agent->counters.nb_errors, agent->counters.nb_processed);
+ }
+ return ret;
+}
+
+/***************************************************************************
+ * Functions that create/destroy SPOE contexts
+ **************************************************************************/
+static int
+spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait)
+{
+ if (buf->size)
+ return 1;
+
+ if (LIST_INLIST(&buffer_wait->list))
+ LIST_DEL_INIT(&buffer_wait->list);
+
+ if (b_alloc(buf))
+ return 1;
+
+ LIST_APPEND(&th_ctx->buffer_wq, &buffer_wait->list);
+ return 0;
+}
+
+static void
+spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait)
+{
+ if (LIST_INLIST(&buffer_wait->list))
+ LIST_DEL_INIT(&buffer_wait->list);
+
+ /* Release the buffer if needed */
+ if (buf->size) {
+ b_free(buf);
+ offer_buffers(buffer_wait->target, 1);
+ }
+}
+
+static int
+spoe_wakeup_context(struct spoe_context *ctx)
+{
+ task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
+ return 1;
+}
+
+static struct spoe_context *
+spoe_create_context(struct stream *s, struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_context *ctx;
+
+ ctx = pool_zalloc(pool_head_spoe_ctx);
+ if (ctx == NULL) {
+ return NULL;
+ }
+ ctx->filter = filter;
+ ctx->state = SPOE_CTX_ST_NONE;
+ ctx->status_code = SPOE_CTX_ERR_NONE;
+ ctx->flags = 0;
+ ctx->events = conf->agent->events;
+ ctx->groups = &conf->agent->groups;
+ ctx->buffer = BUF_NULL;
+ LIST_INIT(&ctx->buffer_wait.list);
+ ctx->buffer_wait.target = ctx;
+ ctx->buffer_wait.wakeup_cb = (int (*)(void *))spoe_wakeup_context;
+ LIST_INIT(&ctx->list);
+
+ ctx->stream_id = 0;
+ ctx->frame_id = 1;
+ ctx->process_exp = TICK_ETERNITY;
+
+ ctx->stats.start_ts = 0;
+ ctx->stats.request_ts = 0;
+ ctx->stats.queue_ts = 0;
+ ctx->stats.wait_ts = 0;
+ ctx->stats.response_ts= 0;
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
+
+ ctx->strm = s;
+ ctx->state = SPOE_CTX_ST_READY;
+ filter->ctx = ctx;
+
+ return ctx;
+}
+
+static void
+spoe_destroy_context(struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_context *ctx = filter->ctx;
+
+ if (!ctx)
+ return;
+
+ spoe_stop_processing(conf->agent, ctx);
+ pool_free(pool_head_spoe_ctx, ctx);
+ filter->ctx = NULL;
+}
+
+static void
+spoe_reset_context(struct spoe_context *ctx)
+{
+ ctx->state = SPOE_CTX_ST_READY;
+ ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
+
+ ctx->stats.start_ts = 0;
+ ctx->stats.request_ts = 0;
+ ctx->stats.queue_ts = 0;
+ ctx->stats.wait_ts = 0;
+ ctx->stats.response_ts= 0;
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
+}
+
+
+/***************************************************************************
+ * Hooks that manage the filter lifecycle (init/check/deinit)
+ **************************************************************************/
+/* Signal handler: Do a soft stop, wakeup SPOE applet */
+static void
+spoe_sig_stop(struct sig_handler *sh)
+{
+ struct proxy *p;
+
+ p = proxies_list;
+ while (p) {
+ struct flt_conf *fconf;
+
+ /* SPOE filter are not initialized for disabled proxoes. Move to
+ * the next one
+ */
+ if (p->flags & PR_FL_DISABLED) {
+ p = p->next;
+ continue;
+ }
+
+ list_for_each_entry(fconf, &p->filter_configs, list) {
+ struct spoe_config *conf;
+ struct spoe_agent *agent;
+ struct spoe_appctx *spoe_appctx;
+ int i;
+
+ if (fconf->id != spoe_filter_id)
+ continue;
+
+ conf = fconf->conf;
+ agent = conf->agent;
+
+ for (i = 0; i < global.nbthread; ++i) {
+ HA_SPIN_LOCK(SPOE_APPLET_LOCK, &agent->rt[i].lock);
+ list_for_each_entry(spoe_appctx, &agent->rt[i].applets, list)
+ spoe_wakeup_appctx(spoe_appctx->owner);
+ HA_SPIN_UNLOCK(SPOE_APPLET_LOCK, &agent->rt[i].lock);
+ }
+ }
+ p = p->next;
+ }
+}
+
+
+/* Initialize the SPOE filter. Returns -1 on error, else 0. */
+static int
+spoe_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+
+ /* conf->agent_fe was already initialized during the config
+ * parsing. Finish initialization. */
+ conf->agent_fe.last_change = ns_to_sec(now_ns);
+ conf->agent_fe.cap = PR_CAP_FE;
+ conf->agent_fe.mode = PR_MODE_TCP;
+ conf->agent_fe.maxconn = 0;
+ conf->agent_fe.options2 |= PR_O2_INDEPSTR;
+ conf->agent_fe.conn_retries = CONN_RETRIES;
+ conf->agent_fe.accept = frontend_accept;
+ conf->agent_fe.srv = NULL;
+ conf->agent_fe.timeout.client = TICK_ETERNITY;
+ conf->agent_fe.fe_req_ana = AN_REQ_SWITCHING_RULES;
+
+ if (!sighandler_registered) {
+ signal_register_fct(0, spoe_sig_stop, 0);
+ sighandler_registered = 1;
+ }
+
+ fconf->flags |= FLT_CFG_FL_HTX;
+ return 0;
+}
+
+/* Free resources allocated by the SPOE filter. */
+static void
+spoe_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+
+ if (conf) {
+ struct spoe_agent *agent = conf->agent;
+
+ spoe_release_agent(agent);
+ free(conf->id);
+ free(conf);
+ }
+ fconf->conf = NULL;
+}
+
+/* Check configuration of a SPOE filter for a specified proxy.
+ * Return 1 on error, else 0. */
+static int
+spoe_check(struct proxy *px, struct flt_conf *fconf)
+{
+ struct flt_conf *f;
+ struct spoe_config *conf = fconf->conf;
+ struct proxy *target;
+ int i;
+
+ /* Check all SPOE filters for proxy <px> to be sure all SPOE agent names
+ * are uniq */
+ list_for_each_entry(f, &px->filter_configs, list) {
+ struct spoe_config *c = f->conf;
+
+ /* This is not an SPOE filter */
+ if (f->id != spoe_filter_id)
+ continue;
+ /* This is the current SPOE filter */
+ if (f == fconf)
+ continue;
+
+ /* Check engine Id. It should be uniq */
+ if (strcmp(conf->id, c->id) == 0) {
+ ha_alert("Proxy %s : duplicated name for SPOE engine '%s'.\n",
+ px->id, conf->id);
+ return 1;
+ }
+ }
+
+ target = proxy_be_by_name(conf->agent->b.name);
+ if (target == NULL) {
+ ha_alert("Proxy %s : unknown backend '%s' used by SPOE agent '%s'"
+ " declared at %s:%d.\n",
+ px->id, conf->agent->b.name, conf->agent->id,
+ conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+ if (target->mode != PR_MODE_TCP) {
+ ha_alert("Proxy %s : backend '%s' used by SPOE agent '%s' declared"
+ " at %s:%d does not support HTTP mode.\n",
+ px->id, target->id, conf->agent->id,
+ conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+
+ if ((conf->agent->rt = calloc(global.nbthread, sizeof(*conf->agent->rt))) == NULL) {
+ ha_alert("Proxy %s : out of memory initializing SPOE agent '%s' declared at %s:%d.\n",
+ px->id, conf->agent->id, conf->agent->conf.file, conf->agent->conf.line);
+ return 1;
+ }
+ for (i = 0; i < global.nbthread; ++i) {
+ conf->agent->rt[i].engine_id = NULL;
+ conf->agent->rt[i].frame_size = conf->agent->max_frame_size;
+ conf->agent->rt[i].processing = 0;
+ conf->agent->rt[i].idles = 0;
+ LIST_INIT(&conf->agent->rt[i].applets);
+ LIST_INIT(&conf->agent->rt[i].sending_queue);
+ LIST_INIT(&conf->agent->rt[i].waiting_queue);
+ HA_SPIN_INIT(&conf->agent->rt[i].lock);
+ }
+
+ if (postresolve_logger_list(&conf->agent_fe.loggers, "SPOE agent", conf->agent->id) & ERR_CODE)
+ return 1;
+
+ ha_free(&conf->agent->b.name);
+ conf->agent->b.be = target;
+ return 0;
+}
+
+/* Initializes the SPOE filter for a proxy for a specific thread.
+ * Returns a negative value if an error occurs. */
+static int
+spoe_init_per_thread(struct proxy *p, struct flt_conf *fconf)
+{
+ struct spoe_config *conf = fconf->conf;
+ struct spoe_agent *agent = conf->agent;
+
+ agent->rt[tid].engine_id = generate_pseudo_uuid();
+ if (agent->rt[tid].engine_id == NULL)
+ return -1;
+ return 0;
+}
+
+/**************************************************************************
+ * Hooks attached to a stream
+ *************************************************************************/
+/* Called when a filter instance is created and attach to a stream. It creates
+ * the context that will be used to process this stream. */
+static int
+spoe_start(struct stream *s, struct filter *filter)
+{
+ struct spoe_config *conf = FLT_CONF(filter);
+ struct spoe_agent *agent = conf->agent;
+ struct spoe_context *ctx;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s);
+
+ if ((ctx = spoe_create_context(s, filter)) == NULL) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to create SPOE context\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s);
+ send_log(&conf->agent_fe, LOG_EMERG,
+ "SPOE: [%s] failed to create SPOE context\n",
+ agent->id);
+ return 0;
+ }
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_REQ_FE]))
+ filter->pre_analyzers |= AN_REQ_INSPECT_FE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_REQ_BE]))
+ filter->pre_analyzers |= AN_REQ_INSPECT_BE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_TCP_RSP]))
+ filter->pre_analyzers |= AN_RES_INSPECT;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_REQ_FE]))
+ filter->pre_analyzers |= AN_REQ_HTTP_PROCESS_FE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_REQ_BE]))
+ filter->pre_analyzers |= AN_REQ_HTTP_PROCESS_BE;
+
+ if (!LIST_ISEMPTY(&ctx->events[SPOE_EV_ON_HTTP_RSP]))
+ filter->pre_analyzers |= AN_RES_HTTP_PROCESS_FE;
+
+ return 1;
+}
+
+/* Called when a filter instance is detached from a stream. It release the
+ * attached SPOE context. */
+static void
+spoe_stop(struct stream *s, struct filter *filter)
+{
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s);
+ spoe_destroy_context(filter);
+}
+
+
+/*
+ * Called when the stream is woken up because of expired timer.
+ */
+static void
+spoe_check_timeouts(struct stream *s, struct filter *filter)
+{
+ struct spoe_context *ctx = filter->ctx;
+
+ if (tick_is_expired(ctx->process_exp, now_ms))
+ s->pending_events |= TASK_WOKEN_MSG;
+}
+
+/* Called when we are ready to filter data on a channel */
+static int
+spoe_start_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct spoe_context *ctx = filter->ctx;
+ int ret = 1;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state], ctx->flags);
+
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ goto out;
+
+ if (!(chn->flags & CF_ISRESP)) {
+ if (filter->pre_analyzers & AN_REQ_INSPECT_FE)
+ chn->analysers |= AN_REQ_INSPECT_FE;
+ if (filter->pre_analyzers & AN_REQ_INSPECT_BE)
+ chn->analysers |= AN_REQ_INSPECT_BE;
+
+ if (ctx->flags & SPOE_CTX_FL_CLI_CONNECTED)
+ goto out;
+
+ ctx->stream_id = s->uniq_id;
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_CLIENT_SESS);
+ if (!ret)
+ goto out;
+ ctx->flags |= SPOE_CTX_FL_CLI_CONNECTED;
+ }
+ else {
+ if (filter->pre_analyzers & AN_RES_INSPECT)
+ chn->analysers |= AN_RES_INSPECT;
+
+ if (ctx->flags & SPOE_CTX_FL_SRV_CONNECTED)
+ goto out;
+
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_SERVER_SESS);
+ if (!ret) {
+ channel_dont_read(chn);
+ channel_dont_close(chn);
+ goto out;
+ }
+ ctx->flags |= SPOE_CTX_FL_SRV_CONNECTED;
+ }
+
+ out:
+ return ret;
+}
+
+/* Called before a processing happens on a given channel */
+static int
+spoe_chn_pre_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct spoe_context *ctx = filter->ctx;
+ int ret = 1;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x - ana=0x%08x\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state],
+ ctx->flags, an_bit);
+
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ goto out;
+
+ switch (an_bit) {
+ case AN_REQ_INSPECT_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_REQ_FE);
+ break;
+ case AN_REQ_INSPECT_BE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_REQ_BE);
+ break;
+ case AN_RES_INSPECT:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_TCP_RSP);
+ break;
+ case AN_REQ_HTTP_PROCESS_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_REQ_FE);
+ break;
+ case AN_REQ_HTTP_PROCESS_BE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_REQ_BE);
+ break;
+ case AN_RES_HTTP_PROCESS_FE:
+ ret = spoe_process_event(s, ctx, SPOE_EV_ON_HTTP_RSP);
+ break;
+ }
+
+ out:
+ if (!ret && (chn->flags & CF_ISRESP)) {
+ channel_dont_read(chn);
+ channel_dont_close(chn);
+ }
+ return ret;
+}
+
+/* Called when the filtering on the channel ends. */
+static int
+spoe_end_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct spoe_context *ctx = filter->ctx;
+
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p - ctx-state=%s"
+ " - ctx-flags=0x%08x\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ ((struct spoe_config *)FLT_CONF(filter))->agent->id,
+ __FUNCTION__, s, spoe_ctx_state_str[ctx->state], ctx->flags);
+
+ if (!(ctx->flags & SPOE_CTX_FL_PROCESS)) {
+ spoe_reset_context(ctx);
+ }
+
+ return 1;
+}
+
+/********************************************************************
+ * Functions that manage the filter initialization
+ ********************************************************************/
+struct flt_ops spoe_ops = {
+ /* Manage SPOE filter, called for each filter declaration */
+ .init = spoe_init,
+ .deinit = spoe_deinit,
+ .check = spoe_check,
+ .init_per_thread = spoe_init_per_thread,
+
+ /* Handle start/stop of SPOE */
+ .attach = spoe_start,
+ .detach = spoe_stop,
+ .check_timeouts = spoe_check_timeouts,
+
+ /* Handle channels activity */
+ .channel_start_analyze = spoe_start_analyze,
+ .channel_pre_analyze = spoe_chn_pre_analyze,
+ .channel_end_analyze = spoe_end_analyze,
+};
+
+
+static int
+cfg_parse_spoe_agent(const char *file, int linenum, char **args, int kwm)
+{
+ const char *err;
+ int i, err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-agent") == 0) { /* new spoe-agent section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-agent section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (curagent != NULL) {
+ ha_alert("parsing [%s:%d] : another spoe-agent section previously defined.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if ((curagent = calloc(1, sizeof(*curagent))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curagent->id = strdup(args[1]);
+
+ curagent->conf.file = strdup(file);
+ curagent->conf.line = linenum;
+
+ curagent->timeout.hello = TICK_ETERNITY;
+ curagent->timeout.idle = TICK_ETERNITY;
+ curagent->timeout.processing = TICK_ETERNITY;
+
+ curagent->var_pfx = NULL;
+ curagent->var_on_error = NULL;
+ curagent->var_t_process = NULL;
+ curagent->var_t_total = NULL;
+ curagent->flags = (SPOE_FL_ASYNC | SPOE_FL_PIPELINING | SPOE_FL_SND_FRAGMENTATION);
+ curagent->cps_max = 0;
+ curagent->eps_max = 0;
+ curagent->max_frame_size = MAX_FRAME_SIZE;
+ curagent->max_fpa = 20;
+
+ for (i = 0; i < SPOE_EV_EVENTS; ++i)
+ LIST_INIT(&curagent->events[i]);
+ LIST_INIT(&curagent->groups);
+ LIST_INIT(&curagent->messages);
+ }
+ else if (strcmp(args[0], "use-backend") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects a backend name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ free(curagent->b.name);
+ curagent->b.name = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "messages") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curmphs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curmphs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "groups") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curgphs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-group '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curgphs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ unsigned int *tv = NULL;
+ const char *res;
+ unsigned timeout;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : 'timeout' expects 'hello', 'idle' and 'processing'.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ if (strcmp(args[1], "hello") == 0)
+ tv = &curagent->timeout.hello;
+ else if (strcmp(args[1], "idle") == 0)
+ tv = &curagent->timeout.idle;
+ else if (strcmp(args[1], "processing") == 0)
+ tv = &curagent->timeout.processing;
+ else {
+ ha_alert("parsing [%s:%d] : 'timeout' supports 'hello', 'idle' or 'processing' (got %s).\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : 'timeout %s' expects an integer value (in milliseconds).\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d] : unexpected character '%c' in 'timeout %s'.\n",
+ file, linenum, *res, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ *tv = MS_TO_TICKS(timeout);
+ }
+ else if (strcmp(args[0], "option") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "pipelining") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_PIPELINING;
+ else
+ curagent->flags |= SPOE_FL_PIPELINING;
+ goto out;
+ }
+ else if (strcmp(args[1], "async") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_ASYNC;
+ else
+ curagent->flags |= SPOE_FL_ASYNC;
+ goto out;
+ }
+ else if (strcmp(args[1], "send-frag-payload") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curagent->flags &= ~SPOE_FL_SND_FRAGMENTATION;
+ else
+ curagent->flags |= SPOE_FL_SND_FRAGMENTATION;
+ goto out;
+ }
+ else if (strcmp(args[1], "dontlog-normal") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ if (kwm == 1)
+ curpxopts2 &= ~PR_O2_NOLOGNORM;
+ else
+ curpxopts2 |= PR_O2_NOLOGNORM;
+ goto out;
+ }
+
+ /* Following options does not support negation */
+ if (kwm == 1) {
+ ha_alert("parsing [%s:%d]: negation is not supported for option '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[1], "var-prefix") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_pfx = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "force-set-var") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->flags |= SPOE_FL_FORCE_SET_VAR;
+ }
+ else if (strcmp(args[1], "continue-on-error") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->flags |= SPOE_FL_CONT_ON_ERR;
+ }
+ else if (strcmp(args[1], "set-on-error") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_on_error = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "set-process-time") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_t_process = strdup(args[2]);
+ }
+ else if (strcmp(args[1], "set-total-time") == 0) {
+ char *tmp;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d]: '%s %s' expects a value.\n",
+ file, linenum, args[0],
+ args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(2, file, linenum, args, &err_code))
+ goto out;
+ tmp = args[2];
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ ha_alert("parsing [%s:%d]: '%s %s' only supports [a-zA-Z0-9_.] chars.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ tmp++;
+ }
+ curagent->var_t_total = strdup(args[2]);
+ }
+ else {
+ ha_alert("parsing [%s:%d]: option '%s' is not supported.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "maxconnrate") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->cps_max = atol(args[1]);
+ }
+ else if (strcmp(args[0], "maxerrrate") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->eps_max = atol(args[1]);
+ }
+ else if (strcmp(args[0], "max-frame-size") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->max_frame_size = atol(args[1]);
+ if (curagent->max_frame_size < MIN_FRAME_SIZE ||
+ curagent->max_frame_size > MAX_FRAME_SIZE) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument in the range [%d, %d].\n",
+ file, linenum, args[0], MIN_FRAME_SIZE, MAX_FRAME_SIZE);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "max-waiting-frames") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ curagent->max_fpa = atol(args[1]);
+ if (curagent->max_fpa < 1) {
+ ha_alert("parsing [%s:%d] : '%s' expects a positive integer argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "register-var-names") == 0) {
+ int cur_arg;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects one or more variable names.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_var_placeholder *vph;
+
+ if ((vph = calloc(1, sizeof(*vph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if ((vph->name = strdup(args[cur_arg])) == NULL) {
+ free(vph);
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ LIST_APPEND(&curvars, &vph->list);
+ cur_arg++;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ char *errmsg = NULL;
+
+ if (!parse_logger(args, &curloggers, (kwm == 1), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-agent section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ return err_code;
+}
+static int
+cfg_parse_spoe_group(const char *file, int linenum, char **args, int kwm)
+{
+ struct spoe_group *grp;
+ const char *err;
+ int err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-group") == 0) { /* new spoe-group section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-group section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(grp, &curgrps, list) {
+ if (strcmp(grp->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-group section '%s' has the same"
+ " name as another one declared at %s:%d.\n",
+ file, linenum, args[1], grp->conf.file, grp->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((curgrp = calloc(1, sizeof(*curgrp))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curgrp->id = strdup(args[1]);
+ curgrp->conf.file = strdup(file);
+ curgrp->conf.line = linenum;
+ LIST_INIT(&curgrp->phs);
+ LIST_INIT(&curgrp->messages);
+ LIST_APPEND(&curgrps, &curgrp->list);
+ }
+ else if (strcmp(args[0], "messages") == 0) {
+ int cur_arg = 1;
+ while (*args[cur_arg]) {
+ struct spoe_placeholder *ph = NULL;
+
+ list_for_each_entry(ph, &curgrp->phs, list) {
+ if (strcmp(ph->id, args[cur_arg]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message '%s' already used.\n",
+ file, linenum, args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((ph = calloc(1, sizeof(*ph))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ ph->id = strdup(args[cur_arg]);
+ LIST_APPEND(&curgrp->phs, &ph->list);
+ cur_arg++;
+ }
+ }
+ else if (*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-group section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ return err_code;
+}
+
+static int
+cfg_parse_spoe_message(const char *file, int linenum, char **args, int kwm)
+{
+ struct spoe_message *msg;
+ struct spoe_arg *arg;
+ const char *err;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if ((cfg_scope == NULL && curengine != NULL) ||
+ (cfg_scope != NULL && curengine == NULL) ||
+ (curengine != NULL && cfg_scope != NULL && strcmp(curengine, cfg_scope) != 0))
+ goto out;
+
+ if (strcmp(args[0], "spoe-message") == 0) { /* new spoe-message section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for spoe-message section.\n",
+ file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(msg, &curmsgs, list) {
+ if (strcmp(msg->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: spoe-message section '%s' has the same"
+ " name as another one declared at %s:%d.\n",
+ file, linenum, args[1], msg->conf.file, msg->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if ((curmsg = calloc(1, sizeof(*curmsg))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ curmsg->id = strdup(args[1]);
+ curmsg->id_len = strlen(curmsg->id);
+ curmsg->event = SPOE_EV_NONE;
+ curmsg->conf.file = strdup(file);
+ curmsg->conf.line = linenum;
+ curmsg->nargs = 0;
+ LIST_INIT(&curmsg->args);
+ LIST_INIT(&curmsg->acls);
+ LIST_INIT(&curmsg->by_evt);
+ LIST_INIT(&curmsg->by_grp);
+ LIST_APPEND(&curmsgs, &curmsg->list);
+ }
+ else if (strcmp(args[0], "args") == 0) {
+ int cur_arg = 1;
+
+ curproxy->conf.args.ctx = ARGC_SPOE;
+ curproxy->conf.args.file = file;
+ curproxy->conf.args.line = linenum;
+ while (*args[cur_arg]) {
+ char *delim = strchr(args[cur_arg], '=');
+ int idx = 0;
+
+ if ((arg = calloc(1, sizeof(*arg))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (!delim) {
+ arg->name = NULL;
+ arg->name_len = 0;
+ delim = args[cur_arg];
+ }
+ else {
+ arg->name = my_strndup(args[cur_arg], delim - args[cur_arg]);
+ arg->name_len = delim - args[cur_arg];
+ delim++;
+ }
+ arg->expr = sample_parse_expr((char*[]){delim, NULL},
+ &idx, file, linenum, &errmsg,
+ &curproxy->conf.args, NULL);
+ if (arg->expr == NULL) {
+ ha_alert("parsing [%s:%d] : '%s': %s.\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(arg->name);
+ free(arg);
+ goto out;
+ }
+ curmsg->nargs++;
+ LIST_APPEND(&curmsg->args, &arg->list);
+ cur_arg++;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+ else if (strcmp(args[0], "acl") == 0) {
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in acl name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcasecmp(args[1], "or") == 0) {
+ ha_alert("parsing [%s:%d] : acl name '%s' will never match. 'or' is used to express a "
+ "logical disjunction within a condition.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (parse_acl((const char **)args + 1, &curmsg->acls, &errmsg, &curproxy->conf.args, file, linenum) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing ACL '%s' : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "event") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing event name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* if (alertif_too_many_args(1, file, linenum, args, &err_code)) */
+ /* goto out; */
+
+ if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_CLIENT_SESS]) == 0)
+ curmsg->event = SPOE_EV_ON_CLIENT_SESS;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_SERVER_SESS]) == 0)
+ curmsg->event = SPOE_EV_ON_SERVER_SESS;
+
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_REQ_FE]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_REQ_FE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_REQ_BE]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_REQ_BE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_TCP_RSP]) == 0)
+ curmsg->event = SPOE_EV_ON_TCP_RSP;
+
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_REQ_FE]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_REQ_FE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_REQ_BE]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_REQ_BE;
+ else if (strcmp(args[1], spoe_event_str[SPOE_EV_ON_HTTP_RSP]) == 0)
+ curmsg->event = SPOE_EV_ON_HTTP_RSP;
+ else {
+ ha_alert("parsing [%s:%d] : unknown event '%s'.\n",
+ file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (strcmp(args[2], "if") == 0 || strcmp(args[2], "unless") == 0) {
+ struct acl_cond *cond;
+
+ cond = build_acl_cond(file, linenum, &curmsg->acls,
+ curproxy, (const char **)args+2,
+ &errmsg);
+ if (cond == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while "
+ "parsing an 'event %s' condition : %s.\n",
+ file, linenum, args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curmsg->cond = cond;
+ }
+ else if (*args[2]) {
+ ha_alert("parsing [%s:%d]: 'event %s' expects either 'if' "
+ "or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[1], args[2]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (!*args[0]) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in spoe-message section.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ out:
+ free(errmsg);
+ return err_code;
+}
+
+/* Return -1 on error, else 0 */
+static int
+parse_spoe_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct list backup_sections;
+ struct spoe_config *conf;
+ struct spoe_message *msg, *msgback;
+ struct spoe_group *grp, *grpback;
+ struct spoe_placeholder *ph, *phback;
+ struct spoe_var_placeholder *vph, *vphback;
+ struct logger *logger, *loggerback;
+ char *file = NULL, *engine = NULL;
+ int ret, pos = *cur_arg + 1;
+
+ LIST_INIT(&curmsgs);
+ LIST_INIT(&curgrps);
+ LIST_INIT(&curmphs);
+ LIST_INIT(&curgphs);
+ LIST_INIT(&curvars);
+ LIST_INIT(&curloggers);
+ curpxopts = 0;
+ curpxopts2 = 0;
+
+ conf = calloc(1, sizeof(*conf));
+ if (conf == NULL) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ conf->proxy = px;
+
+ while (*args[pos]) {
+ if (strcmp(args[pos], "config") == 0) {
+ if (!*args[pos+1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ file = args[pos+1];
+ pos += 2;
+ }
+ else if (strcmp(args[pos], "engine") == 0) {
+ if (!*args[pos+1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ engine = args[pos+1];
+ pos += 2;
+ }
+ else {
+ memprintf(err, "unknown keyword '%s'", args[pos]);
+ goto error;
+ }
+ }
+ if (file == NULL) {
+ memprintf(err, "'%s' : missing config file", args[*cur_arg]);
+ goto error;
+ }
+
+ /* backup sections and register SPOE sections */
+ LIST_INIT(&backup_sections);
+ cfg_backup_sections(&backup_sections);
+ cfg_register_section("spoe-agent", cfg_parse_spoe_agent, NULL);
+ cfg_register_section("spoe-group", cfg_parse_spoe_group, NULL);
+ cfg_register_section("spoe-message", cfg_parse_spoe_message, NULL);
+
+ /* Parse SPOE filter configuration file */
+ BUG_ON(px != curproxy);
+ curengine = engine;
+ curagent = NULL;
+ curmsg = NULL;
+ ret = readcfgfile(file);
+
+ /* unregister SPOE sections and restore previous sections */
+ cfg_unregister_sections();
+ cfg_restore_sections(&backup_sections);
+
+ if (ret == -1) {
+ memprintf(err, "Could not open configuration file %s : %s",
+ file, strerror(errno));
+ goto error;
+ }
+ if (ret & (ERR_ABORT|ERR_FATAL)) {
+ memprintf(err, "Error(s) found in configuration file %s", file);
+ goto error;
+ }
+
+ /* Check SPOE agent */
+ if (curagent == NULL) {
+ memprintf(err, "No SPOE agent found in file %s", file);
+ goto error;
+ }
+ if (curagent->b.name == NULL) {
+ memprintf(err, "No backend declared for SPOE agent '%s' declared at %s:%d",
+ curagent->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ }
+ if (curagent->timeout.hello == TICK_ETERNITY ||
+ curagent->timeout.idle == TICK_ETERNITY ||
+ curagent->timeout.processing == TICK_ETERNITY) {
+ ha_warning("Proxy '%s': missing timeouts for SPOE agent '%s' declare at %s:%d.\n"
+ " | While not properly invalid, you will certainly encounter various problems\n"
+ " | with such a configuration. To fix this, please ensure that all following\n"
+ " | timeouts are set to a non-zero value: 'hello', 'idle', 'processing'.\n",
+ px->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ }
+ if (curagent->var_pfx == NULL) {
+ char *tmp = curagent->id;
+
+ while (*tmp) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ memprintf(err, "Invalid variable prefix '%s' for SPOE agent '%s' declared at %s:%d. "
+ "Use 'option var-prefix' to set it. Only [a-zA-Z0-9_.] chars are supported.\n",
+ curagent->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ }
+ tmp++;
+ }
+ curagent->var_pfx = strdup(curagent->id);
+ }
+
+ if (curagent->var_on_error) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_on_error);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_on_error, *err);
+ goto error;
+ }
+ }
+
+ if (curagent->var_t_process) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_t_process);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_t_process, *err);
+ goto error;
+ }
+ }
+
+ if (curagent->var_t_total) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "txn.%s.%s",
+ curagent->var_pfx, curagent->var_t_total);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, curagent->var_t_process, *err);
+ goto error;
+ }
+ }
+
+ if (LIST_ISEMPTY(&curmphs) && LIST_ISEMPTY(&curgphs)) {
+ ha_warning("Proxy '%s': No message/group used by SPOE agent '%s' declared at %s:%d.\n",
+ px->id, curagent->id, curagent->conf.file, curagent->conf.line);
+ goto finish;
+ }
+
+ /* Replace placeholders by the corresponding messages for the SPOE
+ * agent */
+ list_for_each_entry(ph, &curmphs, list) {
+ list_for_each_entry(msg, &curmsgs, list) {
+ struct spoe_arg *arg;
+ unsigned int where;
+
+ if (strcmp(msg->id, ph->id) == 0) {
+ if ((px->cap & (PR_CAP_FE|PR_CAP_BE)) == (PR_CAP_FE|PR_CAP_BE)) {
+ if (msg->event == SPOE_EV_ON_TCP_REQ_BE)
+ msg->event = SPOE_EV_ON_TCP_REQ_FE;
+ if (msg->event == SPOE_EV_ON_HTTP_REQ_BE)
+ msg->event = SPOE_EV_ON_HTTP_REQ_FE;
+ }
+ if (!(px->cap & PR_CAP_FE) && (msg->event == SPOE_EV_ON_CLIENT_SESS ||
+ msg->event == SPOE_EV_ON_TCP_REQ_FE ||
+ msg->event == SPOE_EV_ON_HTTP_REQ_FE)) {
+ ha_warning("Proxy '%s': frontend event used on a backend proxy at %s:%d.\n",
+ px->id, msg->conf.file, msg->conf.line);
+ goto next_mph;
+ }
+ if (msg->event == SPOE_EV_NONE) {
+ ha_warning("Proxy '%s': Ignore SPOE message '%s' without event at %s:%d.\n",
+ px->id, msg->id, msg->conf.file, msg->conf.line);
+ goto next_mph;
+ }
+
+ where = 0;
+ switch (msg->event) {
+ case SPOE_EV_ON_CLIENT_SESS:
+ where |= SMP_VAL_FE_CON_ACC;
+ break;
+
+ case SPOE_EV_ON_TCP_REQ_FE:
+ where |= SMP_VAL_FE_REQ_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_REQ_FE:
+ where |= SMP_VAL_FE_HRQ_HDR;
+ break;
+
+ case SPOE_EV_ON_TCP_REQ_BE:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_REQ_CNT;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_REQ_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_REQ_BE:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+ break;
+
+ case SPOE_EV_ON_SERVER_SESS:
+ where |= SMP_VAL_BE_SRV_CON;
+ break;
+
+ case SPOE_EV_ON_TCP_RSP:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_RES_CNT;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_RES_CNT;
+ break;
+
+ case SPOE_EV_ON_HTTP_RSP:
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRS_HDR;
+ break;
+
+ default:
+ break;
+ }
+
+ list_for_each_entry(arg, &msg->args, list) {
+ if (!(arg->expr->fetch->val & where)) {
+ memprintf(err, "Ignore SPOE message '%s' at %s:%d: "
+ "some args extract information from '%s', "
+ "none of which is available here ('%s')",
+ msg->id, msg->conf.file, msg->conf.line,
+ sample_ckp_names(arg->expr->fetch->use),
+ sample_ckp_names(where));
+ goto error;
+ }
+ }
+
+ msg->agent = curagent;
+ LIST_APPEND(&curagent->events[msg->event], &msg->by_evt);
+ goto next_mph;
+ }
+ }
+ memprintf(err, "SPOE agent '%s' try to use undefined SPOE message '%s' at %s:%d",
+ curagent->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_mph:
+ continue;
+ }
+
+ /* Replace placeholders by the corresponding groups for the SPOE
+ * agent */
+ list_for_each_entry(ph, &curgphs, list) {
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ if (strcmp(grp->id, ph->id) == 0) {
+ grp->agent = curagent;
+ LIST_DELETE(&grp->list);
+ LIST_APPEND(&curagent->groups, &grp->list);
+ goto next_aph;
+ }
+ }
+ memprintf(err, "SPOE agent '%s' try to use undefined SPOE group '%s' at %s:%d",
+ curagent->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_aph:
+ continue;
+ }
+
+ /* Replace placeholders by the corresponding message for each SPOE
+ * group of the SPOE agent */
+ list_for_each_entry(grp, &curagent->groups, list) {
+ list_for_each_entry_safe(ph, phback, &grp->phs, list) {
+ list_for_each_entry(msg, &curmsgs, list) {
+ if (strcmp(msg->id, ph->id) == 0) {
+ if (msg->group != NULL) {
+ memprintf(err, "SPOE message '%s' already belongs to "
+ "the SPOE group '%s' declare at %s:%d",
+ msg->id, msg->group->id,
+ msg->group->conf.file,
+ msg->group->conf.line);
+ goto error;
+ }
+
+ /* Scope for arguments are not checked for now. We will check
+ * them only if a rule use the corresponding SPOE group. */
+ msg->agent = curagent;
+ msg->group = grp;
+ LIST_DELETE(&ph->list);
+ LIST_APPEND(&grp->messages, &msg->by_grp);
+ goto next_mph_grp;
+ }
+ }
+ memprintf(err, "SPOE group '%s' try to use undefined SPOE message '%s' at %s:%d",
+ grp->id, ph->id, curagent->conf.file, curagent->conf.line);
+ goto error;
+ next_mph_grp:
+ continue;
+ }
+ }
+
+ finish:
+ /* move curmsgs to the agent message list */
+ curmsgs.n->p = &curagent->messages;
+ curmsgs.p->n = &curagent->messages;
+ curagent->messages = curmsgs;
+ LIST_INIT(&curmsgs);
+
+ conf->id = strdup(engine ? engine : curagent->id);
+ conf->agent = curagent;
+ curagent->spoe_conf = conf;
+
+ /* Start agent's proxy initialization here. It will be finished during
+ * the filter init. */
+ memset(&conf->agent_fe, 0, sizeof(conf->agent_fe));
+ init_new_proxy(&conf->agent_fe);
+ conf->agent_fe.id = conf->agent->id;
+ conf->agent_fe.parent = conf->agent;
+ conf->agent_fe.options |= curpxopts;
+ conf->agent_fe.options2 |= curpxopts2;
+
+ list_for_each_entry_safe(logger, loggerback, &curloggers, list) {
+ LIST_DELETE(&logger->list);
+ LIST_APPEND(&conf->agent_fe.loggers, &logger->list);
+ }
+
+ list_for_each_entry_safe(ph, phback, &curmphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(ph, phback, &curgphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(vph, vphback, &curvars, list) {
+ struct arg arg;
+
+ trash.data = snprintf(trash.area, trash.size, "proc.%s.%s",
+ curagent->var_pfx, vph->name);
+
+ arg.type = ARGT_STR;
+ arg.data.str.area = trash.area;
+ arg.data.str.data = trash.data;
+ arg.data.str.size = 0; /* Set it to 0 to not release it in vars_check_arg() */
+ if (!vars_check_arg(&arg, err)) {
+ memprintf(err, "SPOE agent '%s': failed to register variable %s.%s (%s)",
+ curagent->id, curagent->var_pfx, vph->name, *err);
+ goto error;
+ }
+
+ LIST_DELETE(&vph->list);
+ free(vph->name);
+ free(vph);
+ }
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ *cur_arg = pos;
+ fconf->id = spoe_filter_id;
+ fconf->ops = &spoe_ops;
+ fconf->conf = conf;
+ return 0;
+
+ error:
+ spoe_release_agent(curagent);
+ list_for_each_entry_safe(ph, phback, &curmphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(ph, phback, &curgphs, list) {
+ LIST_DELETE(&ph->list);
+ spoe_release_placeholder(ph);
+ }
+ list_for_each_entry_safe(vph, vphback, &curvars, list) {
+ LIST_DELETE(&vph->list);
+ free(vph->name);
+ free(vph);
+ }
+ list_for_each_entry_safe(grp, grpback, &curgrps, list) {
+ LIST_DELETE(&grp->list);
+ spoe_release_group(grp);
+ }
+ list_for_each_entry_safe(msg, msgback, &curmsgs, list) {
+ LIST_DELETE(&msg->list);
+ spoe_release_message(msg);
+ }
+ list_for_each_entry_safe(logger, loggerback, &curloggers, list) {
+ LIST_DELETE(&logger->list);
+ free(logger);
+ }
+ free(conf);
+ return -1;
+}
+
+/* Send message of a SPOE group. This is the action_ptr callback of a rule
+ * associated to a "send-spoe-group" action.
+ *
+ * It returns ACT_RET_CONT if processing is finished (with error or not), it returns
+ * ACT_RET_YIELD if the action is in progress. */
+static enum act_return
+spoe_send_group(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct filter *filter;
+ struct spoe_agent *agent = NULL;
+ struct spoe_group *group = NULL;
+ struct spoe_context *ctx = NULL;
+ int ret, dir;
+
+ list_for_each_entry(filter, &s->strm_flt.filters, list) {
+ if (filter->config == rule->arg.act.p[0]) {
+ agent = rule->arg.act.p[2];
+ group = rule->arg.act.p[3];
+ ctx = filter->ctx;
+ break;
+ }
+ }
+ if (agent == NULL || group == NULL || ctx == NULL)
+ return ACT_RET_CONT;
+ if (ctx->state == SPOE_CTX_ST_NONE)
+ return ACT_RET_CONT;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ default:
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - internal error while execute spoe-send-group\n",
+ (int)date.tv_sec, (int)date.tv_usec, agent->id,
+ __FUNCTION__, s);
+ send_log(px, LOG_ERR, "SPOE: [%s] internal error while execute spoe-send-group\n",
+ agent->id);
+ return ACT_RET_CONT;
+ }
+
+ ret = spoe_process_group(s, ctx, group, dir);
+ if (ret == 1)
+ return ACT_RET_CONT;
+ else if (ret == 0) {
+ if (flags & ACT_OPT_FINAL) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - failed to process group '%s': interrupted by caller\n",
+ (int)date.tv_sec, (int)date.tv_usec,
+ agent->id, __FUNCTION__, s, group->id);
+ ctx->status_code = SPOE_CTX_ERR_INTERRUPT;
+ spoe_stop_processing(agent, ctx);
+ spoe_handle_processing_error(s, agent, ctx, dir);
+ return ACT_RET_CONT;
+ }
+ return ACT_RET_YIELD;
+ }
+ else
+ return ACT_RET_CONT;
+}
+
+/* Check an "send-spoe-group" action. Here, we'll try to find the real SPOE
+ * group associated to <rule>. The format of an rule using 'send-spoe-group'
+ * action should be:
+ *
+ * (http|tcp)-(request|response) send-spoe-group <engine-id> <group-id>
+ *
+ * So, we'll loop on each configured SPOE filter for the proxy <px> to find the
+ * SPOE engine matching <engine-id>. And then, we'll try to find the good group
+ * matching <group-id>. Finally, we'll check all messages referenced by the SPOE
+ * group.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int
+check_send_spoe_group(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct flt_conf *fconf;
+ struct spoe_config *conf;
+ struct spoe_agent *agent = NULL;
+ struct spoe_group *group;
+ struct spoe_message *msg;
+ char *engine_id = rule->arg.act.p[0];
+ char *group_id = rule->arg.act.p[1];
+ unsigned int where = 0;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_SES: where = SMP_VAL_FE_SES_ACC; break;
+ case ACT_F_TCP_REQ_CNT: where = SMP_VAL_FE_REQ_CNT; break;
+ case ACT_F_TCP_RES_CNT: where = SMP_VAL_BE_RES_CNT; break;
+ case ACT_F_HTTP_REQ: where = SMP_VAL_FE_HRQ_HDR; break;
+ case ACT_F_HTTP_RES: where = SMP_VAL_BE_HRS_HDR; break;
+ default:
+ memprintf(err,
+ "internal error, unexpected rule->from=%d, please report this bug!",
+ rule->from);
+ goto error;
+ }
+
+ /* Try to find the SPOE engine by checking all SPOE filters for proxy
+ * <px> */
+ list_for_each_entry(fconf, &px->filter_configs, list) {
+ conf = fconf->conf;
+
+ /* This is not an SPOE filter */
+ if (fconf->id != spoe_filter_id)
+ continue;
+
+ /* This is the good engine */
+ if (strcmp(conf->id, engine_id) == 0) {
+ agent = conf->agent;
+ break;
+ }
+ }
+ if (agent == NULL) {
+ memprintf(err, "unable to find SPOE engine '%s' used by the send-spoe-group '%s'",
+ engine_id, group_id);
+ goto error;
+ }
+
+ /* Try to find the right group */
+ list_for_each_entry(group, &agent->groups, list) {
+ /* This is the good group */
+ if (strcmp(group->id, group_id) == 0)
+ break;
+ }
+ if (&group->list == &agent->groups) {
+ memprintf(err, "unable to find SPOE group '%s' into SPOE engine '%s' configuration",
+ group_id, engine_id);
+ goto error;
+ }
+
+ /* Ok, we found the group, we need to check messages and their
+ * arguments */
+ list_for_each_entry(msg, &group->messages, by_grp) {
+ struct spoe_arg *arg;
+
+ list_for_each_entry(arg, &msg->args, list) {
+ if (!(arg->expr->fetch->val & where)) {
+ memprintf(err, "Invalid SPOE message '%s' used by SPOE group '%s' at %s:%d: "
+ "some args extract information from '%s',"
+ "none of which is available here ('%s')",
+ msg->id, group->id, msg->conf.file, msg->conf.line,
+ sample_ckp_names(arg->expr->fetch->use),
+ sample_ckp_names(where));
+ goto error;
+ }
+ }
+ }
+
+ free(engine_id);
+ free(group_id);
+ rule->arg.act.p[0] = fconf; /* Associate filter config with the rule */
+ rule->arg.act.p[1] = conf; /* Associate SPOE config with the rule */
+ rule->arg.act.p[2] = agent; /* Associate SPOE agent with the rule */
+ rule->arg.act.p[3] = group; /* Associate SPOE group with the rule */
+ return 1;
+
+ error:
+ free(engine_id);
+ free(group_id);
+ return 0;
+}
+
+/* Parse 'send-spoe-group' action following the format:
+ *
+ * ... send-spoe-group <engine-id> <group-id>
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise, it returns ACT_RET_PRS_OK and parsing engine and group
+ * ids are saved and used later, when the rule will be checked.
+ */
+static enum act_parse_ret
+parse_send_spoe_group(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ if (!*args[*orig_arg] || !*args[*orig_arg+1] ||
+ (*args[*orig_arg+2] && strcmp(args[*orig_arg+2], "if") != 0 && strcmp(args[*orig_arg+2], "unless") != 0)) {
+ memprintf(err, "expects 2 arguments: <engine-id> <group-id>");
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.act.p[0] = strdup(args[*orig_arg]); /* Copy the SPOE engine id */
+ rule->arg.act.p[1] = strdup(args[*orig_arg+1]); /* Cope the SPOE group id */
+
+ (*orig_arg) += 2;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = spoe_send_group;
+ rule->check_ptr = check_send_spoe_group;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* Declare the filter parser for "spoe" keyword */
+static struct flt_kw_list flt_kws = { "SPOE", { }, {
+ { "spoe", parse_spoe_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &flt_kws);
+
+/* Delcate the action parser for "spoe-action" keyword */
+static struct action_kw_list tcp_req_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_action_kws);
+
+static struct action_kw_list tcp_res_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_action_kws);
+
+static struct action_kw_list http_req_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_action_kws);
+
+static struct action_kw_list http_res_action_kws = { { }, {
+ { "send-spoe-group", parse_send_spoe_group },
+ { /* END */ },
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_action_kws);
diff --git a/src/flt_trace.c b/src/flt_trace.c
new file mode 100644
index 0000000..bbadfe2
--- /dev/null
+++ b/src/flt_trace.c
@@ -0,0 +1,675 @@
+/*
+ * Stream filters related variables and functions.
+ *
+ * Copyright (C) 2015 Qualys Inc., Christopher Faulet <cfaulet@qualys.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+
+#include <haproxy/api.h>
+#include <haproxy/channel-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+const char *trace_flt_id = "trace filter";
+
+struct flt_ops trace_ops;
+
+#define TRACE_F_QUIET 0x00000001
+#define TRACE_F_RAND_FWD 0x00000002
+#define TRACE_F_HEXDUMP 0x00000004
+
+struct trace_config {
+ struct proxy *proxy;
+ char *name;
+ unsigned int flags;
+};
+
+#define FLT_TRACE(conf, fmt, ...) \
+ do { \
+ if (!(conf->flags & TRACE_F_QUIET)) \
+ fprintf(stderr, "%d.%06d [%-20s] " fmt "\n", \
+ (int)date.tv_sec, (int)date.tv_usec, (conf)->name,\
+ ##__VA_ARGS__); \
+ } while (0)
+
+#define FLT_STRM_TRACE(conf, strm, fmt, ...) \
+ do { \
+ if (!(conf->flags & TRACE_F_QUIET)) \
+ fprintf(stderr, "%d.%06d [%-20s] [strm %p(%x) 0x%08x 0x%08x] " fmt "\n", \
+ (int)date.tv_sec, (int)date.tv_usec, (conf)->name, \
+ strm, (strm ? ((struct stream *)strm)->uniq_id : ~0U), \
+ (strm ? strm->req.analysers : 0), (strm ? strm->res.analysers : 0), \
+ ##__VA_ARGS__); \
+ } while (0)
+
+
+static const char *
+channel_label(const struct channel *chn)
+{
+ return (chn->flags & CF_ISRESP) ? "RESPONSE" : "REQUEST";
+}
+
+static const char *
+proxy_mode(const struct stream *s)
+{
+ struct proxy *px = (s->flags & SF_BE_ASSIGNED ? s->be : strm_fe(s));
+
+ return ((px->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+}
+
+static const char *
+stream_pos(const struct stream *s)
+{
+ return (s->flags & SF_BE_ASSIGNED) ? "backend" : "frontend";
+}
+
+static const char *
+filter_type(const struct filter *f)
+{
+ return (f->flags & FLT_FL_IS_BACKEND_FILTER) ? "backend" : "frontend";
+}
+
+static void
+trace_hexdump(struct ist ist)
+{
+ int i, j, padding;
+
+ padding = ((ist.len % 16) ? (16 - ist.len % 16) : 0);
+ for (i = 0; i < ist.len + padding; i++) {
+ if (!(i % 16))
+ fprintf(stderr, "\t0x%06x: ", i);
+ else if (!(i % 8))
+ fprintf(stderr, " ");
+
+ if (i < ist.len)
+ fprintf(stderr, "%02x ", (unsigned char)*(ist.ptr+i));
+ else
+ fprintf(stderr, " ");
+
+ /* print ASCII dump */
+ if (i % 16 == 15) {
+ fprintf(stderr, " |");
+ for(j = i - 15; j <= i && j < ist.len; j++)
+ fprintf(stderr, "%c", (isprint((unsigned char)*(ist.ptr+j)) ? *(ist.ptr+j) : '.'));
+ fprintf(stderr, "|\n");
+ }
+ }
+}
+
+static void
+trace_raw_hexdump(struct buffer *buf, unsigned int offset, unsigned int len)
+{
+ unsigned char p[len];
+ int block1, block2;
+
+ block1 = len;
+ if (block1 > b_contig_data(buf, offset))
+ block1 = b_contig_data(buf, offset);
+ block2 = len - block1;
+
+ memcpy(p, b_peek(buf, offset), block1);
+ memcpy(p+block1, b_orig(buf), block2);
+ trace_hexdump(ist2(p, len));
+}
+
+static void
+trace_htx_hexdump(struct htx *htx, unsigned int offset, unsigned int len)
+{
+ struct htx_blk *blk;
+
+ for (blk = htx_get_first_blk(htx); blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+
+ if (offset >= sz) {
+ offset -= sz;
+ continue;
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ offset = 0;
+
+ v = isttrim(v, len);
+ len -= v.len;
+ if (type == HTX_BLK_DATA)
+ trace_hexdump(v);
+ }
+}
+
+static unsigned int
+trace_get_htx_datalen(struct htx *htx, unsigned int offset, unsigned int len)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+ uint32_t data = 0;
+
+ blk = htxret.blk;
+ if (blk && htxret.ret && htx_get_blk_type(blk) == HTX_BLK_DATA) {
+ data += htxret.ret;
+ blk = htx_get_next_blk(htx, blk);
+ }
+ while (blk) {
+ if (htx_get_blk_type(blk) == HTX_BLK_UNUSED)
+ goto next;
+ else if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ break;
+ data += htx_get_blksz(blk);
+ next:
+ blk = htx_get_next_blk(htx, blk);
+ }
+ return data;
+}
+
+/***************************************************************************
+ * Hooks that manage the filter lifecycle (init/check/deinit)
+ **************************************************************************/
+/* Initialize the filter. Returns -1 on error, else 0. */
+static int
+trace_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf->name)
+ memprintf(&conf->name, "%s/%s", conf->name, px->id);
+ else
+ memprintf(&conf->name, "TRACE/%s", px->id);
+
+ fconf->flags |= FLT_CFG_FL_HTX;
+ fconf->conf = conf;
+
+ FLT_TRACE(conf, "filter initialized [quiet=%s - fwd random=%s - hexdump=%s]",
+ ((conf->flags & TRACE_F_QUIET) ? "true" : "false"),
+ ((conf->flags & TRACE_F_RAND_FWD) ? "true" : "false"),
+ ((conf->flags & TRACE_F_HEXDUMP) ? "true" : "false"));
+ return 0;
+}
+
+/* Free resources allocated by the trace filter. */
+static void
+trace_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf) {
+ FLT_TRACE(conf, "filter deinitialized");
+ free(conf->name);
+ free(conf);
+ }
+ fconf->conf = NULL;
+}
+
+/* Check configuration of a trace filter for a specified proxy.
+ * Return 1 on error, else 0. */
+static int
+trace_check(struct proxy *px, struct flt_conf *fconf)
+{
+ return 0;
+}
+
+/* Initialize the filter for each thread. Return -1 on error, else 0. */
+static int
+trace_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ FLT_TRACE(conf, "filter initialized for thread tid %u", tid);
+ return 0;
+}
+
+/* Free resources allocate by the trace filter for each thread. */
+static void
+trace_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct trace_config *conf = fconf->conf;
+
+ if (conf)
+ FLT_TRACE(conf, "filter deinitialized for thread tid %u", tid);
+}
+
+/**************************************************************************
+ * Hooks to handle start/stop of streams
+ *************************************************************************/
+/* Called when a filter instance is created and attach to a stream */
+static int
+trace_attach(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: filter-type=%s",
+ __FUNCTION__, filter_type(filter));
+
+ return 1;
+}
+
+/* Called when a filter instance is detach from a stream, just before its
+ * destruction */
+static void
+trace_detach(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: filter-type=%s",
+ __FUNCTION__, filter_type(filter));
+}
+
+/* Called when a stream is created */
+static int
+trace_stream_start(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+ return 0;
+}
+
+
+/* Called when a backend is set for a stream */
+static int
+trace_stream_set_backend(struct stream *s, struct filter *filter,
+ struct proxy *be)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: backend=%s",
+ __FUNCTION__, be->id);
+ return 0;
+}
+
+/* Called when a stream is destroyed */
+static void
+trace_stream_stop(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+}
+
+/* Called when the stream is woken up because of an expired timer */
+static void
+trace_check_timeouts(struct stream *s, struct filter *filter)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s",
+ __FUNCTION__);
+}
+
+/**************************************************************************
+ * Hooks to handle channels activity
+ *************************************************************************/
+/* Called when analyze starts for a given channel */
+static int
+trace_chn_start_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s));
+ filter->pre_analyzers |= (AN_REQ_ALL | AN_RES_ALL);
+ filter->post_analyzers |= (AN_REQ_ALL | AN_RES_ALL);
+ register_data_filter(s, chn, filter);
+ return 1;
+}
+
+/* Called before a processing happens on a given channel */
+static int
+trace_chn_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn, unsigned an_bit)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ char *ana;
+
+ switch (an_bit) {
+ case AN_REQ_INSPECT_FE:
+ ana = "AN_REQ_INSPECT_FE";
+ break;
+ case AN_REQ_WAIT_HTTP:
+ ana = "AN_REQ_WAIT_HTTP";
+ break;
+ case AN_REQ_HTTP_BODY:
+ ana = "AN_REQ_HTTP_BODY";
+ break;
+ case AN_REQ_HTTP_PROCESS_FE:
+ ana = "AN_REQ_HTTP_PROCESS_FE";
+ break;
+ case AN_REQ_SWITCHING_RULES:
+ ana = "AN_REQ_SWITCHING_RULES";
+ break;
+ case AN_REQ_INSPECT_BE:
+ ana = "AN_REQ_INSPECT_BE";
+ break;
+ case AN_REQ_HTTP_PROCESS_BE:
+ ana = "AN_REQ_HTTP_PROCESS_BE";
+ break;
+ case AN_REQ_SRV_RULES:
+ ana = "AN_REQ_SRV_RULES";
+ break;
+ case AN_REQ_HTTP_INNER:
+ ana = "AN_REQ_HTTP_INNER";
+ break;
+ case AN_REQ_HTTP_TARPIT:
+ ana = "AN_REQ_HTTP_TARPIT";
+ break;
+ case AN_REQ_STICKING_RULES:
+ ana = "AN_REQ_STICKING_RULES";
+ break;
+ case AN_REQ_PRST_RDP_COOKIE:
+ ana = "AN_REQ_PRST_RDP_COOKIE";
+ break;
+ case AN_REQ_HTTP_XFER_BODY:
+ ana = "AN_REQ_HTTP_XFER_BODY";
+ break;
+ case AN_RES_INSPECT:
+ ana = "AN_RES_INSPECT";
+ break;
+ case AN_RES_WAIT_HTTP:
+ ana = "AN_RES_WAIT_HTTP";
+ break;
+ case AN_RES_HTTP_PROCESS_FE: // AN_RES_HTTP_PROCESS_BE
+ ana = "AN_RES_HTTP_PROCESS_FE/BE";
+ break;
+ case AN_RES_STORE_RULES:
+ ana = "AN_RES_STORE_RULES";
+ break;
+ case AN_RES_HTTP_XFER_BODY:
+ ana = "AN_RES_HTTP_XFER_BODY";
+ break;
+ default:
+ ana = "unknown";
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "analyzer=%s - step=%s",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ ana, ((chn->analysers & an_bit) ? "PRE" : "POST"));
+ return 1;
+}
+
+/* Called when analyze ends for a given channel */
+static int
+trace_chn_end_analyze(struct stream *s, struct filter *filter,
+ struct channel *chn)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s));
+ return 1;
+}
+
+/**************************************************************************
+ * Hooks to filter HTTP messages
+ *************************************************************************/
+static int
+trace_http_headers(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_sl *sl = http_get_stline(htx);
+ int32_t pos;
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)\t%.*s %.*s %.*s",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s),
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+ FLT_STRM_TRACE(conf, s, "\t%.*s: %.*s",
+ (int)n.len, n.ptr, (int)v.len, v.ptr);
+ }
+ return 1;
+}
+
+static int
+trace_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ int ret = len;
+
+ if (ret && (conf->flags & TRACE_F_RAND_FWD)) {
+ unsigned int data = trace_get_htx_datalen(htxbuf(&msg->chn->buf), offset, len);
+
+ if (data) {
+ ret = ha_random() % (ret+1);
+ if (!ret || ret >= data)
+ ret = len;
+ }
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_htx_hexdump(htxbuf(&msg->chn->buf), offset, ret);
+
+ if (ret != len)
+ task_wakeup(s->task, TASK_WOKEN_MSG);
+ return ret;
+}
+
+static int
+trace_http_end(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s));
+ return 1;
+}
+
+static void
+trace_http_reset(struct stream *s, struct filter *filter,
+ struct http_msg *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__,
+ channel_label(msg->chn), proxy_mode(s), stream_pos(s));
+}
+
+static void
+trace_http_reply(struct stream *s, struct filter *filter, short status,
+ const struct buffer *msg)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s)",
+ __FUNCTION__, "-", proxy_mode(s), stream_pos(s));
+}
+
+/**************************************************************************
+ * Hooks to filter TCP data
+ *************************************************************************/
+static int
+trace_tcp_payload(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned int offset, unsigned int len)
+{
+ struct trace_config *conf = FLT_CONF(filter);
+ int ret = len;
+
+ if (s->flags & SF_HTX) {
+ if (ret && (conf->flags & TRACE_F_RAND_FWD)) {
+ unsigned int data = trace_get_htx_datalen(htxbuf(&chn->buf), offset, len);
+
+ if (data) {
+ ret = ha_random() % (ret+1);
+ if (!ret || ret >= data)
+ ret = len;
+ }
+ }
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_htx_hexdump(htxbuf(&chn->buf), offset, ret);
+ }
+ else {
+
+ if (ret && (conf->flags & TRACE_F_RAND_FWD))
+ ret = ha_random() % (ret+1);
+
+ FLT_STRM_TRACE(conf, s, "%-25s: channel=%-10s - mode=%-5s (%s) - "
+ "offset=%u - len=%u - forward=%d",
+ __FUNCTION__,
+ channel_label(chn), proxy_mode(s), stream_pos(s),
+ offset, len, ret);
+
+ if (conf->flags & TRACE_F_HEXDUMP)
+ trace_raw_hexdump(&chn->buf, offset, ret);
+ }
+
+ if (ret != len)
+ task_wakeup(s->task, TASK_WOKEN_MSG);
+ return ret;
+}
+/********************************************************************
+ * Functions that manage the filter initialization
+ ********************************************************************/
+struct flt_ops trace_ops = {
+ /* Manage trace filter, called for each filter declaration */
+ .init = trace_init,
+ .deinit = trace_deinit,
+ .check = trace_check,
+ .init_per_thread = trace_init_per_thread,
+ .deinit_per_thread = trace_deinit_per_thread,
+
+ /* Handle start/stop of streams */
+ .attach = trace_attach,
+ .detach = trace_detach,
+ .stream_start = trace_stream_start,
+ .stream_set_backend = trace_stream_set_backend,
+ .stream_stop = trace_stream_stop,
+ .check_timeouts = trace_check_timeouts,
+
+ /* Handle channels activity */
+ .channel_start_analyze = trace_chn_start_analyze,
+ .channel_pre_analyze = trace_chn_analyze,
+ .channel_post_analyze = trace_chn_analyze,
+ .channel_end_analyze = trace_chn_end_analyze,
+
+ /* Filter HTTP requests and responses */
+ .http_headers = trace_http_headers,
+ .http_payload = trace_http_payload,
+ .http_end = trace_http_end,
+ .http_reset = trace_http_reset,
+ .http_reply = trace_http_reply,
+
+ /* Filter TCP data */
+ .tcp_payload = trace_tcp_payload,
+};
+
+/* Return -1 on error, else 0 */
+static int
+parse_trace_flt(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct trace_config *conf;
+ int pos = *cur_arg;
+
+ conf = calloc(1, sizeof(*conf));
+ if (!conf) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ return -1;
+ }
+ conf->proxy = px;
+ conf->flags = 0;
+ if (strcmp(args[pos], "trace") == 0) {
+ pos++;
+
+ while (*args[pos]) {
+ if (strcmp(args[pos], "name") == 0) {
+ if (!*args[pos + 1]) {
+ memprintf(err, "'%s' : '%s' option without value",
+ args[*cur_arg], args[pos]);
+ goto error;
+ }
+ conf->name = strdup(args[pos + 1]);
+ if (!conf->name) {
+ memprintf(err, "%s: out of memory", args[*cur_arg]);
+ goto error;
+ }
+ pos++;
+ }
+ else if (strcmp(args[pos], "quiet") == 0)
+ conf->flags |= TRACE_F_QUIET;
+ else if (strcmp(args[pos], "random-parsing") == 0)
+ ; // ignore
+ else if (strcmp(args[pos], "random-forwarding") == 0)
+ conf->flags |= TRACE_F_RAND_FWD;
+ else if (strcmp(args[pos], "hexdump") == 0)
+ conf->flags |= TRACE_F_HEXDUMP;
+ else
+ break;
+ pos++;
+ }
+ *cur_arg = pos;
+ fconf->id = trace_flt_id;
+ fconf->ops = &trace_ops;
+ }
+
+ fconf->conf = conf;
+ return 0;
+
+ error:
+ if (conf->name)
+ free(conf->name);
+ free(conf);
+ return -1;
+}
+
+/* Declare the filter parser for "trace" keyword */
+static struct flt_kw_list flt_kws = { "TRACE", { }, {
+ { "trace", parse_trace_flt, NULL },
+ { NULL, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, flt_register_keywords, &flt_kws);
diff --git a/src/freq_ctr.c b/src/freq_ctr.c
new file mode 100644
index 0000000..1361333
--- /dev/null
+++ b/src/freq_ctr.c
@@ -0,0 +1,218 @@
+/*
+ * Event rate calculation functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/tools.h>
+
+/* Update a frequency counter by <inc> incremental units. It is automatically
+ * rotated if the period is over. It is important that it correctly initializes
+ * a null area. This one works on frequency counters which have a period
+ * different from one second. It relies on the process-wide clock that is
+ * guaranteed to be monotonic. It's important to avoid forced rotates between
+ * threads. A faster wrapper (update_freq_ctr_period) should be used instead,
+ * which uses the thread's local time whenever possible and falls back to this
+ * one when needed (less than 0.003% of the time).
+ */
+uint update_freq_ctr_period_slow(struct freq_ctr *ctr, uint period, uint inc)
+{
+ uint curr_tick;
+ uint32_t now_ms_tmp;
+
+ /* atomically update the counter if still within the period, even if
+ * a rotation is in progress (no big deal).
+ */
+ for (;; __ha_cpu_relax()) {
+ curr_tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ now_ms_tmp = HA_ATOMIC_LOAD(&global_now_ms);
+
+ if (now_ms_tmp - curr_tick < period)
+ return HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc);
+
+ /* a rotation is needed. While extremely rare, contention may
+ * happen because it will be triggered on time, and all threads
+ * see the time change simultaneously.
+ */
+ if (!(curr_tick & 1) &&
+ HA_ATOMIC_CAS(&ctr->curr_tick, &curr_tick, curr_tick | 0x1))
+ break;
+ }
+
+ /* atomically switch the new period into the old one without losing any
+ * potential concurrent update. We're the only one performing the rotate
+ * (locked above), others are only adding positive values to curr_ctr.
+ */
+ HA_ATOMIC_STORE(&ctr->prev_ctr, HA_ATOMIC_XCHG(&ctr->curr_ctr, inc));
+ curr_tick += period;
+ if (likely(now_ms_tmp - curr_tick >= period)) {
+ /* we missed at least two periods */
+ HA_ATOMIC_STORE(&ctr->prev_ctr, 0);
+ curr_tick = now_ms_tmp;
+ }
+
+ /* release the lock and update the time in case of rotate. */
+ HA_ATOMIC_STORE(&ctr->curr_tick, curr_tick & ~1);
+ return inc;
+}
+
+/* Returns the total number of events over the current + last period, including
+ * a number of already pending events <pend>. The average frequency will be
+ * obtained by dividing the output by <period>. This is essentially made to
+ * ease implementation of higher-level read functions.
+ *
+ * As a special case, if pend < 0, it's assumed there are no pending
+ * events and a flapping correction must be applied at the end. This is used by
+ * read_freq_ctr_period() to avoid reporting ups and downs on low-frequency
+ * events when the past value is <= 1.
+ */
+ullong freq_ctr_total(const struct freq_ctr *ctr, uint period, int pend)
+{
+ ullong curr, past, old_curr, old_past;
+ uint tick, old_tick;
+ int remain;
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+
+ while (1) {
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ old_tick = tick;
+ old_curr = curr;
+ old_past = past;
+
+ /* now let's load the values a second time and make sure they
+ * did not change, which will indicate it was a stable reading.
+ */
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ if (tick != old_tick)
+ goto redo1;
+
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ if (curr != old_curr)
+ goto redo2;
+
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+ if (past != old_past)
+ goto redo3;
+
+ /* all values match between two loads, they're stable, let's
+ * quit now.
+ */
+ break;
+ redo0:
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ redo1:
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ redo2:
+ past = HA_ATOMIC_LOAD(&ctr->prev_ctr);
+ redo3:
+ __ha_cpu_relax();
+ };
+
+ remain = tick + period - HA_ATOMIC_LOAD(&global_now_ms);
+ if (unlikely(remain < 0)) {
+ /* We're past the first period, check if we can still report a
+ * part of last period or if we're too far away.
+ */
+ remain += period;
+ past = (remain >= 0) ? curr : 0;
+ curr = 0;
+ }
+
+ if (pend < 0) {
+ /* enable flapping correction at very low rates */
+ pend = 0;
+ if (!curr && past <= 1)
+ return past * period;
+ }
+
+ /* compute the total number of confirmed events over the period */
+ return past * remain + (curr + pend) * period;
+}
+
+/* Returns the excess of events (may be negative) over the current period for
+ * target frequency <freq>. It returns 0 if the counter is in the future or if
+ * the counter is empty. The result considers the position of the current time
+ * within the current period.
+ *
+ * The caller may safely add new events if result is negative or null.
+ */
+int freq_ctr_overshoot_period(const struct freq_ctr *ctr, uint period, uint freq)
+{
+ ullong curr, old_curr;
+ uint tick, old_tick;
+ int elapsed;
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+
+ while (1) {
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ old_tick = tick;
+ old_curr = curr;
+
+ /* now let's load the values a second time and make sure they
+ * did not change, which will indicate it was a stable reading.
+ */
+
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ if (tick & 0x1) // change in progress
+ goto redo0;
+
+ if (tick != old_tick)
+ goto redo1;
+
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ if (curr != old_curr)
+ goto redo2;
+
+ /* all values match between two loads, they're stable, let's
+ * quit now.
+ */
+ break;
+ redo0:
+ tick = HA_ATOMIC_LOAD(&ctr->curr_tick);
+ redo1:
+ curr = HA_ATOMIC_LOAD(&ctr->curr_ctr);
+ redo2:
+ __ha_cpu_relax();
+ };
+
+ if (!curr && !tick) {
+ /* The counter is empty, there is no overshoot */
+ return 0;
+ }
+
+ elapsed = HA_ATOMIC_LOAD(&global_now_ms) - tick;
+ if (unlikely(elapsed < 0 || elapsed > period)) {
+ /* The counter is in the future or the elapsed time is higher than the period, there is no overshoot */
+ return 0;
+ }
+
+ return curr - div64_32((uint64_t)elapsed * freq, period);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/frontend.c b/src/frontend.c
new file mode 100644
index 0000000..ad2e39e
--- /dev/null
+++ b/src/frontend.c
@@ -0,0 +1,339 @@
+/*
+ * Frontend variables and functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/log.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+/* Finish a stream accept() for a proxy (TCP or HTTP). It returns a negative
+ * value in case of a critical failure which must cause the listener to be
+ * disabled, a positive or null value in case of success.
+ */
+int frontend_accept(struct stream *s)
+{
+ const struct sockaddr_storage *src, *dst;
+ struct session *sess = s->sess;
+ struct connection *conn = objt_conn(sess->origin);
+ struct listener *l = sess->listener;
+ struct proxy *fe = sess->fe;
+
+ if ((fe->mode == PR_MODE_TCP || fe->mode == PR_MODE_HTTP)
+ && (!LIST_ISEMPTY(&fe->loggers))) {
+ if (likely(!LIST_ISEMPTY(&fe->logformat))) {
+ /* we have the client ip */
+ if (s->logs.logwait & LW_CLIP)
+ if (!(s->logs.logwait &= ~(LW_CLIP|LW_INIT)))
+ s->do_log(s);
+ }
+ else if (conn) {
+ src = sc_src(s->scf);
+ if (!src)
+ send_log(fe, LOG_INFO, "Connect from unknown source to listener %d (%s/%s)\n",
+ l->luid, fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ else {
+ char pn[INET6_ADDRSTRLEN], sn[INET6_ADDRSTRLEN];
+ int port;
+
+ switch (addr_to_str(src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ dst = sc_dst(s->scf);
+ if (dst) {
+ addr_to_str(dst, sn, sizeof(sn));
+ port = get_host_port(dst);
+ } else {
+ strlcpy2(sn, "undetermined address", sizeof(sn));
+ port = 0;
+ }
+ send_log(fe, LOG_INFO, "Connect from %s:%d to %s:%d (%s/%s)\n",
+ pn, get_host_port(src),
+ sn, port,
+ fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ break;
+ case AF_UNIX:
+ /* UNIX socket, only the destination is known */
+ send_log(fe, LOG_INFO, "Connect to unix:%d (%s/%s)\n",
+ l->luid,
+ fe->id, (fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP");
+ break;
+ }
+ }
+ }
+ }
+
+ if (unlikely((global.mode & MODE_DEBUG) && conn &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ char pn[INET6_ADDRSTRLEN];
+ char alpn[16] = "<none>";
+ const char *alpn_str = NULL;
+ int alpn_len;
+
+ /* try to report the ALPN value when available (also works for NPN) */
+ if (conn == sc_conn(s->scf)) {
+ if (conn_get_alpn(conn, &alpn_str, &alpn_len) && alpn_str) {
+ int len = MIN(alpn_len, sizeof(alpn) - 1);
+ memcpy(alpn, alpn_str, len);
+ alpn[len] = 0;
+ }
+ }
+
+ src = sc_src(s->scf);
+ if (!src) {
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [listener:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ l->luid, alpn);
+ }
+ else switch (addr_to_str(src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [%s:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ pn, get_host_port(src), alpn);
+ break;
+ case AF_UNIX:
+ /* UNIX socket, only the destination is known */
+ chunk_printf(&trash, "%08x:%s.accept(%04x)=%04x from [unix:%d] ALPN=%s\n",
+ s->uniq_id, fe->id, (unsigned short)l->rx.fd, (unsigned short)conn->handle.fd,
+ l->luid, alpn);
+ break;
+ }
+
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+
+ if (fe->mode == PR_MODE_HTTP)
+ s->scf->flags |= SC_FL_RCV_ONCE; /* one read is usually enough */
+
+ if (unlikely(fe->nb_req_cap > 0)) {
+ if ((s->req_cap = pool_zalloc(fe->req_cap_pool)) == NULL)
+ goto out_return; /* no memory */
+ }
+
+ if (unlikely(fe->nb_rsp_cap > 0)) {
+ if ((s->res_cap = pool_zalloc(fe->rsp_cap_pool)) == NULL)
+ goto out_free_reqcap; /* no memory */
+ }
+
+ if ((fe->http_needed || IS_HTX_STRM(s)) && !http_create_txn(s))
+ goto out_free_rspcap;
+
+ /* everything's OK, let's go on */
+ return 1;
+
+ /* Error unrolling */
+ out_free_rspcap:
+ pool_free(fe->rsp_cap_pool, s->res_cap);
+ out_free_reqcap:
+ pool_free(fe->req_cap_pool, s->req_cap);
+ out_return:
+ return -1;
+}
+
+/* Increment current active connection counter. This ensures that global
+ * maxconn is not reached or exceeded. This must be done for every new frontend
+ * connection allocation.
+ *
+ * Returns the new actconn global value. If maxconn reached or exceeded, 0 is
+ * returned : the connection allocation should be cancelled.
+ */
+int increment_actconn()
+{
+ unsigned int count, next_actconn;
+
+ do {
+ count = actconn;
+ if (unlikely(count >= global.maxconn)) {
+ /* maxconn reached */
+ next_actconn = 0;
+ goto end;
+ }
+
+ /* try to increment actconn */
+ next_actconn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&actconn, (int *)(&count), next_actconn) && __ha_cpu_relax());
+
+ end:
+ return next_actconn;
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the id of the frontend */
+static int
+smp_fetch_fe_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->fe->uuid;
+ return 1;
+}
+
+/* set string to the name of the frontend */
+static int
+smp_fetch_fe_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.str.area = (char *)smp->sess->fe->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* set string to the name of the default backend */
+static int
+smp_fetch_fe_defbe(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->sess->fe->defbe.be)
+ return 0;
+ smp->data.u.str.area = (char *)smp->sess->fe->defbe.be->id;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* set temp integer to the number of HTTP requests per second reaching the frontend.
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_req_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->fe_req_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of connections per second reaching the frontend.
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = read_freq_ctr(&px->fe_sess_per_sec);
+ return 1;
+}
+
+/* set temp integer to the number of concurrent connections on the frontend
+ * Accepts exactly 1 argument. Argument is a frontend, other types will cause
+ * an undefined behaviour.
+ */
+static int
+smp_fetch_fe_conn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *px = args->data.prx;
+
+ if (px == NULL)
+ return 0;
+ if (px->cap & PR_CAP_DEF)
+ px = smp->px;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = px->feconn;
+ return 1;
+}
+
+static int
+smp_fetch_fe_client_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = TICKS_TO_MS(smp->sess->fe->timeout.client);
+ return 1;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "fe_client_timeout", smp_fetch_fe_client_timeout, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "fe_conn", smp_fetch_fe_conn, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "fe_defbe", smp_fetch_fe_defbe, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { "fe_id", smp_fetch_fe_id, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "fe_name", smp_fetch_fe_name, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { "fe_req_rate", smp_fetch_fe_req_rate, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "fe_sess_rate", smp_fetch_fe_sess_rate, ARG1(1,FE), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/h1.c b/src/h1.c
new file mode 100644
index 0000000..e251e74
--- /dev/null
+++ b/src/h1.c
@@ -0,0 +1,1319 @@
+/*
+ * HTTP/1 protocol analyzer
+ *
+ * Copyright 2000-2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+
+#include <import/sha1.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/h1.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/tools.h>
+
+/* Parse the Content-Length header field of an HTTP/1 request. The function
+ * checks all possible occurrences of a comma-delimited value, and verifies
+ * if any of them doesn't match a previous value. It returns <0 if a value
+ * differs, 0 if the whole header can be dropped (i.e. already known), or >0
+ * if the value can be indexed (first one). In the last case, the value might
+ * be adjusted and the caller must only add the updated value.
+ */
+int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value)
+{
+ char *e, *n;
+ long long cl;
+ int not_first = !!(h1m->flags & H1_MF_CLEN);
+ struct ist word;
+
+ word.ptr = value->ptr;
+ e = value->ptr + value->len;
+
+ while (1) {
+ if (word.ptr >= e) {
+ /* empty header or empty value */
+ goto fail;
+ }
+
+ /* skip leading delimiter and blanks */
+ if (unlikely(HTTP_IS_LWS(*word.ptr))) {
+ word.ptr++;
+ continue;
+ }
+
+ /* digits only now */
+ for (cl = 0, n = word.ptr; n < e; n++) {
+ unsigned int c = *n - '0';
+ if (unlikely(c > 9)) {
+ /* non-digit */
+ if (unlikely(n == word.ptr)) // spaces only
+ goto fail;
+ break;
+ }
+
+ if (unlikely(!cl && n > word.ptr)) {
+ /* There was a leading zero before this digit,
+ * let's trim it.
+ */
+ word.ptr = n;
+ }
+
+ if (unlikely(cl > ULLONG_MAX / 10ULL))
+ goto fail; /* multiply overflow */
+ cl = cl * 10ULL;
+ if (unlikely(cl + c < cl))
+ goto fail; /* addition overflow */
+ cl = cl + c;
+ }
+
+ /* keep a copy of the exact cleaned value */
+ word.len = n - word.ptr;
+
+ /* skip trailing LWS till next comma or EOL */
+ for (; n < e; n++) {
+ if (!HTTP_IS_LWS(*n)) {
+ if (unlikely(*n != ','))
+ goto fail;
+ break;
+ }
+ }
+
+ /* if duplicate, must be equal */
+ if (h1m->flags & H1_MF_CLEN && cl != h1m->body_len)
+ goto fail;
+
+ /* OK, store this result as the one to be indexed */
+ h1m->flags |= H1_MF_CLEN;
+ h1m->curr_len = h1m->body_len = cl;
+ *value = word;
+
+ /* Now either n==e and we're done, or n points to the comma,
+ * and we skip it and continue.
+ */
+ if (n++ == e)
+ break;
+
+ word.ptr = n;
+ }
+ /* here we've reached the end with a single value or a series of
+ * identical values, all matching previous series if any. The last
+ * parsed value was sent back into <value>. We just have to decide
+ * if this occurrence has to be indexed (it's the first one) or
+ * silently skipped (it's not the first one)
+ */
+ return !not_first;
+ fail:
+ return -1;
+}
+
+/* Parse the Transfer-Encoding: header field of an HTTP/1 request, looking for
+ * "chunked" encoding to perform some checks (it must be the last encoding for
+ * the request and must not be performed twice for any message). The
+ * H1_MF_TE_CHUNKED is set if a valid "chunked" encoding is found. The
+ * H1_MF_TE_OTHER flag is set if any other encoding is found. The H1_MF_XFER_ENC
+ * flag is always set. The H1_MF_CHNK is set when "chunked" encoding is the last
+ * one. Note that transfer codings are case-insensitive (cf RFC7230#4). This
+ * function returns <0 if a error is found, 0 if the whole header can be dropped
+ * (not used yet), or >0 if the value can be indexed.
+ */
+int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value)
+{
+ char *e, *n;
+ struct ist word;
+
+ h1m->flags |= H1_MF_XFER_ENC;
+
+ word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
+ e = istend(value);
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ h1m->flags &= ~H1_MF_CHNK;
+ if (isteqi(word, ist("chunked"))) {
+ if (h1m->flags & H1_MF_TE_CHUNKED) {
+ /* cf RFC7230#3.3.1 : A sender MUST NOT apply
+ * chunked more than once to a message body
+ * (i.e., chunking an already chunked message is
+ * not allowed)
+ */
+ goto fail;
+ }
+ h1m->flags |= (H1_MF_TE_CHUNKED|H1_MF_CHNK);
+ }
+ else {
+ if ((h1m->flags & (H1_MF_RESP|H1_MF_TE_CHUNKED)) == H1_MF_TE_CHUNKED) {
+ /* cf RFC7230#3.3.1 : If any transfer coding
+ * other than chunked is applied to a request
+ * payload body, the sender MUST apply chunked
+ * as the final transfer coding to ensure that
+ * the message is properly framed.
+ */
+ goto fail;
+ }
+ h1m->flags |= H1_MF_TE_OTHER;
+ }
+
+ word.ptr = n;
+ }
+
+ return 1;
+ fail:
+ return -1;
+}
+
+/* Validate the authority and the host header value for CONNECT method. If there
+ * is hast header, its value is normalized. 0 is returned on success, -1 if the
+ * authority is invalid and -2 if the host is invalid.
+ */
+static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
+{
+ struct ist uri_host, uri_port, host, host_port;
+
+ if (!isttest(authority))
+ goto invalid_authority;
+ uri_host = authority;
+ uri_port = http_get_host_port(authority);
+ if (!istlen(uri_port))
+ goto invalid_authority;
+ uri_host.len -= (istlen(uri_port) + 1);
+
+ if (!host_hdr || !isttest(*host_hdr))
+ goto end;
+
+ /* Get the port of the host header value, if any */
+ host = *host_hdr;
+ host_port = http_get_host_port(*host_hdr);
+ if (isttest(host_port))
+ host.len -= (istlen(host_port) + 1);
+
+ if (istlen(host_port)) {
+ if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
+ goto invalid_host;
+ if (http_is_default_port(IST_NULL, uri_port))
+ *host_hdr = host; /* normalize */
+ }
+ else {
+ if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
+ goto invalid_host;
+ }
+
+ end:
+ return 0;
+
+ invalid_authority:
+ return -1;
+
+ invalid_host:
+ return -2;
+}
+
+
+/* Validate the authority and the host header value for non-CONNECT method, when
+ * an absolute-URI is detected but when it does not exactly match the host
+ * value. The idea is to detect default port (http or https). authority and host
+ * are defined here. 0 is returned on success, -1 if the host is does not match
+ * the authority.
+ */
+static int h1_validate_mismatch_authority(struct ist scheme, struct ist authority, struct ist host_hdr)
+{
+ struct ist uri_host, uri_port, host, host_port;
+
+ if (!isttest(scheme))
+ goto mismatch;
+
+ uri_host = authority;
+ uri_port = http_get_host_port(authority);
+ if (isttest(uri_port))
+ uri_host.len -= (istlen(uri_port) + 1);
+
+ host = host_hdr;
+ host_port = http_get_host_port(host_hdr);
+ if (isttest(host_port))
+ host.len -= (istlen(host_port) + 1);
+
+ if (!isttest(uri_port) && !isttest(host_port)) {
+ /* No port on both: we already know the authority does not match
+ * the host value
+ */
+ goto mismatch;
+ }
+ else if (isttest(uri_port) && !http_is_default_port(scheme, uri_port)) {
+ /* here there is no port for the host value and the port for the
+ * authority is not the default one
+ */
+ goto mismatch;
+ }
+ else if (isttest(host_port) && !http_is_default_port(scheme, host_port)) {
+ /* here there is no port for the authority and the port for the
+ * host value is not the default one
+ */
+ goto mismatch;
+ }
+ else {
+ /* the authority or the host value contain a default port and
+ * there is no port on the other value
+ */
+ if (!isteqi(uri_host, host))
+ goto mismatch;
+ }
+
+ return 0;
+
+ mismatch:
+ return -1;
+}
+
+
+/* Parse the Connection: header of an HTTP/1 request, looking for "close",
+ * "keep-alive", and "upgrade" values, and updating h1m->flags according to
+ * what was found there. Note that flags are only added, not removed, so the
+ * function is safe for being called multiple times if multiple occurrences
+ * are found. If the flag H1_MF_CLEAN_CONN_HDR, the header value is cleaned
+ * up from "keep-alive" and "close" values. To do so, the header value is
+ * rewritten in place and its length is updated.
+ */
+void h1_parse_connection_header(struct h1m *h1m, struct ist *value)
+{
+ char *e, *n, *p;
+ struct ist word;
+
+ word.ptr = value->ptr - 1; // -1 for next loop's pre-increment
+ p = value->ptr;
+ e = value->ptr + value->len;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ value->len = 0;
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ if (isteqi(word, ist("keep-alive"))) {
+ h1m->flags |= H1_MF_CONN_KAL;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ goto skip_val;
+ }
+ else if (isteqi(word, ist("close"))) {
+ h1m->flags |= H1_MF_CONN_CLO;
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR)
+ goto skip_val;
+ }
+ else if (isteqi(word, ist("upgrade")))
+ h1m->flags |= H1_MF_CONN_UPG;
+
+ if (h1m->flags & H1_MF_CLEAN_CONN_HDR) {
+ if (value->ptr + value->len == p) {
+ /* no rewrite done till now */
+ value->len = n - value->ptr;
+ }
+ else {
+ if (value->len)
+ value->ptr[value->len++] = ',';
+ istcat(value, word, e - value->ptr);
+ }
+ }
+
+ skip_val:
+ word.ptr = p = n;
+ }
+}
+
+/* Parse the Upgrade: header of an HTTP/1 request.
+ * If "websocket" is found, set H1_MF_UPG_WEBSOCKET flag
+ */
+void h1_parse_upgrade_header(struct h1m *h1m, struct ist value)
+{
+ char *e, *n;
+ struct ist word;
+
+ h1m->flags &= ~H1_MF_UPG_WEBSOCKET;
+
+ word.ptr = value.ptr - 1; // -1 for next loop's pre-increment
+ e = istend(value);
+
+ while (++word.ptr < e) {
+ /* skip leading delimiter and blanks */
+ if (HTTP_IS_LWS(*word.ptr))
+ continue;
+
+ n = http_find_hdr_value_end(word.ptr, e); // next comma or end of line
+ word.len = n - word.ptr;
+
+ /* trim trailing blanks */
+ while (word.len && HTTP_IS_LWS(word.ptr[word.len-1]))
+ word.len--;
+
+ if (isteqi(word, ist("websocket")))
+ h1m->flags |= H1_MF_UPG_WEBSOCKET;
+
+ word.ptr = n;
+ }
+}
+
+/* Macros used in the HTTP/1 parser, to check for the expected presence of
+ * certain bytes (ef: LF) or to skip to next byte and yield in case of failure.
+ */
+
+/* Expects to find an LF at <ptr>. If not, set <state> to <where> and jump to
+ * <bad>.
+ */
+#define EXPECT_LF_HERE(ptr, bad, state, where) \
+ do { \
+ if (unlikely(*(ptr) != '\n')) { \
+ state = (where); \
+ goto bad; \
+ } \
+ } while (0)
+
+/* Increments pointer <ptr>, continues to label <more> if it's still below
+ * pointer <end>, or goes to <stop> and sets <state> to <where> if the end
+ * of buffer was reached.
+ */
+#define EAT_AND_JUMP_OR_RETURN(ptr, end, more, stop, state, where) \
+ do { \
+ if (likely(++(ptr) < (end))) \
+ goto more; \
+ else { \
+ state = (where); \
+ goto stop; \
+ } \
+ } while (0)
+
+/* This function parses a contiguous HTTP/1 headers block starting at <start>
+ * and ending before <stop>, at once, and converts it a list of (name,value)
+ * pairs representing header fields into the array <hdr> of size <hdr_num>,
+ * whose last entry will have an empty name and an empty value. If <hdr_num> is
+ * too small to represent the whole message, an error is returned. Some
+ * protocol elements such as content-length and transfer-encoding will be
+ * parsed and stored into h1m as well. <hdr> may be null, in which case only
+ * the parsing state will be updated. This may be used to restart the parsing
+ * where it stopped for example.
+ *
+ * For now it's limited to the response. If the header block is incomplete,
+ * 0 is returned, waiting to be called again with more data to try it again.
+ * The caller is responsible for initializing h1m->state to H1_MSG_RPBEFORE,
+ * and h1m->next to zero on the first call, the parser will do the rest. If
+ * an incomplete message is seen, the caller only needs to present h1m->state
+ * and h1m->next again, with an empty header list so that the parser can start
+ * again. In this case, it will detect that it interrupted a previous session
+ * and will first look for the end of the message before reparsing it again and
+ * indexing it at the same time. This ensures that incomplete messages fed 1
+ * character at a time are never processed entirely more than exactly twice,
+ * and that there is no need to store all the internal state and pre-parsed
+ * headers or start line between calls.
+ *
+ * A pointer to a start line descriptor may be passed in <slp>, in which case
+ * the parser will fill it with whatever it found.
+ *
+ * The code derived from the main HTTP/1 parser above but was simplified and
+ * optimized to process responses produced or forwarded by haproxy. The caller
+ * is responsible for ensuring that the message doesn't wrap, and should ensure
+ * it is complete to avoid having to retry the operation after a failed
+ * attempt. The message is not supposed to be invalid, which is why a few
+ * properties such as the character set used in the header field names are not
+ * checked. In case of an unparsable response message, a negative value will be
+ * returned with h1m->err_pos and h1m->err_state matching the location and
+ * state where the error was met. Leading blank likes are tolerated but not
+ * recommended. If flag H1_MF_HDRS_ONLY is set in h1m->flags, only headers are
+ * parsed and the start line is skipped. It is not required to set h1m->state
+ * nor h1m->next in this case.
+ *
+ * This function returns :
+ * -1 in case of error. In this case, h1m->err_state is filled (if h1m is
+ * set) with the state the error occurred in and h1m->err_pos with the
+ * the position relative to <start>
+ * -2 if the output is full (hdr_num reached). err_state and err_pos also
+ * indicate where it failed.
+ * 0 in case of missing data.
+ * > 0 on success, it then corresponds to the number of bytes read since
+ * <start> so that the caller can go on with the payload.
+ */
+int h1_headers_to_hdr_list(char *start, const char *stop,
+ struct http_hdr *hdr, unsigned int hdr_num,
+ struct h1m *h1m, union h1_sl *slp)
+{
+ enum h1m_state state;
+ register char *ptr;
+ register const char *end;
+ unsigned int hdr_count;
+ unsigned int skip; /* number of bytes skipped at the beginning */
+ unsigned int sol; /* start of line */
+ unsigned int col; /* position of the colon */
+ unsigned int eol; /* end of line */
+ unsigned int sov; /* start of value */
+ union h1_sl sl;
+ int skip_update;
+ int restarting;
+ int host_idx;
+ struct ist n, v; /* header name and value during parsing */
+
+ skip = 0; // do it only once to keep track of the leading CRLF.
+
+ try_again:
+ hdr_count = sol = col = eol = sov = 0;
+ sl.st.status = 0;
+ skip_update = restarting = 0;
+ host_idx = -1;
+
+ if (h1m->flags & H1_MF_HDRS_ONLY) {
+ state = H1_MSG_HDR_FIRST;
+ h1m->next = 0;
+ }
+ else {
+ state = h1m->state;
+ if (h1m->state != H1_MSG_RQBEFORE && h1m->state != H1_MSG_RPBEFORE)
+ restarting = 1;
+ }
+
+ ptr = start + h1m->next;
+ end = stop;
+
+ if (unlikely(ptr >= end))
+ goto http_msg_ood;
+
+ /* don't update output if hdr is NULL or if we're restarting */
+ if (!hdr || restarting)
+ skip_update = 1;
+
+ switch (state) {
+ case H1_MSG_RQBEFORE:
+ http_msg_rqbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, we may have skipped some
+ * heading CRLF. Skip them now.
+ */
+ skip += ptr - start;
+ start = ptr;
+
+ sol = 0;
+ sl.rq.m.ptr = ptr;
+ hdr_count = 0;
+ state = H1_MSG_RQMETH;
+ goto http_msg_rqmeth;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = H1_MSG_RQBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore_cr, http_msg_ood, state, H1_MSG_RQBEFORE_CR);
+ /* stop here */
+
+ case H1_MSG_RQBEFORE_CR:
+ http_msg_rqbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqbefore, http_msg_ood, state, H1_MSG_RQBEFORE);
+ /* stop here */
+
+ case H1_MSG_RQMETH:
+ http_msg_rqmeth:
+ if (likely(HTTP_IS_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth, http_msg_ood, state, H1_MSG_RQMETH);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.rq.m.len = ptr - sl.rq.m.ptr;
+ sl.rq.meth = find_http_meth(start, sl.rq.m.len);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* HTTP 0.9 request */
+ sl.rq.m.len = ptr - sl.rq.m.ptr;
+ sl.rq.meth = find_http_meth(sl.rq.m.ptr, sl.rq.m.len);
+ http_msg_req09_uri:
+ sl.rq.u.ptr = ptr;
+ http_msg_req09_uri_e:
+ sl.rq.u.len = ptr - sl.rq.u.ptr;
+ http_msg_req09_ver:
+ sl.rq.v = ist2(ptr, 0);
+ goto http_msg_rqline_eol;
+ }
+ state = H1_MSG_RQMETH;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQMETH_SP:
+ http_msg_rqmeth_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.rq.u.ptr = ptr;
+ goto http_msg_rquri;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqmeth_sp, http_msg_ood, state, H1_MSG_RQMETH_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri;
+
+ case H1_MSG_RQURI:
+ http_msg_rquri:
+#ifdef HA_UNALIGNED_LE
+ /* speedup: skip bytes not between 0x24 and 0x7e inclusive */
+ while (ptr <= end - sizeof(int)) {
+ int x = *(int *)ptr - 0x24242424;
+ if (x & 0x80808080)
+ break;
+
+ x -= 0x5b5b5b5b;
+ if (!(x & 0x80808080))
+ break;
+
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = H1_MSG_RQURI;
+ goto http_msg_ood;
+ }
+ http_msg_rquri2:
+ if (likely((unsigned char)(*ptr - 33) <= 93)) { /* 33 to 126 included */
+ if (*ptr == '#') {
+ if (h1m->err_pos < -1) /* PR_O2_REQBUG_OK not set */
+ goto invalid_char;
+ if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
+ h1m->err_pos = ptr - start + skip;
+ }
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri2, http_msg_ood, state, H1_MSG_RQURI);
+ }
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.rq.u.len = ptr - sl.rq.u.ptr;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
+ }
+ if (likely((unsigned char)*ptr >= 128)) {
+ /* non-ASCII chars are forbidden unless option
+ * accept-invalid-http-request is enabled in the frontend.
+ * In any case, we capture the faulty char.
+ */
+ if (h1m->err_pos < -1)
+ goto invalid_char;
+ if (h1m->err_pos == -1)
+ h1m->err_pos = ptr - start + skip;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri, http_msg_ood, state, H1_MSG_RQURI);
+ }
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_uri_e;
+ }
+
+ /* OK forbidden chars, 0..31 or 127 */
+ invalid_char:
+ state = H1_MSG_RQURI;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQURI_SP:
+ http_msg_rquri_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.rq.v.ptr = ptr;
+ goto http_msg_rqver;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rquri_sp, http_msg_ood, state, H1_MSG_RQURI_SP);
+ /* so it's a CR/LF, meaning an HTTP 0.9 request */
+ goto http_msg_req09_ver;
+
+
+ case H1_MSG_RQVER:
+ http_msg_rqver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqver, http_msg_ood, state, H1_MSG_RQVER);
+
+ if (likely(HTTP_IS_CRLF(*ptr))) {
+ sl.rq.v.len = ptr - sl.rq.v.ptr;
+ http_msg_rqline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the request would not be
+ * complete. We can then record the request length and return
+ * to the caller which will be able to register it.
+ */
+
+ if (likely(!skip_update)) {
+ if ((sl.rq.v.len == 8) &&
+ (*(sl.rq.v.ptr + 5) > '1' ||
+ (*(sl.rq.v.ptr + 5) == '1' && *(sl.rq.v.ptr + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RQVER;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":method"), sl.rq.m);
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RQVER;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":path"), sl.rq.u);
+ }
+
+ sol = ptr - start;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rqline_end, http_msg_ood, state, H1_MSG_RQLINE_END);
+ goto http_msg_rqline_end;
+ }
+
+ /* neither an HTTP_VER token nor a CRLF */
+ state = H1_MSG_RQVER;
+ goto http_msg_invalid;
+
+ case H1_MSG_RQLINE_END:
+ http_msg_rqline_end:
+ /* check for HTTP/0.9 request : no version information
+ * available. sol must point to the first of CR or LF. However
+ * since we don't save these elements between calls, if we come
+ * here from a restart, we don't necessarily know. Thus in this
+ * case we simply start over.
+ */
+ if (restarting)
+ goto restart;
+
+ if (unlikely(sl.rq.v.len == 0))
+ goto http_msg_last_lf;
+
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RQLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
+ /* stop here */
+
+ /*
+ * Common states below
+ */
+ case H1_MSG_RPBEFORE:
+ http_msg_rpbefore:
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ /* we have a start of message, we may have skipped some
+ * heading CRLF. Skip them now.
+ */
+ skip += ptr - start;
+ start = ptr;
+
+ sol = 0;
+ sl.st.v.ptr = ptr;
+ hdr_count = 0;
+ state = H1_MSG_RPVER;
+ goto http_msg_rpver;
+ }
+
+ if (unlikely(!HTTP_IS_CRLF(*ptr))) {
+ state = H1_MSG_RPBEFORE;
+ goto http_msg_invalid;
+ }
+
+ if (unlikely(*ptr == '\n'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore_cr, http_msg_ood, state, H1_MSG_RPBEFORE_CR);
+ /* stop here */
+
+ case H1_MSG_RPBEFORE_CR:
+ http_msg_rpbefore_cr:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPBEFORE_CR);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpbefore, http_msg_ood, state, H1_MSG_RPBEFORE);
+ /* stop here */
+
+ case H1_MSG_RPVER:
+ http_msg_rpver:
+ if (likely(HTTP_IS_VER_TOKEN(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver, http_msg_ood, state, H1_MSG_RPVER);
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.st.v.len = ptr - sl.st.v.ptr;
+
+ if ((sl.st.v.len == 8) &&
+ (*(sl.st.v.ptr + 5) > '1' ||
+ (*(sl.st.v.ptr + 5) == '1' && *(sl.st.v.ptr + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
+ }
+ state = H1_MSG_RPVER;
+ goto http_msg_invalid;
+
+ case H1_MSG_RPVER_SP:
+ http_msg_rpver_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.st.status = 0;
+ sl.st.c.ptr = ptr;
+ goto http_msg_rpcode;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpver_sp, http_msg_ood, state, H1_MSG_RPVER_SP);
+ /* so it's a CR/LF, this is invalid */
+ state = H1_MSG_RPVER_SP;
+ goto http_msg_invalid;
+
+ case H1_MSG_RPCODE:
+ http_msg_rpcode:
+ if (likely(HTTP_IS_DIGIT(*ptr))) {
+ sl.st.status = sl.st.status * 10 + *ptr - '0';
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode, http_msg_ood, state, H1_MSG_RPCODE);
+ }
+
+ if (unlikely(!HTTP_IS_LWS(*ptr))) {
+ state = H1_MSG_RPCODE;
+ goto http_msg_invalid;
+ }
+
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ sl.st.c.len = ptr - sl.st.c.ptr;
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
+ }
+
+ /* so it's a CR/LF, so there is no reason phrase */
+ sl.st.c.len = ptr - sl.st.c.ptr;
+
+ http_msg_rsp_reason:
+ sl.st.r = ist2(ptr, 0);
+ goto http_msg_rpline_eol;
+
+ case H1_MSG_RPCODE_SP:
+ http_msg_rpcode_sp:
+ if (likely(!HTTP_IS_LWS(*ptr))) {
+ sl.st.r.ptr = ptr;
+ goto http_msg_rpreason;
+ }
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpcode_sp, http_msg_ood, state, H1_MSG_RPCODE_SP);
+ /* so it's a CR/LF, so there is no reason phrase */
+ goto http_msg_rsp_reason;
+
+ case H1_MSG_RPREASON:
+ http_msg_rpreason:
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpreason, http_msg_ood, state, H1_MSG_RPREASON);
+ sl.st.r.len = ptr - sl.st.r.ptr;
+ http_msg_rpline_eol:
+ /* We have seen the end of line. Note that we do not
+ * necessarily have the \n yet, but at least we know that we
+ * have EITHER \r OR \n, otherwise the response would not be
+ * complete. We can then record the response length and return
+ * to the caller which will be able to register it.
+ */
+
+ if (likely(!skip_update)) {
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_RPREASON;
+ goto http_output_full;
+ }
+ if (!(h1m->flags & H1_MF_NO_PHDR))
+ http_set_hdr(&hdr[hdr_count++], ist(":status"), sl.st.c);
+ }
+
+ sol = ptr - start;
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_rpline_end, http_msg_ood, state, H1_MSG_RPLINE_END);
+ goto http_msg_rpline_end;
+
+ case H1_MSG_RPLINE_END:
+ http_msg_rpline_end:
+ /* sol must point to the first of CR or LF. */
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_RPLINE_END);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_first, http_msg_ood, state, H1_MSG_HDR_FIRST);
+ /* stop here */
+
+ case H1_MSG_HDR_FIRST:
+ http_msg_hdr_first:
+ sol = ptr - start;
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_name;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case H1_MSG_HDR_NAME:
+ http_msg_hdr_name:
+ /* assumes sol points to the first char */
+ if (likely(HTTP_IS_TOKEN(*ptr))) {
+ if (!skip_update) {
+ /* turn it to lower case if needed */
+ if (isupper((unsigned char)*ptr) && h1m->flags & H1_MF_TOLOWER)
+ *ptr = tolower((unsigned char)*ptr);
+ }
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
+ }
+
+ if (likely(*ptr == ':')) {
+ col = ptr - start;
+ if (col <= sol) {
+ state = H1_MSG_HDR_NAME;
+ goto http_msg_invalid;
+ }
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
+ }
+
+ if (likely(h1m->err_pos < -1) || *ptr == '\n') {
+ state = H1_MSG_HDR_NAME;
+ goto http_msg_invalid;
+ }
+
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = ptr - start + skip; /* >= 0 now */
+
+ /* and we still accept this non-token character */
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_name, http_msg_ood, state, H1_MSG_HDR_NAME);
+
+ case H1_MSG_HDR_L1_SP:
+ http_msg_hdr_l1_sp:
+ /* assumes sol points to the first char */
+ if (likely(HTTP_IS_SPHT(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_sp, http_msg_ood, state, H1_MSG_HDR_L1_SP);
+
+ /* header value can be basically anything except CR/LF */
+ sov = ptr - start;
+
+ if (likely(!HTTP_IS_CRLF(*ptr))) {
+ goto http_msg_hdr_val;
+ }
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lf, http_msg_ood, state, H1_MSG_HDR_L1_LF);
+ goto http_msg_hdr_l1_lf;
+
+ case H1_MSG_HDR_L1_LF:
+ http_msg_hdr_l1_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L1_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l1_lws, http_msg_ood, state, H1_MSG_HDR_L1_LWS);
+
+ case H1_MSG_HDR_L1_LWS:
+ http_msg_hdr_l1_lws:
+ if (likely(HTTP_IS_SPHT(*ptr))) {
+ if (!skip_update) {
+ /* replace HT,CR,LF with spaces */
+ for (; start + sov < ptr; sov++)
+ start[sov] = ' ';
+ }
+ goto http_msg_hdr_l1_sp;
+ }
+ /* we had a header consisting only in spaces ! */
+ eol = sov;
+ goto http_msg_complete_header;
+
+ case H1_MSG_HDR_VAL:
+ http_msg_hdr_val:
+ /* assumes sol points to the first char, and sov
+ * points to the first character of the value.
+ */
+
+ /* speedup: we'll skip packs of 4 or 8 bytes not containing bytes 0x0D
+ * and lower. In fact since most of the time is spent in the loop, we
+ * also remove the sign bit test so that bytes 0x8e..0x0d break the
+ * loop, but we don't care since they're very rare in header values.
+ */
+#ifdef HA_UNALIGNED_LE64
+ while (ptr <= end - sizeof(long)) {
+ if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(long);
+ }
+#endif
+#ifdef HA_UNALIGNED_LE
+ while (ptr <= end - sizeof(int)) {
+ if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080)
+ goto http_msg_hdr_val2;
+ ptr += sizeof(int);
+ }
+#endif
+ if (ptr >= end) {
+ state = H1_MSG_HDR_VAL;
+ goto http_msg_ood;
+ }
+ http_msg_hdr_val2:
+ if (likely(!*ptr)) {
+ /* RFC9110 clarified that NUL is explicitly forbidden in header values
+ * (like CR and LF).
+ */
+ if (h1m->err_pos < -1) { /* PR_O2_REQBUG_OK not set */
+ state = H1_MSG_HDR_VAL;
+ goto http_msg_invalid;
+ }
+ if (h1m->err_pos == -1) /* PR_O2_REQBUG_OK set: just log */
+ h1m->err_pos = ptr - start + skip;
+ }
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_val2, http_msg_ood, state, H1_MSG_HDR_VAL);
+
+ eol = ptr - start;
+ /* Note: we could also copy eol into ->eoh so that we have the
+ * real header end in case it ends with lots of LWS, but is this
+ * really needed ?
+ */
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lf, http_msg_ood, state, H1_MSG_HDR_L2_LF);
+ goto http_msg_hdr_l2_lf;
+
+ case H1_MSG_HDR_L2_LF:
+ http_msg_hdr_l2_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_HDR_L2_LF);
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_hdr_l2_lws, http_msg_ood, state, H1_MSG_HDR_L2_LWS);
+
+ case H1_MSG_HDR_L2_LWS:
+ http_msg_hdr_l2_lws:
+ if (unlikely(HTTP_IS_SPHT(*ptr))) {
+ if (!skip_update) {
+ /* LWS: replace HT,CR,LF with spaces */
+ for (; start + eol < ptr; eol++)
+ start[eol] = ' ';
+ }
+ goto http_msg_hdr_val;
+ }
+ http_msg_complete_header:
+ /*
+ * It was a new header, so the last one is finished. Assumes
+ * <sol> points to the first char of the name, <col> to the
+ * colon, <sov> points to the first character of the value and
+ * <eol> to the first CR or LF so we know how the line ends. We
+ * will trim spaces around the value. It's possible to do it by
+ * adjusting <eol> and <sov> which are no more used after this.
+ * We can add the header field to the list.
+ */
+ if (likely(!skip_update)) {
+ while (sov < eol && HTTP_IS_LWS(start[sov]))
+ sov++;
+
+ while (eol - 1 > sov && HTTP_IS_LWS(start[eol - 1]))
+ eol--;
+
+
+ n = ist2(start + sol, col - sol);
+ v = ist2(start + sov, eol - sov);
+
+ do {
+ int ret;
+
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_HDR_L2_LWS;
+ goto http_output_full;
+ }
+
+ if (isteqi(n, ist("transfer-encoding"))) {
+ ret = h1_parse_xfer_enc_header(h1m, v);
+ if (ret < 0) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ else if (ret == 0) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("content-length"))) {
+ ret = h1_parse_cont_len_header(h1m, &v);
+
+ if (ret < 0) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ else if (ret == 0) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("connection"))) {
+ h1_parse_connection_header(h1m, &v);
+ if (!v.len) {
+ /* skip it */
+ break;
+ }
+ }
+ else if (isteqi(n, ist("upgrade"))) {
+ h1_parse_upgrade_header(h1m, v);
+ }
+ else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
+ if (host_idx == -1)
+ host_idx = hdr_count;
+ else {
+ if (!isteqi(v, hdr[host_idx].v)) {
+ state = H1_MSG_HDR_L2_LWS;
+ ptr = v.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ /* if the same host, skip it */
+ break;
+ }
+ }
+
+ http_set_hdr(&hdr[hdr_count++], n, v);
+ } while (0);
+ }
+
+ sol = ptr - start;
+
+ if (likely(!HTTP_IS_CRLF(*ptr)))
+ goto http_msg_hdr_name;
+
+ if (likely(*ptr == '\r'))
+ EAT_AND_JUMP_OR_RETURN(ptr, end, http_msg_last_lf, http_msg_ood, state, H1_MSG_LAST_LF);
+ goto http_msg_last_lf;
+
+ case H1_MSG_LAST_LF:
+ http_msg_last_lf:
+ EXPECT_LF_HERE(ptr, http_msg_invalid, state, H1_MSG_LAST_LF);
+ ptr++;
+ /* <ptr> now points to the first byte of payload. If needed sol
+ * still points to the first of either CR or LF of the empty
+ * line ending the headers block.
+ */
+ if (likely(!skip_update)) {
+ if (unlikely(hdr_count >= hdr_num)) {
+ state = H1_MSG_LAST_LF;
+ goto http_output_full;
+ }
+ http_set_hdr(&hdr[hdr_count++], ist2(start+sol, 0), ist(""));
+ }
+
+ /* reaching here we've parsed the whole message. We may detect
+ * that we were already continuing an interrupted parsing pass
+ * so we were silently looking for the end of message not
+ * updating anything before deciding to parse it fully at once.
+ * It's guaranteed that we won't match this test twice in a row
+ * since restarting will turn zero.
+ */
+ if (restarting)
+ goto restart;
+
+
+ if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
+ struct http_uri_parser parser = http_uri_parser_init(sl.rq.u);
+ struct ist scheme, authority;
+ int ret;
+
+ scheme = http_parse_scheme(&parser);
+ authority = http_parse_authority(&parser, 1);
+ if (sl.rq.meth == HTTP_METH_CONNECT) {
+ struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
+
+ ret = h1_validate_connect_authority(authority, host);
+ if (ret < 0) {
+ if (h1m->err_pos < -1) {
+ state = H1_MSG_LAST_LF;
+ /* WT: gcc seems to see a path where sl.rq.u.ptr was used
+ * uninitialized, but it doesn't know that the function is
+ * called with initial states making this impossible.
+ */
+ ALREADY_CHECKED(sl.rq.u.ptr);
+ ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
+ }
+ }
+ else if (host_idx != -1 && istlen(authority)) {
+ struct ist host = hdr[host_idx].v;
+
+ /* For non-CONNECT method, the authority must match the host header value */
+ if (!isteqi(authority, host)) {
+ ret = h1_validate_mismatch_authority(scheme, authority, host);
+ if (ret < 0) {
+ if (h1m->err_pos < -1) {
+ state = H1_MSG_LAST_LF;
+ ptr = host.ptr; /* Set ptr on the error */
+ goto http_msg_invalid;
+ }
+ if (h1m->err_pos == -1) /* capture the error pointer */
+ h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
+ }
+ }
+ }
+ }
+
+ state = H1_MSG_DATA;
+ if (h1m->flags & H1_MF_XFER_ENC) {
+ if (h1m->flags & H1_MF_CLEN) {
+ /* T-E + C-L: force close and remove C-L */
+ h1m->flags |= H1_MF_CONN_CLO;
+ h1m->flags &= ~H1_MF_CLEN;
+ h1m->curr_len = h1m->body_len = 0;
+ hdr_count = http_del_hdr(hdr, ist("content-length"));
+ }
+ else if (!(h1m->flags & H1_MF_VER_11)) {
+ /* T-E + HTTP/1.0: force close */
+ h1m->flags |= H1_MF_CONN_CLO;
+ }
+
+ if (h1m->flags & H1_MF_CHNK)
+ state = H1_MSG_CHUNK_SIZE;
+ else if (!(h1m->flags & H1_MF_RESP)) {
+ /* cf RFC7230#3.3.3 : transfer-encoding in
+ * request without chunked encoding is invalid.
+ */
+ goto http_msg_invalid;
+ }
+ }
+
+ break;
+
+ default:
+ /* impossible states */
+ goto http_msg_invalid;
+ }
+
+ /* Now we've left the headers state and are either in H1_MSG_DATA or
+ * H1_MSG_CHUNK_SIZE.
+ */
+
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->state = state;
+ h1m->next = ptr - start + skip;
+ return h1m->next;
+
+ http_msg_ood:
+ /* out of data at <ptr> during state <state> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->state = state;
+ h1m->next = ptr - start + skip;
+ return 0;
+
+ http_msg_invalid:
+ /* invalid message, error at <ptr> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->err_state = h1m->state = state;
+ h1m->err_pos = h1m->next = ptr - start + skip;
+ return -1;
+
+ http_output_full:
+ /* no more room to store the current header, error at <ptr> */
+ if (slp && !skip_update)
+ *slp = sl;
+
+ h1m->err_state = h1m->state = state;
+ h1m->err_pos = h1m->next = ptr - start + skip;
+ return -2;
+
+ restart:
+ h1m->flags &= H1_MF_RESTART_MASK;
+ h1m->curr_len = h1m->body_len = h1m->next = 0;
+ if (h1m->flags & H1_MF_RESP)
+ h1m->state = H1_MSG_RPBEFORE;
+ else
+ h1m->state = H1_MSG_RQBEFORE;
+ goto try_again;
+}
+
+/* This function performs a very minimal parsing of the trailers block present
+ * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of
+ * bytes to delete to skip the trailers. It may return 0 if it's missing some
+ * input data, or < 0 in case of parse error (in which case the caller may have
+ * to decide how to proceed, possibly eating everything).
+ */
+int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max)
+{
+ const char *stop = b_peek(buf, ofs + max);
+ int count = ofs;
+
+ while (1) {
+ const char *p1 = NULL, *p2 = NULL;
+ const char *start = b_peek(buf, count);
+ const char *ptr = start;
+
+ /* scan current line and stop at LF or CRLF */
+ while (1) {
+ if (ptr == stop)
+ return 0;
+
+ if (*ptr == '\n') {
+ if (!p1)
+ p1 = ptr;
+ p2 = ptr;
+ break;
+ }
+
+ if (*ptr == '\r') {
+ if (p1)
+ return -1;
+ p1 = ptr;
+ }
+
+ ptr = b_next(buf, ptr);
+ }
+
+ /* after LF; point to beginning of next line */
+ p2 = b_next(buf, p2);
+ count += b_dist(buf, start, p2);
+
+ /* LF/CRLF at beginning of line => end of trailers at p2.
+ * Everything was scheduled for forwarding, there's nothing left
+ * from this message. */
+ if (p1 == start)
+ break;
+ /* OK, next line then */
+ }
+ return count - ofs;
+}
+
+/* Generate a random key for a WebSocket Handshake in respect with rfc6455
+ * The key is 128-bits long encoded as a base64 string in <key_out> parameter
+ * (25 bytes long).
+ */
+void h1_generate_random_ws_input_key(char key_out[25])
+{
+ /* generate a random websocket key */
+ const uint64_t rand1 = ha_random64(), rand2 = ha_random64();
+ char key[16];
+
+ memcpy(key, &rand1, 8);
+ memcpy(&key[8], &rand2, 8);
+ a2base64(key, 16, key_out, 25);
+}
+
+#define H1_WS_KEY_SUFFIX_GUID "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"
+
+/*
+ * Calculate the WebSocket handshake response key from <key_in>. Following the
+ * rfc6455, <key_in> must be 24 bytes longs. The result is stored in <key_out>
+ * as a 29 bytes long string.
+ */
+void h1_calculate_ws_output_key(const char *key, char *result)
+{
+ blk_SHA_CTX sha1_ctx;
+ char hash_in[60], hash_out[20];
+
+ /* concatenate the key with a fixed suffix */
+ memcpy(hash_in, key, 24);
+ memcpy(&hash_in[24], H1_WS_KEY_SUFFIX_GUID, 36);
+
+ /* sha1 the result */
+ blk_SHA1_Init(&sha1_ctx);
+ blk_SHA1_Update(&sha1_ctx, hash_in, 60);
+ blk_SHA1_Final((unsigned char *)hash_out, &sha1_ctx);
+
+ /* encode in base64 the hash */
+ a2base64(hash_out, 20, result, 29);
+}
diff --git a/src/h1_htx.c b/src/h1_htx.c
new file mode 100644
index 0000000..f4f13fc
--- /dev/null
+++ b/src/h1_htx.c
@@ -0,0 +1,1074 @@
+/*
+ * Functions to manipulate H1 messages using the internal representation.
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/tools.h>
+
+/* Estimate the size of the HTX headers after the parsing, including the EOH. */
+static size_t h1_eval_htx_hdrs_size(const struct http_hdr *hdrs)
+{
+ size_t sz = 0;
+ int i;
+
+ for (i = 0; hdrs[i].n.len; i++)
+ sz += sizeof(struct htx_blk) + hdrs[i].n.len + hdrs[i].v.len;
+ sz += sizeof(struct htx_blk) + 1;
+ return sz;
+}
+
+/* Estimate the size of the HTX request after the parsing. */
+static size_t h1_eval_htx_size(const struct ist p1, const struct ist p2, const struct ist p3,
+ const struct http_hdr *hdrs)
+{
+ size_t sz;
+
+ /* size of the HTX start-line */
+ sz = sizeof(struct htx_blk) + sizeof(struct htx_sl) + p1.len + p2.len + p3.len;
+ sz += h1_eval_htx_hdrs_size(hdrs);
+ return sz;
+}
+
+/* Check the validity of the request version. If the version is valid, it
+ * returns 1. Otherwise, it returns 0.
+ */
+static int h1_process_req_vsn(struct h1m *h1m, union h1_sl *sl)
+{
+ /* RFC7230#2.6 has enforced the format of the HTTP version string to be
+ * exactly one digit "." one digit. This check may be disabled using
+ * option accept-invalid-http-request.
+ */
+ if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
+ if (sl->rq.v.len != 8)
+ return 0;
+
+ if (!istnmatch(sl->rq.v, ist("HTTP/"), 5) ||
+ !isdigit((unsigned char)*(sl->rq.v.ptr + 5)) ||
+ *(sl->rq.v.ptr + 6) != '.' ||
+ !isdigit((unsigned char)*(sl->rq.v.ptr + 7)))
+ return 0;
+ }
+ else if (!sl->rq.v.len) {
+ /* try to convert HTTP/0.9 requests to HTTP/1.0 */
+
+ /* RFC 1945 allows only GET for HTTP/0.9 requests */
+ if (sl->rq.meth != HTTP_METH_GET)
+ return 0;
+
+ /* HTTP/0.9 requests *must* have a request URI, per RFC 1945 */
+ if (!sl->rq.u.len)
+ return 0;
+
+ /* Add HTTP version */
+ sl->rq.v = ist("HTTP/1.0");
+ return 1;
+ }
+
+ if ((sl->rq.v.len == 8) &&
+ ((*(sl->rq.v.ptr + 5) > '1') ||
+ ((*(sl->rq.v.ptr + 5) == '1') && (*(sl->rq.v.ptr + 7) >= '1'))))
+ h1m->flags |= H1_MF_VER_11;
+ return 1;
+}
+
+/* Check the validity of the response version. If the version is valid, it
+ * returns 1. Otherwise, it returns 0.
+ */
+static int h1_process_res_vsn(struct h1m *h1m, union h1_sl *sl)
+{
+ /* RFC7230#2.6 has enforced the format of the HTTP version string to be
+ * exactly one digit "." one digit. This check may be disabled using
+ * option accept-invalid-http-request.
+ */
+ if (h1m->err_pos == -2) { /* PR_O2_REQBUG_OK not set */
+ if (sl->st.v.len != 8)
+ return 0;
+
+ if (*(sl->st.v.ptr + 4) != '/' ||
+ !isdigit((unsigned char)*(sl->st.v.ptr + 5)) ||
+ *(sl->st.v.ptr + 6) != '.' ||
+ !isdigit((unsigned char)*(sl->st.v.ptr + 7)))
+ return 0;
+ }
+
+ if ((sl->st.v.len == 8) &&
+ ((*(sl->st.v.ptr + 5) > '1') ||
+ ((*(sl->st.v.ptr + 5) == '1') && (*(sl->st.v.ptr + 7) >= '1'))))
+ h1m->flags |= H1_MF_VER_11;
+
+ return 1;
+}
+
+/* Convert H1M flags to HTX start-line flags. */
+static unsigned int h1m_htx_sl_flags(struct h1m *h1m)
+{
+ unsigned int flags = HTX_SL_F_NONE;
+
+ if (h1m->flags & H1_MF_RESP)
+ flags |= HTX_SL_F_IS_RESP;
+ if (h1m->flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m->flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m->flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m->flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m->flags & H1_MF_CLEN) {
+ flags |= HTX_SL_F_CLEN;
+ if (h1m->body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ else
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ if (h1m->flags & H1_MF_CONN_UPG)
+ flags |= HTX_SL_F_CONN_UPG;
+ return flags;
+}
+
+/* Postprocess the parsed headers for a request and convert them into an htx
+ * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
+ * proceed. Parsing errors are reported by setting the htx flag
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
+ */
+static int h1_postparse_req_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
+ struct http_hdr *hdrs, size_t max)
+{
+ struct htx_sl *sl;
+ struct ist meth, uri, vsn;
+ unsigned int flags = 0;
+
+ /* <h1sl> is always defined for a request */
+ meth = h1sl->rq.m;
+ uri = h1sl->rq.u;
+ vsn = h1sl->rq.v;
+
+ /* Be sure the message, once converted into HTX, will not exceed the max
+ * size allowed.
+ */
+ if (h1_eval_htx_size(meth, uri, vsn, hdrs) > max) {
+ if (htx_is_empty(htx))
+ goto error;
+ goto output_full;
+ }
+
+ /* By default, request have always a known length */
+ h1m->flags |= H1_MF_XFER_LEN;
+
+ if (h1sl->rq.meth == HTTP_METH_CONNECT) {
+ h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m->curr_len = h1m->body_len = 0;
+ }
+ else if (h1sl->rq.meth == HTTP_METH_HEAD)
+ flags |= HTX_SL_F_BODYLESS_RESP;
+
+
+ flags |= h1m_htx_sl_flags(h1m);
+ if ((flags & (HTX_SL_F_CONN_UPG|HTX_SL_F_BODYLESS)) == HTX_SL_F_CONN_UPG) {
+ int i;
+
+ for (i = 0; hdrs[i].n.len; i++) {
+ if (isteqi(hdrs[i].n, ist("upgrade")))
+ hdrs[i].v = IST_NULL;
+ }
+ h1m->flags &=~ H1_MF_CONN_UPG;
+ flags &= ~HTX_SL_F_CONN_UPG;
+ }
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, uri, vsn);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ goto error;
+ sl->info.req.meth = h1sl->rq.meth;
+
+ /* Check if the uri contains an authority. Also check if it contains an
+ * explicit scheme and if it is "http" or "https". */
+ if (h1sl->rq.meth == HTTP_METH_CONNECT)
+ sl->flags |= HTX_SL_F_HAS_AUTHORITY;
+ else if (uri.len && uri.ptr[0] != '/' && uri.ptr[0] != '*') {
+ sl->flags |= (HTX_SL_F_HAS_AUTHORITY|HTX_SL_F_HAS_SCHM);
+ if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
+ sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
+
+ /* absolute-form target URI present, proceed to scheme-based
+ * normalization */
+ http_scheme_based_normalize(htx);
+ }
+
+ /* If body length cannot be determined, set htx->extra to
+ * HTX_UNKOWN_PAYLOAD_LENGTH. This value is impossible in other cases.
+ */
+ htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : HTX_UNKOWN_PAYLOAD_LENGTH);
+
+ end:
+ return 1;
+ output_full:
+ h1m_init_req(h1m);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ return -2;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Postprocess the parsed headers for a response and convert them into an htx
+ * message. It returns the number of bytes parsed if > 0, or 0 if it couldn't
+ * proceed. Parsing errors are reported by setting the htx flag
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields.
+ */
+static int h1_postparse_res_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *htx,
+ struct http_hdr *hdrs, size_t max)
+{
+ struct htx_sl *sl;
+ struct ist vsn, status, reason;
+ unsigned int flags = 0;
+ uint16_t code = 0;
+
+ if (h1sl) {
+ /* For HTTP responses, the start-line was parsed */
+ code = h1sl->st.status;
+ vsn = h1sl->st.v;
+ status = h1sl->st.c;
+ reason = h1sl->st.r;
+ }
+ else {
+ /* For FCGI responses, there is no start(-line but the "Status"
+ * header must be parsed, if found.
+ */
+ int hdr;
+
+ vsn = ((h1m->flags & H1_MF_VER_11) ? ist("HTTP/1.1") : ist("HTTP/1.0"));
+ for (hdr = 0; hdrs[hdr].n.len; hdr++) {
+ if (isteqi(hdrs[hdr].n, ist("status"))) {
+ code = http_parse_status_val(hdrs[hdr].v, &status, &reason);
+ }
+ else if (isteqi(hdrs[hdr].n, ist("location"))) {
+ code = 302;
+ status = ist("302");
+ reason = ist("Found");
+ }
+ }
+ if (!code) {
+ code = 200;
+ status = ist("200");
+ reason = ist("OK");
+ }
+ /* FIXME: Check the codes 1xx ? */
+ }
+
+ /* Be sure the message, once converted into HTX, will not exceed the max
+ * size allowed.
+ */
+ if (h1_eval_htx_size(vsn, status, reason, hdrs) > max) {
+ if (htx_is_empty(htx))
+ goto error;
+ goto output_full;
+ }
+
+ if ((h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) && code != 101)
+ h1m->flags &= ~(H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET);
+
+ if (((h1m->flags & H1_MF_METH_CONNECT) && code >= 200 && code < 300) || code == 101) {
+ h1m->flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m->flags |= H1_MF_XFER_LEN;
+ h1m->curr_len = h1m->body_len = 0;
+ flags |= HTX_SL_F_BODYLESS_RESP;
+ }
+ else if ((h1m->flags & H1_MF_METH_HEAD) || (code >= 100 && code < 200) ||
+ (code == 204) || (code == 304)) {
+ /* Responses known to have no body. */
+ h1m->flags |= H1_MF_XFER_LEN;
+ h1m->curr_len = h1m->body_len = 0;
+ flags |= HTX_SL_F_BODYLESS_RESP;
+ }
+ else if (h1m->flags & (H1_MF_CLEN|H1_MF_CHNK)) {
+ /* Responses with a known body length. */
+ h1m->flags |= H1_MF_XFER_LEN;
+ }
+
+ flags |= h1m_htx_sl_flags(h1m);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, vsn, status, reason);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ goto error;
+ sl->info.res.status = code;
+
+ /* If body length cannot be determined, set htx->extra to
+ * HTX_UNKOWN_PAYLOAD_LENGTH. This value is impossible in other cases.
+ */
+ htx->extra = ((h1m->flags & H1_MF_XFER_LEN) ? h1m->curr_len : HTX_UNKOWN_PAYLOAD_LENGTH);
+
+ end:
+ return 1;
+ output_full:
+ h1m_init_res(h1m);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ return -2;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Parse HTTP/1 headers. It returns the number of bytes parsed on success, 0 if
+ * headers are incomplete, -1 if an error occurred or -2 if it needs more space
+ * to proceed while the output buffer is not empty. Parsing errors are reported
+ * by setting the htx flag HTX_FL_PARSING_ERROR and filling h1m->err_pos and
+ * h1m->err_state fields. This functions is responsible to update the parser
+ * state <h1m> and the start-line <h1sl> if not NULL. For the requests, <h1sl>
+ * must always be provided. For responses, <h1sl> may be NULL and <h1m> flags
+ * HTTP_METH_CONNECT of HTTP_METH_HEAD may be set.
+ */
+int h1_parse_msg_hdrs(struct h1m *h1m, union h1_sl *h1sl, struct htx *dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ int total = 0, ret = 0;
+
+ if (!max || !b_data(srcbuf))
+ goto end;
+
+ /* Realing input buffer if necessary */
+ if (b_head(srcbuf) + b_data(srcbuf) > b_wrap(srcbuf))
+ b_slow_realign_ofs(srcbuf, trash.area, 0);
+
+ if (!h1sl) {
+ /* If there no start-line, be sure to only parse the headers */
+ h1m->flags |= H1_MF_HDRS_ONLY;
+ }
+ ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), h1m, h1sl);
+ if (ret <= 0) {
+ /* Incomplete or invalid message. If the input buffer only
+ * contains headers and is full, which is detected by it being
+ * full and the offset to be zero, it's an error because
+ * headers are too large to be handled by the parser. */
+ if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
+ goto error;
+ goto end;
+ }
+ total = ret;
+
+ /* messages headers fully parsed, do some checks to prepare the body
+ * parsing.
+ */
+
+ if (!(h1m->flags & H1_MF_RESP)) {
+ if (!h1_process_req_vsn(h1m, h1sl)) {
+ h1m->err_pos = h1sl->rq.v.ptr - b_head(srcbuf);
+ h1m->err_state = h1m->state;
+ goto vsn_error;
+ }
+ ret = h1_postparse_req_hdrs(h1m, h1sl, dsthtx, hdrs, max);
+ if (ret < 0)
+ return ret;
+ }
+ else {
+ if (h1sl && !h1_process_res_vsn(h1m, h1sl)) {
+ h1m->err_pos = h1sl->st.v.ptr - b_head(srcbuf);
+ h1m->err_state = h1m->state;
+ goto vsn_error;
+ }
+ ret = h1_postparse_res_hdrs(h1m, h1sl, dsthtx, hdrs, max);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* Switch messages without any payload to DONE state */
+ if (((h1m->flags & H1_MF_CLEN) && h1m->body_len == 0) ||
+ ((h1m->flags & (H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK)) == H1_MF_XFER_LEN)) {
+ h1m->state = H1_MSG_DONE;
+ dsthtx->flags |= HTX_FL_EOM;
+ }
+
+ end:
+ return total;
+ error:
+ h1m->err_pos = h1m->next;
+ h1m->err_state = h1m->state;
+ vsn_error:
+ dsthtx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+
+}
+
+/* Copy data from <srbuf> into an DATA block in <dsthtx>. If possible, a
+ * zero-copy is performed. It returns the number of bytes copied.
+ */
+static size_t h1_copy_msg_data(struct htx **dsthtx, struct buffer *srcbuf, size_t ofs,
+ size_t count, size_t max, struct buffer *htxbuf)
+{
+ struct htx *tmp_htx = *dsthtx;
+ size_t block1, block2, ret = 0;
+
+ /* Be prepared to create at least one HTX block by reserving its size
+ * and adjust <count> accordingly.
+ */
+ if (max <= sizeof(struct htx_blk))
+ goto end;
+ max -= sizeof(struct htx_blk);
+ if (count > max)
+ count = max;
+
+ /* very often with large files we'll face the following
+ * situation :
+ * - htx is empty and points to <htxbuf>
+ * - count == srcbuf->data
+ * - srcbuf->head == sizeof(struct htx)
+ * => we can swap the buffers and place an htx header into
+ * the target buffer instead
+ */
+ if (unlikely(htx_is_empty(tmp_htx) && count == b_data(srcbuf) &&
+ !ofs && b_head_ofs(srcbuf) == sizeof(struct htx))) {
+ void *raw_area = srcbuf->area;
+ void *htx_area = htxbuf->area;
+ struct htx_blk *blk;
+
+ srcbuf->area = htx_area;
+ htxbuf->area = raw_area;
+ tmp_htx = (struct htx *)htxbuf->area;
+ tmp_htx->size = htxbuf->size - sizeof(*tmp_htx);
+ htx_reset(tmp_htx);
+ b_set_data(htxbuf, b_size(htxbuf));
+
+ blk = htx_add_blk(tmp_htx, HTX_BLK_DATA, count);
+ blk->info += count;
+
+ *dsthtx = tmp_htx;
+ /* nothing else to do, the old buffer now contains an
+ * empty pre-initialized HTX header
+ */
+ return count;
+ }
+
+ /* * First block is the copy of contiguous data starting at offset <ofs>
+ * with <count> as max. <max> is updated accordingly
+ *
+ * * Second block is the remaining (count - block1) if <max> is large
+ * enough. Another HTX block is reserved.
+ */
+ block1 = b_contig_data(srcbuf, ofs);
+ block2 = 0;
+ if (block1 > count)
+ block1 = count;
+ max -= block1;
+
+ if (max > sizeof(struct htx_blk)) {
+ block2 = count - block1;
+ max -= sizeof(struct htx_blk);
+ if (block2 > max)
+ block2 = max;
+ }
+
+ ret = htx_add_data(tmp_htx, ist2(b_peek(srcbuf, ofs), block1));
+ if (ret == block1 && block2)
+ ret += htx_add_data(tmp_htx, ist2(b_orig(srcbuf), block2));
+ end:
+ return ret;
+}
+
+static const char hextable[] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
+};
+
+/* Generic function to parse the current HTTP chunk. It may be used to parsed
+ * any kind of chunks, including incomplete HTTP chunks or split chunks
+ * because the buffer wraps. This version tries to performed zero-copy on large
+ * chunks if possible.
+ */
+static size_t h1_parse_chunk(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t *max,
+ struct buffer *htxbuf)
+{
+ uint64_t chksz;
+ size_t sz, used, lmax, total = 0;
+ int ret = 0;
+
+ lmax = *max;
+ switch (h1m->state) {
+ case H1_MSG_DATA:
+ new_chunk:
+ used = htx_used_space(*dsthtx);
+ if (b_data(srcbuf) == ofs || lmax <= sizeof(struct htx_blk))
+ break;
+
+ sz = b_data(srcbuf) - ofs;
+ if (unlikely(sz > h1m->curr_len))
+ sz = h1m->curr_len;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, lmax, htxbuf);
+ lmax -= htx_used_space(*dsthtx) - used;
+ ofs += sz;
+ total += sz;
+ h1m->curr_len -= sz;
+ if (h1m->curr_len)
+ break;
+
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ __fallthrough;
+
+ case H1_MSG_CHUNK_CRLF:
+ ret = h1_skip_chunk_crlf(srcbuf, ofs, b_data(srcbuf));
+ if (ret <= 0)
+ break;
+ ofs += ret;
+ total += ret;
+
+ /* Don't parse next chunk to try to handle contiguous chunks if possible */
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ break;
+
+ case H1_MSG_CHUNK_SIZE:
+ ret = h1_parse_chunk_size(srcbuf, ofs, b_data(srcbuf), &chksz);
+ if (ret <= 0)
+ break;
+ h1m->state = ((!chksz) ? H1_MSG_TRAILERS : H1_MSG_DATA);
+ h1m->curr_len = chksz;
+ h1m->body_len += chksz;
+ ofs += ret;
+ total += ret;
+
+ if (h1m->curr_len) {
+ h1m->state = H1_MSG_DATA;
+ goto new_chunk;
+ }
+ h1m->state = H1_MSG_TRAILERS;
+ break;
+
+ default:
+ /* unexpected */
+ ret = -1;
+ break;
+ }
+
+ if (ret < 0) {
+ (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
+ h1m->err_state = h1m->state;
+ h1m->err_pos = ofs;
+ total = 0;
+ }
+
+ /* Don't forget to update htx->extra */
+ (*dsthtx)->extra = h1m->curr_len;
+ *max = lmax;
+ return total;
+}
+
+/* Parses full contiguous HTTP chunks. This version is optimized for small
+ * chunks and does not performed zero-copy. It must be called in
+ * H1_MSG_CHUNK_SIZE state. Be careful if you change something in this
+ * function. It is really sensitive, any change may have an impact on
+ * performance.
+ */
+static size_t h1_parse_full_contig_chunks(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t *max,
+ struct buffer *htxbuf)
+{
+ char *start, *end, *dptr;
+ ssize_t dpos, ridx, save;
+ size_t lmax, total = 0;
+ uint64_t chksz;
+ struct htx_ret htxret;
+
+ lmax = *max;
+ if (lmax <= sizeof(struct htx_blk))
+ goto out;
+
+ /* source info :
+ * start : pointer at <ofs> position
+ * end : pointer marking the end of data to parse
+ * ridx : the reverse index (negative) marking the parser position (end[ridx])
+ */
+ ridx = -b_contig_data(srcbuf, ofs);
+ if (!ridx)
+ goto out;
+ start = b_peek(srcbuf, ofs);
+ end = start - ridx;
+
+ /* Reserve the maximum possible size for the data */
+ htxret = htx_reserve_max_data(*dsthtx);
+ if (!htxret.blk)
+ goto out;
+
+ /* destination info :
+ * dptr : pointer on the beginning of the data
+ * dpos : current position where to copy data
+ */
+ dptr = htx_get_blk_ptr(*dsthtx, htxret.blk);
+ dpos = htxret.ret;
+
+ /* Empty DATA block is not possible, thus if <dpos> is the beginning of
+ * the block, it means it is a new block. We can remove the block size
+ * from <max>. Then we must adjust it if it exceeds the free size in the
+ * block.
+ */
+ if (!dpos)
+ lmax -= sizeof(struct htx_blk);
+ if (lmax > htx_get_blksz(htxret.blk) - dpos)
+ lmax = htx_get_blksz(htxret.blk) - dpos;
+
+ while (1) {
+ /* The chunk size is in the following form, though we are only
+ * interested in the size and CRLF :
+ * 1*HEXDIGIT *WSP *[ ';' extensions ] CRLF
+ */
+ chksz = 0;
+ save = ridx; /* Save the parser position to rewind if necessary */
+ while (1) {
+ int c;
+
+ if (!ridx)
+ goto end_parsing;
+
+ /* Convert current character */
+ c = hextable[(unsigned char)end[ridx]];
+
+ /* not a hex digit anymore */
+ if (c & 0xF0)
+ break;
+
+ /* Update current chunk size */
+ chksz = (chksz << 4) + c;
+
+ if (unlikely(chksz & 0xF0000000000000ULL)) {
+ /* Don't get more than 13 hexa-digit (2^52 - 1)
+ * to never fed possibly bogus values from
+ * languages that use floats for their integers
+ */
+ goto parsing_error;
+ }
+ ++ridx;
+ }
+
+ if (unlikely(chksz > lmax))
+ goto end_parsing;
+
+ if (unlikely(ridx == save)) {
+ /* empty size not allowed */
+ goto parsing_error;
+ }
+
+ /* Skip spaces */
+ while (HTTP_IS_SPHT(end[ridx])) {
+ if (!++ridx)
+ goto end_parsing;
+ }
+
+ /* Up to there, we know that at least one byte is present. Check
+ * for the end of chunk size.
+ */
+ while (1) {
+ if (likely(end[ridx] == '\r')) {
+ /* Parse CRLF */
+ if (!++ridx)
+ goto end_parsing;
+ if (unlikely(end[ridx] != '\n')) {
+ /* CR must be followed by LF */
+ goto parsing_error;
+ }
+
+ /* done */
+ ++ridx;
+ break;
+ }
+ else if (likely(end[ridx] == ';')) {
+ /* chunk extension, ends at next CRLF */
+ if (!++ridx)
+ goto end_parsing;
+ while (!HTTP_IS_CRLF(end[ridx])) {
+ if (!++ridx)
+ goto end_parsing;
+ }
+ /* we have a CRLF now, loop above */
+ continue;
+ }
+ else {
+ /* all other characters are unexpected, especially LF alone */
+ goto parsing_error;
+ }
+ }
+
+ /* Exit if it is the last chunk */
+ if (unlikely(!chksz)) {
+ h1m->state = H1_MSG_TRAILERS;
+ save = ridx;
+ goto end_parsing;
+ }
+
+ /* Now check if the whole chunk is here (including the CRLF at
+ * the end), otherwise we switch in H1_MSG_DATA state.
+ */
+ if (chksz + 2 > -ridx) {
+ h1m->curr_len = chksz;
+ h1m->body_len += chksz;
+ h1m->state = H1_MSG_DATA;
+ (*dsthtx)->extra = h1m->curr_len;
+ save = ridx;
+ goto end_parsing;
+ }
+
+ memcpy(dptr + dpos, end + ridx, chksz);
+ h1m->body_len += chksz;
+ lmax -= chksz;
+ dpos += chksz;
+ ridx += chksz;
+
+ /* Parse CRLF */
+ if (unlikely(end[ridx] != '\r')) {
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ goto parsing_error;
+ }
+ ++ridx;
+ if (end[ridx] != '\n') {
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ goto parsing_error;
+ }
+ ++ridx;
+ }
+
+ end_parsing:
+ ridx = save;
+
+ /* Adjust the HTX block size or remove the block if nothing was copied
+ * (Empty HTX data block are not supported).
+ */
+ if (!dpos)
+ htx_remove_blk(*dsthtx, htxret.blk);
+ else
+ htx_change_blk_value_len(*dsthtx, htxret.blk, dpos);
+ total = end + ridx - start;
+ *max = lmax;
+
+ out:
+ return total;
+
+ parsing_error:
+ (*dsthtx)->flags |= HTX_FL_PARSING_ERROR;
+ h1m->err_state = h1m->state;
+ h1m->err_pos = ofs + end + ridx - start;
+ return 0;
+}
+
+/* Parse HTTP chunks. This function relies on an optimized function to parse
+ * contiguous chunks if possible. Otherwise, when a chunk is incomplete or when
+ * the underlying buffer is wrapping, a generic function is used.
+ */
+static size_t h1_parse_msg_chunks(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t ret, total = 0;
+
+ while (ofs < b_data(srcbuf)) {
+ ret = 0;
+
+ /* First parse full contiguous chunks. It is only possible if we
+ * are waiting for the next chunk size.
+ */
+ if (h1m->state == H1_MSG_CHUNK_SIZE) {
+ ret = h1_parse_full_contig_chunks(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
+ /* exit on error */
+ if (!ret && (*dsthtx)->flags & HTX_FL_PARSING_ERROR) {
+ total = 0;
+ break;
+ }
+ /* or let a chance to parse remaining data */
+ total += ret;
+ ofs += ret;
+ ret = 0;
+ }
+
+ /* If some data remains, try to parse it using the generic
+ * function handling incomplete chunks and split chunks
+ * because of a wrapping buffer.
+ */
+ if (h1m->state < H1_MSG_TRAILERS && ofs < b_data(srcbuf)) {
+ ret = h1_parse_chunk(h1m, dsthtx, srcbuf, ofs, &max, htxbuf);
+ total += ret;
+ ofs += ret;
+ }
+
+ /* nothing more was parsed or parsing was stopped on incomplete
+ * chunk, we can exit, handling parsing error if necessary.
+ */
+ if (!ret || h1m->state != H1_MSG_CHUNK_SIZE) {
+ if ((*dsthtx)->flags & HTX_FL_PARSING_ERROR)
+ total = 0;
+ break;
+ }
+ }
+
+ return total;
+}
+
+/* Parse HTTP/1 body. It returns the number of bytes parsed if > 0, or 0 if it
+ * couldn't proceed. Parsing errors are reported by setting the htx flags
+ * HTX_FL_PARSING_ERROR and filling h1m->err_pos and h1m->err_state fields. This
+ * functions is responsible to update the parser state <h1m>.
+ */
+size_t h1_parse_msg_data(struct h1m *h1m, struct htx **dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t sz, total = 0;
+
+ if (b_data(srcbuf) == ofs)
+ return 0;
+
+ if (h1m->flags & H1_MF_CLEN) {
+ /* content-length: read only h2m->body_len */
+ sz = b_data(srcbuf) - ofs;
+ if (unlikely(sz > h1m->curr_len))
+ sz = h1m->curr_len;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
+ h1m->curr_len -= sz;
+ (*dsthtx)->extra = h1m->curr_len;
+ total += sz;
+ if (!h1m->curr_len) {
+ h1m->state = H1_MSG_DONE;
+ (*dsthtx)->flags |= HTX_FL_EOM;
+ }
+ }
+ else if (h1m->flags & H1_MF_CHNK) {
+ /* te:chunked : parse chunks */
+ total += h1_parse_msg_chunks(h1m, dsthtx, srcbuf, ofs, max, htxbuf);
+ }
+ else if (h1m->flags & H1_MF_XFER_LEN) {
+ /* XFER_LEN is set but not CLEN nor CHNK, it means there is no
+ * body. Switch the message in DONE state
+ */
+ h1m->state = H1_MSG_DONE;
+ (*dsthtx)->flags |= HTX_FL_EOM;
+ }
+ else {
+ /* no content length, read till SHUTW */
+ sz = b_data(srcbuf) - ofs;
+ sz = h1_copy_msg_data(dsthtx, srcbuf, ofs, sz, max, htxbuf);
+ total += sz;
+ }
+
+ return total;
+}
+
+/* Parse HTTP/1 trailers. It returns the number of bytes parsed on success, 0 if
+ * trailers are incomplete, -1 if an error occurred or -2 if it needs more space
+ * to proceed while the output buffer is not empty. Parsing errors are reported
+ * by setting the htx flags HTX_FL_PARSING_ERROR and filling h1m->err_pos and
+ * h1m->err_state fields. This functions is responsible to update the parser
+ * state <h1m>.
+ */
+int h1_parse_msg_tlrs(struct h1m *h1m, struct htx *dsthtx,
+ struct buffer *srcbuf, size_t ofs, size_t max)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ struct h1m tlr_h1m;
+ int ret = 0;
+
+ if (b_data(srcbuf) == ofs) {
+ /* Nothing to parse */
+ goto end;
+ }
+ if (!max) {
+ /* No more room */
+ goto output_full;
+ }
+
+ /* Realing input buffer if necessary */
+ if (b_peek(srcbuf, ofs) > b_tail(srcbuf))
+ b_slow_realign_ofs(srcbuf, trash.area, 0);
+
+ tlr_h1m.flags = (H1_MF_NO_PHDR|H1_MF_HDRS_ONLY);
+ tlr_h1m.err_pos = h1m->err_pos;
+ ret = h1_headers_to_hdr_list(b_peek(srcbuf, ofs), b_tail(srcbuf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &tlr_h1m, NULL);
+ if (ret <= 0) {
+ /* Incomplete or invalid trailers. If the input buffer only
+ * contains trailers and is full, which is detected by it being
+ * full and the offset to be zero, it's an error because
+ * trailers are too large to be handled by the parser. */
+ if (ret < 0 || (!ret && !ofs && !buf_room_for_htx_data(srcbuf)))
+ goto error;
+ goto end;
+ }
+
+ /* messages trailers fully parsed. */
+ if (h1_eval_htx_hdrs_size(hdrs) > max) {
+ if (htx_is_empty(dsthtx))
+ goto error;
+ goto output_full;
+ }
+
+ if (!htx_add_all_trailers(dsthtx, hdrs))
+ goto error;
+
+ h1m->state = H1_MSG_DONE;
+ dsthtx->flags |= HTX_FL_EOM;
+
+ end:
+ return ret;
+ output_full:
+ return -2;
+ error:
+ h1m->err_state = h1m->state;
+ h1m->err_pos = h1m->next;
+ dsthtx->flags |= HTX_FL_PARSING_ERROR;
+ return -1;
+}
+
+/* Appends the H1 representation of the request line <sl> to the chunk <chk>. It
+ * returns 1 if data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_reqline(const struct htx_sl *sl, struct buffer *chk)
+{
+ struct ist uri;
+ size_t sz = chk->data;
+
+ uri = h1_get_uri(sl);
+ if (!chunk_memcat(chk, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)) ||
+ !chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, uri.ptr, uri.len) ||
+ !chunk_memcat(chk, " ", 1))
+ goto full;
+
+ if (sl->flags & HTX_SL_F_VER_11) {
+ if (!chunk_memcat(chk, "HTTP/1.1", 8))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)))
+ goto full;
+ }
+
+ if (!chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the status line <sl> to the chunk <chk>. It
+ * returns 1 if data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_stline(const struct htx_sl *sl, struct buffer *chk)
+{
+ size_t sz = chk->data;
+
+ if (HTX_SL_LEN(sl) + 4 > b_room(chk))
+ return 0;
+
+ if (sl->flags & HTX_SL_F_VER_11) {
+ if (!chunk_memcat(chk, "HTTP/1.1", 8))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)))
+ goto full;
+ }
+ if (!chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl)) ||
+ !chunk_memcat(chk, " ", 1) ||
+ !chunk_memcat(chk, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl)) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the header <n> with the value <v> to the
+ * chunk <chk>. It returns 1 if data are successfully appended, otherwise it
+ * returns 0.
+ */
+int h1_format_htx_hdr(const struct ist n, const struct ist v, struct buffer *chk)
+{
+ size_t sz = chk->data;
+
+ if (n.len + v.len + 4 > b_room(chk))
+ return 0;
+
+ if (!chunk_memcat(chk, n.ptr, n.len) ||
+ !chunk_memcat(chk, ": ", 2) ||
+ !chunk_memcat(chk, v.ptr, v.len) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/* Appends the H1 representation of the data <data> to the chunk <chk>. If
+ * <chunked> is non-zero, it emits HTTP/1 chunk-encoded data. It returns 1 if
+ * data are successfully appended, otherwise it returns 0.
+ */
+int h1_format_htx_data(const struct ist data, struct buffer *chk, int chunked)
+{
+ size_t sz = chk->data;
+
+ if (chunked) {
+ uint32_t chksz;
+ char tmp[10];
+ char *beg, *end;
+
+ chksz = data.len;
+
+ beg = end = tmp+10;
+ *--beg = '\n';
+ *--beg = '\r';
+ do {
+ *--beg = hextab[chksz & 0xF];
+ } while (chksz >>= 4);
+
+ if (!chunk_memcat(chk, beg, end - beg) ||
+ !chunk_memcat(chk, data.ptr, data.len) ||
+ !chunk_memcat(chk, "\r\n", 2))
+ goto full;
+ }
+ else {
+ if (!chunk_memcat(chk, data.ptr, data.len))
+ return 0;
+ }
+
+ return 1;
+
+ full:
+ chk->data = sz;
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/h2.c b/src/h2.c
new file mode 100644
index 0000000..9c60cc6
--- /dev/null
+++ b/src/h2.c
@@ -0,0 +1,814 @@
+/*
+ * HTTP/2 protocol processing
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/h2.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <import/ist.h>
+
+
+struct h2_frame_definition h2_frame_definition[H2_FT_ENTRIES] = {
+ [H2_FT_DATA ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_HEADERS ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 1, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PRIORITY ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 5, .max_len = 5, },
+ [H2_FT_RST_STREAM ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
+ [H2_FT_SETTINGS ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PUSH_PROMISE ] = { .dir = 0, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_PING ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = 8, },
+ [H2_FT_GOAWAY ] = { .dir = 3, .min_id = 0, .max_id = 0, .min_len = 8, .max_len = H2_MAX_FRAME_LEN, },
+ [H2_FT_WINDOW_UPDATE] = { .dir = 3, .min_id = 0, .max_id = H2_MAX_STREAM_ID, .min_len = 4, .max_len = 4, },
+ [H2_FT_CONTINUATION ] = { .dir = 3, .min_id = 1, .max_id = H2_MAX_STREAM_ID, .min_len = 0, .max_len = H2_MAX_FRAME_LEN, },
+};
+
+/* Prepare the request line into <htx> from pseudo headers stored in <phdr[]>.
+ * <fields> indicates what was found so far. This should be called once at the
+ * detection of the first general header field or at the end of the request if
+ * no general header field was found yet. Returns the created start line on
+ * success, or NULL on failure. Upon success, <msgf> is updated with a few
+ * H2_MSGF_* flags indicating what was found while parsing.
+ *
+ * The rules below deserve a bit of explanation. There tends to be some
+ * confusion regarding H2's authority vs the Host header. They are different
+ * though may sometimes be exchanged. In H2, the request line is broken into :
+ * - :method
+ * - :scheme
+ * - :authority
+ * - :path
+ *
+ * An equivalent HTTP/1.x absolute-form request would then look like :
+ * <:method> <:scheme>://<:authority><:path> HTTP/x.y
+ *
+ * Except for CONNECT which doesn't have scheme nor path and looks like :
+ * <:method> <:authority> HTTP/x.y
+ *
+ * It's worth noting that H2 still supports an encoding to map H1 origin-form
+ * and asterisk-form requests. These ones do not specify the authority. However
+ * in H2 they must still specify the scheme, which is not present in H1. Also,
+ * when encoding an absolute-form H1 request without a path, the path
+ * automatically becomes "/" except for the OPTIONS method where it
+ * becomes "*".
+ *
+ * As such it is explicitly permitted for an H2 client to send a request
+ * featuring a Host header and no :authority, though it's not the recommended
+ * way to use H2 for a client. It is however the only permitted way to encode
+ * an origin-form H1 request over H2. Thus we need to respect such differences
+ * as much as possible when re-encoding the H2 request into HTX.
+ */
+static struct htx_sl *h2_prepare_htx_reqline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
+{
+ struct ist uri, meth_sl;
+ unsigned int flags = HTX_SL_F_NONE;
+ struct htx_sl *sl;
+ enum http_meth_t meth;
+ size_t i;
+
+ if ((fields & H2_PHDR_FND_METH) && isteq(phdr[H2_PHDR_IDX_METH], ist("CONNECT"))) {
+ if (fields & H2_PHDR_FND_PROT) {
+ /* rfc 8441 Extended Connect Protocol
+ * #4 :scheme and :path must be present, as well as
+ * :authority like all h2 requests
+ */
+ if (!(fields & H2_PHDR_FND_SCHM)) {
+ /* missing scheme */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_PATH)) {
+ /* missing path */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_AUTH)) {
+ /* missing authority */
+ goto fail;
+ }
+
+ flags |= HTX_SL_F_HAS_SCHM;
+ if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
+ flags |= HTX_SL_F_SCHM_HTTP;
+ else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
+ flags |= HTX_SL_F_SCHM_HTTPS;
+ else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+
+ meth_sl = ist("GET");
+
+ *msgf |= H2_MSGF_EXT_CONNECT;
+ /* no ES on the HEADERS frame but no body either for
+ * Extended CONNECT */
+ *msgf &= ~H2_MSGF_BODY;
+ }
+ else {
+ /* RFC 7540 #8.2.6 regarding CONNECT: ":scheme" and ":path"
+ * MUST be omitted ; ":authority" contains the host and port
+ * to connect to.
+ */
+ if (fields & H2_PHDR_FND_SCHM) {
+ /* scheme not allowed */
+ goto fail;
+ }
+ else if (fields & H2_PHDR_FND_PATH) {
+ /* path not allowed */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_AUTH)) {
+ /* missing authority */
+ goto fail;
+ }
+
+ meth_sl = phdr[H2_PHDR_IDX_METH];
+ }
+
+ *msgf |= H2_MSGF_BODY_TUNNEL;
+ }
+ else if ((fields & (H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) !=
+ (H2_PHDR_FND_METH|H2_PHDR_FND_SCHM|H2_PHDR_FND_PATH)) {
+ /* RFC 7540 #8.1.2.3 : all requests MUST include exactly one
+ * valid value for the ":method", ":scheme" and ":path" phdr
+ * unless it is a CONNECT request.
+ */
+ if (!(fields & H2_PHDR_FND_METH)) {
+ /* missing method */
+ goto fail;
+ }
+ else if (!(fields & H2_PHDR_FND_SCHM)) {
+ /* missing scheme */
+ goto fail;
+ }
+ else {
+ /* missing path */
+ goto fail;
+ }
+ }
+ else { /* regular methods */
+ /* RFC3986#6.2.2.1: scheme is case-insensitive. We need to
+ * classify the scheme as "present/http", "present/https",
+ * "present/other", "absent" so as to decide whether or not
+ * we're facing a normalized URI that will have to be encoded
+ * in origin or absolute form. Indeed, 7540#8.1.2.3 says that
+ * clients should use the absolute form, thus we cannot infer
+ * whether or not the client wanted to use a proxy here.
+ */
+ flags |= HTX_SL_F_HAS_SCHM;
+ if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("http")))
+ flags |= HTX_SL_F_SCHM_HTTP;
+ else if (isteqi(phdr[H2_PHDR_IDX_SCHM], ist("https")))
+ flags |= HTX_SL_F_SCHM_HTTPS;
+ else if (!http_validate_scheme(phdr[H2_PHDR_IDX_SCHM]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+
+ meth_sl = phdr[H2_PHDR_IDX_METH];
+ }
+
+ if (fields & H2_PHDR_FND_PATH) {
+ /* 7540#8.1.2.3: :path must not be empty, and must be either
+ * '*' or an RFC3986 "path-absolute" starting with a "/" but
+ * not with "//".
+ * However, this "path-absolute" was a mistake which was
+ * later fixed in http2bis as "absolute-path" to match
+ * HTTP/1, thus also allowing "//".
+ */
+ if (unlikely(!phdr[H2_PHDR_IDX_PATH].len))
+ goto fail;
+ else if (unlikely(phdr[H2_PHDR_IDX_PATH].ptr[0] != '/')) {
+ if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
+ goto fail;
+ }
+ }
+
+ if (!(flags & HTX_SL_F_HAS_SCHM)) {
+ /* no scheme, use authority only (CONNECT) */
+ uri = phdr[H2_PHDR_IDX_AUTH];
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+ }
+ else if (fields & H2_PHDR_FND_AUTH) {
+ /* authority is present, let's use the absolute form. We simply
+ * use the trash to concatenate them since all of them MUST fit
+ * in a bufsize since it's where they come from.
+ */
+ uri = ist2bin(trash.area, phdr[H2_PHDR_IDX_SCHM]);
+ istcat(&uri, ist("://"), trash.size);
+ istcat(&uri, phdr[H2_PHDR_IDX_AUTH], trash.size);
+ if (!isteq(phdr[H2_PHDR_IDX_PATH], ist("*")))
+ istcat(&uri, phdr[H2_PHDR_IDX_PATH], trash.size);
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+
+ if (flags & (HTX_SL_F_SCHM_HTTP|HTX_SL_F_SCHM_HTTPS)) {
+ /* we don't know if it was originally an absolute or a
+ * relative request because newer versions of HTTP use
+ * the absolute URI format by default, which we call
+ * the normalized URI format internally. This is the
+ * strongly recommended way of sending a request for
+ * a regular client, so we cannot distinguish this
+ * from a request intended for a proxy. For other
+ * schemes however there is no doubt.
+ */
+ flags |= HTX_SL_F_NORMALIZED_URI;
+ }
+ }
+ else {
+ /* usual schemes with or without authority, use origin form */
+ uri = phdr[H2_PHDR_IDX_PATH];
+ if (fields & H2_PHDR_FND_AUTH)
+ flags |= HTX_SL_F_HAS_AUTHORITY;
+ }
+
+ /* The method is a non-empty token (RFC7231#4.1) */
+ if (!meth_sl.len)
+ goto fail;
+ for (i = 0; i < meth_sl.len; i++) {
+ if (!HTTP_IS_TOKEN(meth_sl.ptr[i]))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* make sure the final URI isn't empty. Note that 7540#8.1.2.3 states
+ * that :path must not be empty.
+ */
+ if (!uri.len)
+ goto fail;
+
+ /* The final URI must not contain LWS nor CTL characters */
+ for (i = 0; i < uri.len; i++) {
+ unsigned char c = uri.ptr[i];
+ if (HTTP_IS_LWS(c) || HTTP_IS_CTL(c))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* Set HTX start-line flags */
+ flags |= HTX_SL_F_VER_11; // V2 in fact
+ flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
+
+
+ meth = find_http_meth(meth_sl.ptr, meth_sl.len);
+ if (meth == HTTP_METH_HEAD) {
+ *msgf |= H2_MSGF_BODYLESS_RSP;
+ flags |= HTX_SL_F_BODYLESS_RESP;
+ }
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth_sl, uri, ist("HTTP/2.0"));
+ if (!sl)
+ goto fail;
+ sl->info.req.meth = meth;
+ return sl;
+ fail:
+ return NULL;
+}
+
+/* Takes an H2 request present in the headers list <list> terminated by a name
+ * being <NULL,0> and emits the equivalent HTX request according to the rules
+ * documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
+ * non-zero is returned if some bytes were emitted. In case of error, a
+ * negative error code is returned.
+ *
+ * Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
+ * was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
+ * if a body is detected (!ES).
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_*
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ *
+ * The Cookie header will be reassembled at the end, and for this, the <list>
+ * will be used to create a linked list, so its contents may be destroyed.
+ *
+ * When <relaxed> is non-nul, some non-dangerous checks will be ignored. This
+ * is in order to satisfy "option accept-invalid-http-request" for
+ * interoperability purposes.
+ */
+int h2_make_htx_request(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, int relaxed)
+{
+ struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
+ uint32_t fields; /* bit mask of H2_PHDR_FND_* */
+ uint32_t idx;
+ int ck, lck; /* cookie index and last cookie index */
+ int phdr;
+ int ret;
+ int i;
+ struct htx_sl *sl = NULL;
+ unsigned int sl_flags = 0;
+ const char *ctl;
+
+ lck = ck = -1; // no cookie for now
+ fields = 0;
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* this is an indexed pseudo-header */
+ phdr = list[idx].n.len;
+ }
+ else {
+ /* this can be any type of header */
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token).
+ * For pseudo-headers we check from 2nd char and for other ones
+ * from the first char, because HTTP_IS_TOKEN() also excludes
+ * the colon.
+ */
+ phdr = h2_str_to_phdr(list[idx].n);
+
+ for (i = !!phdr; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+ }
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters. For :path we reject all CTL
+ * chars, spaces, and '#'.
+ */
+ if (phdr == H2_PHDR_IDX_PATH && !relaxed) {
+ ctl = ist_find_range(list[idx].v, 0, '#');
+ if (unlikely(ctl) && http_path_has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+ } else {
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+ }
+
+ if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
+ /* insert a pseudo header by its index (in phdr) and value (in value) */
+ if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
+ if (fields & H2_PHDR_FND_NONE) {
+ /* pseudo header field after regular headers */
+ goto fail;
+ }
+ else {
+ /* repeated pseudo header field */
+ goto fail;
+ }
+ }
+ fields |= 1 << phdr;
+ phdr_val[phdr] = list[idx].v;
+ continue;
+ }
+ else if (phdr != 0) {
+ /* invalid pseudo header -- should never happen here */
+ goto fail;
+ }
+
+ /* regular header field in (name,value) */
+ if (unlikely(!(fields & H2_PHDR_FND_NONE))) {
+ /* no more pseudo-headers, time to build the request line */
+ sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ fields |= H2_PHDR_FND_NONE;
+
+ /* http2bis draft recommends to drop Host in favor of :authority when
+ * the latter is present. This is required to make sure there is no
+ * discrepancy between the authority and the host header, especially
+ * since routing rules usually involve Host. Here we already know if
+ * :authority was found so we can emit it right now and mark the host
+ * as filled so that it's skipped later.
+ */
+ if (fields & H2_PHDR_FND_AUTH) {
+ if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
+ goto fail;
+ fields |= H2_PHDR_FND_HOST;
+ }
+ }
+
+ if (isteq(list[idx].n, ist("host"))) {
+ if (fields & H2_PHDR_FND_HOST)
+ continue;
+
+ fields |= H2_PHDR_FND_HOST;
+ }
+
+ if (isteq(list[idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[idx].v, body_len,
+ *msgf & H2_MSGF_BODY_CL);
+ if (ret < 0)
+ goto fail;
+
+ *msgf |= H2_MSGF_BODY_CL;
+ sl_flags |= HTX_SL_F_CLEN;
+ if (ret == 0)
+ continue; // skip this duplicate
+ }
+
+ /* these ones are forbidden in requests (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ if (isteq(list[idx].n, ist("te")) && !isteq(list[idx].v, ist("trailers")))
+ goto fail;
+
+ /* cookie requires special processing at the end */
+ if (isteq(list[idx].n, ist("cookie"))) {
+ http_cookie_register(list, idx, &ck, &lck);
+ continue;
+ }
+
+ if (!htx_add_header(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2.1 mandates to reject response pseudo-headers (:status) */
+ if (fields & H2_PHDR_FND_STAT)
+ goto fail;
+
+ /* Let's dump the request now if not yet emitted. */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ sl = h2_prepare_htx_reqline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ }
+
+ if (*msgf & H2_MSGF_BODY_TUNNEL)
+ *msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
+
+ if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
+ (*msgf & H2_MSGF_BODY_TUNNEL)) {
+ /* Request without body or tunnel requested */
+ sl_flags |= HTX_SL_F_BODYLESS;
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ if (*msgf & H2_MSGF_EXT_CONNECT) {
+ if (!htx_add_header(htx, ist("upgrade"), phdr_val[H2_PHDR_IDX_PROT]))
+ goto fail;
+ if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
+ goto fail;
+ sl_flags |= HTX_SL_F_CONN_UPG;
+ }
+
+ /* update the start line with last detected header info */
+ sl->flags |= sl_flags;
+
+ /* complete with missing Host if needed (we may validate this test if
+ * no regular header was found).
+ */
+ if ((fields & (H2_PHDR_FND_HOST|H2_PHDR_FND_AUTH)) == H2_PHDR_FND_AUTH) {
+ /* missing Host field, use :authority instead */
+ if (!htx_add_header(htx, ist("host"), phdr_val[H2_PHDR_IDX_AUTH]))
+ goto fail;
+ }
+
+ /* now we may have to build a cookie list. We'll dump the values of all
+ * visited headers.
+ */
+ if (ck >= 0) {
+ if (http_cookie_merge(htx, list, ck))
+ goto fail;
+ }
+
+ /* now send the end of headers marker */
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ /* proceed to scheme-based normalization on target-URI */
+ if (fields & H2_PHDR_FND_SCHM)
+ http_scheme_based_normalize(htx);
+
+ ret = 1;
+ return ret;
+
+ fail:
+ return -1;
+}
+
+/* Prepare the status line into <htx> from pseudo headers stored in <phdr[]>.
+ * <fields> indicates what was found so far. This should be called once at the
+ * detection of the first general header field or at the end of the message if
+ * no general header field was found yet. Returns the created start line on
+ * success, or NULL on failure. Upon success, <msgf> is updated with a few
+ * H2_MSGF_* flags indicating what was found while parsing.
+ */
+static struct htx_sl *h2_prepare_htx_stsline(uint32_t fields, struct ist *phdr, struct htx *htx, unsigned int *msgf)
+{
+ unsigned int status, flags = HTX_SL_F_IS_RESP;
+ struct htx_sl *sl;
+ struct ist stat;
+
+ /* only :status is allowed as a pseudo header */
+ if (!(fields & H2_PHDR_FND_STAT))
+ goto fail;
+
+ if (phdr[H2_PHDR_IDX_STAT].len != 3)
+ goto fail;
+
+ /* if Extended CONNECT is used, convert status code from 200 to htx 101
+ * following rfc 8441 */
+ if (unlikely(*msgf & H2_MSGF_EXT_CONNECT) &&
+ isteq(phdr[H2_PHDR_IDX_STAT], ist("200"))) {
+ stat = ist("101");
+ status = 101;
+ }
+ else {
+ unsigned char h, t, u;
+
+ stat = phdr[H2_PHDR_IDX_STAT];
+
+ h = stat.ptr[0] - '0';
+ t = stat.ptr[1] - '0';
+ u = stat.ptr[2] - '0';
+ if (h > 9 || t > 9 || u > 9)
+ goto fail;
+ status = h * 100 + t * 10 + u;
+ }
+
+ /* 101 responses are not supported in H2, so return a error.
+ * On 1xx responses there is no ES on the HEADERS frame but there is no
+ * body. So remove the flag H2_MSGF_BODY and add H2_MSGF_RSP_1XX to
+ * notify the decoder another HEADERS frame is expected.
+ * 204/304 response have no body by definition. So remove the flag
+ * H2_MSGF_BODY and set H2_MSGF_BODYLESS_RSP.
+ *
+ * Note however that there is a special condition for Extended CONNECT.
+ * In this case, we explicitly convert it to HTX 101 to mimic
+ * Get+Upgrade HTTP/1.1 mechanism
+ */
+ if (status == 101) {
+ if (!(*msgf & H2_MSGF_EXT_CONNECT))
+ goto fail;
+ }
+ else if (status < 200) {
+ *msgf |= H2_MSGF_RSP_1XX;
+ *msgf &= ~H2_MSGF_BODY;
+ }
+ else if (status == 204 || status == 304) {
+ *msgf &= ~H2_MSGF_BODY;
+ *msgf |= H2_MSGF_BODYLESS_RSP;
+ flags |= HTX_SL_F_BODYLESS_RESP;
+ }
+
+ /* Set HTX start-line flags */
+ flags |= HTX_SL_F_VER_11; // V2 in fact
+ flags |= HTX_SL_F_XFER_LEN; // xfer len always known with H2
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/2.0"), stat, ist(""));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = status;
+ return sl;
+ fail:
+ return NULL;
+}
+
+/* Takes an H2 response present in the headers list <list> terminated by a name
+ * being <NULL,0> and emits the equivalent HTX response according to the rules
+ * documented in RFC7540 #8.1.2. The output contents are emitted in <htx>, and
+ * a positive value is returned if some bytes were emitted. In case of error, a
+ * negative error code is returned.
+ *
+ * Upon success, <msgf> is filled with a few H2_MSGF_* flags indicating what
+ * was found while parsing. The caller must set it to zero in or H2_MSGF_BODY
+ * if a body is detected (!ES).
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_*
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ *
+ * <upgrade_protocol> is only used if the htx status code is 101 indicating a
+ * response to an upgrade or h2-equivalent request.
+ */
+int h2_make_htx_response(struct http_hdr *list, struct htx *htx, unsigned int *msgf, unsigned long long *body_len, char *upgrade_protocol)
+{
+ struct ist phdr_val[H2_PHDR_NUM_ENTRIES];
+ uint32_t fields; /* bit mask of H2_PHDR_FND_* */
+ uint32_t idx;
+ int phdr;
+ int ret;
+ int i;
+ struct htx_sl *sl = NULL;
+ unsigned int sl_flags = 0;
+ const char *ctl;
+
+ fields = 0;
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* this is an indexed pseudo-header */
+ phdr = list[idx].n.len;
+ }
+ else {
+ /* this can be any type of header */
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token).
+ * For pseudo-headers we check from 2nd char and for other ones
+ * from the first char, because HTTP_IS_TOKEN() also excludes
+ * the colon.
+ */
+ phdr = h2_str_to_phdr(list[idx].n);
+
+ for (i = !!phdr; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+ }
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters.
+ */
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+
+ if (phdr > 0 && phdr < H2_PHDR_NUM_ENTRIES) {
+ /* insert a pseudo header by its index (in phdr) and value (in value) */
+ if (fields & ((1 << phdr) | H2_PHDR_FND_NONE)) {
+ if (fields & H2_PHDR_FND_NONE) {
+ /* pseudo header field after regular headers */
+ goto fail;
+ }
+ else {
+ /* repeated pseudo header field */
+ goto fail;
+ }
+ }
+ fields |= 1 << phdr;
+ phdr_val[phdr] = list[idx].v;
+ continue;
+ }
+ else if (phdr != 0) {
+ /* invalid pseudo header -- should never happen here */
+ goto fail;
+ }
+
+ /* regular header field in (name,value) */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ /* no more pseudo-headers, time to build the status line */
+ sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ fields |= H2_PHDR_FND_NONE;
+ }
+
+ if (isteq(list[idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[idx].v, body_len,
+ *msgf & H2_MSGF_BODY_CL);
+ if (ret < 0)
+ goto fail;
+
+ *msgf |= H2_MSGF_BODY_CL;
+ sl_flags |= HTX_SL_F_CLEN;
+ if (ret == 0)
+ continue; // skip this duplicate
+ }
+
+ /* these ones are forbidden in responses (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ if (!htx_add_header(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2.1 mandates to reject request pseudo-headers */
+ if (fields & (H2_PHDR_FND_AUTH|H2_PHDR_FND_METH|H2_PHDR_FND_PATH|H2_PHDR_FND_SCHM))
+ goto fail;
+
+ /* Let's dump the request now if not yet emitted. */
+ if (!(fields & H2_PHDR_FND_NONE)) {
+ sl = h2_prepare_htx_stsline(fields, phdr_val, htx, msgf);
+ if (!sl)
+ goto fail;
+ }
+
+ if (sl->info.res.status == 101 && upgrade_protocol) {
+ if (!htx_add_header(htx, ist("connection"), ist("upgrade")))
+ goto fail;
+ if (!htx_add_header(htx, ist("upgrade"), ist(upgrade_protocol)))
+ goto fail;
+ sl_flags |= HTX_SL_F_CONN_UPG;
+ }
+
+ if ((*msgf & H2_MSGF_BODY_TUNNEL) &&
+ ((sl->info.res.status >= 200 && sl->info.res.status < 300) || sl->info.res.status == 101))
+ *msgf &= ~(H2_MSGF_BODY|H2_MSGF_BODY_CL);
+ else
+ *msgf &= ~H2_MSGF_BODY_TUNNEL;
+
+ if (!(*msgf & H2_MSGF_BODY) || ((*msgf & H2_MSGF_BODY_CL) && *body_len == 0) ||
+ (*msgf & H2_MSGF_BODY_TUNNEL)) {
+ /* Response without body or tunnel successfully established */
+ sl_flags |= HTX_SL_F_BODYLESS;
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ /* update the start line with last detected header info */
+ sl->flags |= sl_flags;
+
+ if ((*msgf & (H2_MSGF_BODY|H2_MSGF_BODY_TUNNEL|H2_MSGF_BODY_CL)) == H2_MSGF_BODY) {
+ /* FIXME: Do we need to signal anything when we have a body and
+ * no content-length, to have the equivalent of H1's chunked
+ * encoding?
+ */
+ }
+
+ /* now send the end of headers marker */
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ ret = 1;
+ return ret;
+
+ fail:
+ return -1;
+}
+
+/* Takes an H2 headers list <list> terminated by a name being <NULL,0> and emits
+ * the equivalent HTX trailers blocks. The output contents are emitted in <htx>,
+ * and a positive value is returned if some bytes were emitted. In case of
+ * error, a negative error code is returned. The caller must have verified that
+ * the message in the buffer is compatible with receipt of trailers.
+ *
+ * The headers list <list> must be composed of :
+ * - n.name != NULL, n.len > 0 : literal header name
+ * - n.name == NULL, n.len > 0 : indexed pseudo header name number <n.len>
+ * among H2_PHDR_IDX_* (illegal here)
+ * - n.name ignored, n.len == 0 : end of list
+ * - in all cases except the end of list, v.name and v.len must designate a
+ * valid value.
+ */
+int h2_make_htx_trailers(struct http_hdr *list, struct htx *htx)
+{
+ const char *ctl;
+ uint32_t idx;
+ int i;
+
+ for (idx = 0; list[idx].n.len != 0; idx++) {
+ if (!isttest(list[idx].n)) {
+ /* This is an indexed pseudo-header (RFC7540#8.1.2.1) */
+ goto fail;
+ }
+
+ /* RFC7540#8.1.2: upper case not allowed in header field names.
+ * #10.3: header names must be valid (i.e. match a token). This
+ * also catches pseudo-headers which are forbidden in trailers.
+ */
+ for (i = 0; i < list[idx].n.len; i++)
+ if ((uint8_t)(list[idx].n.ptr[i] - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(list[idx].n.ptr[i]))
+ goto fail;
+
+ /* these ones are forbidden in trailers (RFC7540#8.1.2.2) */
+ if (isteq(list[idx].n, ist("host")) ||
+ isteq(list[idx].n, ist("content-length")) ||
+ isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("te")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ goto fail;
+
+ /* RFC7540#10.3: intermediaries forwarding to HTTP/1 must take care of
+ * rejecting NUL, CR and LF characters.
+ */
+ ctl = ist_find_ctl(list[idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[idx].v, ctl))
+ goto fail;
+
+ if (!htx_add_trailer(htx, list[idx].n, list[idx].v))
+ goto fail;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOT))
+ goto fail;
+
+ return 1;
+
+ fail:
+ return -1;
+}
diff --git a/src/h3.c b/src/h3.c
new file mode 100644
index 0000000..4aa1a52
--- /dev/null
+++ b/src/h3.c
@@ -0,0 +1,2403 @@
+/*
+ * HTTP/3 protocol processing
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ist.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h3.h>
+#include <haproxy/h3_stats.h>
+#include <haproxy/http.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/intops.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/pool.h>
+#include <haproxy/qmux_http.h>
+#include <haproxy/qpack-dec.h>
+#include <haproxy/qpack-enc.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+/* trace source and events */
+static void h3_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event h3_trace_events[] = {
+#define H3_EV_RX_FRAME (1ULL << 0)
+ { .mask = H3_EV_RX_FRAME, .name = "rx_frame", .desc = "receipt of any H3 frame" },
+#define H3_EV_RX_DATA (1ULL << 1)
+ { .mask = H3_EV_RX_DATA, .name = "rx_data", .desc = "receipt of H3 DATA frame" },
+#define H3_EV_RX_HDR (1ULL << 2)
+ { .mask = H3_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H3 HEADERS frame" },
+#define H3_EV_RX_SETTINGS (1ULL << 3)
+ { .mask = H3_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H3 SETTINGS frame" },
+#define H3_EV_TX_DATA (1ULL << 4)
+ { .mask = H3_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H3 DATA frame" },
+#define H3_EV_TX_HDR (1ULL << 5)
+ { .mask = H3_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H3 HEADERS frame" },
+#define H3_EV_TX_SETTINGS (1ULL << 6)
+ { .mask = H3_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H3 SETTINGS frame" },
+#define H3_EV_H3S_NEW (1ULL << 7)
+ { .mask = H3_EV_H3S_NEW, .name = "h3s_new", .desc = "new H3 stream" },
+#define H3_EV_H3S_END (1ULL << 8)
+ { .mask = H3_EV_H3S_END, .name = "h3s_end", .desc = "H3 stream terminated" },
+#define H3_EV_H3C_NEW (1ULL << 9)
+ { .mask = H3_EV_H3C_NEW, .name = "h3c_new", .desc = "new H3 connection" },
+#define H3_EV_H3C_END (1ULL << 10)
+ { .mask = H3_EV_H3C_END, .name = "h3c_end", .desc = "H3 connection terminated" },
+#define H3_EV_STRM_SEND (1ULL << 12)
+ { .mask = H3_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+ { }
+};
+
+static const struct name_desc h3_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="qcs", .desc="QUIC stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h3_trace_decoding[] = {
+#define H3_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H3_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only qcc/qcs state and flags, no real decoding" },
+ { /* end */ }
+};
+
+struct trace_source trace_h3 = {
+ .name = IST("h3"),
+ .desc = "HTTP/3 transcoder",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = h3_trace,
+ .known_events = h3_trace_events,
+ .lockon_args = h3_trace_lockon_args,
+ .decoding = h3_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+#define TRACE_SOURCE &trace_h3
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+#if defined(DEBUG_H3)
+#define h3_debug_printf fprintf
+#define h3_debug_hexdump debug_hexdump
+#else
+#define h3_debug_printf(...) do { } while (0)
+#define h3_debug_hexdump(...) do { } while (0)
+#endif
+
+#define H3_CF_SETTINGS_SENT 0x00000001 /* SETTINGS frame already sent on local control stream */
+#define H3_CF_SETTINGS_RECV 0x00000002 /* SETTINGS frame already received on remote control stream */
+#define H3_CF_UNI_CTRL_SET 0x00000004 /* Remote H3 Control stream opened */
+#define H3_CF_UNI_QPACK_DEC_SET 0x00000008 /* Remote QPACK decoder stream opened */
+#define H3_CF_UNI_QPACK_ENC_SET 0x00000010 /* Remote QPACK encoder stream opened */
+#define H3_CF_GOAWAY_SENT 0x00000020 /* GOAWAY sent on local control stream */
+
+/* Default settings */
+static uint64_t h3_settings_qpack_max_table_capacity = 0;
+static uint64_t h3_settings_qpack_blocked_streams = 4096;
+static uint64_t h3_settings_max_field_section_size = QUIC_VARINT_8_BYTE_MAX; /* Unlimited */
+
+struct h3c {
+ struct qcc *qcc;
+ struct qcs *ctrl_strm; /* Control stream */
+ enum h3_err err;
+ uint32_t flags;
+
+ /* Settings */
+ uint64_t qpack_max_table_capacity;
+ uint64_t qpack_blocked_streams;
+ uint64_t max_field_section_size;
+
+ uint64_t id_goaway; /* stream ID used for a GOAWAY frame */
+
+ struct buffer_wait buf_wait; /* wait list for buffer allocations */
+ /* Stats counters */
+ struct h3_counters *prx_counters;
+};
+
+DECLARE_STATIC_POOL(pool_head_h3c, "h3c", sizeof(struct h3c));
+
+#define H3_SF_UNI_INIT 0x00000001 /* stream type not parsed for unidirectional stream */
+#define H3_SF_UNI_NO_H3 0x00000002 /* unidirectional stream does not carry H3 frames */
+#define H3_SF_HAVE_CLEN 0x00000004 /* content-length header is present */
+
+struct h3s {
+ struct h3c *h3c;
+
+ enum h3s_t type;
+ enum h3s_st_req st_req; /* only used for request streams */
+ uint64_t demux_frame_len;
+ uint64_t demux_frame_type;
+
+ unsigned long long body_len; /* known request body length from content-length header if present */
+ unsigned long long data_len; /* total length of all parsed DATA */
+
+ int flags;
+ int err; /* used for stream reset */
+};
+
+DECLARE_STATIC_POOL(pool_head_h3s, "h3s", sizeof(struct h3s));
+
+/* Initialize an uni-stream <qcs> by reading its type from <b>.
+ *
+ * Returns the count of consumed bytes or a negative error code.
+ */
+static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs,
+ struct buffer *b)
+{
+ /* decode unidirectional stream type */
+ struct h3s *h3s = qcs->ctx;
+ uint64_t type;
+ size_t len = 0, ret;
+
+ TRACE_ENTER(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+
+ /* Function reserved to uni streams. Must be called only once per stream instance. */
+ BUG_ON(!quic_stream_is_uni(qcs->id) || h3s->flags & H3_SF_UNI_INIT);
+
+ ret = b_quic_dec_int(&type, b, &len);
+ if (!ret) {
+ /* not enough data to decode uni stream type, retry later */
+ TRACE_DATA("cannot decode uni stream type due to incomplete data", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ goto out;
+ }
+
+ switch (type) {
+ case H3_UNI_S_T_CTRL:
+ if (h3c->flags & H3_CF_UNI_CTRL_SET) {
+ TRACE_ERROR("duplicated control stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ goto err;
+ }
+ h3c->flags |= H3_CF_UNI_CTRL_SET;
+ h3s->type = H3S_T_CTRL;
+ break;
+
+ case H3_UNI_S_T_PUSH:
+ /* TODO not supported for the moment */
+ h3s->type = H3S_T_PUSH;
+ break;
+
+ case H3_UNI_S_T_QPACK_DEC:
+ if (h3c->flags & H3_CF_UNI_QPACK_DEC_SET) {
+ TRACE_ERROR("duplicated qpack decoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ goto err;
+ }
+ h3c->flags |= H3_CF_UNI_QPACK_DEC_SET;
+ h3s->type = H3S_T_QPACK_DEC;
+ h3s->flags |= H3_SF_UNI_NO_H3;
+ break;
+
+ case H3_UNI_S_T_QPACK_ENC:
+ if (h3c->flags & H3_CF_UNI_QPACK_ENC_SET) {
+ TRACE_ERROR("duplicated qpack encoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1);
+ goto err;
+ }
+ h3c->flags |= H3_CF_UNI_QPACK_ENC_SET;
+ h3s->type = H3S_T_QPACK_ENC;
+ h3s->flags |= H3_SF_UNI_NO_H3;
+ break;
+
+ default:
+ /* draft-ietf-quic-http34 9. Extensions to HTTP/3
+ *
+ * Implementations MUST [...] abort reading on unidirectional
+ * streams that have unknown or unsupported types.
+ */
+ TRACE_STATE("abort reading on unknown uni stream type", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ qcc_abort_stream_read(qcs);
+ goto err;
+ }
+
+ h3s->flags |= H3_SF_UNI_INIT;
+
+ out:
+ TRACE_LEAVE(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return len;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+/* Parse a buffer <b> for a <qcs> uni-stream which does not contains H3 frames.
+ * This may be used for QPACK encoder/decoder streams for example. <fin> is set
+ * if this is the last frame of the stream.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_parse_uni_stream_no_h3(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+
+ /* Function reserved to non-HTTP/3 unidirectional streams. */
+ BUG_ON(!quic_stream_is_uni(qcs->id) || !(h3s->flags & H3_SF_UNI_NO_H3));
+
+ switch (h3s->type) {
+ case H3S_T_QPACK_DEC:
+ if (qpack_decode_dec(b, fin, qcs))
+ return -1;
+ break;
+ case H3S_T_QPACK_ENC:
+ if (qpack_decode_enc(b, fin, qcs))
+ return -1;
+ break;
+ case H3S_T_UNKNOWN:
+ default:
+ /* Unknown stream should be flagged with QC_SF_READ_ABORTED. */
+ ABORT_NOW();
+ }
+
+ /* TODO adjust return code */
+ return 0;
+}
+
+/* Decode a H3 frame header from <rxbuf> buffer. The frame type is stored in
+ * <ftype> and length in <flen>.
+ *
+ * Returns the size of the H3 frame header. Note that the input buffer is not
+ * consumed.
+ */
+static inline size_t h3_decode_frm_header(uint64_t *ftype, uint64_t *flen,
+ struct buffer *b)
+{
+ size_t hlen;
+
+ hlen = 0;
+ if (!b_quic_dec_int(ftype, b, &hlen) ||
+ !b_quic_dec_int(flen, b, &hlen)) {
+ return 0;
+ }
+
+ return hlen;
+}
+
+/* Check if H3 frame of type <ftype> is valid when received on stream <qcs>.
+ *
+ * Returns 0 if frame valid, otherwise HTTP/3 error code.
+ */
+static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype)
+{
+ struct h3s *h3s = qcs->ctx;
+ int ret = 0;
+
+ /* Stream type must be known to ensure frame is valid for this stream. */
+ BUG_ON(h3s->type == H3S_T_UNKNOWN);
+
+ switch (ftype) {
+ case H3_FT_DATA:
+ /* cf H3_FT_HEADERS case. */
+ if (h3s->type == H3S_T_CTRL ||
+ (h3s->st_req != H3S_ST_REQ_HEADERS && h3s->st_req != H3S_ST_REQ_DATA)) {
+ ret = H3_FRAME_UNEXPECTED;
+ }
+
+ break;
+
+ case H3_FT_HEADERS:
+ /* RFC 9114 4.1. HTTP Message Framing
+ *
+ *
+ * An HTTP message (request or response) consists of:
+ * 1. the header section, including message control data, sent as a
+ * single HEADERS frame,
+ * 2. optionally, the content, if present, sent as a series of DATA
+ * frames, and
+ * 3. optionally, the trailer section, if present, sent as a single
+ * HEADERS frame.
+ *
+ * [...]
+ *
+ * Receipt of an invalid sequence of frames MUST be treated as a
+ * connection error of type H3_FRAME_UNEXPECTED. In particular, a DATA
+ * frame before any HEADERS frame, or a HEADERS or DATA frame after the
+ * trailing HEADERS frame, is considered invalid. Other frame types,
+ * especially unknown frame types, might be permitted subject to their
+ * own rules; see Section 9.
+ */
+ if (h3s->type == H3S_T_CTRL || h3s->st_req == H3S_ST_REQ_TRAILERS)
+ ret = H3_FRAME_UNEXPECTED;
+ break;
+
+ case H3_FT_CANCEL_PUSH:
+ case H3_FT_GOAWAY:
+ case H3_FT_MAX_PUSH_ID:
+ /* RFC 9114 7.2.3. CANCEL_PUSH
+ *
+ * A CANCEL_PUSH frame is sent on the control stream. Receiving a
+ * CANCEL_PUSH frame on a stream other than the control stream MUST be
+ * treated as a connection error of type H3_FRAME_UNEXPECTED.
+ */
+
+ /* RFC 9114 7.2.6. GOAWAY
+ *
+ * A client MUST treat a GOAWAY frame on a stream other than the
+ * control stream as a connection error of type H3_FRAME_UNEXPECTED.
+ */
+
+ /* RFC 9114 7.2.7. MAX_PUSH_ID
+ *
+ * The MAX_PUSH_ID frame is always sent on the control stream. Receipt
+ * of a MAX_PUSH_ID frame on any other stream MUST be treated as a
+ * connection error of type H3_FRAME_UNEXPECTED.
+ */
+
+ if (h3s->type != H3S_T_CTRL)
+ ret = H3_FRAME_UNEXPECTED;
+ else if (!(h3c->flags & H3_CF_SETTINGS_RECV))
+ ret = H3_MISSING_SETTINGS;
+ break;
+
+ case H3_FT_SETTINGS:
+ /* RFC 9114 7.2.4. SETTINGS
+ *
+ * A SETTINGS frame MUST be sent as the first frame of
+ * each control stream (see Section 6.2.1) by each peer, and it MUST NOT
+ * be sent subsequently. If an endpoint receives a second SETTINGS frame
+ * on the control stream, the endpoint MUST respond with a connection
+ * error of type H3_FRAME_UNEXPECTED.
+ *
+ * SETTINGS frames MUST NOT be sent on any stream other than the control
+ * stream. If an endpoint receives a SETTINGS frame on a different
+ * stream, the endpoint MUST respond with a connection error of type
+ * H3_FRAME_UNEXPECTED.
+ */
+ if (h3s->type != H3S_T_CTRL || h3c->flags & H3_CF_SETTINGS_RECV)
+ ret = H3_FRAME_UNEXPECTED;
+ break;
+
+ case H3_FT_PUSH_PROMISE:
+ /* RFC 9114 7.2.5. PUSH_PROMISE
+ *
+ * A client MUST NOT send a PUSH_PROMISE frame. A server MUST treat the
+ * receipt of a PUSH_PROMISE frame as a connection error of type
+ * H3_FRAME_UNEXPECTED.
+ */
+
+ /* TODO server-side only. */
+ ret = H3_FRAME_UNEXPECTED;
+ break;
+
+ default:
+ /* RFC 9114 9. Extensions to HTTP/3
+ *
+ * Implementations MUST ignore unknown or unsupported values in all
+ * extensible protocol elements. [...]
+ * However, where a known frame type is required to be in a
+ * specific location, such as the SETTINGS frame as the first frame of
+ * the control stream (see Section 6.2.1), an unknown frame type does
+ * not satisfy that requirement and SHOULD be treated as an error.
+ */
+ if (h3s->type == H3S_T_CTRL && !(h3c->flags & H3_CF_SETTINGS_RECV))
+ ret = H3_MISSING_SETTINGS;
+ break;
+ }
+
+ return ret;
+}
+
+/* Check from stream <qcs> that length of all DATA frames does not exceed with
+ * a previously parsed content-length header. <fin> must be set for the last
+ * data of the stream so that length of DATA frames must be equal to the
+ * content-length.
+ *
+ * This must only be called for a stream with H3_SF_HAVE_CLEN flag.
+ *
+ * Return 0 on valid else non-zero.
+ */
+static int h3_check_body_size(struct qcs *qcs, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ int ret = 0;
+ TRACE_ENTER(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+
+ /* Reserved for streams with a previously parsed content-length header. */
+ BUG_ON(!(h3s->flags & H3_SF_HAVE_CLEN));
+
+ /* RFC 9114 4.1.2. Malformed Requests and Responses
+ *
+ * A request or response that is defined as having content when it
+ * contains a Content-Length header field (Section 8.6 of [HTTP]) is
+ * malformed if the value of the Content-Length header field does not
+ * equal the sum of the DATA frame lengths received.
+ *
+ * TODO for backend support
+ * A response that is
+ * defined as never having content, even when a Content-Length is
+ * present, can have a non-zero Content-Length header field even though
+ * no content is included in DATA frames.
+ */
+ if (h3s->data_len > h3s->body_len ||
+ (fin && h3s->data_len < h3s->body_len)) {
+ TRACE_ERROR("Content-length does not match DATA frame size", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ ret = -1;
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ return ret;
+}
+
+/* Set <auth> authority header to the new value <value> for <qcs> stream. This
+ * ensures that value is conformant to the specification. If <auth> is a
+ * non-null length string, it ensures that <value> is identical to it.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int h3_set_authority(struct qcs *qcs, struct ist *auth, const struct ist value)
+{
+ /* RFC 9114 4.3.1. Request Pseudo-Header Fields
+ *
+ * If the :scheme pseudo-header field identifies a scheme that has a
+ * mandatory authority component (including "http" and "https"), the
+ * request MUST contain either an :authority pseudo-header field or a
+ * Host header field. If these fields are present, they MUST NOT be
+ * empty. If both fields are present, they MUST contain the same value.
+ */
+
+ /* Check that if a previous value is set the new value is identical. */
+ if (isttest(*auth) && !isteq(*auth, value)) {
+ TRACE_ERROR("difference between :authority and host headers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ /* Check that value is not empty. */
+ if (!istlen(value)) {
+ TRACE_ERROR("empty :authority/host header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ *auth = value;
+ return 0;
+}
+
+/* Parse from buffer <buf> a H3 HEADERS frame of length <len>. Data are copied
+ * in a local HTX buffer and transfer to the stream connector layer. <fin> must be
+ * set if this is the last data to transfer from this stream.
+ *
+ * Returns the number of consumed bytes or a negative error code. On error
+ * either the connection should be closed or the stream reset using codes
+ * provided in h3c.err / h3s.err.
+ */
+static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf,
+ uint64_t len, char fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer htx_buf = BUF_NULL;
+ struct buffer *tmp = get_trash_chunk();
+ struct htx *htx = NULL;
+ struct htx_sl *sl;
+ struct http_hdr list[global.tune.max_http_hdr];
+ unsigned int flags = HTX_SL_F_NONE;
+ struct ist meth = IST_NULL, path = IST_NULL;
+ struct ist scheme = IST_NULL, authority = IST_NULL;
+ int hdr_idx, ret;
+ int cookie = -1, last_cookie = -1, i;
+ const char *ctl;
+ int relaxed = !!(h3c->qcc->proxy->options2 & PR_O2_REQBUG_OK);
+
+ /* RFC 9114 4.1.2. Malformed Requests and Responses
+ *
+ * A malformed request or response is one that is an otherwise valid
+ * sequence of frames but is invalid due to:
+ * - the presence of prohibited fields or pseudo-header fields,
+ * - the absence of mandatory pseudo-header fields,
+ * - invalid values for pseudo-header fields,
+ * - pseudo-header fields after fields,
+ * - an invalid sequence of HTTP messages,
+ * - the inclusion of uppercase field names, or
+ * - the inclusion of invalid characters in field names or values.
+ *
+ * [...]
+ *
+ * Intermediaries that process HTTP requests or responses (i.e., any
+ * intermediary not acting as a tunnel) MUST NOT forward a malformed
+ * request or response. Malformed requests or responses that are
+ * detected MUST be treated as a stream error of type H3_MESSAGE_ERROR.
+ */
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+
+ /* TODO support trailer parsing in this function */
+
+ /* TODO support buffer wrapping */
+ BUG_ON(b_head(buf) + len >= b_wrap(buf));
+ ret = qpack_decode_fs((const unsigned char *)b_head(buf), len, tmp,
+ list, sizeof(list) / sizeof(list[0]));
+ if (ret < 0) {
+ TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = -ret;
+ len = -1;
+ goto out;
+ }
+
+ if (!qcs_get_buf(qcs, &htx_buf)) {
+ TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ BUG_ON(!b_size(&htx_buf)); /* TODO */
+ htx = htx_from_buf(&htx_buf);
+
+ /* first treat pseudo-header to build the start line */
+ hdr_idx = 0;
+ while (1) {
+ /* RFC 9114 4.3. HTTP Control Data
+ *
+ * Endpoints MUST treat a request or response that contains
+ * undefined or invalid pseudo-header fields as malformed.
+ *
+ * All pseudo-header fields MUST appear in the header section before
+ * regular header fields. Any request or response that contains a
+ * pseudo-header field that appears in a header section after a regular
+ * header field MUST be treated as malformed.
+ */
+
+ /* Stop at first non pseudo-header. */
+ if (!istmatch(list[hdr_idx].n, ist(":")))
+ break;
+
+ /* RFC 9114 10.3 Intermediary-Encapsulation Attacks
+ *
+ * While most values that can be encoded will not alter field
+ * parsing, carriage return (ASCII 0x0d), line feed (ASCII 0x0a),
+ * and the null character (ASCII 0x00) might be exploited by an
+ * attacker if they are translated verbatim. Any request or
+ * response that contains a character not permitted in a field
+ * value MUST be treated as malformed
+ */
+
+ /* look for forbidden control characters in the pseudo-header value */
+ ctl = ist_find_ctl(list[hdr_idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) {
+ TRACE_ERROR("control character present in pseudo-header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ /* pseudo-header. Malformed name with uppercase character or
+ * invalid token will be rejected in the else clause.
+ */
+ if (isteq(list[hdr_idx].n, ist(":method"))) {
+ if (isttest(meth)) {
+ TRACE_ERROR("duplicated method pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ meth = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":path"))) {
+ if (isttest(path)) {
+ TRACE_ERROR("duplicated path pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (!relaxed) {
+ /* we need to reject any control chars or '#' from the path,
+ * unless option accept-invalid-http-request is set.
+ */
+ ctl = ist_find_range(list[hdr_idx].v, 0, '#');
+ if (unlikely(ctl) && http_path_has_forbidden_char(list[hdr_idx].v, ctl)) {
+ TRACE_ERROR("forbidden character in ':path' pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ path = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":scheme"))) {
+ if (isttest(scheme)) {
+ /* duplicated pseudo-header */
+ TRACE_ERROR("duplicated scheme pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ scheme = list[hdr_idx].v;
+ }
+ else if (isteq(list[hdr_idx].n, ist(":authority"))) {
+ if (isttest(authority)) {
+ TRACE_ERROR("duplicated authority pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) {
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+ else {
+ TRACE_ERROR("unknown pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ ++hdr_idx;
+ }
+
+ if (!istmatch(meth, ist("CONNECT"))) {
+ /* RFC 9114 4.3.1. Request Pseudo-Header Fields
+ *
+ * All HTTP/3 requests MUST include exactly one value for the :method,
+ * :scheme, and :path pseudo-header fields, unless the request is a
+ * CONNECT request; see Section 4.4.
+ */
+ if (!isttest(meth) || !isttest(scheme) || !isttest(path)) {
+ TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ flags |= HTX_SL_F_VER_11;
+ flags |= HTX_SL_F_XFER_LEN;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, path, ist("HTTP/3.0"));
+ if (!sl) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (fin)
+ sl->flags |= HTX_SL_F_BODYLESS;
+
+ sl->info.req.meth = find_http_meth(meth.ptr, meth.len);
+
+ if (isttest(authority)) {
+ if (!htx_add_header(htx, ist("host"), authority)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ /* now treat standard headers */
+ while (1) {
+ if (isteq(list[hdr_idx].n, ist("")))
+ break;
+
+ if (istmatch(list[hdr_idx].n, ist(":"))) {
+ TRACE_ERROR("pseudo-header field after fields", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ for (i = 0; i < list[hdr_idx].n.len; ++i) {
+ const char c = list[hdr_idx].n.ptr[i];
+ if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) {
+ TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+
+ /* RFC 9114 10.3 Intermediary-Encapsulation Attacks
+ *
+ * While most values that can be encoded will not alter field
+ * parsing, carriage return (ASCII 0x0d), line feed (ASCII 0x0a),
+ * and the null character (ASCII 0x00) might be exploited by an
+ * attacker if they are translated verbatim. Any request or
+ * response that contains a character not permitted in a field
+ * value MUST be treated as malformed
+ */
+
+ /* look for forbidden control characters in the header value */
+ ctl = ist_find_ctl(list[hdr_idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) {
+ TRACE_ERROR("control character present in header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (isteq(list[hdr_idx].n, ist("host"))) {
+ if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) {
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+ else if (isteq(list[hdr_idx].n, ist("cookie"))) {
+ http_cookie_register(list, hdr_idx, &cookie, &last_cookie);
+ ++hdr_idx;
+ continue;
+ }
+ else if (isteq(list[hdr_idx].n, ist("content-length"))) {
+ ret = http_parse_cont_len_header(&list[hdr_idx].v,
+ &h3s->body_len,
+ h3s->flags & H3_SF_HAVE_CLEN);
+ if (ret < 0) {
+ TRACE_ERROR("invalid content-length", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ else if (!ret) {
+ /* Skip duplicated value. */
+ ++hdr_idx;
+ continue;
+ }
+
+ h3s->flags |= H3_SF_HAVE_CLEN;
+ sl->flags |= HTX_SL_F_CLEN;
+ /* This will fail if current frame is the last one and
+ * content-length is not null.
+ */
+ if (h3_check_body_size(qcs, fin)) {
+ len = -1;
+ goto out;
+ }
+ }
+ else if (isteq(list[hdr_idx].n, ist("connection")) ||
+ isteq(list[hdr_idx].n, ist("proxy-connection")) ||
+ isteq(list[hdr_idx].n, ist("keep-alive")) ||
+ isteq(list[hdr_idx].n, ist("transfer-encoding"))) {
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * HTTP/3 does not use the Connection header field to indicate
+ * connection-specific fields; in this protocol, connection-
+ * specific metadata is conveyed by other means. An endpoint
+ * MUST NOT generate an HTTP/3 field section containing
+ * connection-specific fields; any message containing
+ * connection-specific fields MUST be treated as malformed.
+ */
+ TRACE_ERROR("invalid connection header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ else if (isteq(list[hdr_idx].n, ist("te")) &&
+ !isteq(list[hdr_idx].v, ist("trailers"))) {
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * The only exception to this is the TE header field, which MAY
+ * be present in an HTTP/3 request header; when it is, it MUST
+ * NOT contain any value other than "trailers".
+ */
+ TRACE_ERROR("invalid te header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (!htx_add_header(htx, list[hdr_idx].n, list[hdr_idx].v)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ ++hdr_idx;
+ }
+
+ /* RFC 9114 4.3.1. Request Pseudo-Header Fields
+ *
+ * If the :scheme pseudo-header field identifies a scheme that has a
+ * mandatory authority component (including "http" and "https"), the
+ * request MUST contain either an :authority pseudo-header field or a
+ * Host header field.
+ */
+ if (!isttest(authority)) {
+ TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (cookie >= 0) {
+ if (http_cookie_merge(htx, list, cookie)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (fin)
+ htx->flags |= HTX_FL_EOM;
+
+ htx_to_buf(htx, &htx_buf);
+ htx = NULL;
+
+ if (!qcs_attach_sc(qcs, &htx_buf, fin)) {
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * The GOAWAY frame contains an identifier that
+ * indicates to the receiver the range of requests or pushes that were
+ * or might be processed in this connection. The server sends a client-
+ * initiated bidirectional stream ID; the client sends a push ID.
+ * Requests or pushes with the indicated identifier or greater are
+ * rejected (Section 4.1.1) by the sender of the GOAWAY. This
+ * identifier MAY be zero if no requests or pushes were processed.
+ */
+ if (qcs->id >= h3c->id_goaway)
+ h3c->id_goaway = qcs->id + 4;
+
+ out:
+ /* HTX may be non NULL if error before previous htx_to_buf(). */
+ if (htx)
+ htx_to_buf(htx, &htx_buf);
+
+ /* buffer is transferred to the stream connector and set to NULL
+ * except on stream creation error.
+ */
+ if (b_size(&htx_buf)) {
+ b_free(&htx_buf);
+ offer_buffers(NULL, 1);
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ return len;
+}
+
+/* Parse from buffer <buf> a H3 HEADERS frame of length <len> used as trailers.
+ * Data are copied in a local HTX buffer and transfer to the stream connector
+ * layer. <fin> must be set if this is the last data to transfer from this
+ * stream.
+ *
+ * Returns the number of consumed bytes or a negative error code. On error
+ * either the connection should be closed or the stream reset using codes
+ * provided in h3c.err / h3s.err.
+ */
+static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf,
+ uint64_t len, char fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer *tmp = get_trash_chunk();
+ struct buffer *appbuf = NULL;
+ struct htx *htx = NULL;
+ struct htx_sl *sl;
+ struct http_hdr list[global.tune.max_http_hdr];
+ int hdr_idx, ret;
+ const char *ctl;
+ int i;
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+
+ /* TODO support buffer wrapping */
+ BUG_ON(b_head(buf) + len >= b_wrap(buf));
+ ret = qpack_decode_fs((const unsigned char *)b_head(buf), len, tmp,
+ list, sizeof(list) / sizeof(list[0]));
+ if (ret < 0) {
+ TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = -ret;
+ len = -1;
+ goto out;
+ }
+
+ if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) {
+ TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+ BUG_ON(!b_size(appbuf)); /* TODO */
+ htx = htx_from_buf(appbuf);
+
+ if (!h3s->data_len) {
+ /* Notify that no body is present. This can only happens if
+ * there is H3 HEADERS as trailers without or empty H3 DATA
+ * frame. So this is probably not realistice ?
+ *
+ * TODO if sl is NULL because already consumed there is no way
+ * to notify about missing body.
+ */
+ sl = http_get_stline(htx);
+ if (sl)
+ sl->flags |= HTX_SL_F_BODYLESS;
+ else
+ TRACE_ERROR("cannot notify missing body after trailers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ }
+
+ hdr_idx = 0;
+ while (1) {
+ if (isteq(list[hdr_idx].n, ist("")))
+ break;
+
+ /* RFC 9114 4.3. HTTP Control Data
+ *
+ * Pseudo-header
+ * fields MUST NOT appear in trailer sections.
+ */
+ if (istmatch(list[hdr_idx].n, ist(":"))) {
+ TRACE_ERROR("pseudo-header field in trailers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ for (i = 0; i < list[hdr_idx].n.len; ++i) {
+ const char c = list[hdr_idx].n.ptr[i];
+ if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) {
+ TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+ }
+
+ /* forbidden HTTP/3 headers, cf h3_headers_to_htx() */
+ if (isteq(list[hdr_idx].n, ist("host")) ||
+ isteq(list[hdr_idx].n, ist("content-length")) ||
+ isteq(list[hdr_idx].n, ist("connection")) ||
+ isteq(list[hdr_idx].n, ist("proxy-connection")) ||
+ isteq(list[hdr_idx].n, ist("keep-alive")) ||
+ isteq(list[hdr_idx].n, ist("te")) ||
+ isteq(list[hdr_idx].n, ist("transfer-encoding"))) {
+ TRACE_ERROR("forbidden HTTP/3 headers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ /* RFC 9114 10.3 Intermediary-Encapsulation Attacks
+ *
+ * While most values that can be encoded will not alter field
+ * parsing, carriage return (ASCII 0x0d), line feed (ASCII 0x0a),
+ * and the null character (ASCII 0x00) might be exploited by an
+ * attacker if they are translated verbatim. Any request or
+ * response that contains a character not permitted in a field
+ * value MUST be treated as malformed
+ */
+
+ /* look for forbidden control characters in the trailer value */
+ ctl = ist_find_ctl(list[hdr_idx].v);
+ if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) {
+ TRACE_ERROR("control character present in trailer value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3s->err = H3_MESSAGE_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (!htx_add_trailer(htx, list[hdr_idx].n, list[hdr_idx].v)) {
+ TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ ++hdr_idx;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOT)) {
+ TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ if (fin)
+ htx->flags |= HTX_FL_EOM;
+
+ out:
+ /* HTX may be non NULL if error before previous htx_to_buf(). */
+ if (appbuf)
+ htx_to_buf(htx, appbuf);
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs);
+ return len;
+}
+
+/* Copy from buffer <buf> a H3 DATA frame of length <len> in QUIC stream <qcs>
+ * HTX buffer. <fin> must be set if this is the last data to transfer from this
+ * stream.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf,
+ uint64_t len, char fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer *appbuf;
+ struct htx *htx = NULL;
+ size_t htx_sent = 0;
+ int htx_space;
+ char *head;
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+
+ if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) {
+ TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ len = -1;
+ goto out;
+ }
+
+ htx = htx_from_buf(appbuf);
+
+ if (len > b_data(buf)) {
+ len = b_data(buf);
+ fin = 0;
+ }
+
+ head = b_head(buf);
+ retry:
+ htx_space = htx_free_data_space(htx);
+ if (!htx_space) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ if (len > htx_space) {
+ len = htx_space;
+ fin = 0;
+ }
+
+ if (head + len > b_wrap(buf)) {
+ size_t contig = b_wrap(buf) - head;
+ htx_sent = htx_add_data(htx, ist2(b_head(buf), contig));
+ if (htx_sent < contig) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ len -= contig;
+ head = b_orig(buf);
+ goto retry;
+ }
+
+ htx_sent += htx_add_data(htx, ist2(head, len));
+ if (htx_sent < len) {
+ qcs->flags |= QC_SF_DEM_FULL;
+ goto out;
+ }
+
+ if (fin && len == htx_sent)
+ htx->flags |= HTX_FL_EOM;
+
+ out:
+ if (appbuf)
+ htx_to_buf(htx, appbuf);
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs);
+ return htx_sent;
+}
+
+/* Parse a SETTINGS frame of length <len> of payload <buf>.
+ *
+ * Returns the number of consumed bytes or a negative error code.
+ */
+static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf,
+ size_t len)
+{
+ struct buffer b;
+ uint64_t id, value;
+ size_t ret = 0;
+ long mask = 0; /* used to detect duplicated settings identifier */
+
+ TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_SETTINGS, h3c->qcc->conn);
+
+ /* Work on a copy of <buf>. */
+ b = b_make(b_orig(buf), b_size(buf), b_head_ofs(buf), len);
+
+ while (b_data(&b)) {
+ if (!b_quic_dec_int(&id, &b, &ret) || !b_quic_dec_int(&value, &b, &ret)) {
+ h3c->err = H3_FRAME_ERROR;
+ return -1;
+ }
+
+ h3_debug_printf(stderr, "%s id: %llu value: %llu\n",
+ __func__, (unsigned long long)id, (unsigned long long)value);
+
+ /* draft-ietf-quic-http34 7.2.4. SETTINGS
+ *
+ * The same setting identifier MUST NOT occur more than once in the
+ * SETTINGS frame. A receiver MAY treat the presence of duplicate
+ * setting identifiers as a connection error of type H3_SETTINGS_ERROR.
+ */
+
+ /* Ignore duplicate check for ID too big used for GREASE. */
+ if (id < sizeof(mask)) {
+ if (ha_bit_test(id, &mask)) {
+ h3c->err = H3_SETTINGS_ERROR;
+ return -1;
+ }
+ ha_bit_set(id, &mask);
+ }
+
+ switch (id) {
+ case H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY:
+ h3c->qpack_max_table_capacity = value;
+ break;
+ case H3_SETTINGS_MAX_FIELD_SECTION_SIZE:
+ h3c->max_field_section_size = value;
+ break;
+ case H3_SETTINGS_QPACK_BLOCKED_STREAMS:
+ h3c->qpack_blocked_streams = value;
+ break;
+
+ case H3_SETTINGS_RESERVED_0:
+ case H3_SETTINGS_RESERVED_2:
+ case H3_SETTINGS_RESERVED_3:
+ case H3_SETTINGS_RESERVED_4:
+ case H3_SETTINGS_RESERVED_5:
+ /* draft-ietf-quic-http34 7.2.4.1. Defined SETTINGS Parameters
+ *
+ * Setting identifiers which were defined in [HTTP2] where there is no
+ * corresponding HTTP/3 setting have also been reserved
+ * (Section 11.2.2). These reserved settings MUST NOT be sent, and
+ * their receipt MUST be treated as a connection error of type
+ * H3_SETTINGS_ERROR.
+ */
+ h3c->err = H3_SETTINGS_ERROR;
+ return -1;
+ default:
+ /* MUST be ignored */
+ break;
+ }
+ }
+
+ TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_SETTINGS, h3c->qcc->conn);
+ return ret;
+}
+
+/* Decode <qcs> remotely initiated bidi-stream. <fin> must be set to indicate
+ * that we received the last data of the stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ ssize_t total = 0, ret;
+
+ TRACE_ENTER(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+
+ if (quic_stream_is_uni(qcs->id) && !(h3s->flags & H3_SF_UNI_INIT)) {
+ ret = h3_init_uni_stream(h3c, qcs, b);
+ if (ret < 0) {
+ TRACE_ERROR("cannot initialize uni stream", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ goto err;
+ }
+ else if (!ret) {
+ /* not enough data to initialize uni stream, retry later */
+ goto done;
+ }
+
+ total += ret;
+ }
+
+ if (quic_stream_is_uni(qcs->id) && (h3s->flags & H3_SF_UNI_NO_H3)) {
+ /* For non-h3 STREAM, parse it and return immediately. */
+ if ((ret = h3_parse_uni_stream_no_h3(qcs, b, fin)) < 0) {
+ TRACE_ERROR("error when parsing non-HTTP3 uni stream", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ goto err;
+ }
+
+ total += ret;
+ goto done;
+ }
+
+ /* RFC 9114 6.2.1. Control Streams
+ *
+ * The sender MUST NOT close the control stream, and the receiver MUST NOT
+ * request that the sender close the control stream. If either control
+ * stream is closed at any point, this MUST be treated as a connection
+ * error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (h3s->type == H3S_T_CTRL && fin) {
+ TRACE_ERROR("control stream closed by remote peer", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ goto err;
+ }
+
+ if (!b_data(b) && fin && quic_stream_is_bidi(qcs->id)) {
+ struct buffer *appbuf;
+ struct htx *htx;
+
+ TRACE_PROTO("received FIN without data", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) {
+ TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+
+ htx = htx_from_buf(appbuf);
+ if (!htx_set_eom(htx)) {
+ TRACE_ERROR("cannot set EOM", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ }
+ htx_to_buf(htx, appbuf);
+ goto done;
+ }
+
+ while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL) && !h3c->err && !h3s->err) {
+ uint64_t ftype, flen;
+ char last_stream_frame = 0;
+
+ if (!h3s->demux_frame_len) {
+ /* Switch to a new frame. */
+ size_t hlen = h3_decode_frm_header(&ftype, &flen, b);
+ if (!hlen) {
+ TRACE_PROTO("pause parsing on incomplete frame header", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ break;
+ }
+
+ h3s->demux_frame_type = ftype;
+ h3s->demux_frame_len = flen;
+ total += hlen;
+ TRACE_PROTO("parsing a new frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+
+ /* Check that content-length is not exceeded on a new DATA frame. */
+ if (ftype == H3_FT_DATA) {
+ h3s->data_len += flen;
+ if (h3s->flags & H3_SF_HAVE_CLEN && h3_check_body_size(qcs, (fin && flen == b_data(b))))
+ break;
+ }
+
+ if ((ret = h3_check_frame_valid(h3c, qcs, ftype))) {
+ TRACE_ERROR("received an invalid frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, ret, 1);
+ goto err;
+ }
+
+ if (!b_data(b))
+ break;
+ }
+
+ flen = h3s->demux_frame_len;
+ ftype = h3s->demux_frame_type;
+
+ /* Do not demux incomplete frames except H3 DATA which can be
+ * fragmented in multiple HTX blocks.
+ */
+ if (flen > b_data(b) && ftype != H3_FT_DATA) {
+ /* Reject frames bigger than bufsize.
+ *
+ * TODO HEADERS should in complement be limited with H3
+ * SETTINGS_MAX_FIELD_SECTION_SIZE parameter to prevent
+ * excessive decompressed size.
+ */
+ if (flen > QC_S_RX_BUF_SZ) {
+ TRACE_ERROR("received a too big frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_EXCESSIVE_LOAD, 1);
+ goto err;
+ }
+ break;
+ }
+
+ last_stream_frame = (fin && flen == b_data(b));
+
+ /* Check content-length equality with DATA frames length on the last frame. */
+ if (last_stream_frame && h3s->flags & H3_SF_HAVE_CLEN && h3_check_body_size(qcs, last_stream_frame))
+ break;
+
+ h3_inc_frame_type_cnt(h3c->prx_counters, ftype);
+ switch (ftype) {
+ case H3_FT_DATA:
+ ret = h3_data_to_htx(qcs, b, flen, last_stream_frame);
+ h3s->st_req = H3S_ST_REQ_DATA;
+ break;
+ case H3_FT_HEADERS:
+ if (h3s->st_req == H3S_ST_REQ_BEFORE) {
+ ret = h3_headers_to_htx(qcs, b, flen, last_stream_frame);
+ h3s->st_req = H3S_ST_REQ_HEADERS;
+ }
+ else {
+ ret = h3_trailers_to_htx(qcs, b, flen, last_stream_frame);
+ h3s->st_req = H3S_ST_REQ_TRAILERS;
+ }
+ break;
+ case H3_FT_CANCEL_PUSH:
+ case H3_FT_PUSH_PROMISE:
+ case H3_FT_MAX_PUSH_ID:
+ case H3_FT_GOAWAY:
+ /* Not supported */
+ ret = flen;
+ break;
+ case H3_FT_SETTINGS:
+ ret = h3_parse_settings_frm(qcs->qcc->ctx, b, flen);
+ if (ret < 0) {
+ TRACE_ERROR("error on SETTINGS parsing", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, h3c->err, 1);
+ goto err;
+ }
+ h3c->flags |= H3_CF_SETTINGS_RECV;
+ break;
+ default:
+ /* draft-ietf-quic-http34 9. Extensions to HTTP/3
+ *
+ * Implementations MUST discard frames [...] that have unknown
+ * or unsupported types.
+ */
+ ret = flen;
+ break;
+ }
+
+ if (ret > 0) {
+ BUG_ON(h3s->demux_frame_len < ret);
+ h3s->demux_frame_len -= ret;
+ b_del(b, ret);
+ total += ret;
+ }
+ }
+
+ /* Reset demux frame type for traces. */
+ if (!h3s->demux_frame_len)
+ h3s->demux_frame_type = H3_FT_UNINIT;
+
+ /* Interrupt decoding on stream/connection error detected. */
+ if (h3s->err) {
+ qcc_abort_stream_read(qcs);
+ qcc_reset_stream(qcs, h3s->err);
+ return b_data(b);
+ }
+ else if (h3c->err) {
+ qcc_set_error(qcs->qcc, h3c->err, 1);
+ return b_data(b);
+ }
+
+ /* TODO may be useful to wakeup the MUX if blocked due to full buffer.
+ * However, currently, io-cb of MUX does not handle Rx.
+ */
+
+ done:
+ TRACE_LEAVE(H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ return total;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_RX_FRAME, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+/* Returns buffer for data sending.
+ * May be NULL if the allocation failed.
+ */
+static struct buffer *mux_get_buf(struct qcs *qcs)
+{
+ if (!b_size(&qcs->tx.buf))
+ b_alloc(&qcs->tx.buf);
+
+ return &qcs->tx.buf;
+}
+
+/* Function used to emit stream data from <qcs> control uni-stream.
+ *
+ * On success return the number of sent bytes. A negative code is used on
+ * error.
+ */
+static int h3_control_send(struct qcs *qcs, void *ctx)
+{
+ int ret;
+ struct h3c *h3c = ctx;
+ unsigned char data[(2 + 3) * 2 * QUIC_VARINT_MAX_SIZE]; /* enough for 3 settings */
+ struct buffer pos, *res;
+ size_t frm_len;
+
+ TRACE_ENTER(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+
+ BUG_ON_HOT(h3c->flags & H3_CF_SETTINGS_SENT);
+
+ ret = 0;
+ pos = b_make((char *)data, sizeof(data), 0, 0);
+
+ frm_len = quic_int_getsize(H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY) +
+ quic_int_getsize(h3_settings_qpack_max_table_capacity) +
+ quic_int_getsize(H3_SETTINGS_QPACK_BLOCKED_STREAMS) +
+ quic_int_getsize(h3_settings_qpack_blocked_streams);
+ if (h3_settings_max_field_section_size) {
+ frm_len += quic_int_getsize(H3_SETTINGS_MAX_FIELD_SECTION_SIZE) +
+ quic_int_getsize(h3_settings_max_field_section_size);
+ }
+
+ b_quic_enc_int(&pos, H3_UNI_S_T_CTRL, 0);
+ /* Build a SETTINGS frame */
+ b_quic_enc_int(&pos, H3_FT_SETTINGS, 0);
+ b_quic_enc_int(&pos, frm_len, 0);
+ b_quic_enc_int(&pos, H3_SETTINGS_QPACK_MAX_TABLE_CAPACITY, 0);
+ b_quic_enc_int(&pos, h3_settings_qpack_max_table_capacity, 0);
+ b_quic_enc_int(&pos, H3_SETTINGS_QPACK_BLOCKED_STREAMS, 0);
+ b_quic_enc_int(&pos, h3_settings_qpack_blocked_streams, 0);
+ if (h3_settings_max_field_section_size) {
+ b_quic_enc_int(&pos, H3_SETTINGS_MAX_FIELD_SECTION_SIZE, 0);
+ b_quic_enc_int(&pos, h3_settings_max_field_section_size, 0);
+ }
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res)) {
+ TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+ goto err;
+ }
+
+ if (b_room(res) < b_data(&pos)) {
+ // TODO the mux should be put in blocked state, with
+ // the stream in state waiting for settings to be sent
+ ABORT_NOW();
+ }
+
+ ret = b_force_xfer(res, &pos, b_data(&pos));
+ if (ret > 0) {
+ /* Register qcs for sending before other streams. */
+ qcc_send_stream(qcs, 1);
+ h3c->flags |= H3_CF_SETTINGS_SENT;
+ }
+
+ TRACE_LEAVE(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+ return ret;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer outbuf;
+ struct buffer headers_buf = BUF_NULL;
+ struct buffer *res;
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ int frame_length_size; /* size in bytes of frame length varint field */
+ int ret = 0;
+ int hdr;
+ int status = 0;
+
+ TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_RES_SL) {
+ /* start-line -> HEADERS h3 frame */
+ BUG_ON(sl);
+ sl = htx_get_blk_ptr(htx, blk);
+ /* TODO should be on h3 layer */
+ status = sl->info.res.status;
+ }
+ else if (type == HTX_BLK_HDR) {
+ if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else {
+ ABORT_NOW();
+ goto err;
+ }
+ }
+
+ BUG_ON(!sl);
+
+ list[hdr].n = ist("");
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res)) {
+ TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+
+ /* At least 5 bytes to store frame type + length as a varint max size */
+ if (b_room(res) < 5)
+ ABORT_NOW();
+
+ b_reset(&outbuf);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+ /* Start the headers after frame type + length */
+ headers_buf = b_make(b_head(res) + 5, b_size(res) - 5, 0, 0);
+
+ if (qpack_encode_field_section_line(&headers_buf))
+ ABORT_NOW();
+ if (qpack_encode_int_status(&headers_buf, status)) {
+ TRACE_ERROR("invalid status code", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+
+ for (hdr = 0; hdr < sizeof(list) / sizeof(list[0]); ++hdr) {
+ if (isteq(list[hdr].n, ist("")))
+ break;
+
+ /* RFC 9114 4.2. HTTP Fields
+ *
+ * An intermediary transforming an HTTP/1.x message to HTTP/3
+ * MUST remove connection-specific header fields as discussed in
+ * Section 7.6.1 of [HTTP], or their messages will be treated by
+ * other HTTP/3 endpoints as malformed.
+ */
+ if (isteq(list[hdr].n, ist("connection")) ||
+ isteq(list[hdr].n, ist("proxy-connection")) ||
+ isteq(list[hdr].n, ist("keep-alive")) ||
+ isteq(list[hdr].n, ist("transfer-encoding"))) {
+ continue;
+ }
+ else if (isteq(list[hdr].n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ const struct ist v = istist(list[hdr].v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ continue;
+ list[hdr].v = ist("trailers");
+ }
+
+ if (qpack_encode_header(&headers_buf, list[hdr].n, list[hdr].v))
+ ABORT_NOW();
+ }
+
+ /* Now that all headers are encoded, we are certain that res buffer is
+ * big enough
+ */
+ frame_length_size = quic_int_getsize(b_data(&headers_buf));
+ res->head += 4 - frame_length_size;
+ b_putchr(res, 0x01); /* h3 HEADERS frame type */
+ if (!b_quic_enc_int(res, b_data(&headers_buf), 0))
+ ABORT_NOW();
+ b_add(res, b_data(&headers_buf));
+
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return ret;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+/* Convert a series of HTX trailer blocks from <htx> buffer into <qcs> buffer
+ * as a H3 HEADERS frame. H3 forbidden trailers are skipped. HTX trailer blocks
+ * are removed from <htx> until EOT is found and itself removed.
+ *
+ * If only a EOT HTX block is present without trailer, no H3 frame is produced.
+ * Caller is responsible to emit an empty QUIC STREAM frame to signal the end
+ * of the stream.
+ *
+ * Returns the size of HTX blocks removed.
+ */
+static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer headers_buf = BUF_NULL;
+ struct buffer *res;
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ char *tail;
+ int ret = 0;
+ int hdr;
+
+ TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOT)
+ break;
+
+ if (type == HTX_BLK_TLR) {
+ if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else {
+ TRACE_ERROR("unexpected HTX block", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+ }
+
+ if (!hdr) {
+ /* No headers encoded here so no need to generate a H3 HEADERS
+ * frame. Mux will send an empty QUIC STREAM frame with FIN.
+ */
+ TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ goto end;
+ }
+ list[hdr].n = ist("");
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res)) {
+ TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+
+ /* At least 9 bytes to store frame type + length as a varint max size */
+ if (b_room(res) < 9) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto err;
+ }
+
+ /* Force buffer realignment as size required to encode headers is unknown. */
+ if (b_space_wraps(res))
+ b_slow_realign(res, trash.area, b_data(res));
+ /* Start the headers after frame type + length */
+ headers_buf = b_make(b_peek(res, b_data(res) + 9), b_contig_space(res) - 9, 0, 0);
+
+ if (qpack_encode_field_section_line(&headers_buf)) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto err;
+ }
+
+ tail = b_tail(&headers_buf);
+ for (hdr = 0; hdr < sizeof(list) / sizeof(list[0]); ++hdr) {
+ if (isteq(list[hdr].n, ist("")))
+ break;
+
+ /* forbidden HTTP/3 headers, cf h3_resp_headers_send() */
+ if (isteq(list[hdr].n, ist("host")) ||
+ isteq(list[hdr].n, ist("content-length")) ||
+ isteq(list[hdr].n, ist("connection")) ||
+ isteq(list[hdr].n, ist("proxy-connection")) ||
+ isteq(list[hdr].n, ist("keep-alive")) ||
+ isteq(list[hdr].n, ist("te")) ||
+ isteq(list[hdr].n, ist("transfer-encoding"))) {
+ continue;
+ }
+
+ if (qpack_encode_header(&headers_buf, list[hdr].n, list[hdr].v)) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto err;
+ }
+ }
+
+ /* Check that at least one header was encoded in buffer. */
+ if (b_tail(&headers_buf) == tail) {
+ /* No headers encoded here so no need to generate a H3 HEADERS
+ * frame. Mux will send an empty QUIC STREAM frame with FIN.
+ */
+ TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ goto end;
+ }
+
+ /* Now that all headers are encoded, we are certain that res buffer is
+ * big enough.
+ */
+ b_putchr(res, 0x01); /* h3 HEADERS frame type */
+ if (!b_quic_enc_int(res, b_data(&headers_buf), 8))
+ ABORT_NOW();
+ b_add(res, b_data(&headers_buf));
+
+ end:
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ if (type == HTX_BLK_EOT)
+ break;
+ }
+
+ TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return ret;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+/* Returns the total of bytes sent. This corresponds to the
+ * total bytes of HTX block removed. A negative error code is returned in case
+ * of a fatal error which should caused a connection closure.
+ */
+static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count)
+{
+ struct htx *htx;
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ struct buffer outbuf;
+ struct buffer *res;
+ size_t total = 0;
+ int bsize, fsize, hsize;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+
+ TRACE_ENTER(H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+
+ htx = htx_from_buf(buf);
+
+ new_frame:
+ if (!count || htx_is_empty(htx))
+ goto end;
+
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ fsize = bsize = htx_get_blksz(blk);
+
+ /* h3 DATA headers : 1-byte frame type + varint frame length */
+ hsize = 1 + QUIC_VARINT_MAX_SIZE;
+
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res)) {
+ TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+ h3c->err = H3_INTERNAL_ERROR;
+ goto err;
+ }
+
+ if (unlikely(fsize == count &&
+ !b_data(res) &&
+ htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) {
+ void *old_area = res->area;
+
+ /* map an H2 frame to the HTX block so that we can put the
+ * frame header there.
+ */
+ *res = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - hsize, fsize + hsize);
+ outbuf = b_make(b_head(res), hsize, 0, 0);
+ b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */
+ b_quic_enc_int(&outbuf, fsize, QUIC_VARINT_MAX_SIZE); /* h3 frame length */
+
+ /* and exchange with our old area */
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+ total += fsize;
+ fsize = 0;
+ goto end;
+ }
+
+ if (fsize > count)
+ fsize = count;
+
+ while (1) {
+ b_reset(&outbuf);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+ if (b_size(&outbuf) > hsize || !b_space_wraps(res))
+ break;
+ b_slow_realign(res, trash.area, b_data(res));
+ }
+
+ /* Not enough room for headers and at least one data byte, block the
+ * stream. It is expected that the stream connector layer will subscribe
+ * on SEND.
+ */
+ if (b_size(&outbuf) <= hsize) {
+ TRACE_STATE("not enough room for data frame", H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto end;
+ }
+
+ if (b_size(&outbuf) < hsize + fsize)
+ fsize = b_size(&outbuf) - hsize;
+ BUG_ON(fsize <= 0);
+
+ b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */
+ b_quic_enc_int(&outbuf, fsize, 0); /* h3 frame length */
+
+ b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize);
+ total += fsize;
+ count -= fsize;
+
+ if (fsize == bsize)
+ htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, fsize);
+
+ /* commit the buffer */
+ b_add(res, b_data(&outbuf));
+ goto new_frame;
+
+ end:
+ TRACE_LEAVE(H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+ return total;
+
+ err:
+ BUG_ON(total); /* Must return HTX removed size if at least on frame encoded. */
+ TRACE_DEVEL("leaving on error", H3_EV_TX_DATA, qcs->qcc->conn, qcs);
+ return -1;
+}
+
+static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;
+ size_t total = 0;
+ enum htx_blk_type btype;
+ struct htx *htx;
+ struct htx_blk *blk;
+ uint32_t bsize;
+ int32_t idx;
+ int ret = 0;
+
+ TRACE_ENTER(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ htx = htx_from_buf(buf);
+
+ if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH)
+ qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH;
+
+ while (count && !htx_is_empty(htx) &&
+ !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err) {
+
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+
+ /* Not implemented : QUIC on backend side */
+ BUG_ON(btype == HTX_BLK_REQ_SL);
+
+ switch (btype) {
+ case HTX_BLK_RES_SL:
+ /* start-line -> HEADERS h3 frame */
+ ret = h3_resp_headers_send(qcs, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto out;
+ }
+ break;
+
+ case HTX_BLK_DATA:
+ ret = h3_resp_data_send(qcs, buf, count);
+ if (ret > 0) {
+ htx = htx_from_buf(buf);
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto out;
+ }
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ ret = h3_resp_trailers_send(qcs, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto out;
+ }
+ break;
+
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+
+ /* If an error occured, either buffer space or connection error
+ * must be set to break current loop.
+ */
+ BUG_ON(ret < 0 && !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err);
+ }
+
+ /* Interrupt sending on connection error. */
+ if (unlikely(h3c->err)) {
+ qcc_set_error(qcs->qcc, h3c->err, 1);
+ goto out;
+ }
+
+ /* RFC 9114 4.1. HTTP Message Framing
+ *
+ * A server can send a complete response prior to the client sending an
+ * entire request if the response does not depend on any portion of the
+ * request that has not been sent and received. When the server does not
+ * need to receive the remainder of the request, it MAY abort reading
+ * the request stream, send a complete response, and cleanly close the
+ * sending part of the stream. The error code H3_NO_ERROR SHOULD be used
+ * when requesting that the client stop sending on the request stream.
+ * Clients MUST NOT discard complete responses as a result of having
+ * their request terminated abruptly, though clients can always discard
+ * responses at their discretion for other reasons. If the server sends
+ * a partial or complete response but does not abort reading the
+ * request, clients SHOULD continue sending the content of the request
+ * and close the stream normally.
+ */
+ if (unlikely((htx->flags & HTX_FL_EOM) && htx_is_empty(htx)) &&
+ !qcs_is_close_remote(qcs)) {
+ /* Generate a STOP_SENDING if full response transferred before
+ * receiving the full request.
+ */
+ qcs->err = H3_NO_ERROR;
+ qcc_abort_stream_read(qcs);
+ }
+
+ out:
+ htx_to_buf(htx, buf);
+
+ TRACE_LEAVE(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ return total;
+}
+
+static size_t h3_nego_ff(struct qcs *qcs, size_t count)
+{
+ struct buffer *res;
+ int hsize;
+ size_t sz, ret = 0;
+
+ TRACE_ENTER(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res)) {
+ qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF;
+ goto end;
+ }
+
+ /* h3 DATA headers : 1-byte frame type + varint frame length */
+ hsize = 1 + QUIC_VARINT_MAX_SIZE;
+ while (1) {
+ if (b_contig_space(res) >= hsize || !b_space_wraps(res))
+ break;
+ b_slow_realign(res, trash.area, b_data(res));
+ }
+
+ /* Not enough room for headers and at least one data byte, block the
+ * stream. It is expected that the stream connector layer will subscribe
+ * on SEND.
+ */
+ if (b_contig_space(res) <= hsize) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ goto end;
+ }
+
+ /* Cannot forward more than available room in output buffer */
+ sz = b_contig_space(res) - hsize;
+ if (count > sz)
+ count = sz;
+
+ qcs->sd->iobuf.buf = res;
+ qcs->sd->iobuf.offset = hsize;
+ qcs->sd->iobuf.data = 0;
+
+ ret = count;
+ end:
+ TRACE_LEAVE(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ return ret;
+}
+
+static size_t h3_done_ff(struct qcs *qcs)
+{
+ size_t total = qcs->sd->iobuf.data;
+ TRACE_ENTER(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ h3_debug_printf(stderr, "%s\n", __func__);
+
+ if (qcs->sd->iobuf.data) {
+ b_sub(qcs->sd->iobuf.buf, qcs->sd->iobuf.data);
+ b_putchr(qcs->sd->iobuf.buf, 0x00); /* h3 frame type = DATA */
+ b_quic_enc_int(qcs->sd->iobuf.buf, qcs->sd->iobuf.data, QUIC_VARINT_MAX_SIZE); /* h3 frame length */
+ b_add(qcs->sd->iobuf.buf, qcs->sd->iobuf.data);
+ }
+
+ qcs->sd->iobuf.buf = NULL;
+ qcs->sd->iobuf.offset = 0;
+ qcs->sd->iobuf.data = 0;
+
+ TRACE_LEAVE(H3_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ return total;
+}
+
+/* Notify about a closure on <qcs> stream requested by the remote peer.
+ *
+ * Stream channel <side> is explained relative to our endpoint : WR for
+ * STOP_SENDING or RD for RESET_STREAM reception. Callback decode_qcs() is used
+ * instead for closure performed using a STREAM frame with FIN bit.
+ *
+ * The main objective of this function is to check if closure is valid
+ * according to HTTP/3 specification.
+ *
+ * Returns 0 on success else non-zero. A CONNECTION_CLOSE is generated on
+ * error.
+ */
+static int h3_close(struct qcs *qcs, enum qcc_app_ops_close_side side)
+{
+ struct h3s *h3s = qcs->ctx;
+ struct h3c *h3c = h3s->h3c;;
+
+ /* RFC 9114 6.2.1. Control Streams
+ *
+ * The sender
+ * MUST NOT close the control stream, and the receiver MUST NOT
+ * request that the sender close the control stream. If either
+ * control stream is closed at any point, this MUST be treated
+ * as a connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (qcs == h3c->ctrl_strm || h3s->type == H3S_T_CTRL) {
+ TRACE_ERROR("closure detected on control stream", H3_EV_H3S_END, qcs->qcc->conn, qcs);
+ qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int h3_attach(struct qcs *qcs, void *conn_ctx)
+{
+ struct h3c *h3c = conn_ctx;
+ struct h3s *h3s = NULL;
+
+ TRACE_ENTER(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * Upon sending
+ * a GOAWAY frame, the endpoint SHOULD explicitly cancel (see
+ * Sections 4.1.1 and 7.2.3) any requests or pushes that have
+ * identifiers greater than or equal to the one indicated, in
+ * order to clean up transport state for the affected streams.
+ * The endpoint SHOULD continue to do so as more requests or
+ * pushes arrive.
+ */
+ if (h3c->flags & H3_CF_GOAWAY_SENT && qcs->id >= h3c->id_goaway &&
+ quic_stream_is_bidi(qcs->id)) {
+ /* Reject request and do not allocate a h3s context.
+ * TODO support push uni-stream rejection.
+ */
+ TRACE_STATE("reject stream higher than goaway", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ qcc_abort_stream_read(qcs);
+ qcc_reset_stream(qcs, H3_REQUEST_REJECTED);
+ goto done;
+ }
+
+ h3s = pool_alloc(pool_head_h3s);
+ if (!h3s) {
+ TRACE_ERROR("h3s allocation failure", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ goto err;
+ }
+
+ qcs->ctx = h3s;
+ h3s->h3c = conn_ctx;
+
+ h3s->demux_frame_len = 0;
+ h3s->demux_frame_type = H3_FT_UNINIT;
+ h3s->body_len = 0;
+ h3s->data_len = 0;
+ h3s->flags = 0;
+ h3s->err = 0;
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ h3s->type = H3S_T_REQ;
+ h3s->st_req = H3S_ST_REQ_BEFORE;
+ qcs_wait_http_req(qcs);
+ }
+ else {
+ /* stream type must be decoded for unidirectional streams */
+ h3s->type = H3S_T_UNKNOWN;
+ }
+
+ done:
+ TRACE_LEAVE(H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving in error", H3_EV_H3S_NEW, qcs->qcc->conn, qcs);
+ return 1;
+}
+
+static void h3_detach(struct qcs *qcs)
+{
+ struct h3s *h3s = qcs->ctx;
+
+ TRACE_ENTER(H3_EV_H3S_END, qcs->qcc->conn, qcs);
+
+ pool_free(pool_head_h3s, h3s);
+ qcs->ctx = NULL;
+
+ TRACE_LEAVE(H3_EV_H3S_END, qcs->qcc->conn, qcs);
+}
+
+/* Initialize H3 control stream and prepare SETTINGS emission.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int h3_finalize(void *ctx)
+{
+ struct h3c *h3c = ctx;
+ struct qcc *qcc = h3c->qcc;
+ struct qcs *qcs;
+
+ TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn);
+
+ qcs = qcc_init_stream_local(h3c->qcc, 0);
+ if (!qcs) {
+ TRACE_ERROR("cannot init control stream", H3_EV_H3C_NEW, qcc->conn);
+ goto err;
+ }
+
+ h3c->ctrl_strm = qcs;
+
+ if (h3_control_send(qcs, h3c) < 0)
+ goto err;
+
+ TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn);
+ return 1;
+}
+
+/* Generate a GOAWAY frame for <h3c> connection on the control stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int h3_send_goaway(struct h3c *h3c)
+{
+ struct qcs *qcs = h3c->ctrl_strm;
+ struct buffer pos, *res;
+ unsigned char data[3 * QUIC_VARINT_MAX_SIZE];
+ size_t frm_len = quic_int_getsize(h3c->id_goaway);
+
+ TRACE_ENTER(H3_EV_H3C_END, h3c->qcc->conn);
+
+ if (!qcs) {
+ TRACE_ERROR("control stream not initialized", H3_EV_H3C_END, h3c->qcc->conn);
+ goto err;
+ }
+
+ pos = b_make((char *)data, sizeof(data), 0, 0);
+
+ b_quic_enc_int(&pos, H3_FT_GOAWAY, 0);
+ b_quic_enc_int(&pos, frm_len, 0);
+ b_quic_enc_int(&pos, h3c->id_goaway, 0);
+
+ res = mux_get_buf(qcs);
+ if (b_is_null(res) || b_room(res) < b_data(&pos)) {
+ /* Do not try forcefully to emit GOAWAY if no space left. */
+ TRACE_ERROR("cannot send GOAWAY", H3_EV_H3C_END, h3c->qcc->conn, qcs);
+ goto err;
+ }
+
+ b_force_xfer(res, &pos, b_data(&pos));
+ qcc_send_stream(qcs, 1);
+
+ h3c->flags |= H3_CF_GOAWAY_SENT;
+ TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn);
+ return 0;
+
+ err:
+ /* Consider GOAWAY as sent even if not really the case. This will
+ * block future stream opening using H3_REQUEST_REJECTED reset.
+ */
+ h3c->flags |= H3_CF_GOAWAY_SENT;
+ TRACE_DEVEL("leaving in error", H3_EV_H3C_END, h3c->qcc->conn);
+ return 1;
+}
+
+/* Initialize the HTTP/3 context for <qcc> mux.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int h3_init(struct qcc *qcc)
+{
+ struct h3c *h3c;
+ struct quic_conn *qc = qcc->conn->handle.qc;
+
+ TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn);
+
+ h3c = pool_alloc(pool_head_h3c);
+ if (!h3c) {
+ TRACE_ERROR("cannot allocate h3c", H3_EV_H3C_NEW, qcc->conn);
+ goto fail_no_h3;
+ }
+
+ h3c->qcc = qcc;
+ h3c->ctrl_strm = NULL;
+ h3c->err = 0;
+ h3c->flags = 0;
+ h3c->id_goaway = 0;
+
+ qcc->ctx = h3c;
+ /* TODO cleanup only ref to quic_conn */
+ h3c->prx_counters =
+ EXTRA_COUNTERS_GET(qc->li->bind_conf->frontend->extra_counters_fe,
+ &h3_stats_module);
+ LIST_INIT(&h3c->buf_wait.list);
+
+ TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn);
+ return 1;
+
+ fail_no_h3:
+ TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn);
+ return 0;
+}
+
+/* Send a HTTP/3 GOAWAY followed by a CONNECTION_CLOSE_APP. */
+static void h3_shutdown(void *ctx)
+{
+ struct h3c *h3c = ctx;
+
+ TRACE_ENTER(H3_EV_H3C_END, h3c->qcc->conn);
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * Even when a connection is not idle, either endpoint can decide to
+ * stop using the connection and initiate a graceful connection close.
+ * Endpoints initiate the graceful shutdown of an HTTP/3 connection by
+ * sending a GOAWAY frame.
+ */
+ h3_send_goaway(h3c);
+
+ /* RFC 9114 5.2. Connection Shutdown
+ *
+ * An endpoint that completes a
+ * graceful shutdown SHOULD use the H3_NO_ERROR error code when closing
+ * the connection.
+ */
+ h3c->qcc->err = quic_err_app(H3_NO_ERROR);
+
+ TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn);
+}
+
+static void h3_release(void *ctx)
+{
+ struct h3c *h3c = ctx;
+ pool_free(pool_head_h3c, h3c);
+}
+
+/* Increment the h3 error code counters for <error_code> value */
+static void h3_stats_inc_err_cnt(void *ctx, int err_code)
+{
+ struct h3c *h3c = ctx;
+
+ h3_inc_err_cnt(h3c->prx_counters, err_code);
+}
+
+static inline const char *h3_ft_str(uint64_t type)
+{
+ switch (type) {
+ case H3_FT_DATA: return "DATA";
+ case H3_FT_HEADERS: return "HEADERS";
+ case H3_FT_SETTINGS: return "SETTINGS";
+ case H3_FT_PUSH_PROMISE: return "PUSH_PROMISE";
+ case H3_FT_MAX_PUSH_ID: return "MAX_PUSH_ID";
+ case H3_FT_CANCEL_PUSH: return "CANCEL_PUSH";
+ case H3_FT_GOAWAY: return "GOAWAY";
+ default: return "_UNKNOWN_";
+ }
+}
+
+/* h3 trace handler */
+static void h3_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct qcc *qcc = conn ? conn->ctx : NULL;
+ const struct qcs *qcs = a2;
+ const struct h3s *h3s = qcs ? qcs->ctx : NULL;
+
+ if (!qcc)
+ return;
+
+ if (src->verbosity > H3_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : qcc=%p(F)", qcc);
+ if (qcc->conn->handle.qc)
+ chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc);
+
+ if (qcs)
+ chunk_appendf(&trace_buf, " qcs=%p(%llu)", qcs, (ull)qcs->id);
+
+ if (h3s && h3s->demux_frame_type != H3_FT_UNINIT) {
+ chunk_appendf(&trace_buf, " h3s.dem=%s/%llu",
+ h3_ft_str(h3s->demux_frame_type), (ull)h3s->demux_frame_len);
+ }
+ }
+}
+
+/* HTTP/3 application layer operations */
+const struct qcc_app_ops h3_ops = {
+ .init = h3_init,
+ .attach = h3_attach,
+ .decode_qcs = h3_decode_qcs,
+ .snd_buf = h3_snd_buf,
+ .nego_ff = h3_nego_ff,
+ .done_ff = h3_done_ff,
+ .close = h3_close,
+ .detach = h3_detach,
+ .finalize = h3_finalize,
+ .shutdown = h3_shutdown,
+ .inc_err_cnt = h3_stats_inc_err_cnt,
+ .release = h3_release,
+};
diff --git a/src/h3_stats.c b/src/h3_stats.c
new file mode 100644
index 0000000..c96093f
--- /dev/null
+++ b/src/h3_stats.c
@@ -0,0 +1,276 @@
+#include <haproxy/h3.h>
+#include <haproxy/stats.h>
+
+enum {
+ /* h3 frame type counters */
+ H3_ST_DATA,
+ H3_ST_HEADERS,
+ H3_ST_CANCEL_PUSH,
+ H3_ST_PUSH_PROMISE,
+ H3_ST_MAX_PUSH_ID,
+ H3_ST_GOAWAY,
+ H3_ST_SETTINGS,
+ /* h3 error counters */
+ H3_ST_H3_NO_ERROR,
+ H3_ST_H3_GENERAL_PROTOCOL_ERROR,
+ H3_ST_H3_INTERNAL_ERROR,
+ H3_ST_H3_STREAM_CREATION_ERROR,
+ H3_ST_H3_CLOSED_CRITICAL_STREAM,
+ H3_ST_H3_FRAME_UNEXPECTED,
+ H3_ST_H3_FRAME_ERROR,
+ H3_ST_H3_EXCESSIVE_LOAD,
+ H3_ST_H3_ID_ERROR,
+ H3_ST_H3_SETTINGS_ERROR,
+ H3_ST_H3_MISSING_SETTINGS,
+ H3_ST_H3_REQUEST_REJECTED,
+ H3_ST_H3_REQUEST_CANCELLED,
+ H3_ST_H3_REQUEST_INCOMPLETE,
+ H3_ST_H3_MESSAGE_ERROR,
+ H3_ST_H3_CONNECT_ERROR,
+ H3_ST_H3_VERSION_FALLBACK,
+ /* QPACK error counters */
+ H3_ST_QPACK_DECOMPRESSION_FAILED,
+ H3_ST_QPACK_ENCODER_STREAM_ERROR,
+ H3_ST_QPACK_DECODER_STREAM_ERROR,
+ H3_STATS_COUNT /* must be the last */
+};
+
+static struct name_desc h3_stats[] = {
+ /* h3 frame type counters */
+ [H3_ST_DATA] = { .name = "h3_data",
+ .desc = "Total number of DATA frames received" },
+ [H3_ST_HEADERS] = { .name = "h3_headers",
+ .desc = "Total number of HEADERS frames received" },
+ [H3_ST_CANCEL_PUSH] = { .name = "h3_cancel_push",
+ .desc = "Total number of CANCEL_PUSH frames received" },
+ [H3_ST_PUSH_PROMISE] = { .name = "h3_push_promise",
+ .desc = "Total number of PUSH_PROMISE frames received" },
+ [H3_ST_MAX_PUSH_ID] = { .name = "h3_max_push_id",
+ .desc = "Total number of MAX_PUSH_ID frames received" },
+ [H3_ST_GOAWAY] = { .name = "h3_goaway",
+ .desc = "Total number of GOAWAY frames received" },
+ [H3_ST_SETTINGS] = { .name = "h3_settings",
+ .desc = "Total number of SETTINGS frames received" },
+ /* h3 error counters */
+ [H3_ST_H3_NO_ERROR] = { .name = "h3_no_error",
+ .desc = "Total number of H3_NO_ERROR errors received" },
+ [H3_ST_H3_GENERAL_PROTOCOL_ERROR] = { .name = "h3_general_protocol_error",
+ .desc = "Total number of H3_GENERAL_PROTOCOL_ERROR errors received" },
+ [H3_ST_H3_INTERNAL_ERROR] = { .name = "h3_internal_error",
+ .desc = "Total number of H3_INTERNAL_ERROR errors received" },
+ [H3_ST_H3_STREAM_CREATION_ERROR] = { .name = "h3_stream_creation_error",
+ .desc = "Total number of H3_STREAM_CREATION_ERROR errors received" },
+ [H3_ST_H3_CLOSED_CRITICAL_STREAM] = { .name = "h3_closed_critical_stream",
+ .desc = "Total number of H3_CLOSED_CRITICAL_STREAM errors received" },
+ [H3_ST_H3_FRAME_UNEXPECTED] = { .name = "h3_frame_unexpected",
+ .desc = "Total number of H3_FRAME_UNEXPECTED errors received" },
+ [H3_ST_H3_FRAME_ERROR] = { .name = "h3_frame_error",
+ .desc = "Total number of H3_FRAME_ERROR errors received" },
+ [H3_ST_H3_EXCESSIVE_LOAD] = { .name = "h3_excessive_load",
+ .desc = "Total number of H3_EXCESSIVE_LOAD errors received" },
+ [H3_ST_H3_ID_ERROR] = { .name = "h3_id_error",
+ .desc = "Total number of H3_ID_ERROR errors received" },
+ [H3_ST_H3_SETTINGS_ERROR] = { .name = "h3_settings_error",
+ .desc = "Total number of H3_SETTINGS_ERROR errors received" },
+ [H3_ST_H3_MISSING_SETTINGS] = { .name = "h3_missing_settings",
+ .desc = "Total number of H3_MISSING_SETTINGS errors received" },
+ [H3_ST_H3_REQUEST_REJECTED] = { .name = "h3_request_rejected",
+ .desc = "Total number of H3_REQUEST_REJECTED errors received" },
+ [H3_ST_H3_REQUEST_CANCELLED] = { .name = "h3_request_cancelled",
+ .desc = "Total number of H3_REQUEST_CANCELLED errors received" },
+ [H3_ST_H3_REQUEST_INCOMPLETE] = { .name = "h3_request_incomplete",
+ .desc = "Total number of H3_REQUEST_INCOMPLETE errors received" },
+ [H3_ST_H3_MESSAGE_ERROR] = { .name = "h3_message_error",
+ .desc = "Total number of H3_MESSAGE_ERROR errors received" },
+ [H3_ST_H3_CONNECT_ERROR] = { .name = "h3_connect_error",
+ .desc = "Total number of H3_CONNECT_ERROR errors received" },
+ [H3_ST_H3_VERSION_FALLBACK] = { .name = "h3_version_fallback",
+ .desc = "Total number of H3_VERSION_FALLBACK errors received" },
+ /* QPACK error counters */
+ [H3_ST_QPACK_DECOMPRESSION_FAILED] = { .name = "pack_decompression_failed",
+ .desc = "Total number of QPACK_DECOMPRESSION_FAILED errors received" },
+ [H3_ST_QPACK_ENCODER_STREAM_ERROR] = { .name = "qpack_encoder_stream_error",
+ .desc = "Total number of QPACK_ENCODER_STREAM_ERROR errors received" },
+ [H3_ST_QPACK_DECODER_STREAM_ERROR] = { .name = "qpack_decoder_stream_error",
+ .desc = "Total number of QPACK_DECODER_STREAM_ERROR errors received" },
+};
+
+static struct h3_counters {
+ /* h3 frame type counters */
+ long long h3_data; /* total number of DATA frames received */
+ long long h3_headers; /* total number of HEADERS frames received */
+ long long h3_cancel_push; /* total number of CANCEL_PUSH frames received */
+ long long h3_push_promise; /* total number of PUSH_PROMISE frames received */
+ long long h3_max_push_id; /* total number of MAX_PUSH_ID frames received */
+ long long h3_goaway; /* total number of GOAWAY frames received */
+ long long h3_settings; /* total number of SETTINGS frames received */
+ /* h3 error counters */
+ long long h3_no_error; /* total number of H3_NO_ERROR errors received */
+ long long h3_general_protocol_error; /* total number of H3_GENERAL_PROTOCOL_ERROR errors received */
+ long long h3_internal_error; /* total number of H3_INTERNAL_ERROR errors received */
+ long long h3_stream_creation_error; /* total number of H3_STREAM_CREATION_ERROR errors received */
+ long long h3_closed_critical_stream; /* total number of H3_CLOSED_CRITICAL_STREAM errors received */
+ long long h3_frame_unexpected; /* total number of H3_FRAME_UNEXPECTED errors received */
+ long long h3_frame_error; /* total number of H3_FRAME_ERROR errors received */
+ long long h3_excessive_load; /* total number of H3_EXCESSIVE_LOAD errors received */
+ long long h3_id_error; /* total number of H3_ID_ERROR errors received */
+ long long h3_settings_error; /* total number of H3_SETTINGS_ERROR errors received */
+ long long h3_missing_settings; /* total number of H3_MISSING_SETTINGS errors received */
+ long long h3_request_rejected; /* total number of H3_REQUEST_REJECTED errors received */
+ long long h3_request_cancelled; /* total number of H3_REQUEST_CANCELLED errors received */
+ long long h3_request_incomplete; /* total number of H3_REQUEST_INCOMPLETE errors received */
+ long long h3_message_error; /* total number of H3_MESSAGE_ERROR errors received */
+ long long h3_connect_error; /* total number of H3_CONNECT_ERROR errors received */
+ long long h3_version_fallback; /* total number of H3_VERSION_FALLBACK errors received */
+ /* QPACK error counters */
+ long long qpack_decompression_failed; /* total number of QPACK_DECOMPRESSION_FAILED errors received */
+ long long qpack_encoder_stream_error; /* total number of QPACK_ENCODER_STREAM_ERROR errors received */
+ long long qpack_decoder_stream_error; /* total number of QPACK_DECODER_STREAM_ERROR errors received */
+} h3_counters;
+
+static void h3_fill_stats(void *data, struct field *stats)
+{
+ struct h3_counters *counters = data;
+
+ /* h3 frame type counters */
+ stats[H3_ST_DATA] = mkf_u64(FN_COUNTER, counters->h3_data);
+ stats[H3_ST_HEADERS] = mkf_u64(FN_COUNTER, counters->h3_headers);
+ stats[H3_ST_CANCEL_PUSH] = mkf_u64(FN_COUNTER, counters->h3_cancel_push);
+ stats[H3_ST_PUSH_PROMISE] = mkf_u64(FN_COUNTER, counters->h3_push_promise);
+ stats[H3_ST_MAX_PUSH_ID] = mkf_u64(FN_COUNTER, counters->h3_max_push_id);
+ stats[H3_ST_GOAWAY] = mkf_u64(FN_COUNTER, counters->h3_goaway);
+ stats[H3_ST_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_settings);
+ /* h3 error counters */
+ stats[H3_ST_H3_NO_ERROR] = mkf_u64(FN_COUNTER, counters->h3_no_error);
+ stats[H3_ST_H3_GENERAL_PROTOCOL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error);
+ stats[H3_ST_H3_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_internal_error);
+ stats[H3_ST_H3_STREAM_CREATION_ERROR] = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error);
+ stats[H3_ST_H3_CLOSED_CRITICAL_STREAM] = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream);
+ stats[H3_ST_H3_FRAME_UNEXPECTED] = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected);
+ stats[H3_ST_H3_FRAME_ERROR] = mkf_u64(FN_COUNTER, counters->h3_frame_error);
+ stats[H3_ST_H3_EXCESSIVE_LOAD] = mkf_u64(FN_COUNTER, counters->h3_excessive_load);
+ stats[H3_ST_H3_ID_ERROR] = mkf_u64(FN_COUNTER, counters->h3_id_error);
+ stats[H3_ST_H3_SETTINGS_ERROR] = mkf_u64(FN_COUNTER, counters->h3_settings_error);
+ stats[H3_ST_H3_MISSING_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_missing_settings);
+ stats[H3_ST_H3_REQUEST_REJECTED] = mkf_u64(FN_COUNTER, counters->h3_request_rejected);
+ stats[H3_ST_H3_REQUEST_CANCELLED] = mkf_u64(FN_COUNTER, counters->h3_request_cancelled);
+ stats[H3_ST_H3_REQUEST_INCOMPLETE] = mkf_u64(FN_COUNTER, counters->h3_request_incomplete);
+ stats[H3_ST_H3_MESSAGE_ERROR] = mkf_u64(FN_COUNTER, counters->h3_message_error);
+ stats[H3_ST_H3_CONNECT_ERROR] = mkf_u64(FN_COUNTER, counters->h3_connect_error);
+ stats[H3_ST_H3_VERSION_FALLBACK] = mkf_u64(FN_COUNTER, counters->h3_version_fallback);
+ /* QPACK error counters */
+ stats[H3_ST_QPACK_DECOMPRESSION_FAILED] = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed);
+ stats[H3_ST_QPACK_ENCODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error);
+ stats[H3_ST_QPACK_DECODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error);
+}
+
+struct stats_module h3_stats_module = {
+ .name = "h3",
+ .fill_stats = h3_fill_stats,
+ .stats = h3_stats,
+ .stats_count = H3_STATS_COUNT,
+ .counters = &h3_counters,
+ .counters_size = sizeof(h3_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h3_stats_module);
+
+void h3_inc_err_cnt(struct h3_counters *ctrs, int error_code)
+{
+ switch (error_code) {
+ case H3_NO_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_no_error);
+ break;
+ case H3_GENERAL_PROTOCOL_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_general_protocol_error);
+ break;
+ case H3_INTERNAL_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_internal_error);
+ break;
+ case H3_STREAM_CREATION_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_stream_creation_error);
+ break;
+ case H3_CLOSED_CRITICAL_STREAM:
+ HA_ATOMIC_INC(&ctrs->h3_closed_critical_stream);
+ break;
+ case H3_FRAME_UNEXPECTED:
+ HA_ATOMIC_INC(&ctrs->h3_frame_unexpected);
+ break;
+ case H3_FRAME_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_frame_error);
+ break;
+ case H3_EXCESSIVE_LOAD:
+ HA_ATOMIC_INC(&ctrs->h3_excessive_load);
+ break;
+ case H3_ID_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_id_error);
+ break;
+ case H3_SETTINGS_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_settings_error);
+ break;
+ case H3_MISSING_SETTINGS:
+ HA_ATOMIC_INC(&ctrs->h3_missing_settings);
+ break;
+ case H3_REQUEST_REJECTED:
+ HA_ATOMIC_INC(&ctrs->h3_request_rejected);
+ break;
+ case H3_REQUEST_CANCELLED:
+ HA_ATOMIC_INC(&ctrs->h3_request_cancelled);
+ break;
+ case H3_REQUEST_INCOMPLETE:
+ HA_ATOMIC_INC(&ctrs->h3_request_incomplete);
+ break;
+ case H3_MESSAGE_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_message_error);
+ break;
+ case H3_CONNECT_ERROR:
+ HA_ATOMIC_INC(&ctrs->h3_connect_error);
+ break;
+ case H3_VERSION_FALLBACK:
+ HA_ATOMIC_INC(&ctrs->h3_version_fallback);
+ break;
+ case QPACK_DECOMPRESSION_FAILED:
+ HA_ATOMIC_INC(&ctrs->qpack_decompression_failed);
+ break;
+ case QPACK_ENCODER_STREAM_ERROR:
+ HA_ATOMIC_INC(&ctrs->qpack_encoder_stream_error);
+ break;
+ case QPACK_DECODER_STREAM_ERROR:
+ HA_ATOMIC_INC(&ctrs->qpack_decoder_stream_error);
+ break;
+ default:
+ break;
+
+ }
+}
+
+void h3_inc_frame_type_cnt(struct h3_counters *ctrs, int frm_type)
+{
+ switch (frm_type) {
+ case H3_FT_DATA:
+ HA_ATOMIC_INC(&ctrs->h3_data);
+ break;
+ case H3_FT_HEADERS:
+ HA_ATOMIC_INC(&ctrs->h3_headers);
+ break;
+ case H3_FT_CANCEL_PUSH:
+ HA_ATOMIC_INC(&ctrs->h3_cancel_push);
+ break;
+ case H3_FT_PUSH_PROMISE:
+ HA_ATOMIC_INC(&ctrs->h3_push_promise);
+ break;
+ case H3_FT_MAX_PUSH_ID:
+ HA_ATOMIC_INC(&ctrs->h3_max_push_id);
+ break;
+ case H3_FT_GOAWAY:
+ HA_ATOMIC_INC(&ctrs->h3_goaway);
+ break;
+ case H3_FT_SETTINGS:
+ HA_ATOMIC_INC(&ctrs->h3_settings);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/src/haproxy.c b/src/haproxy.c
new file mode 100644
index 0000000..4c739f4
--- /dev/null
+++ b/src/haproxy.c
@@ -0,0 +1,3962 @@
+/*
+ * HAProxy : High Availability-enabled HTTP/TCP proxy
+ * Copyright 2000-2024 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <sys/resource.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <syslog.h>
+#include <grp.h>
+
+#ifdef USE_THREAD
+#include <pthread.h>
+#endif
+
+#ifdef USE_CPU_AFFINITY
+#include <sched.h>
+#if defined(__FreeBSD__) || defined(__DragonFly__)
+#include <sys/param.h>
+#ifdef __FreeBSD__
+#include <sys/cpuset.h>
+#endif
+#endif
+#endif
+
+#if defined(USE_PRCTL)
+#include <sys/prctl.h>
+#endif
+
+#if defined(USE_PROCCTL)
+#include <sys/procctl.h>
+#endif
+
+#ifdef DEBUG_FULL
+#include <assert.h>
+#endif
+#if defined(USE_SYSTEMD)
+#include <systemd/sd-daemon.h>
+#endif
+
+#include <import/sha1.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgcond.h>
+#include <haproxy/cfgdiag.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/connection.h>
+#ifdef USE_CPU_AFFINITY
+#include <haproxy/cpuset.h>
+#endif
+#include <haproxy/debug.h>
+#include <haproxy/dns.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_rules.h>
+#if defined(USE_LINUX_CAP)
+#include <haproxy/linuxcap.h>
+#endif
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/mworker.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/pattern.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/signal.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+#include <haproxy/version.h>
+
+
+/* array of init calls for older platforms */
+DECLARE_INIT_STAGES;
+
+/* create a read_mostly section to hold variables which are accessed a lot
+ * but which almost never change. The purpose is to isolate them in their
+ * own cache lines where they don't risk to be perturbated by write accesses
+ * to neighbor variables. We need to create an empty aligned variable for
+ * this. The fact that the variable is of size zero means that it will be
+ * eliminated at link time if no other variable uses it, but alignment will
+ * be respected.
+ */
+empty_t __read_mostly_align HA_SECTION("read_mostly") ALIGNED(64);
+
+#ifdef BUILD_FEATURES
+char *build_features = BUILD_FEATURES;
+#else
+char *build_features = "";
+#endif
+
+/* list of config files */
+static struct list cfg_cfgfiles = LIST_HEAD_INIT(cfg_cfgfiles);
+int pid; /* current process id */
+
+static unsigned long stopping_tgroup_mask; /* Thread groups acknowledging stopping */
+
+/* global options */
+struct global global = {
+ .hard_stop_after = TICK_ETERNITY,
+ .close_spread_time = TICK_ETERNITY,
+ .close_spread_end = TICK_ETERNITY,
+ .numa_cpu_mapping = 1,
+ .nbthread = 0,
+ .req_count = 0,
+ .loggers = LIST_HEAD_INIT(global.loggers),
+ .maxzlibmem = DEFAULT_MAXZLIBMEM * 1024U * 1024U,
+ .comp_rate_lim = 0,
+ .ssl_server_verify = SSL_SERVER_VERIFY_REQUIRED,
+ .unix_bind = {
+ .ux = {
+ .uid = -1,
+ .gid = -1,
+ .mode = 0,
+ }
+ },
+ .tune = {
+ .options = GTUNE_LISTENER_MQ_OPT,
+ .bufsize = (BUFSIZE + 2*sizeof(void *) - 1) & -(2*sizeof(void *)),
+ .maxrewrite = MAXREWRITE,
+ .reserved_bufs = RESERVED_BUFS,
+ .pattern_cache = DEFAULT_PAT_LRU_SIZE,
+ .pool_low_ratio = 20,
+ .pool_high_ratio = 25,
+ .max_http_hdr = MAX_HTTP_HDR,
+#ifdef USE_OPENSSL
+ .sslcachesize = SSLCACHESIZE,
+#endif
+ .comp_maxlevel = 1,
+#ifdef DEFAULT_IDLE_TIMER
+ .idle_timer = DEFAULT_IDLE_TIMER,
+#else
+ .idle_timer = 1000, /* 1 second */
+#endif
+ .nb_stk_ctr = MAX_SESS_STKCTR,
+ .default_shards = -2, /* by-group */
+#ifdef USE_QUIC
+ .quic_backend_max_idle_timeout = QUIC_TP_DFLT_BACK_MAX_IDLE_TIMEOUT,
+ .quic_frontend_max_idle_timeout = QUIC_TP_DFLT_FRONT_MAX_IDLE_TIMEOUT,
+ .quic_frontend_max_streams_bidi = QUIC_TP_DFLT_FRONT_MAX_STREAMS_BIDI,
+ .quic_reorder_ratio = QUIC_DFLT_REORDER_RATIO,
+ .quic_retry_threshold = QUIC_DFLT_RETRY_THRESHOLD,
+ .quic_max_frame_loss = QUIC_DFLT_MAX_FRAME_LOSS,
+ .quic_streams_buf = 30,
+#endif /* USE_QUIC */
+ },
+#ifdef USE_OPENSSL
+#ifdef DEFAULT_MAXSSLCONN
+ .maxsslconn = DEFAULT_MAXSSLCONN,
+#endif
+#endif
+ /* others NULL OK */
+};
+
+/*********************************************************************/
+
+int stopping; /* non zero means stopping in progress */
+int killed; /* non zero means a hard-stop is triggered */
+int jobs = 0; /* number of active jobs (conns, listeners, active tasks, ...) */
+int unstoppable_jobs = 0; /* number of active jobs that can't be stopped during a soft stop */
+int active_peers = 0; /* number of active peers (connection attempts and connected) */
+int connected_peers = 0; /* number of connected peers (verified ones) */
+int arg_mode = 0; /* MODE_DEBUG etc as passed on command line ... */
+char *change_dir = NULL; /* set when -C is passed */
+char *check_condition = NULL; /* check condition passed to -cc */
+
+/* Here we store information about the pids of the processes we may pause
+ * or kill. We will send them a signal every 10 ms until we can bind to all
+ * our ports. With 200 retries, that's about 2 seconds.
+ */
+#define MAX_START_RETRIES 200
+static int *oldpids = NULL;
+static int oldpids_sig; /* use USR1 or TERM */
+
+/* Path to the unix socket we use to retrieve listener sockets from the old process */
+static const char *old_unixsocket;
+
+int atexit_flag = 0;
+
+int nb_oldpids = 0;
+const int zero = 0;
+const int one = 1;
+const struct linger nolinger = { .l_onoff = 1, .l_linger = 0 };
+
+char hostname[MAX_HOSTNAME_LEN];
+char *localpeer = NULL;
+static char *kwd_dump = NULL; // list of keyword dumps to produce
+
+static char **old_argv = NULL; /* previous argv but cleaned up */
+
+struct list proc_list = LIST_HEAD_INIT(proc_list);
+
+int master = 0; /* 1 if in master, 0 if in child */
+unsigned int rlim_fd_cur_at_boot = 0;
+unsigned int rlim_fd_max_at_boot = 0;
+
+/* per-boot randomness */
+unsigned char boot_seed[20]; /* per-boot random seed (160 bits initially) */
+
+/* takes the thread config in argument or NULL for any thread */
+static void *run_thread_poll_loop(void *data);
+
+/* bitfield of a few warnings to emit just once (WARN_*) */
+unsigned int warned = 0;
+
+/* set if experimental features have been used for the current process */
+unsigned int tainted = 0;
+
+unsigned int experimental_directives_allowed = 0;
+
+int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum,
+ char **errmsg)
+{
+ if (kw->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ memprintf(errmsg, "parsing [%s:%d] : '%s' directive is experimental, must be allowed via a global 'expose-experimental-directives'",
+ file, linenum, kw->kw);
+ return 1;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ return 0;
+}
+
+/* master CLI configuration (-S flag) */
+struct list mworker_cli_conf = LIST_HEAD_INIT(mworker_cli_conf);
+
+/* These are strings to be reported in the output of "haproxy -vv". They may
+ * either be constants (in which case must_free must be zero) or dynamically
+ * allocated strings to pass to free() on exit, and in this case must_free
+ * must be non-zero.
+ */
+struct list build_opts_list = LIST_HEAD_INIT(build_opts_list);
+struct build_opts_str {
+ struct list list;
+ const char *str;
+ int must_free;
+};
+
+/*********************************************************************/
+/* general purpose functions ***************************************/
+/*********************************************************************/
+
+/* used to register some build option strings at boot. Set must_free to
+ * non-zero if the string must be freed upon exit.
+ */
+void hap_register_build_opts(const char *str, int must_free)
+{
+ struct build_opts_str *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->str = str;
+ b->must_free = must_free;
+ LIST_APPEND(&build_opts_list, &b->list);
+}
+
+/* returns the first build option when <curr> is NULL, or the next one when
+ * <curr> is passed the last returned value. NULL when there is no more entries
+ * in the list. Otherwise the returned pointer is &opt->str so the caller can
+ * print it as *ret.
+ */
+const char **hap_get_next_build_opt(const char **curr)
+{
+ struct build_opts_str *head, *start;
+
+ head = container_of(&build_opts_list, struct build_opts_str, list);
+
+ if (curr)
+ start = container_of(curr, struct build_opts_str, str);
+ else
+ start = head;
+
+ start = container_of(start->list.n, struct build_opts_str, list);
+
+ if (start == head)
+ return NULL;
+
+ return &start->str;
+}
+
+/* used to make a new feature appear in the build_features list at boot time.
+ * The feature must be in the format "XXX" without the leading "+" which will
+ * be automatically appended.
+ */
+void hap_register_feature(const char *name)
+{
+ static int must_free = 0;
+ int new_len = strlen(build_features) + 2 + strlen(name);
+ char *new_features;
+
+ new_features = malloc(new_len + 1);
+ if (!new_features)
+ return;
+
+ strlcpy2(new_features, build_features, new_len);
+ snprintf(new_features, new_len + 1, "%s +%s", build_features, name);
+
+ if (must_free)
+ ha_free(&build_features);
+
+ build_features = new_features;
+ must_free = 1;
+}
+
+#define VERSION_MAX_ELTS 7
+
+/* This function splits an haproxy version string into an array of integers.
+ * The syntax of the supported version string is the following:
+ *
+ * <a>[.<b>[.<c>[.<d>]]][-{dev,pre,rc}<f>][-*][-<g>]
+ *
+ * This validates for example:
+ * 1.2.1-pre2, 1.2.1, 1.2.10.1, 1.3.16-rc1, 1.4-dev3, 1.5-dev18, 1.5-dev18-43
+ * 2.4-dev18-f6818d-20
+ *
+ * The result is set in a array of <VERSION_MAX_ELTS> elements. Each letter has
+ * one fixed place in the array. The tags take a numeric value called <e> which
+ * defaults to 3. "dev" is 1, "rc" and "pre" are 2. Numbers not encountered are
+ * considered as zero (henxe 1.5 and 1.5.0 are the same).
+ *
+ * The resulting values are:
+ * 1.2.1-pre2 1, 2, 1, 0, 2, 2, 0
+ * 1.2.1 1, 2, 1, 0, 3, 0, 0
+ * 1.2.10.1 1, 2, 10, 1, 3, 0, 0
+ * 1.3.16-rc1 1, 3, 16, 0, 2, 1, 0
+ * 1.4-dev3 1, 4, 0, 0, 1, 3, 0
+ * 1.5-dev18 1, 5, 0, 0, 1, 18, 0
+ * 1.5-dev18-43 1, 5, 0, 0, 1, 18, 43
+ * 2.4-dev18-f6818d-20 2, 4, 0, 0, 1, 18, 20
+ *
+ * The function returns non-zero if the conversion succeeded, or zero if it
+ * failed.
+ */
+int split_version(const char *version, unsigned int *value)
+{
+ const char *p, *s;
+ char *error;
+ int nelts;
+
+ /* Initialize array with zeroes */
+ for (nelts = 0; nelts < VERSION_MAX_ELTS; nelts++)
+ value[nelts] = 0;
+ value[4] = 3;
+
+ p = version;
+
+ /* If the version number is empty, return false */
+ if (*p == '\0')
+ return 0;
+
+ /* Convert first number <a> */
+ value[0] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <b> */
+ value[1] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <c> */
+ value[2] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error == '-')
+ goto split_version_tag;
+ if (*error != '.')
+ return 0;
+
+ /* Convert first number <d> */
+ value[3] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error != '-')
+ return 0;
+
+ split_version_tag:
+ /* Check for commit number */
+ if (*p >= '0' && *p <= '9')
+ goto split_version_commit;
+
+ /* Read tag */
+ if (strncmp(p, "dev", 3) == 0) { value[4] = 1; p += 3; }
+ else if (strncmp(p, "rc", 2) == 0) { value[4] = 2; p += 2; }
+ else if (strncmp(p, "pre", 3) == 0) { value[4] = 2; p += 3; }
+ else
+ goto split_version_commit;
+
+ /* Convert tag number */
+ value[5] = strtol(p, &error, 10);
+ p = error + 1;
+ if (*error == '\0')
+ return 1;
+ if (*error != '-')
+ return 0;
+
+ split_version_commit:
+ /* Search the last "-" */
+ s = strrchr(p, '-');
+ if (s) {
+ s++;
+ if (*s == '\0')
+ return 0;
+ value[6] = strtol(s, &error, 10);
+ if (*error != '\0')
+ value[6] = 0;
+ return 1;
+ }
+
+ /* convert the version */
+ value[6] = strtol(p, &error, 10);
+ if (*error != '\0')
+ value[6] = 0;
+
+ return 1;
+}
+
+/* This function compares the current haproxy version with an arbitrary version
+ * string. It returns:
+ * -1 : the version in argument is older than the current haproxy version
+ * 0 : the version in argument is the same as the current haproxy version
+ * 1 : the version in argument is newer than the current haproxy version
+ *
+ * Or some errors:
+ * -2 : the current haproxy version is not parsable
+ * -3 : the version in argument is not parsable
+ */
+int compare_current_version(const char *version)
+{
+ unsigned int loc[VERSION_MAX_ELTS];
+ unsigned int mod[VERSION_MAX_ELTS];
+ int i;
+
+ /* split versions */
+ if (!split_version(haproxy_version, loc))
+ return -2;
+ if (!split_version(version, mod))
+ return -3;
+
+ /* compare versions */
+ for (i = 0; i < VERSION_MAX_ELTS; i++) {
+ if (mod[i] < loc[i])
+ return -1;
+ else if (mod[i] > loc[i])
+ return 1;
+ }
+ return 0;
+}
+
+void display_version()
+{
+ struct utsname utsname;
+
+ printf("HAProxy version %s %s - https://haproxy.org/\n"
+ PRODUCT_STATUS "\n", haproxy_version, haproxy_date);
+
+ if (strlen(PRODUCT_URL_BUGS) > 0) {
+ char base_version[20];
+ int dots = 0;
+ char *del;
+
+ /* only retrieve the base version without distro-specific extensions */
+ for (del = haproxy_version; *del; del++) {
+ if (*del == '.')
+ dots++;
+ else if (*del < '0' || *del > '9')
+ break;
+ }
+
+ strlcpy2(base_version, haproxy_version, del - haproxy_version + 1);
+ if (dots < 2)
+ printf("Known bugs: https://github.com/haproxy/haproxy/issues?q=is:issue+is:open\n");
+ else
+ printf("Known bugs: " PRODUCT_URL_BUGS "\n", base_version);
+ }
+
+ if (uname(&utsname) == 0) {
+ printf("Running on: %s %s %s %s\n", utsname.sysname, utsname.release, utsname.version, utsname.machine);
+ }
+}
+
+static void display_build_opts()
+{
+ const char **opt;
+
+ printf("Build options :"
+#ifdef BUILD_TARGET
+ "\n TARGET = " BUILD_TARGET
+#endif
+#ifdef BUILD_CPU
+ "\n CPU = " BUILD_CPU
+#endif
+#ifdef BUILD_CC
+ "\n CC = " BUILD_CC
+#endif
+#ifdef BUILD_CFLAGS
+ "\n CFLAGS = " BUILD_CFLAGS
+#endif
+#ifdef BUILD_OPTIONS
+ "\n OPTIONS = " BUILD_OPTIONS
+#endif
+#ifdef BUILD_DEBUG
+ "\n DEBUG = " BUILD_DEBUG
+#endif
+ "\n\nFeature list : %s"
+ "\n\nDefault settings :"
+ "\n bufsize = %d, maxrewrite = %d, maxpollevents = %d"
+ "\n\n",
+ build_features, BUFSIZE, MAXREWRITE, MAX_POLL_EVENTS);
+
+ for (opt = NULL; (opt = hap_get_next_build_opt(opt)); puts(*opt))
+ ;
+
+ putchar('\n');
+
+ list_pollers(stdout);
+ putchar('\n');
+ list_mux_proto(stdout);
+ putchar('\n');
+ list_services(stdout);
+ putchar('\n');
+ list_filters(stdout);
+ putchar('\n');
+}
+
+/*
+ * This function prints the command line usage and exits
+ */
+static void usage(char *name)
+{
+ display_version();
+ fprintf(stderr,
+ "Usage : %s [-f <cfgfile|cfgdir>]* [ -vdV"
+ "D ] [ -n <maxconn> ] [ -N <maxpconn> ]\n"
+ " [ -p <pidfile> ] [ -m <max megs> ] [ -C <dir> ] [-- <cfgfile>*]\n"
+ " -v displays version ; -vv shows known build options.\n"
+ " -d enters debug mode ; -db only disables background mode.\n"
+ " -dM[<byte>,help,...] debug memory (default: poison with <byte>/0x50)\n"
+ " -dt activate traces on stderr\n"
+ " -V enters verbose mode (disables quiet mode)\n"
+ " -D goes daemon ; -C changes to <dir> before loading files.\n"
+ " -W master-worker mode.\n"
+#if defined(USE_SYSTEMD)
+ " -Ws master-worker mode with systemd notify support.\n"
+#endif
+ " -q quiet mode : don't display messages\n"
+ " -c check mode : only check config files and exit\n"
+ " -cc check condition : evaluate a condition and exit\n"
+ " -n sets the maximum total # of connections (uses ulimit -n)\n"
+ " -m limits the usable amount of memory (in MB)\n"
+ " -N sets the default, per-proxy maximum # of connections (%d)\n"
+ " -L set local peer name (default to hostname)\n"
+ " -p writes pids of all children to this file\n"
+ " -dC[[key],line] display the configuration file, if there is a key, the file will be anonymised\n"
+#if defined(USE_EPOLL)
+ " -de disables epoll() usage even when available\n"
+#endif
+#if defined(USE_KQUEUE)
+ " -dk disables kqueue() usage even when available\n"
+#endif
+#if defined(USE_EVPORTS)
+ " -dv disables event ports usage even when available\n"
+#endif
+#if defined(USE_POLL)
+ " -dp disables poll() usage even when available\n"
+#endif
+#if defined(USE_LINUX_SPLICE)
+ " -dS disables splice usage (broken on old kernels)\n"
+#endif
+#if defined(USE_GETADDRINFO)
+ " -dG disables getaddrinfo() usage\n"
+#endif
+#if defined(SO_REUSEPORT)
+ " -dR disables SO_REUSEPORT usage\n"
+#endif
+#if defined(HA_HAVE_DUMP_LIBS)
+ " -dL dumps loaded object files after config checks\n"
+#endif
+ " -dK{class[,...]} dump registered keywords (use 'help' for list)\n"
+ " -dr ignores server address resolution failures\n"
+ " -dV disables SSL verify on servers side\n"
+ " -dW fails if any warning is emitted\n"
+ " -dD diagnostic mode : warn about suspicious configuration statements\n"
+ " -dF disable fast-forward\n"
+ " -dZ disable zero-copy forwarding\n"
+ " -sf/-st [pid ]* finishes/terminates old pids.\n"
+ " -x <unix_socket> get listening sockets from a unix socket\n"
+ " -S <bind>[,<bind options>...] new master CLI\n"
+ "\n",
+ name, cfg_maxpconn);
+ exit(1);
+}
+
+
+
+/*********************************************************************/
+/* more specific functions ***************************************/
+/*********************************************************************/
+
+/* sends the signal <sig> to all pids found in <oldpids>. Returns the number of
+ * pids the signal was correctly delivered to.
+ */
+int tell_old_pids(int sig)
+{
+ int p;
+ int ret = 0;
+ for (p = 0; p < nb_oldpids; p++)
+ if (kill(oldpids[p], sig) == 0)
+ ret++;
+ return ret;
+}
+
+/*
+ * remove a pid forom the olpid array and decrease nb_oldpids
+ * return 1 pid was found otherwise return 0
+ */
+
+int delete_oldpid(int pid)
+{
+ int i;
+
+ for (i = 0; i < nb_oldpids; i++) {
+ if (oldpids[i] == pid) {
+ oldpids[i] = oldpids[nb_oldpids - 1];
+ oldpids[nb_oldpids - 1] = 0;
+ nb_oldpids--;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * When called, this function reexec haproxy with -sf followed by current
+ * children PIDs and possibly old children PIDs if they didn't leave yet.
+ */
+static void mworker_reexec(int hardreload)
+{
+ char **next_argv = NULL;
+ int old_argc = 0; /* previous number of argument */
+ int next_argc = 0;
+ int i = 0;
+ char *msg = NULL;
+ struct rlimit limit;
+ struct mworker_proc *current_child = NULL;
+
+ mworker_block_signals();
+ setenv("HAPROXY_MWORKER_REEXEC", "1", 1);
+
+ mworker_cleanup_proc();
+ mworker_proc_list_to_env(); /* put the children description in the env */
+
+ /* ensure that we close correctly every listeners before reexecuting */
+ mworker_cleanlisteners();
+
+ /* during the reload we must ensure that every FDs that can't be
+ * reuse (ie those that are not referenced in the proc_list)
+ * are closed or they will leak. */
+
+ /* close the listeners FD */
+ mworker_cli_proxy_stop();
+
+ if (fdtab)
+ deinit_pollers();
+
+#ifdef HAVE_SSL_RAND_KEEP_RANDOM_DEVICES_OPEN
+ /* close random device FDs */
+ RAND_keep_random_devices_open(0);
+#endif
+
+ /* restore the initial FD limits */
+ limit.rlim_cur = rlim_fd_cur_at_boot;
+ limit.rlim_max = rlim_fd_max_at_boot;
+ if (raise_rlim_nofile(&limit, &limit) != 0) {
+ ha_warning("Failed to restore initial FD limits (cur=%u max=%u), using cur=%u max=%u\n",
+ rlim_fd_cur_at_boot, rlim_fd_max_at_boot,
+ (unsigned int)limit.rlim_cur, (unsigned int)limit.rlim_max);
+ }
+
+ /* compute length */
+ while (old_argv[old_argc])
+ old_argc++;
+
+ /* 1 for haproxy -sf, 2 for -x /socket */
+ next_argv = calloc(old_argc + 1 + 2 + mworker_child_nb() + 1,
+ sizeof(*next_argv));
+ if (next_argv == NULL)
+ goto alloc_error;
+
+ /* copy the program name */
+ next_argv[next_argc++] = old_argv[0];
+
+ /* insert the new options just after argv[0] in case we have a -- */
+
+ if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) {
+ /* add -sf <PID>* to argv */
+ if (mworker_child_nb() > 0) {
+ struct mworker_proc *child;
+
+ if (hardreload)
+ next_argv[next_argc++] = "-st";
+ else
+ next_argv[next_argc++] = "-sf";
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (!(child->options & PROC_O_LEAVING) && (child->options & PROC_O_TYPE_WORKER))
+ current_child = child;
+
+ if (!(child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) || child->pid <= -1)
+ continue;
+ if ((next_argv[next_argc++] = memprintf(&msg, "%d", child->pid)) == NULL)
+ goto alloc_error;
+ msg = NULL;
+ }
+ }
+
+ if (current_child) {
+ /* add the -x option with the socketpair of the current worker */
+ next_argv[next_argc++] = "-x";
+ if ((next_argv[next_argc++] = memprintf(&msg, "sockpair@%d", current_child->ipc_fd[0])) == NULL)
+ goto alloc_error;
+ msg = NULL;
+ }
+ }
+
+ /* copy the previous options */
+ for (i = 1; i < old_argc; i++)
+ next_argv[next_argc++] = old_argv[i];
+
+ signal(SIGPROF, SIG_IGN);
+ execvp(next_argv[0], next_argv);
+ ha_warning("Failed to reexecute the master process [%d]: %s\n", pid, strerror(errno));
+ ha_free(&next_argv);
+ return;
+
+alloc_error:
+ ha_free(&next_argv);
+ ha_warning("Failed to reexecute the master process [%d]: Cannot allocate memory\n", pid);
+ return;
+}
+
+/* reexec haproxy in waitmode */
+static void mworker_reexec_waitmode()
+{
+ setenv("HAPROXY_MWORKER_WAIT_ONLY", "1", 1);
+ mworker_reexec(0);
+}
+
+/* reload haproxy and emit a warning */
+void mworker_reload(int hardreload)
+{
+ struct mworker_proc *child;
+ struct per_thread_deinit_fct *ptdf;
+
+ ha_notice("Reloading HAProxy%s\n", hardreload?" (hard-reload)":"");
+
+ /* close the poller FD and the thread waker pipe FD */
+ list_for_each_entry(ptdf, &per_thread_deinit_list, list)
+ ptdf->fct();
+
+ /* increment the number of reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ child->reloads++;
+ }
+
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD)
+ sd_notify(0, "RELOADING=1\nSTATUS=Reloading Configuration.\n");
+#endif
+ mworker_reexec(hardreload);
+}
+
+static void mworker_loop()
+{
+
+ /* Busy polling makes no sense in the master :-) */
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+
+
+ signal_unregister(SIGTTIN);
+ signal_unregister(SIGTTOU);
+ signal_unregister(SIGUSR1);
+ signal_unregister(SIGHUP);
+ signal_unregister(SIGQUIT);
+
+ signal_register_fct(SIGTERM, mworker_catch_sigterm, SIGTERM);
+ signal_register_fct(SIGUSR1, mworker_catch_sigterm, SIGUSR1);
+ signal_register_fct(SIGTTIN, mworker_broadcast_signal, SIGTTIN);
+ signal_register_fct(SIGTTOU, mworker_broadcast_signal, SIGTTOU);
+ signal_register_fct(SIGINT, mworker_catch_sigterm, SIGINT);
+ signal_register_fct(SIGHUP, mworker_catch_sighup, SIGHUP);
+ signal_register_fct(SIGUSR2, mworker_catch_sighup, SIGUSR2);
+ signal_register_fct(SIGCHLD, mworker_catch_sigchld, SIGCHLD);
+
+ mworker_unblock_signals();
+ mworker_cleantasks();
+
+ mworker_catch_sigchld(NULL); /* ensure we clean the children in case
+ some SIGCHLD were lost */
+
+ jobs++; /* this is the "master" job, we want to take care of the
+ signals even if there is no listener so the poll loop don't
+ leave */
+
+ fork_poller();
+ run_thread_poll_loop(NULL);
+}
+
+/*
+ * Reexec the process in failure mode, instead of exiting
+ */
+void reexec_on_failure()
+{
+ struct mworker_proc *child;
+
+ if (!atexit_flag)
+ return;
+
+ /* get the info of the children in the env */
+ if (mworker_env_to_proc_list() < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ /* increment the number of failed reloads */
+ list_for_each_entry(child, &proc_list, list) {
+ child->failedreloads++;
+ }
+
+ /* do not keep unused FDs retrieved from the previous process */
+ sock_drop_unused_old_sockets();
+
+ usermsgs_clr(NULL);
+ setenv("HAPROXY_LOAD_SUCCESS", "0", 1);
+ ha_warning("Loading failure!\n");
+#if defined(USE_SYSTEMD)
+ /* the sd_notify API is not able to send a reload failure signal. So
+ * the READY=1 signal still need to be sent */
+ if (global.tune.options & GTUNE_USE_SYSTEMD)
+ sd_notify(0, "READY=1\nSTATUS=Reload failed!\n");
+#endif
+
+ mworker_reexec_waitmode();
+}
+
+/*
+ * Exit with an error message upon a wait-mode failure.
+ */
+void exit_on_waitmode_failure()
+{
+ if (!atexit_flag)
+ return;
+
+ ha_alert("Non-recoverable mworker wait-mode error, exiting.\n");
+}
+
+
+/*
+ * upon SIGUSR1, let's have a soft stop. Note that soft_stop() broadcasts
+ * a signal zero to all subscribers. This means that it's as easy as
+ * subscribing to signal 0 to get informed about an imminent shutdown.
+ */
+static void sig_soft_stop(struct sig_handler *sh)
+{
+ soft_stop();
+ signal_unregister_handler(sh);
+ pool_gc(NULL);
+}
+
+/*
+ * upon SIGTTOU, we pause everything
+ */
+static void sig_pause(struct sig_handler *sh)
+{
+ if (protocol_pause_all() & ERR_FATAL) {
+ const char *msg = "Some proxies refused to pause, performing soft stop now.\n";
+ ha_warning("%s", msg);
+ send_log(NULL, LOG_WARNING, "%s", msg);
+ soft_stop();
+ }
+ pool_gc(NULL);
+}
+
+/*
+ * upon SIGTTIN, let's have a soft stop.
+ */
+static void sig_listen(struct sig_handler *sh)
+{
+ if (protocol_resume_all() & ERR_FATAL) {
+ const char *msg = "Some proxies refused to resume, probably due to a conflict on a listening port. You may want to try again after the conflicting application is stopped, otherwise a restart might be needed to resume safe operations.\n";
+ ha_warning("%s", msg);
+ send_log(NULL, LOG_WARNING, "%s", msg);
+ }
+}
+
+/*
+ * this function dumps every server's state when the process receives SIGHUP.
+ */
+static void sig_dump_state(struct sig_handler *sh)
+{
+ struct proxy *p = proxies_list;
+
+ ha_warning("SIGHUP received, dumping servers states.\n");
+ while (p) {
+ struct server *s = p->srv;
+
+ send_log(p, LOG_NOTICE, "SIGHUP received, dumping servers states for proxy %s.\n", p->id);
+ while (s) {
+ chunk_printf(&trash,
+ "SIGHUP: Server %s/%s is %s. Conn: %d act, %d pend, %lld tot.",
+ p->id, s->id,
+ (s->cur_state != SRV_ST_STOPPED) ? "UP" : "DOWN",
+ s->cur_sess, s->queue.length, s->counters.cum_sess);
+ ha_warning("%s\n", trash.area);
+ send_log(p, LOG_NOTICE, "%s\n", trash.area);
+ s = s->next;
+ }
+
+ /* FIXME: those info are a bit outdated. We should be able to distinguish between FE and BE. */
+ if (!p->srv) {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id,
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ } else if (p->srv_act == 0) {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id,
+ (p->srv_bck) ? "is running on backup servers" : "has no server available",
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ } else {
+ chunk_printf(&trash,
+ "SIGHUP: Proxy %s has %d active servers and %d backup servers available."
+ " Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.",
+ p->id, p->srv_act, p->srv_bck,
+ p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+ }
+ ha_warning("%s\n", trash.area);
+ send_log(p, LOG_NOTICE, "%s\n", trash.area);
+
+ p = p->next;
+ }
+}
+
+static void dump(struct sig_handler *sh)
+{
+ /* dump memory usage then free everything possible */
+ dump_pools();
+ pool_gc(NULL);
+}
+
+/*
+ * This function dup2 the stdio FDs (0,1,2) with <fd>, then closes <fd>
+ * If <fd> < 0, it opens /dev/null and use it to dup
+ *
+ * In the case of chrooting, you have to open /dev/null before the chroot, and
+ * pass the <fd> to this function
+ */
+static void stdio_quiet(int fd)
+{
+ if (fd < 0)
+ fd = open("/dev/null", O_RDWR, 0);
+
+ if (fd > -1) {
+ fclose(stdin);
+ fclose(stdout);
+ fclose(stderr);
+
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ if (fd > 2)
+ close(fd);
+ return;
+ }
+
+ ha_alert("Cannot open /dev/null\n");
+ exit(EXIT_FAILURE);
+}
+
+
+/* This function checks if cfg_cfgfiles contains directories.
+ * If it finds one, it adds all the files (and only files) it contains
+ * in cfg_cfgfiles in place of the directory (and removes the directory).
+ * It adds the files in lexical order.
+ * It adds only files with .cfg extension.
+ * It doesn't add files with name starting with '.'
+ */
+static void cfgfiles_expand_directories(void)
+{
+ struct wordlist *wl, *wlb;
+ char *err = NULL;
+
+ list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
+ struct stat file_stat;
+ struct dirent **dir_entries = NULL;
+ int dir_entries_nb;
+ int dir_entries_it;
+
+ if (stat(wl->s, &file_stat)) {
+ ha_alert("Cannot open configuration file/directory %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ if (!S_ISDIR(file_stat.st_mode))
+ continue;
+
+ /* from this point wl->s is a directory */
+
+ dir_entries_nb = scandir(wl->s, &dir_entries, NULL, alphasort);
+ if (dir_entries_nb < 0) {
+ ha_alert("Cannot open configuration directory %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ /* for each element in the directory wl->s */
+ for (dir_entries_it = 0; dir_entries_it < dir_entries_nb; dir_entries_it++) {
+ struct dirent *dir_entry = dir_entries[dir_entries_it];
+ char *filename = NULL;
+ char *d_name_cfgext = strstr(dir_entry->d_name, ".cfg");
+
+ /* don't add filename that begin with .
+ * only add filename with .cfg extension
+ */
+ if (dir_entry->d_name[0] == '.' ||
+ !(d_name_cfgext && d_name_cfgext[4] == '\0'))
+ goto next_dir_entry;
+
+ if (!memprintf(&filename, "%s/%s", wl->s, dir_entry->d_name)) {
+ ha_alert("Cannot load configuration files %s : out of memory.\n",
+ filename);
+ exit(1);
+ }
+
+ if (stat(filename, &file_stat)) {
+ ha_alert("Cannot open configuration file %s : %s\n",
+ wl->s,
+ strerror(errno));
+ exit(1);
+ }
+
+ /* don't add anything else than regular file in cfg_cfgfiles
+ * this way we avoid loops
+ */
+ if (!S_ISREG(file_stat.st_mode))
+ goto next_dir_entry;
+
+ if (!list_append_word(&wl->list, filename, &err)) {
+ ha_alert("Cannot load configuration files %s : %s\n",
+ filename,
+ err);
+ exit(1);
+ }
+
+next_dir_entry:
+ free(filename);
+ free(dir_entry);
+ }
+
+ free(dir_entries);
+
+ /* remove the current directory (wl) from cfg_cfgfiles */
+ free(wl->s);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+
+ free(err);
+}
+
+/*
+ * copy and cleanup the current argv
+ * Remove the -sf /-st / -x parameters
+ * Return an allocated copy of argv
+ */
+
+static char **copy_argv(int argc, char **argv)
+{
+ char **newargv, **retargv;
+
+ newargv = calloc(argc + 2, sizeof(*newargv));
+ if (newargv == NULL) {
+ ha_warning("Cannot allocate memory\n");
+ return NULL;
+ }
+ retargv = newargv;
+
+ /* first copy argv[0] */
+ *newargv++ = *argv++;
+ argc--;
+
+ while (argc > 0) {
+ if (**argv != '-') {
+ /* non options are copied but will fail in the argument parser */
+ *newargv++ = *argv++;
+ argc--;
+
+ } else {
+ char *flag;
+
+ flag = *argv + 1;
+
+ if (flag[0] == '-' && flag[1] == 0) {
+ /* "--\0" copy every arguments till the end of argv */
+ *newargv++ = *argv++;
+ argc--;
+
+ while (argc > 0) {
+ *newargv++ = *argv++;
+ argc--;
+ }
+ } else {
+ switch (*flag) {
+ case 's':
+ /* -sf / -st and their parameters are ignored */
+ if (flag[1] == 'f' || flag[1] == 't') {
+ argc--;
+ argv++;
+ /* The list can't contain a negative value since the only
+ way to know the end of this list is by looking for the
+ next option or the end of the options */
+ while (argc > 0 && argv[0][0] != '-') {
+ argc--;
+ argv++;
+ }
+ } else {
+ argc--;
+ argv++;
+
+ }
+ break;
+
+ case 'x':
+ /* this option and its parameter are ignored */
+ argc--;
+ argv++;
+ if (argc > 0) {
+ argc--;
+ argv++;
+ }
+ break;
+
+ case 'C':
+ case 'n':
+ case 'm':
+ case 'N':
+ case 'L':
+ case 'f':
+ case 'p':
+ case 'S':
+ /* these options have only 1 parameter which must be copied and can start with a '-' */
+ *newargv++ = *argv++;
+ argc--;
+ if (argc == 0)
+ goto error;
+ *newargv++ = *argv++;
+ argc--;
+ break;
+ default:
+ /* for other options just copy them without parameters, this is also done
+ * for options like "--foo", but this will fail in the argument parser.
+ * */
+ *newargv++ = *argv++;
+ argc--;
+ break;
+ }
+ }
+ }
+ }
+
+ return retargv;
+
+error:
+ free(retargv);
+ return NULL;
+}
+
+
+/* Performs basic random seed initialization. The main issue with this is that
+ * srandom_r() only takes 32 bits and purposely provides a reproducible sequence,
+ * which means that there will only be 4 billion possible random sequences once
+ * srandom() is called, regardless of the internal state. Not calling it is
+ * even worse as we'll always produce the same randoms sequences. What we do
+ * here is to create an initial sequence from various entropy sources, hash it
+ * using SHA1 and keep the resulting 160 bits available globally.
+ *
+ * We initialize the current process with the first 32 bits before starting the
+ * polling loop, where all this will be changed to have process specific and
+ * thread specific sequences.
+ *
+ * Before starting threads, it's still possible to call random() as srandom()
+ * is initialized from this, but after threads and/or processes are started,
+ * only ha_random() is expected to be used to guarantee distinct sequences.
+ */
+static void ha_random_boot(char *const *argv)
+{
+ unsigned char message[256];
+ unsigned char *m = message;
+ struct timeval tv;
+ blk_SHA_CTX ctx;
+ unsigned long l;
+ int fd;
+ int i;
+
+ /* start with current time as pseudo-random seed */
+ gettimeofday(&tv, NULL);
+ write_u32(m, tv.tv_sec); m += 4;
+ write_u32(m, tv.tv_usec); m += 4;
+
+ /* PID and PPID add some OS-based randomness */
+ write_u16(m, getpid()); m += 2;
+ write_u16(m, getppid()); m += 2;
+
+ /* take up to 160 bits bytes from /dev/urandom if available (non-blocking) */
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd >= 0) {
+ i = read(fd, m, 20);
+ if (i > 0)
+ m += i;
+ close(fd);
+ }
+
+ /* take up to 160 bits bytes from openssl (non-blocking) */
+#ifdef USE_OPENSSL
+ if (RAND_bytes(m, 20) == 1)
+ m += 20;
+#endif
+
+ /* take 160 bits from existing random in case it was already initialized */
+ for (i = 0; i < 5; i++) {
+ write_u32(m, random());
+ m += 4;
+ }
+
+ /* stack address (benefit form operating system's ASLR) */
+ l = (unsigned long)&m;
+ memcpy(m, &l, sizeof(l)); m += sizeof(l);
+
+ /* argv address (benefit form operating system's ASLR) */
+ l = (unsigned long)&argv;
+ memcpy(m, &l, sizeof(l)); m += sizeof(l);
+
+ /* use tv_usec again after all the operations above */
+ gettimeofday(&tv, NULL);
+ write_u32(m, tv.tv_usec); m += 4;
+
+ /*
+ * At this point, ~84-92 bytes have been used
+ */
+
+ /* finish with the hostname */
+ strncpy((char *)m, hostname, message + sizeof(message) - m);
+ m += strlen(hostname);
+
+ /* total message length */
+ l = m - message;
+
+ memset(&ctx, 0, sizeof(ctx));
+ blk_SHA1_Init(&ctx);
+ blk_SHA1_Update(&ctx, message, l);
+ blk_SHA1_Final(boot_seed, &ctx);
+
+ srandom(read_u32(boot_seed));
+ ha_random_seed(boot_seed, sizeof(boot_seed));
+}
+
+/* considers splicing proxies' maxconn, computes the ideal global.maxpipes
+ * setting, and returns it. It may return -1 meaning "unlimited" if some
+ * unlimited proxies have been found and the global.maxconn value is not yet
+ * set. It may also return a value greater than maxconn if it's not yet set.
+ * Note that a value of zero means there is no need for pipes. -1 is never
+ * returned if global.maxconn is valid.
+ */
+static int compute_ideal_maxpipes()
+{
+ struct proxy *cur;
+ int nbfe = 0, nbbe = 0;
+ int unlimited = 0;
+ int pipes;
+ int max;
+
+ for (cur = proxies_list; cur; cur = cur->next) {
+ if (cur->options2 & (PR_O2_SPLIC_ANY)) {
+ if (cur->cap & PR_CAP_FE) {
+ max = cur->maxconn;
+ nbfe += max;
+ if (!max) {
+ unlimited = 1;
+ break;
+ }
+ }
+ if (cur->cap & PR_CAP_BE) {
+ max = cur->fullconn ? cur->fullconn : global.maxconn;
+ nbbe += max;
+ if (!max) {
+ unlimited = 1;
+ break;
+ }
+ }
+ }
+ }
+
+ pipes = MAX(nbfe, nbbe);
+ if (global.maxconn) {
+ if (pipes > global.maxconn || unlimited)
+ pipes = global.maxconn;
+ } else if (unlimited) {
+ pipes = -1;
+ }
+
+ return pipes >= 4 ? pipes / 4 : pipes;
+}
+
+/* considers global.maxsocks, global.maxpipes, async engines, SSL frontends and
+ * rlimits and computes an ideal maxconn. It's meant to be called only when
+ * maxsock contains the sum of listening FDs, before it is updated based on
+ * maxconn and pipes. If there are not enough FDs left, DEFAULT_MAXCONN (by
+ * default 100) is returned as it is expected that it will even run on tight
+ * environments, and will maintain compatibility with previous packages that
+ * used to rely on this value as the default one. The system will emit a
+ * warning indicating how many FDs are missing anyway if needed.
+ */
+static int compute_ideal_maxconn()
+{
+ int ssl_sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int engine_fds = global.ssl_used_async_engines * ssl_sides;
+ int pipes = compute_ideal_maxpipes();
+ int remain = MAX(rlim_fd_cur_at_boot, rlim_fd_max_at_boot);
+ int maxconn;
+
+ /* we have to take into account these elements :
+ * - number of engine_fds, which inflates the number of FD needed per
+ * connection by this number.
+ * - number of pipes per connection on average : for the unlimited
+ * case, this is 0.5 pipe FDs per connection, otherwise it's a
+ * fixed value of 2*pipes.
+ * - two FDs per connection
+ */
+
+ if (global.fd_hard_limit && remain > global.fd_hard_limit)
+ remain = global.fd_hard_limit;
+
+ /* subtract listeners and checks */
+ remain -= global.maxsock;
+
+ /* one epoll_fd/kqueue_fd per thread */
+ remain -= global.nbthread;
+
+ /* one wake-up pipe (2 fd) per thread */
+ remain -= 2 * global.nbthread;
+
+ /* Fixed pipes values : we only subtract them if they're not larger
+ * than the remaining FDs because pipes are optional.
+ */
+ if (pipes >= 0 && pipes * 2 < remain)
+ remain -= pipes * 2;
+
+ if (pipes < 0) {
+ /* maxsock = maxconn * 2 + maxconn/4 * 2 + maxconn * engine_fds.
+ * = maxconn * (2 + 0.5 + engine_fds)
+ * = maxconn * (4 + 1 + 2*engine_fds) / 2
+ */
+ maxconn = 2 * remain / (5 + 2 * engine_fds);
+ } else {
+ /* maxsock = maxconn * 2 + maxconn * engine_fds.
+ * = maxconn * (2 + engine_fds)
+ */
+ maxconn = remain / (2 + engine_fds);
+ }
+
+ return MAX(maxconn, DEFAULT_MAXCONN);
+}
+
+/* computes the estimated maxsock value for the given maxconn based on the
+ * possibly set global.maxpipes and existing partial global.maxsock. It may
+ * temporarily change global.maxconn for the time needed to propagate the
+ * computations, and will reset it.
+ */
+static int compute_ideal_maxsock(int maxconn)
+{
+ int maxpipes = global.maxpipes;
+ int maxsock = global.maxsock;
+
+
+ if (!maxpipes) {
+ int old_maxconn = global.maxconn;
+
+ global.maxconn = maxconn;
+ maxpipes = compute_ideal_maxpipes();
+ global.maxconn = old_maxconn;
+ }
+
+ maxsock += maxconn * 2; /* each connection needs two sockets */
+ maxsock += maxpipes * 2; /* each pipe needs two FDs */
+ maxsock += global.nbthread; /* one epoll_fd/kqueue_fd per thread */
+ maxsock += 2 * global.nbthread; /* one wake-up pipe (2 fd) per thread */
+
+ /* compute fd used by async engines */
+ if (global.ssl_used_async_engines) {
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+
+ maxsock += maxconn * sides * global.ssl_used_async_engines;
+ }
+ return maxsock;
+}
+
+/* Tests if it is possible to set the current process's RLIMIT_NOFILE to
+ * <maxsock>, then sets it back to the previous value. Returns non-zero if the
+ * value is accepted, non-zero otherwise. This is used to determine if an
+ * automatic limit may be applied or not. When it is not, the caller knows that
+ * the highest we can do is the rlim_max at boot. In case of error, we return
+ * that the setting is possible, so that we defer the error processing to the
+ * final stage in charge of enforcing this.
+ */
+static int check_if_maxsock_permitted(int maxsock)
+{
+ struct rlimit orig_limit, test_limit;
+ int ret;
+
+ if (global.fd_hard_limit && maxsock > global.fd_hard_limit)
+ return 0;
+
+ if (getrlimit(RLIMIT_NOFILE, &orig_limit) != 0)
+ return 1;
+
+ /* don't go further if we can't even set to what we have */
+ if (raise_rlim_nofile(NULL, &orig_limit) != 0)
+ return 1;
+
+ test_limit.rlim_max = MAX(maxsock, orig_limit.rlim_max);
+ test_limit.rlim_cur = test_limit.rlim_max;
+ ret = raise_rlim_nofile(NULL, &test_limit);
+
+ if (raise_rlim_nofile(NULL, &orig_limit) != 0)
+ return 1;
+
+ return ret == 0;
+}
+
+/* This performs th every basic early initialization at the end of the PREPARE
+ * init stage. It may only assume that list heads are initialized, but not that
+ * anything else is correct. It will initialize a number of variables that
+ * depend on command line and will pre-parse the command line. If it fails, it
+ * directly exits.
+ */
+static void init_early(int argc, char **argv)
+{
+ char *progname;
+ char *tmp;
+ int len;
+
+ setenv("HAPROXY_STARTUP_VERSION", HAPROXY_VERSION, 0);
+
+ /* First, let's initialize most global variables */
+ totalconn = actconn = listeners = stopping = 0;
+ killed = pid = 0;
+
+ global.maxsock = 10; /* reserve 10 fds ; will be incremented by socket eaters */
+ global.rlimit_memmax_all = HAPROXY_MEMMAX;
+ global.mode = MODE_STARTING;
+
+ /* if we were in mworker mode, we should restart in mworker mode */
+ if (getenv("HAPROXY_MWORKER_REEXEC") != NULL)
+ global.mode |= MODE_MWORKER;
+
+ /* initialize date, time, and pid */
+ tzset();
+ clock_init_process_date();
+ start_date = date;
+ start_time_ns = now_ns;
+ pid = getpid();
+
+ /* Set local host name and adjust some environment variables.
+ * NB: POSIX does not make it mandatory for gethostname() to
+ * NULL-terminate the string in case of truncation, and at least
+ * FreeBSD appears not to do it.
+ */
+ memset(hostname, 0, sizeof(hostname));
+ gethostname(hostname, sizeof(hostname) - 1);
+
+ /* preset some environment variables */
+ localpeer = strdup(hostname);
+ if (!localpeer || setenv("HAPROXY_LOCALPEER", localpeer, 1) < 0) {
+ ha_alert("Cannot allocate memory for local peer.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* extract the program name from argv[0], it will be used for the logs
+ * and error messages.
+ */
+ progname = *argv;
+ while ((tmp = strchr(progname, '/')) != NULL)
+ progname = tmp + 1;
+
+ len = strlen(progname);
+ progname = strdup(progname);
+ if (!progname) {
+ ha_alert("Cannot allocate memory for log_tag.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ chunk_initlen(&global.log_tag, progname, len, len);
+}
+
+/* handles program arguments. Very minimal parsing is performed, variables are
+ * fed with some values, and lists are completed with other ones. In case of
+ * error, it will exit.
+ */
+static void init_args(int argc, char **argv)
+{
+ char *progname = global.log_tag.area;
+ char *err_msg = NULL;
+
+ /* pre-fill in the global tuning options before we let the cmdline
+ * change them.
+ */
+ global.tune.options |= GTUNE_USE_SELECT; /* select() is always available */
+#if defined(USE_POLL)
+ global.tune.options |= GTUNE_USE_POLL;
+#endif
+#if defined(USE_EPOLL)
+ global.tune.options |= GTUNE_USE_EPOLL;
+#endif
+#if defined(USE_KQUEUE)
+ global.tune.options |= GTUNE_USE_KQUEUE;
+#endif
+#if defined(USE_EVPORTS)
+ global.tune.options |= GTUNE_USE_EVPORTS;
+#endif
+#if defined(USE_LINUX_SPLICE)
+ global.tune.options |= GTUNE_USE_SPLICE;
+#endif
+#if defined(USE_GETADDRINFO)
+ global.tune.options |= GTUNE_USE_GAI;
+#endif
+#ifdef USE_THREAD
+ global.tune.options |= GTUNE_IDLE_POOL_SHARED;
+#endif
+#ifdef USE_QUIC
+ global.tune.options |= GTUNE_QUIC_SOCK_PER_CONN;
+#endif
+ global.tune.options |= GTUNE_STRICT_LIMITS;
+
+ global.tune.options |= GTUNE_USE_FAST_FWD; /* Use fast-forward by default */
+
+ /* Use zero-copy forwarding by default */
+ global.tune.no_zero_copy_fwd = NO_ZERO_COPY_FWD_QUIC_SND;
+
+ /* keep a copy of original arguments for the master process */
+ old_argv = copy_argv(argc, argv);
+ if (!old_argv) {
+ ha_alert("failed to copy argv.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* skip program name and start */
+ argc--; argv++;
+ while (argc > 0) {
+ char *flag;
+
+ if (**argv == '-') {
+ flag = *argv+1;
+
+ /* 1 arg */
+ if (*flag == 'v') {
+ display_version();
+ if (flag[1] == 'v') /* -vv */
+ display_build_opts();
+ deinit_and_exit(0);
+ }
+#if defined(USE_EPOLL)
+ else if (*flag == 'd' && flag[1] == 'e')
+ global.tune.options &= ~GTUNE_USE_EPOLL;
+#endif
+#if defined(USE_POLL)
+ else if (*flag == 'd' && flag[1] == 'p')
+ global.tune.options &= ~GTUNE_USE_POLL;
+#endif
+#if defined(USE_KQUEUE)
+ else if (*flag == 'd' && flag[1] == 'k')
+ global.tune.options &= ~GTUNE_USE_KQUEUE;
+#endif
+#if defined(USE_EVPORTS)
+ else if (*flag == 'd' && flag[1] == 'v')
+ global.tune.options &= ~GTUNE_USE_EVPORTS;
+#endif
+#if defined(USE_LINUX_SPLICE)
+ else if (*flag == 'd' && flag[1] == 'S')
+ global.tune.options &= ~GTUNE_USE_SPLICE;
+#endif
+#if defined(USE_GETADDRINFO)
+ else if (*flag == 'd' && flag[1] == 'G')
+ global.tune.options &= ~GTUNE_USE_GAI;
+#endif
+#if defined(SO_REUSEPORT)
+ else if (*flag == 'd' && flag[1] == 'R')
+ protocol_clrf_all(PROTO_F_REUSEPORT_SUPPORTED);
+#endif
+ else if (*flag == 'd' && flag[1] == 'F')
+ global.tune.options &= ~GTUNE_USE_FAST_FWD;
+ else if (*flag == 'd' && flag[1] == 'V')
+ global.ssl_server_verify = SSL_SERVER_VERIFY_NONE;
+ else if (*flag == 'd' && flag[1] == 'Z')
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD;
+ else if (*flag == 'V')
+ arg_mode |= MODE_VERBOSE;
+ else if (*flag == 'd' && flag[1] == 'C') {
+ char *end;
+ char *key;
+
+ key = flag + 2;
+ for (;key && *key; key = end) {
+ end = strchr(key, ',');
+ if (end)
+ *(end++) = 0;
+
+ if (strcmp(key, "line") == 0)
+ arg_mode |= MODE_DUMP_NB_L;
+
+ }
+ arg_mode |= MODE_DUMP_CFG;
+ HA_ATOMIC_STORE(&global.anon_key, atoll(flag + 2));
+ }
+ else if (*flag == 'd' && flag[1] == 'b')
+ arg_mode |= MODE_FOREGROUND;
+ else if (*flag == 'd' && flag[1] == 'D')
+ arg_mode |= MODE_DIAG;
+ else if (*flag == 'd' && flag[1] == 'W')
+ arg_mode |= MODE_ZERO_WARNING;
+ else if (*flag == 'd' && flag[1] == 'M') {
+ int ret = pool_parse_debugging(flag + 2, &err_msg);
+
+ if (ret <= -1) {
+ if (ret < -1)
+ ha_alert("-dM: %s\n", err_msg);
+ else
+ printf("%s\n", err_msg);
+ ha_free(&err_msg);
+ exit(ret < -1 ? EXIT_FAILURE : 0);
+ } else if (ret == 0) {
+ ha_warning("-dM: %s\n", err_msg);
+ ha_free(&err_msg);
+ }
+ }
+ else if (*flag == 'd' && flag[1] == 'r')
+ global.tune.options |= GTUNE_RESOLVE_DONTFAIL;
+#if defined(HA_HAVE_DUMP_LIBS)
+ else if (*flag == 'd' && flag[1] == 'L')
+ arg_mode |= MODE_DUMP_LIBS;
+#endif
+ else if (*flag == 'd' && flag[1] == 'K') {
+ arg_mode |= MODE_DUMP_KWD;
+ kwd_dump = flag + 2;
+ }
+ else if (*flag == 'd' && flag[1] == 't') {
+ if (argc > 1 && argv[1][0] != '-') {
+ if (trace_parse_cmd(argv[1], &err_msg)) {
+ ha_alert("-dt: %s.\n", err_msg);
+ ha_free(&err_msg);
+ exit(EXIT_FAILURE);
+ }
+ argc--; argv++;
+ }
+ else {
+ trace_parse_cmd(NULL, NULL);
+ }
+ }
+ else if (*flag == 'd')
+ arg_mode |= MODE_DEBUG;
+ else if (*flag == 'c' && flag[1] == 'c') {
+ arg_mode |= MODE_CHECK_CONDITION;
+ argv++;
+ argc--;
+ check_condition = *argv;
+ }
+ else if (*flag == 'c')
+ arg_mode |= MODE_CHECK;
+ else if (*flag == 'D')
+ arg_mode |= MODE_DAEMON;
+ else if (*flag == 'W' && flag[1] == 's') {
+ arg_mode |= MODE_MWORKER | MODE_FOREGROUND;
+#if defined(USE_SYSTEMD)
+ global.tune.options |= GTUNE_USE_SYSTEMD;
+#else
+ ha_alert("master-worker mode with systemd support (-Ws) requested, but not compiled. Use master-worker mode (-W) if you are not using Type=notify in your unit file or recompile with USE_SYSTEMD=1.\n\n");
+ usage(progname);
+#endif
+ }
+ else if (*flag == 'W')
+ arg_mode |= MODE_MWORKER;
+ else if (*flag == 'q')
+ arg_mode |= MODE_QUIET;
+ else if (*flag == 'x') {
+ if (argc <= 1) {
+ ha_alert("Unix socket path expected with the -x flag\n\n");
+ usage(progname);
+ }
+ if (old_unixsocket)
+ ha_warning("-x option already set, overwriting the value\n");
+ old_unixsocket = argv[1];
+
+ argv++;
+ argc--;
+ }
+ else if (*flag == 'S') {
+ struct wordlist *c;
+
+ if (argc <= 1) {
+ ha_alert("Socket and optional bind parameters expected with the -S flag\n");
+ usage(progname);
+ }
+ if ((c = malloc(sizeof(*c))) == NULL || (c->s = strdup(argv[1])) == NULL) {
+ ha_alert("Cannot allocate memory\n");
+ exit(EXIT_FAILURE);
+ }
+ LIST_INSERT(&mworker_cli_conf, &c->list);
+
+ argv++;
+ argc--;
+ }
+ else if (*flag == 's' && (flag[1] == 'f' || flag[1] == 't')) {
+ /* list of pids to finish ('f') or terminate ('t') */
+
+ if (flag[1] == 'f')
+ oldpids_sig = SIGUSR1; /* finish then exit */
+ else
+ oldpids_sig = SIGTERM; /* terminate immediately */
+ while (argc > 1 && argv[1][0] != '-') {
+ char * endptr = NULL;
+ oldpids = realloc(oldpids, (nb_oldpids + 1) * sizeof(int));
+ if (!oldpids) {
+ ha_alert("Cannot allocate old pid : out of memory.\n");
+ exit(1);
+ }
+ argc--; argv++;
+ errno = 0;
+ oldpids[nb_oldpids] = strtol(*argv, &endptr, 10);
+ if (errno) {
+ ha_alert("-%2s option: failed to parse {%s}: %s\n",
+ flag,
+ *argv, strerror(errno));
+ exit(1);
+ } else if (endptr && strlen(endptr)) {
+ while (isspace((unsigned char)*endptr)) endptr++;
+ if (*endptr != 0) {
+ ha_alert("-%2s option: some bytes unconsumed in PID list {%s}\n",
+ flag, endptr);
+ exit(1);
+ }
+ }
+ if (oldpids[nb_oldpids] <= 0)
+ usage(progname);
+ nb_oldpids++;
+ }
+ }
+ else if (flag[0] == '-' && flag[1] == 0) { /* "--" */
+ /* now that's a cfgfile list */
+ argv++; argc--;
+ while (argc > 0) {
+ if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
+ ha_alert("Cannot load configuration file/directory %s : %s\n",
+ *argv,
+ err_msg);
+ exit(1);
+ }
+ argv++; argc--;
+ }
+ break;
+ }
+ else { /* >=2 args */
+ argv++; argc--;
+ if (argc == 0)
+ usage(progname);
+
+ switch (*flag) {
+ case 'C' : change_dir = *argv; break;
+ case 'n' : cfg_maxconn = atol(*argv); break;
+ case 'm' : global.rlimit_memmax_all = atol(*argv); break;
+ case 'N' : cfg_maxpconn = atol(*argv); break;
+ case 'L' :
+ free(localpeer);
+ if ((localpeer = strdup(*argv)) == NULL) {
+ ha_alert("Cannot allocate memory for local peer.\n");
+ exit(EXIT_FAILURE);
+ }
+ setenv("HAPROXY_LOCALPEER", localpeer, 1);
+ global.localpeer_cmdline = 1;
+ break;
+ case 'f' :
+ if (!list_append_word(&cfg_cfgfiles, *argv, &err_msg)) {
+ ha_alert("Cannot load configuration file/directory %s : %s\n",
+ *argv,
+ err_msg);
+ exit(1);
+ }
+ break;
+ case 'p' :
+ free(global.pidfile);
+ if ((global.pidfile = strdup(*argv)) == NULL) {
+ ha_alert("Cannot allocate memory for pidfile.\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ default: usage(progname);
+ }
+ }
+ }
+ else
+ usage(progname);
+ argv++; argc--;
+ }
+ free(err_msg);
+}
+
+/* call the various keyword dump functions based on the comma-delimited list of
+ * classes in kwd_dump.
+ */
+static void dump_registered_keywords(void)
+{
+ char *end;
+ int all __maybe_unused = 0;
+
+ for (; kwd_dump && *kwd_dump; kwd_dump = end) {
+ end = strchr(kwd_dump, ',');
+ if (end)
+ *(end++) = 0;
+
+ if (strcmp(kwd_dump, "help") == 0) {
+ printf("# List of supported keyword classes:\n");
+ printf("all: list all keywords\n");
+ printf("acl: ACL keywords\n");
+ printf("cfg: configuration keywords\n");
+ printf("cli: CLI keywords\n");
+ printf("cnv: sample converter keywords\n");
+ printf("flt: filter names\n");
+ printf("smp: sample fetch functions\n");
+ printf("svc: service names\n");
+ continue;
+ }
+ else if (strcmp(kwd_dump, "all") == 0) {
+ all = 1;
+ }
+
+ if (all || strcmp(kwd_dump, "acl") == 0) {
+ printf("# List of registered ACL keywords:\n");
+ acl_dump_kwd();
+ }
+
+ if (all || strcmp(kwd_dump, "cfg") == 0) {
+ printf("# List of registered configuration keywords:\n");
+ cfg_dump_registered_keywords();
+ }
+
+ if (all || strcmp(kwd_dump, "cli") == 0) {
+ printf("# List of registered CLI keywords:\n");
+ cli_list_keywords();
+ }
+
+ if (all || strcmp(kwd_dump, "cnv") == 0) {
+ printf("# List of registered sample converter functions:\n");
+ smp_dump_conv_kw();
+ }
+
+ if (all || strcmp(kwd_dump, "flt") == 0) {
+ printf("# List of registered filter names:\n");
+ flt_dump_kws(NULL);
+ }
+
+ if (all || strcmp(kwd_dump, "smp") == 0) {
+ printf("# List of registered sample fetch functions:\n");
+ smp_dump_fetch_kw();
+ }
+
+ if (all || strcmp(kwd_dump, "svc") == 0) {
+ printf("# List of registered service names:\n");
+ list_services(NULL);
+ }
+ }
+}
+
+/* Generate a random cluster-secret in case the setting is not provided in the
+ * configuration. This allows to use features which rely on it albeit with some
+ * limitations.
+ */
+static void generate_random_cluster_secret()
+{
+ /* used as a default random cluster-secret if none defined. */
+ uint64_t rand;
+
+ /* The caller must not overwrite an already defined secret. */
+ BUG_ON(cluster_secret_isset);
+
+ rand = ha_random64();
+ memcpy(global.cluster_secret, &rand, sizeof(rand));
+ rand = ha_random64();
+ memcpy(global.cluster_secret + sizeof(rand), &rand, sizeof(rand));
+ cluster_secret_isset = 1;
+}
+
+/*
+ * This function initializes all the necessary variables. It only returns
+ * if everything is OK. If something fails, it exits.
+ */
+static void init(int argc, char **argv)
+{
+ char *progname = global.log_tag.area;
+ int err_code = 0;
+ struct wordlist *wl;
+ struct proxy *px;
+ struct post_check_fct *pcf;
+ struct pre_check_fct *prcf;
+ int ideal_maxconn;
+ const char *cc, *cflags, *opts;
+
+#ifdef USE_OPENSSL
+#ifdef USE_OPENSSL_WOLFSSL
+ wolfSSL_Init();
+ wolfSSL_Debugging_ON();
+#endif
+
+#ifdef USE_OPENSSL_AWSLC
+ const char *version_str = OpenSSL_version(OPENSSL_VERSION);
+ if (strncmp(version_str, "AWS-LC", 6) != 0) {
+ ha_alert("HAPRoxy built with AWS-LC but running with %s.\n", version_str);
+ exit(1);
+ }
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+ /* Initialize the error strings of OpenSSL
+ * It only needs to be done explicitly with older versions of the SSL
+ * library. On newer versions, errors strings are loaded during start
+ * up. */
+ SSL_load_error_strings();
+#endif
+#endif
+
+ startup_logs_init();
+
+ if (init_acl() != 0)
+ exit(1);
+
+ /* Initialise lua. */
+ hlua_init();
+
+ global.mode |= (arg_mode & (MODE_DAEMON | MODE_MWORKER | MODE_FOREGROUND | MODE_VERBOSE
+ | MODE_QUIET | MODE_CHECK | MODE_DEBUG | MODE_ZERO_WARNING
+ | MODE_DIAG | MODE_CHECK_CONDITION | MODE_DUMP_LIBS | MODE_DUMP_KWD
+ | MODE_DUMP_CFG | MODE_DUMP_NB_L));
+
+ if (getenv("HAPROXY_MWORKER_WAIT_ONLY")) {
+ unsetenv("HAPROXY_MWORKER_WAIT_ONLY");
+ global.mode |= MODE_MWORKER_WAIT;
+ global.mode &= ~MODE_MWORKER;
+ }
+
+ /* set the atexit functions when not doing configuration check */
+ if (!(global.mode & (MODE_CHECK | MODE_CHECK_CONDITION))
+ && (getenv("HAPROXY_MWORKER_REEXEC") != NULL)) {
+
+ if (global.mode & MODE_MWORKER) {
+ atexit_flag = 1;
+ atexit(reexec_on_failure);
+ } else if (global.mode & MODE_MWORKER_WAIT) {
+ atexit_flag = 1;
+ atexit(exit_on_waitmode_failure);
+ }
+ }
+
+ if (change_dir && chdir(change_dir) < 0) {
+ ha_alert("Could not change to directory %s : %s\n", change_dir, strerror(errno));
+ exit(1);
+ }
+
+ usermsgs_clr("config");
+
+ if (global.mode & MODE_CHECK_CONDITION) {
+ int result;
+
+ uint32_t err;
+ const char *errptr;
+ char *errmsg = NULL;
+
+ char *args[MAX_LINE_ARGS+1];
+ int arg = sizeof(args) / sizeof(*args);
+ size_t outlen;
+ char *w;
+
+ if (!check_condition)
+ usage(progname);
+
+ outlen = strlen(check_condition) + 1;
+ err = parse_line(check_condition, check_condition, &outlen, args, &arg,
+ PARSE_OPT_ENV | PARSE_OPT_WORD_EXPAND | PARSE_OPT_DQUOTE | PARSE_OPT_SQUOTE | PARSE_OPT_BKSLASH,
+ &errptr);
+
+ if (err & PARSE_ERR_QUOTE) {
+ ha_alert("Syntax Error in condition: Unmatched quote.\n");
+ exit(2);
+ }
+
+ if (err & PARSE_ERR_HEX) {
+ ha_alert("Syntax Error in condition: Truncated or invalid hexadecimal sequence.\n");
+ exit(2);
+ }
+
+ if (err & (PARSE_ERR_TOOLARGE|PARSE_ERR_OVERLAP)) {
+ ha_alert("Error in condition: Line too long.\n");
+ exit(2);
+ }
+
+ if (err & PARSE_ERR_TOOMANY) {
+ ha_alert("Error in condition: Too many words.\n");
+ exit(2);
+ }
+
+ if (err) {
+ ha_alert("Unhandled error in condition, please report this to the developers.\n");
+ exit(2);
+ }
+
+ /* remerge all words into a single expression */
+ for (w = *args; (w += strlen(w)) < check_condition + outlen - 1; *w = ' ')
+ ;
+
+ result = cfg_eval_condition(args, &errmsg, &errptr);
+
+ if (result < 0) {
+ if (errmsg)
+ ha_alert("Failed to evaluate condition: %s\n", errmsg);
+
+ exit(2);
+ }
+
+ exit(result ? 0 : 1);
+ }
+
+ /* in wait mode, we don't try to read the configuration files */
+ if (!(global.mode & MODE_MWORKER_WAIT)) {
+ char *env_cfgfiles = NULL;
+ int env_err = 0;
+
+ /* handle cfgfiles that are actually directories */
+ cfgfiles_expand_directories();
+
+ if (LIST_ISEMPTY(&cfg_cfgfiles))
+ usage(progname);
+
+ /* temporary create environment variables with default
+ * values to ease user configuration. Do not forget to
+ * unset them after the list_for_each_entry loop.
+ */
+ setenv("HAPROXY_HTTP_LOG_FMT", default_http_log_format, 1);
+ setenv("HAPROXY_HTTPS_LOG_FMT", default_https_log_format, 1);
+ setenv("HAPROXY_TCP_LOG_FMT", default_tcp_log_format, 1);
+ setenv("HAPROXY_BRANCH", PRODUCT_BRANCH, 1);
+ list_for_each_entry(wl, &cfg_cfgfiles, list) {
+ int ret;
+
+ if (env_err == 0) {
+ if (!memprintf(&env_cfgfiles, "%s%s%s",
+ (env_cfgfiles ? env_cfgfiles : ""),
+ (env_cfgfiles ? ";" : ""), wl->s))
+ env_err = 1;
+ }
+
+ ret = readcfgfile(wl->s);
+ if (ret == -1) {
+ ha_alert("Could not open configuration file %s : %s\n",
+ wl->s, strerror(errno));
+ free(env_cfgfiles);
+ exit(1);
+ }
+ if (ret & (ERR_ABORT|ERR_FATAL))
+ ha_alert("Error(s) found in configuration file : %s\n", wl->s);
+ err_code |= ret;
+ if (err_code & ERR_ABORT) {
+ free(env_cfgfiles);
+ exit(1);
+ }
+ }
+ /* remove temporary environment variables. */
+ unsetenv("HAPROXY_BRANCH");
+ unsetenv("HAPROXY_HTTP_LOG_FMT");
+ unsetenv("HAPROXY_HTTPS_LOG_FMT");
+ unsetenv("HAPROXY_TCP_LOG_FMT");
+
+ /* do not try to resolve arguments nor to spot inconsistencies when
+ * the configuration contains fatal errors caused by files not found
+ * or failed memory allocations.
+ */
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ free(env_cfgfiles);
+ exit(1);
+ }
+ if (env_err) {
+ ha_alert("Could not allocate memory for HAPROXY_CFGFILES env variable\n");
+ exit(1);
+ }
+ setenv("HAPROXY_CFGFILES", env_cfgfiles, 1);
+ free(env_cfgfiles);
+
+ }
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *tmproc;
+
+ setenv("HAPROXY_MWORKER", "1", 1);
+
+ if (getenv("HAPROXY_MWORKER_REEXEC") == NULL) {
+
+ tmproc = mworker_proc_new();
+ if (!tmproc) {
+ ha_alert("Cannot allocate process structures.\n");
+ exit(EXIT_FAILURE);
+ }
+ tmproc->options |= PROC_O_TYPE_MASTER; /* master */
+ tmproc->pid = pid;
+ tmproc->timestamp = start_date.tv_sec;
+ proc_self = tmproc;
+
+ LIST_APPEND(&proc_list, &tmproc->list);
+ }
+
+ tmproc = mworker_proc_new();
+ if (!tmproc) {
+ ha_alert("Cannot allocate process structures.\n");
+ exit(EXIT_FAILURE);
+ }
+ tmproc->options |= PROC_O_TYPE_WORKER; /* worker */
+
+ if (mworker_cli_sockpair_new(tmproc, 0) < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ LIST_APPEND(&proc_list, &tmproc->list);
+ }
+
+ if (global.mode & MODE_MWORKER_WAIT) {
+ /* in exec mode, there's always exactly one thread. Failure to
+ * set these ones now will result in nbthread being detected
+ * automatically.
+ */
+ global.nbtgroups = 1;
+ global.nbthread = 1;
+ }
+
+ if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
+ struct wordlist *it, *c;
+
+ master = 1;
+ /* get the info of the children in the env */
+ if (mworker_env_to_proc_list() < 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ if (!LIST_ISEMPTY(&mworker_cli_conf)) {
+ char *path = NULL;
+
+ if (mworker_cli_proxy_create() < 0) {
+ ha_alert("Can't create the master's CLI.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ list_for_each_entry_safe(c, it, &mworker_cli_conf, list) {
+
+ if (mworker_cli_proxy_new_listener(c->s) == NULL) {
+ ha_alert("Can't create the master's CLI.\n");
+ exit(EXIT_FAILURE);
+ }
+ LIST_DELETE(&c->list);
+ free(c->s);
+ free(c);
+ }
+ /* Creates the mcli_reload listener, which is the listener used
+ * to retrieve the master CLI session which asked for the reload.
+ *
+ * ipc_fd[1] will be used as a listener, and ipc_fd[0]
+ * will be used to send the FD of the session.
+ *
+ * Both FDs will be kept in the master. The sockets are
+ * created only if they weren't inherited.
+ */
+ if ((proc_self->ipc_fd[1] == -1) &&
+ socketpair(AF_UNIX, SOCK_STREAM, 0, proc_self->ipc_fd) < 0) {
+ ha_alert("cannot create the mcli_reload socketpair.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Create the mcli_reload listener from the proc_self struct */
+ memprintf(&path, "sockpair@%d", proc_self->ipc_fd[1]);
+ mcli_reload_bind_conf = mworker_cli_proxy_new_listener(path);
+ if (mcli_reload_bind_conf == NULL) {
+ ha_alert("Cannot create the mcli_reload listener.\n");
+ exit(EXIT_FAILURE);
+ }
+ ha_free(&path);
+ }
+ }
+
+ if (!LIST_ISEMPTY(&mworker_cli_conf) && !(arg_mode & MODE_MWORKER)) {
+ ha_alert("a master CLI socket was defined, but master-worker mode (-W) is not enabled.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* destroy unreferenced defaults proxies */
+ proxy_destroy_all_unref_defaults();
+
+ list_for_each_entry(prcf, &pre_check_list, list)
+ err_code |= prcf->fct();
+
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ exit(1);
+ }
+
+ /* update the ready date that will be used to count the startup time
+ * during config checks (e.g. to schedule certain tasks if needed)
+ */
+ clock_update_date(0, 1);
+ clock_adjust_now_offset();
+ ready_date = date;
+
+
+ /* Note: global.nbthread will be initialized as part of this call */
+ err_code |= check_config_validity();
+
+ /* update the ready date to also account for the check time */
+ clock_update_date(0, 1);
+ clock_adjust_now_offset();
+ ready_date = date;
+
+ for (px = proxies_list; px; px = px->next) {
+ struct server *srv;
+ struct post_proxy_check_fct *ppcf;
+ struct post_server_check_fct *pscf;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ list_for_each_entry(pscf, &post_server_check_list, list) {
+ for (srv = px->srv; srv; srv = srv->next)
+ err_code |= pscf->fct(srv);
+ }
+ list_for_each_entry(ppcf, &post_proxy_check_list, list)
+ err_code |= ppcf->fct(px);
+ }
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Fatal errors found in configuration.\n");
+ exit(1);
+ }
+
+ err_code |= pattern_finalize_config();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to finalize pattern config.\n");
+ exit(1);
+ }
+
+ if (global.rlimit_memmax_all)
+ global.rlimit_memmax = global.rlimit_memmax_all;
+
+#ifdef USE_NS
+ err_code |= netns_init();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize namespace support.\n");
+ exit(1);
+ }
+#endif
+
+ thread_detect_binding_discrepancies();
+ thread_detect_more_than_cpus();
+
+ /* Apply server states */
+ apply_server_state();
+
+ for (px = proxies_list; px; px = px->next)
+ srv_compute_all_admin_states(px);
+
+ /* Apply servers' configured address */
+ err_code |= srv_init_addr();
+ if (err_code & (ERR_ABORT|ERR_FATAL)) {
+ ha_alert("Failed to initialize server(s) addr.\n");
+ exit(1);
+ }
+
+ if (warned & WARN_ANY && global.mode & MODE_ZERO_WARNING) {
+ ha_alert("Some warnings were found and 'zero-warning' is set. Aborting.\n");
+ exit(1);
+ }
+
+#if defined(HA_HAVE_DUMP_LIBS)
+ if (global.mode & MODE_DUMP_LIBS) {
+ qfprintf(stdout, "List of loaded object files:\n");
+ chunk_reset(&trash);
+ if (dump_libs(&trash, ((arg_mode & (MODE_QUIET|MODE_VERBOSE)) == MODE_VERBOSE)))
+ printf("%s", trash.area);
+ }
+#endif
+
+ if (global.mode & MODE_DUMP_KWD)
+ dump_registered_keywords();
+
+ if (global.mode & MODE_DIAG) {
+ cfg_run_diagnostics();
+ }
+
+ if (global.mode & MODE_CHECK) {
+ struct peers *pr;
+ struct proxy *px;
+
+ if (warned & WARN_ANY)
+ qfprintf(stdout, "Warnings were found.\n");
+
+ for (pr = cfg_peers; pr; pr = pr->next)
+ if (pr->peers_fe)
+ break;
+
+ for (px = proxies_list; px; px = px->next)
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
+ break;
+
+ if (!px) {
+ /* We may only have log-forward section */
+ for (px = cfg_log_forward; px; px = px->next)
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && px->li_all)
+ break;
+ }
+
+ if (pr || px) {
+ /* At least one peer or one listener has been found */
+ if (global.mode & MODE_VERBOSE)
+ qfprintf(stdout, "Configuration file is valid\n");
+ deinit_and_exit(0);
+ }
+ qfprintf(stdout, "Configuration file has no error but will not start (no listener) => exit(2).\n");
+ exit(2);
+ }
+
+ if (global.mode & MODE_DUMP_CFG)
+ deinit_and_exit(0);
+
+#ifdef USE_OPENSSL
+
+ /* Initialize SSL random generator. Must be called before chroot for
+ * access to /dev/urandom, and before ha_random_boot() which may use
+ * RAND_bytes().
+ */
+ if (!ssl_initialize_random()) {
+ ha_alert("OpenSSL random data generator initialization failed.\n");
+ exit(EXIT_FAILURE);
+ }
+#endif
+ ha_random_boot(argv); // the argv pointer brings some kernel-fed entropy
+
+ /* now we know the buffer size, we can initialize the channels and buffers */
+ init_buffer();
+
+ list_for_each_entry(pcf, &post_check_list, list) {
+ err_code |= pcf->fct();
+ if (err_code & (ERR_ABORT|ERR_FATAL))
+ exit(1);
+ }
+
+ /* set the default maxconn in the master, but let it be rewritable with -n */
+ if (global.mode & MODE_MWORKER_WAIT)
+ global.maxconn = MASTER_MAXCONN;
+
+ if (cfg_maxconn > 0)
+ global.maxconn = cfg_maxconn;
+
+ if (global.cli_fe)
+ global.maxsock += global.cli_fe->maxconn;
+
+ if (cfg_peers) {
+ /* peers also need to bypass global maxconn */
+ struct peers *p = cfg_peers;
+
+ for (p = cfg_peers; p; p = p->next)
+ if (p->peers_fe)
+ global.maxsock += p->peers_fe->maxconn;
+ }
+
+ /* Now we want to compute the maxconn and possibly maxsslconn values.
+ * It's a bit tricky. Maxconn defaults to the pre-computed value based
+ * on rlim_fd_cur and the number of FDs in use due to the configuration,
+ * and maxsslconn defaults to DEFAULT_MAXSSLCONN. On top of that we can
+ * enforce a lower limit based on memmax.
+ *
+ * If memmax is set, then it depends on which values are set. If
+ * maxsslconn is set, we use memmax to determine how many cleartext
+ * connections may be added, and set maxconn to the sum of the two.
+ * If maxconn is set and not maxsslconn, maxsslconn is computed from
+ * the remaining amount of memory between memmax and the cleartext
+ * connections. If neither are set, then it is considered that all
+ * connections are SSL-capable, and maxconn is computed based on this,
+ * then maxsslconn accordingly. We need to know if SSL is used on the
+ * frontends, backends, or both, because when it's used on both sides,
+ * we need twice the value for maxsslconn, but we only count the
+ * handshake once since it is not performed on the two sides at the
+ * same time (frontend-side is terminated before backend-side begins).
+ * The SSL stack is supposed to have filled ssl_session_cost and
+ * ssl_handshake_cost during its initialization. In any case, if
+ * SYSTEM_MAXCONN is set, we still enforce it as an upper limit for
+ * maxconn in order to protect the system.
+ */
+ ideal_maxconn = compute_ideal_maxconn();
+
+ if (!global.rlimit_memmax) {
+ if (global.maxconn == 0) {
+ global.maxconn = ideal_maxconn;
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxconn to %d.\n", global.maxconn);
+ }
+ }
+#ifdef USE_OPENSSL
+ else if (!global.maxconn && !global.maxsslconn &&
+ (global.ssl_used_frontend || global.ssl_used_backend)) {
+ /* memmax is set, compute everything automatically. Here we want
+ * to ensure that all SSL connections will be served. We take
+ * care of the number of sides where SSL is used, and consider
+ * the worst case : SSL used on both sides and doing a handshake
+ * simultaneously. Note that we can't have more than maxconn
+ * handshakes at a time by definition, so for the worst case of
+ * two SSL conns per connection, we count a single handshake.
+ */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int retried = 0;
+
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ /* Principle: we test once to set maxconn according to the free
+ * memory. If it results in values the system rejects, we try a
+ * second time by respecting rlim_fd_max. If it fails again, we
+ * go back to the initial value and will let the final code
+ * dealing with rlimit report the error. That's up to 3 attempts.
+ */
+ do {
+ global.maxconn = mem /
+ ((STREAM_MAX_COST + 2 * global.tune.bufsize) + // stream + 2 buffers per stream
+ sides * global.ssl_session_max_cost + // SSL buffers, one per side
+ global.ssl_handshake_max_cost); // 1 handshake per connection max
+
+ if (retried == 1)
+ global.maxconn = MIN(global.maxconn, ideal_maxconn);
+ global.maxconn = round_2dig(global.maxconn);
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN)
+ global.maxconn = SYSTEM_MAXCONN;
+#endif /* SYSTEM_MAXCONN */
+ global.maxsslconn = sides * global.maxconn;
+
+ if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
+ break;
+ } while (retried++ < 2);
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxconn to %d and global.maxsslconn to %d.\n",
+ global.maxconn, global.maxsslconn);
+ }
+ else if (!global.maxsslconn &&
+ (global.ssl_used_frontend || global.ssl_used_backend)) {
+ /* memmax and maxconn are known, compute maxsslconn automatically.
+ * maxsslconn being forced, we don't know how many of it will be
+ * on each side if both sides are being used. The worst case is
+ * when all connections use only one SSL instance because
+ * handshakes may be on two sides at the same time.
+ */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int64_t sslmem;
+
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ sslmem = mem - global.maxconn * (int64_t)(STREAM_MAX_COST + 2 * global.tune.bufsize);
+ global.maxsslconn = sslmem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost);
+ global.maxsslconn = round_2dig(global.maxsslconn);
+
+ if (sslmem <= 0 || global.maxsslconn < sides) {
+ ha_alert("Cannot compute the automatic maxsslconn because global.maxconn is already too "
+ "high for the global.memmax value (%d MB). The absolute maximum possible value "
+ "without SSL is %d, but %d was found and SSL is in use.\n",
+ global.rlimit_memmax,
+ (int)(mem / (STREAM_MAX_COST + 2 * global.tune.bufsize)),
+ global.maxconn);
+ exit(1);
+ }
+
+ if (global.maxsslconn > sides * global.maxconn)
+ global.maxsslconn = sides * global.maxconn;
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG))
+ fprintf(stderr, "Note: setting global.maxsslconn to %d\n", global.maxsslconn);
+ }
+#endif
+ else if (!global.maxconn) {
+ /* memmax and maxsslconn are known/unused, compute maxconn automatically */
+ int sides = !!global.ssl_used_frontend + !!global.ssl_used_backend;
+ int64_t mem = global.rlimit_memmax * 1048576ULL;
+ int64_t clearmem;
+ int retried = 0;
+
+ if (global.ssl_used_frontend || global.ssl_used_backend)
+ mem -= global.tune.sslcachesize * 200ULL; // about 200 bytes per SSL cache entry
+
+ mem -= global.maxzlibmem;
+ mem = mem * MEM_USABLE_RATIO;
+
+ clearmem = mem;
+ if (sides)
+ clearmem -= (global.ssl_session_max_cost + global.ssl_handshake_max_cost) * (int64_t)global.maxsslconn;
+
+ /* Principle: we test once to set maxconn according to the free
+ * memory. If it results in values the system rejects, we try a
+ * second time by respecting rlim_fd_max. If it fails again, we
+ * go back to the initial value and will let the final code
+ * dealing with rlimit report the error. That's up to 3 attempts.
+ */
+ do {
+ global.maxconn = clearmem / (STREAM_MAX_COST + 2 * global.tune.bufsize);
+ if (retried == 1)
+ global.maxconn = MIN(global.maxconn, ideal_maxconn);
+ global.maxconn = round_2dig(global.maxconn);
+#ifdef SYSTEM_MAXCONN
+ if (global.maxconn > SYSTEM_MAXCONN)
+ global.maxconn = SYSTEM_MAXCONN;
+#endif /* SYSTEM_MAXCONN */
+
+ if (clearmem <= 0 || !global.maxconn) {
+ ha_alert("Cannot compute the automatic maxconn because global.maxsslconn is already too "
+ "high for the global.memmax value (%d MB). The absolute maximum possible value "
+ "is %d, but %d was found.\n",
+ global.rlimit_memmax,
+ (int)(mem / (global.ssl_session_max_cost + global.ssl_handshake_max_cost)),
+ global.maxsslconn);
+ exit(1);
+ }
+
+ if (check_if_maxsock_permitted(compute_ideal_maxsock(global.maxconn)))
+ break;
+ } while (retried++ < 2);
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ if (sides && global.maxsslconn > sides * global.maxconn) {
+ fprintf(stderr, "Note: global.maxsslconn is forced to %d which causes global.maxconn "
+ "to be limited to %d. Better reduce global.maxsslconn to get more "
+ "room for extra connections.\n", global.maxsslconn, global.maxconn);
+ }
+ fprintf(stderr, "Note: setting global.maxconn to %d\n", global.maxconn);
+ }
+ }
+
+ global.maxsock = compute_ideal_maxsock(global.maxconn);
+ global.hardmaxconn = global.maxconn;
+ if (!global.maxpipes)
+ global.maxpipes = compute_ideal_maxpipes();
+
+ /* update connection pool thresholds */
+ global.tune.pool_low_count = ((long long)global.maxsock * global.tune.pool_low_ratio + 99) / 100;
+ global.tune.pool_high_count = ((long long)global.maxsock * global.tune.pool_high_ratio + 99) / 100;
+
+ proxy_adjust_all_maxconn();
+
+ if (global.tune.maxpollevents <= 0)
+ global.tune.maxpollevents = MAX_POLL_EVENTS;
+
+ if (global.tune.runqueue_depth <= 0) {
+ /* tests on various thread counts from 1 to 64 have shown an
+ * optimal queue depth following roughly 1/sqrt(threads).
+ */
+ int s = my_flsl(global.nbthread);
+ s += (global.nbthread / s); // roughly twice the sqrt.
+ global.tune.runqueue_depth = RUNQUEUE_DEPTH * 2 / s;
+ }
+
+ if (global.tune.recv_enough == 0)
+ global.tune.recv_enough = MIN_RECV_AT_ONCE_ENOUGH;
+
+ if (global.tune.maxrewrite >= global.tune.bufsize / 2)
+ global.tune.maxrewrite = global.tune.bufsize / 2;
+
+ usermsgs_clr(NULL);
+
+ if (arg_mode & (MODE_DEBUG | MODE_FOREGROUND)) {
+ /* command line debug mode inhibits configuration mode */
+ global.mode &= ~(MODE_DAEMON | MODE_QUIET);
+ global.mode |= (arg_mode & (MODE_DEBUG | MODE_FOREGROUND));
+ }
+
+ if (arg_mode & MODE_DAEMON) {
+ /* command line daemon mode inhibits foreground and debug modes mode */
+ global.mode &= ~(MODE_DEBUG | MODE_FOREGROUND);
+ global.mode |= arg_mode & MODE_DAEMON;
+ }
+
+ global.mode |= (arg_mode & (MODE_QUIET | MODE_VERBOSE));
+
+ if ((global.mode & MODE_DEBUG) && (global.mode & (MODE_DAEMON | MODE_QUIET))) {
+ ha_warning("<debug> mode incompatible with <quiet> and <daemon>. Keeping <debug> only.\n");
+ global.mode &= ~(MODE_DAEMON | MODE_QUIET);
+ }
+
+ /* Realloc trash buffers because global.tune.bufsize may have changed */
+ if (!init_trash_buffers(0)) {
+ ha_alert("failed to initialize trash buffers.\n");
+ exit(1);
+ }
+
+ if (!init_log_buffers()) {
+ ha_alert("failed to initialize log buffers.\n");
+ exit(1);
+ }
+
+ if (!cluster_secret_isset)
+ generate_random_cluster_secret();
+
+ /*
+ * Note: we could register external pollers here.
+ * Built-in pollers have been registered before main().
+ */
+
+ if (!(global.tune.options & GTUNE_USE_KQUEUE))
+ disable_poller("kqueue");
+
+ if (!(global.tune.options & GTUNE_USE_EVPORTS))
+ disable_poller("evports");
+
+ if (!(global.tune.options & GTUNE_USE_EPOLL))
+ disable_poller("epoll");
+
+ if (!(global.tune.options & GTUNE_USE_POLL))
+ disable_poller("poll");
+
+ if (!(global.tune.options & GTUNE_USE_SELECT))
+ disable_poller("select");
+
+ /* Note: we could disable any poller by name here */
+
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ list_pollers(stderr);
+ fprintf(stderr, "\n");
+ list_filters(stderr);
+ }
+
+ if (!init_pollers()) {
+ ha_alert("No polling mechanism available.\n"
+ " This may happen when using thread-groups with old pollers (poll/select), or\n"
+ " it is possible that haproxy was built with TARGET=generic and that FD_SETSIZE\n"
+ " is too low on this platform to support maxconn and the number of listeners\n"
+ " and servers. You should rebuild haproxy specifying your system using TARGET=\n"
+ " in order to support other polling systems (poll, epoll, kqueue) or reduce the\n"
+ " global maxconn setting to accommodate the system's limitation. For reference,\n"
+ " FD_SETSIZE=%d on this system, global.maxconn=%d resulting in a maximum of\n"
+ " %d file descriptors. You should thus reduce global.maxconn by %d. Also,\n"
+ " check build settings using 'haproxy -vv'.\n\n",
+ FD_SETSIZE, global.maxconn, global.maxsock, (global.maxsock + 1 - FD_SETSIZE) / 2);
+ exit(1);
+ }
+ if (global.mode & (MODE_VERBOSE|MODE_DEBUG)) {
+ printf("Using %s() as the polling mechanism.\n", cur_poller.name);
+ }
+
+ if (!global.node)
+ global.node = strdup(hostname);
+
+ /* stop disabled proxies */
+ for (px = proxies_list; px; px = px->next) {
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ stop_proxy(px);
+ }
+
+ if (!hlua_post_init())
+ exit(1);
+
+ /* Set the per-thread pool cache size to the default value if not set.
+ * This is the right place to decide to automatically adjust it (e.g.
+ * check L2 cache size, thread counts or take into account certain
+ * expensive pools).
+ */
+ if (!global.tune.pool_cache_size)
+ global.tune.pool_cache_size = CONFIG_HAP_POOL_CACHE_SIZE;
+
+ /* fill in a few info about our version and build options */
+ chunk_reset(&trash);
+
+ /* toolchain */
+ cc = chunk_newstr(&trash);
+#if defined(__clang_version__)
+ chunk_appendf(&trash, "clang-" __clang_version__);
+#elif defined(__VERSION__)
+ chunk_appendf(&trash, "gcc-" __VERSION__);
+#endif
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+ chunk_appendf(&trash, "+asan");
+#endif
+ /* toolchain opts */
+ cflags = chunk_newstr(&trash);
+#ifdef BUILD_CC
+ chunk_appendf(&trash, "%s", BUILD_CC);
+#endif
+#ifdef BUILD_CFLAGS
+ chunk_appendf(&trash, " %s", BUILD_CFLAGS);
+#endif
+#ifdef BUILD_DEBUG
+ chunk_appendf(&trash, " %s", BUILD_DEBUG);
+#endif
+ /* settings */
+ opts = chunk_newstr(&trash);
+#ifdef BUILD_TARGET
+ chunk_appendf(&trash, "TARGET='%s'", BUILD_TARGET);
+#endif
+#ifdef BUILD_CPU
+ chunk_appendf(&trash, " CPU='%s'", BUILD_CPU);
+#endif
+#ifdef BUILD_OPTIONS
+ chunk_appendf(&trash, " %s", BUILD_OPTIONS);
+#endif
+
+ post_mortem_add_component("haproxy", haproxy_version, cc, cflags, opts, argv[0]);
+}
+
+void deinit(void)
+{
+ struct proxy *p = proxies_list, *p0;
+ struct wordlist *wl, *wlb;
+ struct uri_auth *uap, *ua = NULL;
+ struct logger *log, *logb;
+ struct build_opts_str *bol, *bolb;
+ struct post_deinit_fct *pdf, *pdfb;
+ struct proxy_deinit_fct *pxdf, *pxdfb;
+ struct server_deinit_fct *srvdf, *srvdfb;
+ struct per_thread_init_fct *tif, *tifb;
+ struct per_thread_deinit_fct *tdf, *tdfb;
+ struct per_thread_alloc_fct *taf, *tafb;
+ struct per_thread_free_fct *tff, *tffb;
+ struct post_server_check_fct *pscf, *pscfb;
+ struct post_check_fct *pcf, *pcfb;
+ struct post_proxy_check_fct *ppcf, *ppcfb;
+ struct pre_check_fct *prcf, *prcfb;
+ struct cfg_postparser *pprs, *pprsb;
+ int cur_fd;
+
+ /* the user may want to skip this phase */
+ if (global.tune.options & GTUNE_QUICK_EXIT)
+ return;
+
+ /* At this point the listeners state is weird:
+ * - most listeners are still bound and referenced in their protocol
+ * - some might be zombies that are not in their proto anymore, but
+ * still appear in their proxy's listeners with a valid FD.
+ * - some might be stopped and still appear in their proxy as FD #-1
+ * - among all of them, some might be inherited hence shared and we're
+ * not allowed to pause them or whatever, we must just close them.
+ * - finally some are not listeners (pipes, logs, stdout, etc) and
+ * must be left intact.
+ *
+ * The safe way to proceed is to unbind (and close) whatever is not yet
+ * unbound so that no more receiver/listener remains alive. Then close
+ * remaining listener FDs, which correspond to zombie listeners (those
+ * belonging to disabled proxies that were in another process).
+ * objt_listener() would be cleaner here but not converted yet.
+ */
+ protocol_unbind_all();
+
+ for (cur_fd = 0; cur_fd < global.maxsock; cur_fd++) {
+ if (!fdtab || !fdtab[cur_fd].owner)
+ continue;
+
+ if (fdtab[cur_fd].iocb == &sock_accept_iocb) {
+ struct listener *l = fdtab[cur_fd].owner;
+
+ BUG_ON(l->state != LI_INIT);
+ unbind_listener(l);
+ }
+ }
+
+ deinit_signals();
+ while (p) {
+ /* build a list of unique uri_auths */
+ if (!ua)
+ ua = p->uri_auth;
+ else {
+ /* check if p->uri_auth is unique */
+ for (uap = ua; uap; uap=uap->next)
+ if (uap == p->uri_auth)
+ break;
+
+ if (!uap && p->uri_auth) {
+ /* add it, if it is */
+ p->uri_auth->next = ua;
+ ua = p->uri_auth;
+ }
+ }
+
+ p0 = p;
+ p = p->next;
+ free_proxy(p0);
+ }/* end while(p) */
+
+ /* we don't need to free sink_proxies_list nor cfg_log_forward proxies since
+ * they are respectively cleaned up in sink_deinit() and deinit_log_forward()
+ */
+
+ /* destroy all referenced defaults proxies */
+ proxy_destroy_all_unref_defaults();
+
+ while (ua) {
+ struct stat_scope *scope, *scopep;
+
+ uap = ua;
+ ua = ua->next;
+
+ free(uap->uri_prefix);
+ free(uap->auth_realm);
+ free(uap->node);
+ free(uap->desc);
+
+ userlist_free(uap->userlist);
+ free_act_rules(&uap->http_req_rules);
+
+ scope = uap->scope;
+ while (scope) {
+ scopep = scope;
+ scope = scope->next;
+
+ free(scopep->px_id);
+ free(scopep);
+ }
+
+ free(uap);
+ }
+
+ userlist_free(userlist);
+
+ cfg_unregister_sections();
+
+ deinit_log_buffers();
+
+ list_for_each_entry(pdf, &post_deinit_list, list)
+ pdf->fct();
+
+ ha_free(&global.log_send_hostname);
+ chunk_destroy(&global.log_tag);
+ ha_free(&global.chroot);
+ ha_free(&global.pidfile);
+ ha_free(&global.node);
+ ha_free(&global.desc);
+ ha_free(&oldpids);
+ ha_free(&old_argv);
+ ha_free(&localpeer);
+ ha_free(&global.server_state_base);
+ ha_free(&global.server_state_file);
+ task_destroy(idle_conn_task);
+ idle_conn_task = NULL;
+
+ list_for_each_entry_safe(log, logb, &global.loggers, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logger(log);
+ }
+
+ list_for_each_entry_safe(wl, wlb, &cfg_cfgfiles, list) {
+ free(wl->s);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+
+ list_for_each_entry_safe(bol, bolb, &build_opts_list, list) {
+ if (bol->must_free)
+ free((void *)bol->str);
+ LIST_DELETE(&bol->list);
+ free(bol);
+ }
+
+ list_for_each_entry_safe(pxdf, pxdfb, &proxy_deinit_list, list) {
+ LIST_DELETE(&pxdf->list);
+ free(pxdf);
+ }
+
+ list_for_each_entry_safe(pdf, pdfb, &post_deinit_list, list) {
+ LIST_DELETE(&pdf->list);
+ free(pdf);
+ }
+
+ list_for_each_entry_safe(srvdf, srvdfb, &server_deinit_list, list) {
+ LIST_DELETE(&srvdf->list);
+ free(srvdf);
+ }
+
+ list_for_each_entry_safe(pcf, pcfb, &post_check_list, list) {
+ LIST_DELETE(&pcf->list);
+ free(pcf);
+ }
+
+ list_for_each_entry_safe(pscf, pscfb, &post_server_check_list, list) {
+ LIST_DELETE(&pscf->list);
+ free(pscf);
+ }
+
+ list_for_each_entry_safe(ppcf, ppcfb, &post_proxy_check_list, list) {
+ LIST_DELETE(&ppcf->list);
+ free(ppcf);
+ }
+
+ list_for_each_entry_safe(prcf, prcfb, &pre_check_list, list) {
+ LIST_DELETE(&prcf->list);
+ free(prcf);
+ }
+
+ list_for_each_entry_safe(tif, tifb, &per_thread_init_list, list) {
+ LIST_DELETE(&tif->list);
+ free(tif);
+ }
+
+ list_for_each_entry_safe(tdf, tdfb, &per_thread_deinit_list, list) {
+ LIST_DELETE(&tdf->list);
+ free(tdf);
+ }
+
+ list_for_each_entry_safe(taf, tafb, &per_thread_alloc_list, list) {
+ LIST_DELETE(&taf->list);
+ free(taf);
+ }
+
+ list_for_each_entry_safe(tff, tffb, &per_thread_free_list, list) {
+ LIST_DELETE(&tff->list);
+ free(tff);
+ }
+
+ list_for_each_entry_safe(pprs, pprsb, &postparsers, list) {
+ LIST_DELETE(&pprs->list);
+ free(pprs);
+ }
+
+ vars_prune(&proc_vars, NULL, NULL);
+ pool_destroy_all();
+ deinit_pollers();
+} /* end deinit() */
+
+__attribute__((noreturn)) void deinit_and_exit(int status)
+{
+ global.mode |= MODE_STOPPING;
+ deinit();
+ exit(status);
+}
+
+/* Runs the polling loop */
+void run_poll_loop()
+{
+ int next, wake;
+
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_IN_LOOP);
+
+ clock_update_date(0,1);
+ while (1) {
+ wake_expired_tasks();
+
+ /* check if we caught some signals and process them in the
+ first thread */
+ if (signal_queue_len && tid == 0) {
+ activity[tid].wake_signal++;
+ signal_process_queue();
+ }
+
+ /* Process a few tasks */
+ process_runnable_tasks();
+
+ /* also stop if we failed to cleanly stop all tasks */
+ if (killed > 1)
+ break;
+
+ /* expire immediately if events or signals are pending */
+ wake = 1;
+ if (thread_has_tasks())
+ activity[tid].wake_tasks++;
+ else {
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_SLEEPING);
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_NOTIFIED);
+ __ha_barrier_atomic_store();
+ if (thread_has_tasks()) {
+ activity[tid].wake_tasks++;
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING);
+ } else if (signal_queue_len) {
+ /* this check is required after setting TH_FL_SLEEPING to avoid
+ * a race with wakeup on signals using wake_threads() */
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING);
+ } else
+ wake = 0;
+ }
+
+ if (!wake) {
+ int i;
+
+ if (stopping) {
+ /* stop muxes/quic-conns before acknowledging stopping */
+ if (!(tg_ctx->stopping_threads & ti->ltid_bit)) {
+ task_wakeup(mux_stopping_data[tid].task, TASK_WOKEN_OTHER);
+ wake = 1;
+ }
+
+ if (_HA_ATOMIC_OR_FETCH(&tg_ctx->stopping_threads, ti->ltid_bit) == ti->ltid_bit &&
+ _HA_ATOMIC_OR_FETCH(&stopping_tgroup_mask, tg->tgid_bit) == tg->tgid_bit) {
+ /* first one to detect it, notify all threads that stopping was just set */
+ for (i = 0; i < global.nbthread; i++) {
+ if (_HA_ATOMIC_LOAD(&ha_thread_info[i].tg->threads_enabled) &
+ ha_thread_info[i].ltid_bit &
+ ~_HA_ATOMIC_LOAD(&ha_thread_info[i].tg_ctx->stopping_threads))
+ wake_thread(i);
+ }
+ }
+ }
+
+ /* stop when there's nothing left to do */
+ if ((jobs - unstoppable_jobs) == 0 &&
+ (_HA_ATOMIC_LOAD(&stopping_tgroup_mask) & all_tgroups_mask) == all_tgroups_mask) {
+ /* check that all threads are aware of the stopping status */
+ for (i = 0; i < global.nbtgroups; i++)
+ if ((_HA_ATOMIC_LOAD(&ha_tgroup_ctx[i].stopping_threads) &
+ _HA_ATOMIC_LOAD(&ha_tgroup_info[i].threads_enabled)) !=
+ _HA_ATOMIC_LOAD(&ha_tgroup_info[i].threads_enabled))
+ break;
+#ifdef USE_THREAD
+ if (i == global.nbtgroups) {
+ /* all are OK, let's wake them all and stop */
+ for (i = 0; i < global.nbthread; i++)
+ if (i != tid && _HA_ATOMIC_LOAD(&ha_thread_info[i].tg->threads_enabled) & ha_thread_info[i].ltid_bit)
+ wake_thread(i);
+ break;
+ }
+#endif
+ }
+ }
+
+ /* If we have to sleep, measure how long */
+ next = wake ? TICK_ETERNITY : next_timer_expiry();
+
+ /* The poller will ensure it returns around <next> */
+ cur_poller.poll(&cur_poller, next, wake);
+
+ activity[tid].loops++;
+ }
+
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_IN_LOOP);
+}
+
+static void *run_thread_poll_loop(void *data)
+{
+ struct per_thread_alloc_fct *ptaf;
+ struct per_thread_init_fct *ptif;
+ struct per_thread_deinit_fct *ptdf;
+ struct per_thread_free_fct *ptff;
+ static int init_left = 0;
+ __decl_thread(static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER);
+ __decl_thread(static pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER);
+
+ ha_set_thread(data);
+ set_thread_cpu_affinity();
+ clock_set_local_source();
+
+#ifdef USE_THREAD
+ ha_thread_info[tid].pth_id = ha_get_pthread_id(tid);
+#endif
+ ha_thread_info[tid].stack_top = __builtin_frame_address(0);
+
+ /* thread is started, from now on it is not idle nor harmless */
+ thread_harmless_end();
+ thread_idle_end();
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_STARTED);
+
+ /* Now, initialize one thread init at a time. This is better since
+ * some init code is a bit tricky and may release global resources
+ * after reallocating them locally. This will also ensure there is
+ * no race on file descriptors allocation.
+ */
+#ifdef USE_THREAD
+ pthread_mutex_lock(&init_mutex);
+#endif
+ /* The first thread must set the number of threads left */
+ if (!init_left)
+ init_left = global.nbthread;
+ init_left--;
+
+ clock_init_thread_date();
+
+ /* per-thread alloc calls performed here are not allowed to snoop on
+ * other threads, so they are free to initialize at their own rhythm
+ * as long as they act as if they were alone. None of them may rely
+ * on resources initialized by the other ones.
+ */
+ list_for_each_entry(ptaf, &per_thread_alloc_list, list) {
+ if (!ptaf->fct()) {
+ ha_alert("failed to allocate resources for thread %u.\n", tid);
+#ifdef USE_THREAD
+ pthread_mutex_unlock(&init_mutex);
+#endif
+ exit(1);
+ }
+ }
+
+ /* per-thread init calls performed here are not allowed to snoop on
+ * other threads, so they are free to initialize at their own rhythm
+ * as long as they act as if they were alone.
+ */
+ list_for_each_entry(ptif, &per_thread_init_list, list) {
+ if (!ptif->fct()) {
+ ha_alert("failed to initialize thread %u.\n", tid);
+#ifdef USE_THREAD
+ pthread_mutex_unlock(&init_mutex);
+#endif
+ exit(1);
+ }
+ }
+
+ /* enabling protocols will result in fd_insert() calls to be performed,
+ * we want all threads to have already allocated their local fd tables
+ * before doing so, thus only the last thread does it.
+ */
+ if (init_left == 0)
+ protocol_enable_all();
+
+#ifdef USE_THREAD
+ pthread_cond_broadcast(&init_cond);
+ pthread_mutex_unlock(&init_mutex);
+
+ /* now wait for other threads to finish starting */
+ pthread_mutex_lock(&init_mutex);
+ while (init_left)
+ pthread_cond_wait(&init_cond, &init_mutex);
+ pthread_mutex_unlock(&init_mutex);
+#endif
+
+#if defined(PR_SET_NO_NEW_PRIVS) && defined(USE_PRCTL)
+ /* Let's refrain from using setuid executables. This way the impact of
+ * an eventual vulnerability in a library remains limited. It may
+ * impact external checks but who cares about them anyway ? In the
+ * worst case it's possible to disable the option. Obviously we do this
+ * in workers only. We can't hard-fail on this one as it really is
+ * implementation dependent though we're interested in feedback, hence
+ * the warning.
+ */
+ if (!(global.tune.options & GTUNE_INSECURE_SETUID) && !master) {
+ static int warn_fail;
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
+ ha_warning("Failed to disable setuid, please report to developers with detailed "
+ "information about your operating system. You can silence this warning "
+ "by adding 'insecure-setuid-wanted' in the 'global' section.\n");
+ }
+ }
+#endif
+
+#if defined(RLIMIT_NPROC)
+ /* all threads have started, it's now time to prevent any new thread
+ * or process from starting. Obviously we do this in workers only. We
+ * can't hard-fail on this one as it really is implementation dependent
+ * though we're interested in feedback, hence the warning.
+ */
+ if (!(global.tune.options & GTUNE_INSECURE_FORK) && !master) {
+ struct rlimit limit = { .rlim_cur = 0, .rlim_max = 0 };
+ static int warn_fail;
+
+ if (setrlimit(RLIMIT_NPROC, &limit) == -1 && !_HA_ATOMIC_FETCH_ADD(&warn_fail, 1)) {
+ ha_warning("Failed to disable forks, please report to developers with detailed "
+ "information about your operating system. You can silence this warning "
+ "by adding 'insecure-fork-wanted' in the 'global' section.\n");
+ }
+ }
+#endif
+ run_poll_loop();
+
+ list_for_each_entry(ptdf, &per_thread_deinit_list, list)
+ ptdf->fct();
+
+ list_for_each_entry(ptff, &per_thread_free_list, list)
+ ptff->fct();
+
+#ifdef USE_THREAD
+ if (!_HA_ATOMIC_AND_FETCH(&ha_tgroup_info[ti->tgid-1].threads_enabled, ~ti->ltid_bit))
+ _HA_ATOMIC_AND(&all_tgroups_mask, ~tg->tgid_bit);
+ if (!_HA_ATOMIC_AND_FETCH(&tg_ctx->stopping_threads, ~ti->ltid_bit))
+ _HA_ATOMIC_AND(&stopping_tgroup_mask, ~tg->tgid_bit);
+ if (tid > 0)
+ pthread_exit(NULL);
+#endif
+ return NULL;
+}
+
+/* set uid/gid depending on global settings */
+static void set_identity(const char *program_name)
+{
+ int from_uid __maybe_unused = geteuid();
+
+ if (global.gid) {
+ if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
+ ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
+ " without 'uid'/'user' is generally useless.\n", program_name);
+
+ if (setgid(global.gid) == -1) {
+ ha_alert("[%s.main()] Cannot set gid %d.\n", program_name, global.gid);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+#if defined(USE_LINUX_CAP)
+ if (prepare_caps_for_setuid(from_uid, global.uid) < 0) {
+ ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
+ protocol_unbind_all();
+ exit(1);
+ }
+#endif
+
+ if (global.uid && setuid(global.uid) == -1) {
+ ha_alert("[%s.main()] Cannot set uid %d.\n", program_name, global.uid);
+ protocol_unbind_all();
+ exit(1);
+ }
+
+#if defined(USE_LINUX_CAP)
+ if (finalize_caps_after_setuid(from_uid, global.uid) < 0) {
+ ha_alert("[%s.main()] Cannot switch uid to %d.\n", program_name, global.uid);
+ protocol_unbind_all();
+ exit(1);
+ }
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int err, retry;
+ struct rlimit limit;
+ int pidfd = -1;
+ int intovf = (unsigned char)argc + 1; /* let the compiler know it's strictly positive */
+
+ /* Catch broken toolchains */
+ if (sizeof(long) != sizeof(void *) || (intovf + 0x7FFFFFFF >= intovf)) {
+ const char *msg;
+
+ if (sizeof(long) != sizeof(void *))
+ /* Apparently MingW64 was not made for us and can also break openssl */
+ msg = "The compiler this program was built with uses unsupported integral type sizes.\n"
+ "Most likely it follows the unsupported LLP64 model. Never try to link HAProxy\n"
+ "against libraries built with that compiler either! Please only use a compiler\n"
+ "producing ILP32 or LP64 programs for both programs and libraries.\n";
+ else if (intovf + 0x7FFFFFFF >= intovf)
+ /* Catch forced CFLAGS that miss 2-complement integer overflow */
+ msg = "The source code was miscompiled by the compiler, which usually indicates that\n"
+ "some of the CFLAGS needed to work around overzealous compiler optimizations\n"
+ "were overwritten at build time. Please do not force CFLAGS, and read Makefile\n"
+ "and INSTALL files to decide on the best way to pass your local build options.\n";
+ else
+ msg = "Bug in the compiler bug detection code, please report it to developers!\n";
+
+ fprintf(stderr,
+ "FATAL ERROR: invalid code detected -- cannot go further, please recompile!\n"
+ "%s"
+ "\nBuild options :"
+#ifdef BUILD_TARGET
+ "\n TARGET = " BUILD_TARGET
+#endif
+#ifdef BUILD_CPU
+ "\n CPU = " BUILD_CPU
+#endif
+#ifdef BUILD_CC
+ "\n CC = " BUILD_CC
+#endif
+#ifdef BUILD_CFLAGS
+ "\n CFLAGS = " BUILD_CFLAGS
+#endif
+#ifdef BUILD_OPTIONS
+ "\n OPTIONS = " BUILD_OPTIONS
+#endif
+#ifdef BUILD_DEBUG
+ "\n DEBUG = " BUILD_DEBUG
+#endif
+ "\n\n", msg);
+
+ return 1;
+ }
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /* take a copy of initial limits before we possibly change them */
+ getrlimit(RLIMIT_NOFILE, &limit);
+
+ if (limit.rlim_max == RLIM_INFINITY)
+ limit.rlim_max = limit.rlim_cur;
+ rlim_fd_cur_at_boot = limit.rlim_cur;
+ rlim_fd_max_at_boot = limit.rlim_max;
+
+ /* process all initcalls in order of potential dependency */
+ RUN_INITCALLS(STG_PREPARE);
+ RUN_INITCALLS(STG_LOCK);
+ RUN_INITCALLS(STG_REGISTER);
+
+ /* now's time to initialize early boot variables */
+ init_early(argc, argv);
+
+ /* handles argument parsing */
+ init_args(argc, argv);
+
+ RUN_INITCALLS(STG_ALLOC);
+ RUN_INITCALLS(STG_POOL);
+
+ /* some code really needs to have the trash properly allocated */
+ if (!trash.area) {
+ ha_alert("failed to initialize trash buffers.\n");
+ exit(1);
+ }
+
+ RUN_INITCALLS(STG_INIT);
+
+ /* this is the late init where the config is parsed */
+ init(argc, argv);
+
+ signal_register_fct(SIGQUIT, dump, SIGQUIT);
+ signal_register_fct(SIGUSR1, sig_soft_stop, SIGUSR1);
+ signal_register_fct(SIGHUP, sig_dump_state, SIGHUP);
+ signal_register_fct(SIGUSR2, NULL, 0);
+
+ /* Always catch SIGPIPE even on platforms which define MSG_NOSIGNAL.
+ * Some recent FreeBSD setups report broken pipes, and MSG_NOSIGNAL
+ * was defined there, so let's stay on the safe side.
+ */
+ signal_register_fct(SIGPIPE, NULL, 0);
+
+ /* ulimits */
+ if (!global.rlimit_nofile)
+ global.rlimit_nofile = global.maxsock;
+
+ if (global.rlimit_nofile) {
+ limit.rlim_cur = global.rlimit_nofile;
+ limit.rlim_max = MAX(rlim_fd_max_at_boot, limit.rlim_cur);
+
+ if ((global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit) ||
+ raise_rlim_nofile(NULL, &limit) != 0) {
+ getrlimit(RLIMIT_NOFILE, &limit);
+ if (global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit)
+ limit.rlim_cur = global.fd_hard_limit;
+
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
+ argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
+ exit(1);
+ }
+ else {
+ /* try to set it to the max possible at least */
+ limit.rlim_cur = limit.rlim_max;
+ if (global.fd_hard_limit && limit.rlim_cur > global.fd_hard_limit)
+ limit.rlim_cur = global.fd_hard_limit;
+
+ if (raise_rlim_nofile(&limit, &limit) == 0)
+ getrlimit(RLIMIT_NOFILE, &limit);
+
+ ha_warning("[%s.main()] Cannot raise FD limit to %d, limit is %d.\n",
+ argv[0], global.rlimit_nofile, (int)limit.rlim_cur);
+ global.rlimit_nofile = limit.rlim_cur;
+ }
+ }
+ }
+
+ if (global.rlimit_memmax) {
+ limit.rlim_cur = limit.rlim_max =
+ global.rlimit_memmax * 1048576ULL;
+#ifdef RLIMIT_AS
+ if (setrlimit(RLIMIT_AS, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ }
+#else
+ if (setrlimit(RLIMIT_DATA, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n",
+ argv[0], global.rlimit_memmax);
+ }
+#endif
+ }
+
+ /* Try to get the listeners FD from the previous process using
+ * _getsocks on the stat socket, it must never been done in wait mode
+ * and check mode
+ */
+ if (old_unixsocket &&
+ !(global.mode & (MODE_MWORKER_WAIT|MODE_CHECK|MODE_CHECK_CONDITION))) {
+ if (strcmp("/dev/null", old_unixsocket) != 0) {
+ if (sock_get_old_sockets(old_unixsocket) != 0) {
+ ha_alert("Failed to get the sockets from the old process!\n");
+ if (!(global.mode & MODE_MWORKER))
+ exit(1);
+ }
+ }
+ }
+
+ /* We will loop at most 100 times with 10 ms delay each time.
+ * That's at most 1 second. We only send a signal to old pids
+ * if we cannot grab at least one port.
+ */
+ retry = MAX_START_RETRIES;
+ err = ERR_NONE;
+ while (retry >= 0) {
+ struct timeval w;
+ err = protocol_bind_all(retry == 0 || nb_oldpids == 0);
+ /* exit the loop on no error or fatal error */
+ if ((err & (ERR_RETRYABLE|ERR_FATAL)) != ERR_RETRYABLE)
+ break;
+ if (nb_oldpids == 0 || retry == 0)
+ break;
+
+ /* FIXME-20060514: Solaris and OpenBSD do not support shutdown() on
+ * listening sockets. So on those platforms, it would be wiser to
+ * simply send SIGUSR1, which will not be undoable.
+ */
+ if (tell_old_pids(SIGTTOU) == 0) {
+ /* no need to wait if we can't contact old pids */
+ retry = 0;
+ continue;
+ }
+ /* give some time to old processes to stop listening */
+ w.tv_sec = 0;
+ w.tv_usec = 10*1000;
+ select(0, NULL, NULL, NULL, &w);
+ retry--;
+ }
+
+ /* Note: protocol_bind_all() sends an alert when it fails. */
+ if ((err & ~ERR_WARN) != ERR_NONE) {
+ ha_alert("[%s.main()] Some protocols failed to start their listeners! Exiting.\n", argv[0]);
+ if (retry != MAX_START_RETRIES && nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all(); /* cleanup everything we can */
+ exit(1);
+ }
+
+ if (!(global.mode & MODE_MWORKER_WAIT) && listeners == 0) {
+ ha_alert("[%s.main()] No enabled listener found (check for 'bind' directives) ! Exiting.\n", argv[0]);
+ /* Note: we don't have to send anything to the old pids because we
+ * never stopped them. */
+ exit(1);
+ }
+
+ /* Ok, all listeners should now be bound, close any leftover sockets
+ * the previous process gave us, we don't need them anymore
+ */
+ sock_drop_unused_old_sockets();
+
+ /* prepare pause/play signals */
+ signal_register_fct(SIGTTOU, sig_pause, SIGTTOU);
+ signal_register_fct(SIGTTIN, sig_listen, SIGTTIN);
+
+ /* MODE_QUIET can inhibit alerts and warnings below this line */
+
+ if (getenv("HAPROXY_MWORKER_REEXEC") != NULL) {
+ /* either stdin/out/err are already closed or should stay as they are. */
+ if ((global.mode & MODE_DAEMON)) {
+ /* daemon mode re-executing, stdin/stdout/stderr are already closed so keep quiet */
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+ } else {
+ if ((global.mode & MODE_QUIET) && !(global.mode & MODE_VERBOSE)) {
+ /* detach from the tty */
+ stdio_quiet(-1);
+ }
+ }
+
+ /* open log & pid files before the chroot */
+ if ((global.mode & MODE_DAEMON || global.mode & MODE_MWORKER) &&
+ !(global.mode & MODE_MWORKER_WAIT) && global.pidfile != NULL) {
+ unlink(global.pidfile);
+ pidfd = open(global.pidfile, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (pidfd < 0) {
+ ha_alert("[%s.main()] Cannot create pidfile %s\n", argv[0], global.pidfile);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+ if ((global.last_checks & LSTCHK_NETADM) && global.uid) {
+ ha_alert("[%s.main()] Some configuration options require full privileges, so global.uid cannot be changed.\n"
+ "", argv[0]);
+ protocol_unbind_all();
+ exit(1);
+ }
+
+ /* If the user is not root, we'll still let them try the configuration
+ * but we inform them that unexpected behaviour may occur.
+ */
+ if ((global.last_checks & LSTCHK_NETADM) && getuid())
+ ha_warning("[%s.main()] Some options which require full privileges"
+ " might not work well.\n"
+ "", argv[0]);
+
+ if ((global.mode & (MODE_MWORKER|MODE_DAEMON)) == 0) {
+
+ /* chroot if needed */
+ if (global.chroot != NULL) {
+ if (chroot(global.chroot) == -1 || chdir("/") == -1) {
+ ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+ }
+
+ if (nb_oldpids && !(global.mode & MODE_MWORKER_WAIT))
+ nb_oldpids = tell_old_pids(oldpids_sig);
+
+ /* send a SIGTERM to workers who have a too high reloads number */
+ if ((global.mode & MODE_MWORKER) && !(global.mode & MODE_MWORKER_WAIT))
+ mworker_kill_max_reloads(SIGTERM);
+
+ /* Note that any error at this stage will be fatal because we will not
+ * be able to restart the old pids.
+ */
+
+ if ((global.mode & (MODE_MWORKER | MODE_DAEMON)) == 0)
+ set_identity(argv[0]);
+
+ /* check ulimits */
+ limit.rlim_cur = limit.rlim_max = 0;
+ getrlimit(RLIMIT_NOFILE, &limit);
+ if (limit.rlim_cur < global.maxsock) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
+ "Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
+ argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
+ global.maxsock);
+ exit(1);
+ }
+ else
+ ha_alert("[%s.main()] FD limit (%d) too low for maxconn=%d/maxsock=%d. "
+ "Please raise 'ulimit-n' to %d or more to avoid any trouble.\n",
+ argv[0], (int)limit.rlim_cur, global.maxconn, global.maxsock,
+ global.maxsock);
+ }
+
+ if (global.prealloc_fd && fcntl((int)limit.rlim_cur - 1, F_GETFD) == -1) {
+ if (dup2(0, (int)limit.rlim_cur - 1) == -1)
+ ha_warning("[%s.main()] Unable to preallocate file descriptor %d : %s",
+ argv[0], (int)limit.rlim_cur - 1, strerror(errno));
+ else
+ close((int)limit.rlim_cur - 1);
+ }
+
+ /* update the ready date a last time to also account for final setup time */
+ clock_update_date(0, 1);
+ clock_adjust_now_offset();
+ ready_date = date;
+
+ if (global.mode & (MODE_DAEMON | MODE_MWORKER | MODE_MWORKER_WAIT)) {
+ int ret = 0;
+ int in_parent = 0;
+ int devnullfd = -1;
+
+ /*
+ * if daemon + mworker: must fork here to let a master
+ * process live in background before forking children
+ */
+
+ if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL)
+ && (global.mode & MODE_MWORKER)
+ && (global.mode & MODE_DAEMON)) {
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ } else if (ret > 0) { /* parent leave to daemonize */
+ exit(0);
+ } else /* change the process group ID in the child (master process) */
+ setsid();
+ }
+
+
+ /* if in master-worker mode, write the PID of the father */
+ if (global.mode & MODE_MWORKER) {
+ char pidstr[100];
+ snprintf(pidstr, sizeof(pidstr), "%d\n", (int)getpid());
+ if (pidfd >= 0)
+ DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
+ }
+
+ /* the father launches the required number of processes */
+ if (!(global.mode & MODE_MWORKER_WAIT)) {
+ struct ring *tmp_startup_logs = NULL;
+
+ if (global.mode & MODE_MWORKER)
+ mworker_ext_launch_all();
+
+ /* at this point the worker must have his own startup_logs buffer */
+ tmp_startup_logs = startup_logs_dup(startup_logs);
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("[%s.main()] Cannot fork.\n", argv[0]);
+ protocol_unbind_all();
+ exit(1); /* there has been an error */
+ }
+ else if (ret == 0) { /* child breaks here */
+ startup_logs_free(startup_logs);
+ startup_logs = tmp_startup_logs;
+ /* This one must not be exported, it's internal! */
+ unsetenv("HAPROXY_MWORKER_REEXEC");
+ ha_random_jump96(1);
+ }
+ else { /* parent here */
+ in_parent = 1;
+
+ if (pidfd >= 0 && !(global.mode & MODE_MWORKER)) {
+ char pidstr[100];
+ snprintf(pidstr, sizeof(pidstr), "%d\n", ret);
+ DISGUISE(write(pidfd, pidstr, strlen(pidstr)));
+ }
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *child;
+
+ ha_notice("New worker (%d) forked\n", ret);
+ /* find the right mworker_proc */
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 &&
+ child->options & PROC_O_TYPE_WORKER &&
+ child->pid == -1) {
+ child->timestamp = date.tv_sec;
+ child->pid = ret;
+ child->version = strdup(haproxy_version);
+ /* at this step the fd is bound for the worker, set it to -1 so
+ * it could be close in case of errors in mworker_cleanup_proc() */
+ child->ipc_fd[1] = -1;
+ break;
+ }
+ }
+ }
+ }
+
+ } else {
+ /* wait mode */
+ in_parent = 1;
+ }
+
+ /* close the pidfile both in children and father */
+ if (pidfd >= 0) {
+ //lseek(pidfd, 0, SEEK_SET); /* debug: emulate eglibc bug */
+ close(pidfd);
+ }
+
+ /* We won't ever use this anymore */
+ ha_free(&global.pidfile);
+
+ if (in_parent) {
+ if (global.mode & (MODE_MWORKER|MODE_MWORKER_WAIT)) {
+ master = 1;
+
+ if ((!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) &&
+ (global.mode & MODE_DAEMON)) {
+ /* detach from the tty, this is required to properly daemonize. */
+ if ((getenv("HAPROXY_MWORKER_REEXEC") == NULL))
+ stdio_quiet(-1);
+
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+
+ if (global.mode & MODE_MWORKER_WAIT) {
+ /* only the wait mode handles the master CLI */
+ mworker_loop();
+ } else {
+
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD)
+ sd_notifyf(0, "READY=1\nMAINPID=%lu\nSTATUS=Ready.\n", (unsigned long)getpid());
+#endif
+ /* if not in wait mode, reload in wait mode to free the memory */
+ setenv("HAPROXY_LOAD_SUCCESS", "1", 1);
+ ha_notice("Loading success.\n");
+ proc_self->failedreloads = 0; /* reset the number of failure */
+ mworker_reexec_waitmode();
+ }
+ /* should never get there */
+ exit(EXIT_FAILURE);
+ }
+#if defined(USE_OPENSSL) && !defined(OPENSSL_NO_DH)
+ ssl_free_dh();
+#endif
+ exit(0); /* parent must leave */
+ }
+
+ /* child must never use the atexit function */
+ atexit_flag = 0;
+
+ /* close useless master sockets */
+ if (global.mode & MODE_MWORKER) {
+ struct mworker_proc *child, *it;
+ master = 0;
+
+ mworker_cli_proxy_stop();
+
+ /* free proc struct of other processes */
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+ /* close the FD of the master side for all
+ * workers, we don't need to close the worker
+ * side of other workers since it's done with
+ * the bind_proc */
+ if (child->ipc_fd[0] >= 0) {
+ close(child->ipc_fd[0]);
+ child->ipc_fd[0] = -1;
+ }
+ if (child->options & PROC_O_TYPE_WORKER &&
+ child->reloads == 0 &&
+ child->pid == -1) {
+ /* keep this struct if this is our pid */
+ proc_self = child;
+ continue;
+ }
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ child = NULL;
+ }
+ }
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) {
+ devnullfd = open("/dev/null", O_RDWR, 0);
+ if (devnullfd < 0) {
+ ha_alert("Cannot open /dev/null\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* Must chroot and setgid/setuid in the children */
+ /* chroot if needed */
+ if (global.chroot != NULL) {
+ if (chroot(global.chroot) == -1 || chdir("/") == -1) {
+ ha_alert("[%s.main()] Cannot chroot(%s).\n", argv[0], global.chroot);
+ if (nb_oldpids)
+ tell_old_pids(SIGTTIN);
+ protocol_unbind_all();
+ exit(1);
+ }
+ }
+
+ ha_free(&global.chroot);
+ set_identity(argv[0]);
+
+ /*
+ * This is only done in daemon mode because we might want the
+ * logs on stdout in mworker mode. If we're NOT in QUIET mode,
+ * we should now close the 3 first FDs to ensure that we can
+ * detach from the TTY. We MUST NOT do it in other cases since
+ * it would have already be done, and 0-2 would have been
+ * affected to listening sockets
+ */
+ if ((global.mode & MODE_DAEMON) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))) {
+ /* detach from the tty */
+ stdio_quiet(devnullfd);
+ global.mode &= ~MODE_VERBOSE;
+ global.mode |= MODE_QUIET; /* ensure that we won't say anything from now */
+ }
+ pid = getpid(); /* update child's pid */
+ if (!(global.mode & MODE_MWORKER)) /* in mworker mode we don't want a new pgid for the children */
+ setsid();
+ fork_poller();
+ }
+
+ /* pass through every cli socket, and check if it's bound to
+ * the current process and if it exposes listeners sockets.
+ * Caution: the GTUNE_SOCKET_TRANSFER is now set after the fork.
+ * */
+
+ if (global.cli_fe) {
+ struct bind_conf *bind_conf;
+
+ list_for_each_entry(bind_conf, &global.cli_fe->conf.bind, by_fe) {
+ if (bind_conf->level & ACCESS_FD_LISTENERS) {
+ global.tune.options |= GTUNE_SOCKET_TRANSFER;
+ break;
+ }
+ }
+ }
+
+ /* Note that here we can't be in the parent/master anymore */
+#if !defined(USE_THREAD) && defined(USE_CPU_AFFINITY)
+ if (ha_cpuset_count(&cpu_map[0].thread[0])) { /* only do this if the process has a CPU map */
+
+#if defined(CPUSET_USE_CPUSET) || defined(__DragonFly__)
+ struct hap_cpuset *set = &cpu_map[0].thread[0];
+ sched_setaffinity(0, sizeof(set->cpuset), &set->cpuset);
+#elif defined(__FreeBSD__)
+ struct hap_cpuset *set = &cpu_map[0].thread[0];
+ ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(set->cpuset), &set->cpuset);
+#endif
+ }
+#endif
+ /* try our best to re-enable core dumps depending on system capabilities.
+ * What is addressed here :
+ * - remove file size limits
+ * - remove core size limits
+ * - mark the process dumpable again if it lost it due to user/group
+ */
+ if (global.tune.options & GTUNE_SET_DUMPABLE) {
+ limit.rlim_cur = limit.rlim_max = RLIM_INFINITY;
+
+#if defined(RLIMIT_FSIZE)
+ if (setrlimit(RLIMIT_FSIZE, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Failed to set the raise the maximum "
+ "file size.\n", argv[0]);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Failed to set the raise the maximum "
+ "file size.\n", argv[0]);
+ }
+#endif
+
+#if defined(RLIMIT_CORE)
+ if (setrlimit(RLIMIT_CORE, &limit) == -1) {
+ if (global.tune.options & GTUNE_STRICT_LIMITS) {
+ ha_alert("[%s.main()] Failed to set the raise the core "
+ "dump size.\n", argv[0]);
+ exit(1);
+ }
+ else
+ ha_warning("[%s.main()] Failed to set the raise the core "
+ "dump size.\n", argv[0]);
+ }
+#endif
+
+#if defined(USE_PRCTL)
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) == -1)
+ ha_warning("[%s.main()] Failed to set the dumpable flag, "
+ "no core will be dumped.\n", argv[0]);
+#elif defined(USE_PROCCTL)
+ {
+ int traceable = PROC_TRACE_CTL_ENABLE;
+ if (procctl(P_PID, getpid(), PROC_TRACE_CTL, &traceable) == -1)
+ ha_warning("[%s.main()] Failed to set the traceable flag, "
+ "no core will be dumped.\n", argv[0]);
+ }
+#endif
+ }
+
+ global.mode &= ~MODE_STARTING;
+ reset_usermsgs_ctx();
+
+ /* start threads 2 and above */
+ setup_extra_threads(&run_thread_poll_loop);
+
+ /* when multithreading we need to let only the thread 0 handle the signals */
+ haproxy_unblock_signals();
+
+ /* Finally, start the poll loop for the first thread */
+ run_thread_poll_loop(&ha_thread_info[0]);
+
+ /* wait for all threads to terminate */
+ wait_for_threads_completion();
+
+ deinit_and_exit(0);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..5c92e94
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,190 @@
+/*
+ * Hash function implementation
+ *
+ * See mailing list thread on "Consistent hashing alternative to sdbm"
+ * http://marc.info/?l=haproxy&m=138213693909219
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+
+#include <haproxy/compiler.h>
+#include <haproxy/hash.h>
+
+
+unsigned int hash_wt6(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned h0 = 0xa53c965aUL;
+ unsigned h1 = 0x5ca6953aUL;
+ unsigned step0 = 6;
+ unsigned step1 = 18;
+
+ for (; len > 0; len--) {
+ unsigned int t;
+
+ t = *key;
+ key++;
+
+ h0 = ~(h0 ^ t);
+ h1 = ~(h1 + t);
+
+ t = (h1 << step0) | (h1 >> (32-step0));
+ h1 = (h0 << step1) | (h0 >> (32-step1));
+ h0 = t;
+
+ t = ((h0 >> 16) ^ h1) & 0xffff;
+ step0 = t & 0x1F;
+ step1 = t >> 11;
+ }
+ return h0 ^ h1;
+}
+
+unsigned int hash_djb2(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash = 5381;
+
+ /* the hash unrolled eight times */
+ for (; len >= 8; len -= 8) {
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ hash = ((hash << 5) + hash) + *key++;
+ }
+ switch (len) {
+ case 7: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 6: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 5: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 4: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 3: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 2: hash = ((hash << 5) + hash) + *key++; __fallthrough;
+ case 1: hash = ((hash << 5) + hash) + *key++; break;
+ default: /* case 0: */ break;
+ }
+ return hash;
+}
+
+unsigned int hash_sdbm(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash = 0;
+ int c;
+
+ while (len--) {
+ c = *key++;
+ hash = c + (hash << 6) + (hash << 16) - hash;
+ }
+
+ return hash;
+}
+
+/* Small yet efficient CRC32 calculation loosely inspired from crc32b found
+ * here : http://www.hackersdelight.org/hdcodetxt/crc.c.txt
+ * The magic value represents the polynom with one bit per exponent. Much
+ * faster table-based versions exist but are pointless for our usage here,
+ * this hash already sustains gigabit speed which is far faster than what
+ * we'd ever need. Better preserve the CPU's cache instead.
+ */
+unsigned int hash_crc32(const void *input, int len)
+{
+ const unsigned char *key = input;
+ unsigned int hash;
+ int bit;
+
+ hash = ~0;
+ while (len--) {
+ hash ^= *key++;
+ for (bit = 0; bit < 8; bit++)
+ hash = (hash >> 1) ^ ((hash & 1) ? 0xedb88320 : 0);
+ }
+ return ~hash;
+}
+
+/* CRC32c poly 0x11EDC6F41 (RFC4960, Appendix B [8].) */
+static const uint32_t crctable[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+ 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+ 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+ 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+ 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+ 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+ 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+ 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+ 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+ 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+ 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+ 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+ 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+ 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+ 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+ 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+ 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+ 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+ 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+ 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+ 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+ 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+ 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+ 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+ 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+ 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+ 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+ 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+ 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+ 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+ 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+ 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+ 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+ 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+ 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+ 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+ 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+ 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+ 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+ 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+ 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+ 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+ 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+ 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+ 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+ 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+ 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+ 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+ 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+ 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+ 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+ 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+uint32_t hash_crc32c(const void *input, int len)
+{
+ const unsigned char *buf = input;
+ uint32_t crc = 0xffffffff;
+ while (len-- > 0) {
+ crc = (crc >> 8) ^ crctable[(crc ^ (*buf++)) & 0xff];
+ }
+ return (crc ^ 0xffffffff);
+}
diff --git a/src/hlua.c b/src/hlua.c
new file mode 100644
index 0000000..d1f5323
--- /dev/null
+++ b/src/hlua.c
@@ -0,0 +1,13961 @@
+/*
+ * Lua unsafe core engine
+ *
+ * Copyright 2015-2016 Thierry Fournier <tfournier@arpalert.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <ctype.h>
+#include <setjmp.h>
+
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+
+#if !defined(LUA_VERSION_NUM) || LUA_VERSION_NUM < 503
+#error "Requires Lua 5.3 or later."
+#endif
+
+#include <import/ebpttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/connection.h>
+#include <haproxy/filters.h>
+#include <haproxy/h1.h>
+#include <haproxy/hlua.h>
+#include <haproxy/hlua_fcn.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_client.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/map.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pattern.h>
+#include <haproxy/payload.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xref.h>
+#include <haproxy/event_hdl.h>
+#include <haproxy/check.h>
+#include <haproxy/mailers.h>
+
+/* Global LUA flags */
+
+enum hlua_log_opt {
+ /* tune.lua.log.loggers */
+ HLUA_LOG_LOGGERS_ON = 0x00000001, /* forward logs to current loggers */
+
+ /* tune.lua.log.stderr */
+ HLUA_LOG_STDERR_ON = 0x00000010, /* forward logs to stderr */
+ HLUA_LOG_STDERR_AUTO = 0x00000020, /* forward logs to stderr if no loggers */
+ HLUA_LOG_STDERR_MASK = 0x00000030,
+};
+/* default log options, made of flags in hlua_log_opt */
+static uint hlua_log_opts = HLUA_LOG_LOGGERS_ON | HLUA_LOG_STDERR_AUTO;
+
+/* Lua uses longjmp to perform yield or throwing errors. This
+ * macro is used only for identifying the function that can
+ * not return because a longjmp is executed.
+ * __LJMP marks a prototype of hlua file that can use longjmp.
+ * WILL_LJMP() marks an lua function that will use longjmp.
+ * MAY_LJMP() marks an lua function that may use longjmp.
+ */
+#define __LJMP
+#define WILL_LJMP(func) do { func; my_unreachable(); } while(0)
+#define MAY_LJMP(func) func
+
+/* This couple of function executes securely some Lua calls outside of
+ * the lua runtime environment. Each Lua call can return a longjmp
+ * if it encounter a memory error.
+ *
+ * Lua documentation extract:
+ *
+ * If an error happens outside any protected environment, Lua calls
+ * a panic function (see lua_atpanic) and then calls abort, thus
+ * exiting the host application. Your panic function can avoid this
+ * exit by never returning (e.g., doing a long jump to your own
+ * recovery point outside Lua).
+ *
+ * The panic function runs as if it were a message handler (see
+ * #2.3); in particular, the error message is at the top of the
+ * stack. However, there is no guarantee about stack space. To push
+ * anything on the stack, the panic function must first check the
+ * available space (see #4.2).
+ *
+ * We must check all the Lua entry point. This includes:
+ * - The include/proto/hlua.h exported functions
+ * - the task wrapper function
+ * - The action wrapper function
+ * - The converters wrapper function
+ * - The sample-fetch wrapper functions
+ *
+ * It is tolerated that the initialisation function returns an abort.
+ * Before each Lua abort, an error message is written on stderr.
+ *
+ * The macro SET_SAFE_LJMP initialise the longjmp. The Macro
+ * RESET_SAFE_LJMP reset the longjmp. These function must be macro
+ * because they must be exists in the program stack when the longjmp
+ * is called.
+ *
+ * Note that the Lua processing is not really thread safe. It provides
+ * heavy system which consists to add our own lock function in the Lua
+ * code and recompile the library. This system will probably not accepted
+ * by maintainers of various distribs.
+ *
+ * Our main execution point of the Lua is the function lua_resume(). A
+ * quick looking on the Lua sources displays a lua_lock() a the start
+ * of function and a lua_unlock() at the end of the function. So I
+ * conclude that the Lua thread safe mode just perform a mutex around
+ * all execution. So I prefer to do this in the HAProxy code, it will be
+ * easier for distro maintainers.
+ *
+ * Note that the HAProxy lua functions rounded by the macro SET_SAFE_LJMP
+ * and RESET_SAFE_LJMP manipulates the Lua stack, so it will be careful
+ * to set mutex around these functions.
+ */
+__decl_spinlock(hlua_global_lock);
+THREAD_LOCAL jmp_buf safe_ljmp_env;
+static int hlua_panic_safe(lua_State *L) { return 0; }
+static int hlua_panic_ljmp(lua_State *L) { WILL_LJMP(longjmp(safe_ljmp_env, 1)); return 0; }
+
+/* This is the chained list of struct hlua_function referenced
+ * for haproxy action, sample-fetches, converters, cli and
+ * applet bindings. It is used for a post-initialisation control.
+ */
+static struct list referenced_functions = LIST_HEAD_INIT(referenced_functions);
+
+/* This variable is used only during initialization to identify the Lua state
+ * currently being initialized. 0 is the common lua state, 1 to n are the Lua
+ * states dedicated to each thread (in this case hlua_state_id==tid+1).
+ */
+static int hlua_state_id;
+
+/* This is a NULL-terminated list of lua file which are referenced to load per thread */
+static char ***per_thread_load = NULL;
+
+lua_State *hlua_init_state(int thread_id);
+
+/* This function takes the Lua global lock. Keep this function's visibility
+ * global so that it can appear in stack dumps and performance profiles!
+ */
+static inline void lua_take_global_lock()
+{
+ HA_SPIN_LOCK(LUA_LOCK, &hlua_global_lock);
+}
+
+static inline void lua_drop_global_lock()
+{
+ HA_SPIN_UNLOCK(LUA_LOCK, &hlua_global_lock);
+}
+
+/* lua lock helpers: only lock when required
+ *
+ * state_id == 0: we're operating on the main lua stack (shared between
+ * os threads), so we need to acquire the main lock
+ *
+ * If the thread already owns the lock (_hlua_locked != 0), skip the lock
+ * attempt. This could happen if we run under protected lua environment.
+ * Not doing this could result in deadlocks because of nested locking
+ * attempts from the same thread
+ */
+static THREAD_LOCAL int _hlua_locked = 0;
+static inline void hlua_lock(struct hlua *hlua)
+{
+ if (hlua->state_id != 0)
+ return;
+ if (!_hlua_locked)
+ lua_take_global_lock();
+ _hlua_locked += 1;
+}
+static inline void hlua_unlock(struct hlua *hlua)
+{
+ if (hlua->state_id != 0)
+ return;
+ BUG_ON(_hlua_locked <= 0);
+ _hlua_locked--;
+ /* drop the lock once the lock count reaches 0 */
+ if (!_hlua_locked)
+ lua_drop_global_lock();
+}
+
+#define SET_SAFE_LJMP_L(__L, __HLUA) \
+ ({ \
+ int ret; \
+ hlua_lock(__HLUA); \
+ if (setjmp(safe_ljmp_env) != 0) { \
+ lua_atpanic(__L, hlua_panic_safe); \
+ ret = 0; \
+ hlua_unlock(__HLUA); \
+ } else { \
+ lua_atpanic(__L, hlua_panic_ljmp); \
+ ret = 1; \
+ } \
+ ret; \
+ })
+
+/* If we are the last function catching Lua errors, we
+ * must reset the panic function.
+ */
+#define RESET_SAFE_LJMP_L(__L, __HLUA) \
+ do { \
+ lua_atpanic(__L, hlua_panic_safe); \
+ hlua_unlock(__HLUA); \
+ } while(0)
+
+#define SET_SAFE_LJMP(__HLUA) \
+ SET_SAFE_LJMP_L((__HLUA)->T, __HLUA)
+
+#define RESET_SAFE_LJMP(__HLUA) \
+ RESET_SAFE_LJMP_L((__HLUA)->T, __HLUA)
+
+#define SET_SAFE_LJMP_PARENT(__HLUA) \
+ SET_SAFE_LJMP_L(hlua_states[(__HLUA)->state_id], __HLUA)
+
+#define RESET_SAFE_LJMP_PARENT(__HLUA) \
+ RESET_SAFE_LJMP_L(hlua_states[(__HLUA)->state_id], __HLUA)
+
+/* Applet status flags */
+#define APPLET_DONE 0x01 /* applet processing is done. */
+/* unused: 0x02 */
+#define APPLET_HDR_SENT 0x04 /* Response header sent. */
+/* unused: 0x08, 0x10 */
+#define APPLET_HTTP11 0x20 /* Last chunk sent. */
+#define APPLET_RSP_SENT 0x40 /* The response was fully sent */
+
+/* The main Lua execution context. The 0 index is the
+ * common state shared by all threads.
+ */
+static lua_State *hlua_states[MAX_THREADS + 1];
+
+#define HLUA_FLT_CB_FINAL 0x00000001
+#define HLUA_FLT_CB_RETVAL 0x00000002
+#define HLUA_FLT_CB_ARG_CHN 0x00000004
+#define HLUA_FLT_CB_ARG_HTTP_MSG 0x00000008
+
+#define HLUA_FLT_CTX_FL_PAYLOAD 0x00000001
+
+struct hlua_reg_filter {
+ char *name;
+ int flt_ref[MAX_THREADS + 1];
+ int fun_ref[MAX_THREADS + 1];
+ struct list l;
+};
+
+struct hlua_flt_config {
+ struct hlua_reg_filter *reg;
+ int ref[MAX_THREADS + 1];
+ char **args;
+};
+
+struct hlua_flt_ctx {
+ int ref; /* ref to the filter lua object */
+ struct hlua *hlua[2]; /* lua runtime context (0: request, 1: response) */
+ unsigned int cur_off[2]; /* current offset (0: request, 1: response) */
+ unsigned int cur_len[2]; /* current forwardable length (0: request, 1: response) */
+ unsigned int flags; /* HLUA_FLT_CTX_FL_* */
+};
+
+/* appctx context used by the cosockets */
+struct hlua_csk_ctx {
+ int connected;
+ struct xref xref; /* cross reference with the Lua object owner. */
+ struct list wake_on_read;
+ struct list wake_on_write;
+ struct appctx *appctx;
+ struct server *srv;
+ int timeout;
+ int die;
+};
+
+/* appctx context used by TCP services */
+struct hlua_tcp_ctx {
+ struct hlua *hlua;
+ int flags;
+ struct task *task;
+};
+
+/* appctx context used by HTTP services */
+struct hlua_http_ctx {
+ struct hlua *hlua;
+ int left_bytes; /* The max amount of bytes that we can read. */
+ int flags;
+ int status;
+ const char *reason;
+ struct task *task;
+};
+
+/* used by registered CLI keywords */
+struct hlua_cli_ctx {
+ struct hlua *hlua;
+ struct task *task;
+ struct hlua_function *fcn;
+};
+
+DECLARE_STATIC_POOL(pool_head_hlua_flt_ctx, "hlua_flt_ctx", sizeof(struct hlua_flt_ctx));
+
+static int hlua_filter_from_payload(struct filter *filter);
+
+/* This is the chained list of struct hlua_flt referenced
+ * for haproxy filters. It is used for a post-initialisation control.
+ */
+static struct list referenced_filters = LIST_HEAD_INIT(referenced_filters);
+
+
+/* This is the memory pool containing struct lua for applets
+ * (including cli).
+ */
+DECLARE_STATIC_POOL(pool_head_hlua, "hlua", sizeof(struct hlua));
+
+/* Used for Socket connection. */
+static struct proxy *socket_proxy;
+static struct server *socket_tcp;
+#ifdef USE_OPENSSL
+static struct server *socket_ssl;
+#endif
+
+/* List head of the function called at the initialisation time. */
+struct list hlua_init_functions[MAX_THREADS + 1];
+
+/* The following variables contains the reference of the different
+ * Lua classes. These references are useful for identify metadata
+ * associated with an object.
+ */
+static int class_txn_ref;
+static int class_socket_ref;
+static int class_channel_ref;
+static int class_fetches_ref;
+static int class_converters_ref;
+static int class_http_ref;
+static int class_http_msg_ref;
+static int class_httpclient_ref;
+static int class_map_ref;
+static int class_applet_tcp_ref;
+static int class_applet_http_ref;
+static int class_txn_reply_ref;
+
+/* Lua max execution timeouts. By default, stream-related
+ * lua coroutines (e.g.: actions) have a short timeout.
+ * On the other hand tasks coroutines don't have a timeout because
+ * a task may remain alive during all the haproxy execution.
+ *
+ * Timeouts are expressed in milliseconds, they are meant to be used
+ * with hlua timer's API exclusively.
+ * 0 means no timeout
+ */
+static uint32_t hlua_timeout_burst = 1000; /* burst timeout. */
+static uint32_t hlua_timeout_session = 4000; /* session timeout. */
+static uint32_t hlua_timeout_task = 0; /* task timeout. */
+static uint32_t hlua_timeout_applet = 4000; /* applet timeout. */
+
+/* hlua multipurpose timer:
+ * used to compute burst lua time (within a single hlua_ctx_resume())
+ * and cumulative lua time for a given coroutine, and to check
+ * the lua coroutine against the configured timeouts
+ */
+
+/* fetch per-thread cpu_time with ms precision (may wrap) */
+static inline uint32_t _hlua_time_ms()
+{
+ /* We're interested in the current cpu time in ms, which will be returned
+ * as a uint32_t to save some space.
+ * We must take the following into account:
+ *
+ * - now_cpu_time_fast() which returns the time in nanoseconds as a uint64_t
+ * will wrap every 585 years.
+ * - uint32_t may only contain 4294967295ms (~=49.7 days), so _hlua_time_ms()
+ * itself will also wrap every 49.7 days.
+ *
+ * While we can safely ignore the now_cpu_time_fast() wrap, we must
+ * take care of the uint32_t wrap by making sure to exclusively
+ * manipulate the time using uint32_t everywhere _hlua_time_ms()
+ * is involved.
+ */
+ return (uint32_t)(now_cpu_time_fast() / 1000000ULL);
+}
+
+/* computes time spent in a single lua execution (in ms) */
+static inline uint32_t _hlua_time_burst(const struct hlua_timer *timer)
+{
+ uint32_t burst_ms;
+
+ /* wrapping is expected and properly
+ * handled thanks to _hlua_time_ms() and burst_ms
+ * being of the same type
+ */
+ burst_ms = _hlua_time_ms() - timer->start;
+ return burst_ms;
+}
+
+static inline void hlua_timer_init(struct hlua_timer *timer, unsigned int max)
+{
+ timer->cumulative = 0;
+ timer->burst = 0;
+ timer->max = max;
+}
+
+/* reset the timer ctx between 2 yields */
+static inline void hlua_timer_reset(struct hlua_timer *timer)
+{
+ timer->cumulative += timer->burst;
+ timer->burst = 0;
+}
+
+/* start the timer right before a new execution */
+static inline void hlua_timer_start(struct hlua_timer *timer)
+{
+ timer->start = _hlua_time_ms();
+}
+
+/* update hlua timer when finishing an execution */
+static inline void hlua_timer_stop(struct hlua_timer *timer)
+{
+ timer->burst += _hlua_time_burst(timer);
+}
+
+/* check the timers for current hlua context:
+ * - first check for burst timeout (max execution time for the current
+ hlua resume, ie: time between effective yields)
+ * - then check for yield cumulative timeout
+ *
+ * Returns 1 if the check succeeded and 0 if it failed
+ * (ie: timeout exceeded)
+ */
+static inline int hlua_timer_check(const struct hlua_timer *timer)
+{
+ uint32_t pburst = _hlua_time_burst(timer); /* pending burst time in ms */
+
+ if (hlua_timeout_burst && (timer->burst + pburst) > hlua_timeout_burst)
+ return 0; /* burst timeout exceeded */
+ if (timer->max && (timer->cumulative + timer->burst + pburst) > timer->max)
+ return 0; /* cumulative timeout exceeded */
+ return 1; /* ok */
+}
+
+/* Interrupts the Lua processing each "hlua_nb_instruction" instructions.
+ * it is used for preventing infinite loops.
+ *
+ * I test the scheer with an infinite loop containing one incrementation
+ * and one test. I run this loop between 10 seconds, I raise a ceil of
+ * 710M loops from one interrupt each 9000 instructions, so I fix the value
+ * to one interrupt each 10 000 instructions.
+ *
+ * configured | Number of
+ * instructions | loops executed
+ * between two | in milions
+ * forced yields |
+ * ---------------+---------------
+ * 10 | 160
+ * 500 | 670
+ * 1000 | 680
+ * 5000 | 700
+ * 7000 | 700
+ * 8000 | 700
+ * 9000 | 710 <- ceil
+ * 10000 | 710
+ * 100000 | 710
+ * 1000000 | 710
+ *
+ */
+static unsigned int hlua_nb_instruction = 10000;
+
+/* Descriptor for the memory allocation state. The limit is pre-initialised to
+ * 0 until it is replaced by "tune.lua.maxmem" during the config parsing, or it
+ * is replaced with ~0 during post_init after everything was loaded. This way
+ * it is guaranteed that if limit is ~0 the boot is complete and that if it's
+ * zero it's not yet limited and proper accounting is required.
+ */
+struct hlua_mem_allocator {
+ size_t allocated;
+ size_t limit;
+};
+
+static struct hlua_mem_allocator hlua_global_allocator THREAD_ALIGNED(64);
+
+/* hlua event subscription */
+struct hlua_event_sub {
+ int fcn_ref;
+ int state_id;
+ struct hlua *hlua;
+ struct task *task;
+ event_hdl_async_equeue equeue;
+ struct event_hdl_sub *sub;
+ uint8_t paused;
+};
+
+/* This is the memory pool containing struct hlua_event_sub
+ * for event subscriptions from lua
+ */
+DECLARE_STATIC_POOL(pool_head_hlua_event_sub, "hlua_esub", sizeof(struct hlua_event_sub));
+
+/* These functions converts types between HAProxy internal args or
+ * sample and LUA types. Another function permits to check if the
+ * LUA stack contains arguments according with an required ARG_T
+ * format.
+ */
+__LJMP static int hlua_arg2lua(lua_State *L, const struct arg *arg);
+static int hlua_lua2arg(lua_State *L, int ud, struct arg *arg);
+__LJMP static int hlua_lua2arg_check(lua_State *L, int first, struct arg *argp,
+ uint64_t mask, struct proxy *p);
+__LJMP static int hlua_smp2lua(lua_State *L, struct sample *smp);
+__LJMP static int hlua_smp2lua_str(lua_State *L, struct sample *smp);
+static int hlua_lua2smp(lua_State *L, int ud, struct sample *smp);
+
+__LJMP static int hlua_http_get_headers(lua_State *L, struct http_msg *msg);
+
+struct prepend_path {
+ struct list l;
+ char *type;
+ char *path;
+};
+
+static struct list prepend_path_list = LIST_HEAD_INIT(prepend_path_list);
+
+#define SEND_ERR(__be, __fmt, __args...) \
+ do { \
+ send_log(__be, LOG_ERR, __fmt, ## __args); \
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)) \
+ ha_alert(__fmt, ## __args); \
+ } while (0)
+
+static inline struct hlua_function *new_hlua_function()
+{
+ struct hlua_function *fcn;
+ int i;
+
+ fcn = calloc(1, sizeof(*fcn));
+ if (!fcn)
+ return NULL;
+ LIST_APPEND(&referenced_functions, &fcn->l);
+ for (i = 0; i < MAX_THREADS + 1; i++)
+ fcn->function_ref[i] = -1;
+ return fcn;
+}
+
+static inline void release_hlua_function(struct hlua_function *fcn)
+{
+ if (!fcn)
+ return;
+ if (fcn->name)
+ ha_free(&fcn->name);
+ LIST_DELETE(&fcn->l);
+ ha_free(&fcn);
+}
+
+/* If the common state is set, the stack id is 0, otherwise it is the tid + 1 */
+static inline int fcn_ref_to_stack_id(struct hlua_function *fcn)
+{
+ if (fcn->function_ref[0] == -1)
+ return tid + 1;
+ return 0;
+}
+
+/* Create a new registered filter. Only its name is filled */
+static inline struct hlua_reg_filter *new_hlua_reg_filter(const char *name)
+{
+ struct hlua_reg_filter *reg_flt;
+ int i;
+
+ reg_flt = calloc(1, sizeof(*reg_flt));
+ if (!reg_flt)
+ return NULL;
+ reg_flt->name = strdup(name);
+ if (!reg_flt->name) {
+ free(reg_flt);
+ return NULL;
+ }
+ LIST_APPEND(&referenced_filters, &reg_flt->l);
+ for (i = 0; i < MAX_THREADS + 1; i++) {
+ reg_flt->flt_ref[i] = -1;
+ reg_flt->fun_ref[i] = -1;
+ }
+ return reg_flt;
+}
+
+/* Release a registered filter */
+static inline void release_hlua_reg_filter(struct hlua_reg_filter *reg_flt)
+{
+ if (!reg_flt)
+ return;
+ if (reg_flt->name)
+ ha_free(&reg_flt->name);
+ LIST_DELETE(&reg_flt->l);
+ ha_free(&reg_flt);
+}
+
+/* If the common state is set, the stack id is 0, otherwise it is the tid + 1 */
+static inline int reg_flt_to_stack_id(struct hlua_reg_filter *reg_flt)
+{
+ if (reg_flt->fun_ref[0] == -1)
+ return tid + 1;
+ return 0;
+}
+
+/* Used to check an Lua function type in the stack. It creates and
+ * returns a reference of the function. This function throws an
+ * error if the argument is not a "function".
+ * When no longer used, the ref must be released with hlua_unref()
+ */
+__LJMP int hlua_checkfunction(lua_State *L, int argno)
+{
+ if (!lua_isfunction(L, argno)) {
+ const char *msg = lua_pushfstring(L, "function expected, got %s", luaL_typename(L, argno));
+ WILL_LJMP(luaL_argerror(L, argno, msg));
+ }
+ lua_pushvalue(L, argno);
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* Used to check an Lua table type in the stack. It creates and
+ * returns a reference of the table. This function throws an
+ * error if the argument is not a "table".
+ * When no longer used, the ref must be released with hlua_unref()
+ */
+__LJMP int hlua_checktable(lua_State *L, int argno)
+{
+ if (!lua_istable(L, argno)) {
+ const char *msg = lua_pushfstring(L, "table expected, got %s", luaL_typename(L, argno));
+ WILL_LJMP(luaL_argerror(L, argno, msg));
+ }
+ lua_pushvalue(L, argno);
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* Get a reference to the object that is at the top of the stack
+ * The referenced object will be popped from the stack
+ *
+ * The function returns the reference to the object which must
+ * be cleared using hlua_unref() when no longer used
+ */
+__LJMP int hlua_ref(lua_State *L)
+{
+ return MAY_LJMP(luaL_ref(L, LUA_REGISTRYINDEX));
+}
+
+/* Pushes a reference previously created using luaL_ref(L, LUA_REGISTRYINDEX)
+ * on <L> stack
+ * (ie: hlua_checkfunction(), hlua_checktable() or hlua_ref())
+ *
+ * When the reference is no longer used, it should be released by calling
+ * hlua_unref()
+ *
+ * <L> can be from any co-routine as long as it belongs to the same lua
+ * parent state that the one used to get the reference.
+ */
+void hlua_pushref(lua_State *L, int ref)
+{
+ lua_rawgeti(L, LUA_REGISTRYINDEX, ref);
+}
+
+/* Releases a reference previously created using luaL_ref(L, LUA_REGISTRYINDEX)
+ * (ie: hlua_checkfunction(), hlua_checktable() or hlua_ref())
+ *
+ * This will allow the reference to be reused and the referred object
+ * to be garbage collected.
+ *
+ * <L> can be from any co-routine as long as it belongs to the same lua
+ * parent state that the one used to get the reference.
+ */
+void hlua_unref(lua_State *L, int ref)
+{
+ luaL_unref(L, LUA_REGISTRYINDEX, ref);
+}
+
+__LJMP const char *hlua_traceback(lua_State *L, const char* sep)
+{
+ lua_Debug ar;
+ int level = 0;
+ struct buffer *msg = get_trash_chunk();
+
+ while (lua_getstack(L, level++, &ar)) {
+ /* Fill fields:
+ * 'S': fills in the fields source, short_src, linedefined, lastlinedefined, and what;
+ * 'l': fills in the field currentline;
+ * 'n': fills in the field name and namewhat;
+ * 't': fills in the field istailcall;
+ */
+ lua_getinfo(L, "Slnt", &ar);
+
+ /* skip these empty entries, usually they come from deep C functions */
+ if (ar.currentline < 0 && *ar.what == 'C' && !*ar.namewhat && !ar.name)
+ continue;
+
+ /* Add separator */
+ if (b_data(msg))
+ chunk_appendf(msg, "%s", sep);
+
+ /* Append code localisation */
+ if (ar.currentline > 0)
+ chunk_appendf(msg, "%s:%d: ", ar.short_src, ar.currentline);
+ else
+ chunk_appendf(msg, "%s: ", ar.short_src);
+
+ /*
+ * Get function name
+ *
+ * if namewhat is no empty, name is defined.
+ * what contains "Lua" for Lua function, "C" for C function,
+ * or "main" for main code.
+ */
+ if (*ar.namewhat != '\0' && ar.name != NULL) /* is there a name from code? */
+ chunk_appendf(msg, "in %s '%s'", ar.namewhat, ar.name); /* use it */
+
+ else if (*ar.what == 'm') /* "main", the code is not executed in a function */
+ chunk_appendf(msg, "in main chunk");
+
+ else if (*ar.what != 'C') /* for Lua functions, use <file:line> */
+ chunk_appendf(msg, "in function line %d", ar.linedefined);
+
+ else /* nothing left... */
+ chunk_appendf(msg, "?");
+
+
+ /* Display tailed call */
+ if (ar.istailcall)
+ chunk_appendf(msg, " ...");
+ }
+
+ return msg->area;
+}
+
+
+/* This function check the number of arguments available in the
+ * stack. If the number of arguments available is not the same
+ * then <nb> an error is thrown.
+ */
+__LJMP static inline void check_args(lua_State *L, int nb, char *fcn)
+{
+ if (lua_gettop(L) == nb)
+ return;
+ WILL_LJMP(luaL_error(L, "'%s' needs %d arguments", fcn, nb));
+}
+
+/* This function pushes an error string prefixed by the file name
+ * and the line number where the error is encountered.
+ */
+static int hlua_pusherror(lua_State *L, const char *fmt, ...)
+{
+ va_list argp;
+ va_start(argp, fmt);
+ luaL_where(L, 1);
+ lua_pushvfstring(L, fmt, argp);
+ va_end(argp);
+ lua_concat(L, 2);
+ return 1;
+}
+
+/* This functions is used with sample fetch and converters. It
+ * converts the HAProxy configuration argument in a lua stack
+ * values.
+ *
+ * It takes an array of "arg", and each entry of the array is
+ * converted and pushed in the LUA stack.
+ */
+__LJMP static int hlua_arg2lua(lua_State *L, const struct arg *arg)
+{
+ switch (arg->type) {
+ case ARGT_SINT:
+ case ARGT_TIME:
+ case ARGT_SIZE:
+ lua_pushinteger(L, arg->data.sint);
+ break;
+
+ case ARGT_STR:
+ lua_pushlstring(L, arg->data.str.area, arg->data.str.data);
+ break;
+
+ case ARGT_IPV4:
+ case ARGT_IPV6:
+ case ARGT_MSK4:
+ case ARGT_MSK6:
+ case ARGT_FE:
+ case ARGT_BE:
+ case ARGT_TAB:
+ case ARGT_SRV:
+ case ARGT_USR:
+ case ARGT_MAP:
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ return 1;
+}
+
+/* This function take one entry in an LUA stack at the index "ud",
+ * and try to convert it in an HAProxy argument entry. This is useful
+ * with sample fetch wrappers. The input arguments are given to the
+ * lua wrapper and converted as arg list by the function.
+ *
+ * Note: although lua_tolstring() may raise a memory error according to
+ * lua documentation, in practise this could only happen when using to
+ * use lua_tolstring() on a number (lua will try to push the number as a
+ * string on the stack, and this may result in memory failure), so here we
+ * assume that hlua_lua2arg() will never raise an exception since it is
+ * exclusively used with lua string inputs.
+ *
+ * Note2: You should be extra careful when using <arg> argument, since
+ * string arguments rely on lua_tolstring() which returns a pointer to lua
+ * object that may be garbage collected at any time when removed from lua
+ * stack, thus you should make sure that <arg> is only used from a local
+ * scope within lua context (and not exported or stored in a lua-independent
+ * ctx) and that related lua object still exists when accessing arg data.
+ * See: https://www.lua.org/manual/5.4/manual.html#4.1.3
+ */
+static int hlua_lua2arg(lua_State *L, int ud, struct arg *arg)
+{
+ switch (lua_type(L, ud)) {
+
+ case LUA_TNUMBER:
+ case LUA_TBOOLEAN:
+ arg->type = ARGT_SINT;
+ arg->data.sint = lua_tointeger(L, ud);
+ break;
+
+ case LUA_TSTRING:
+ arg->type = ARGT_STR;
+ arg->data.str.area = (char *)lua_tolstring(L, ud, &arg->data.str.data);
+ /* We don't know the actual size of the underlying allocation, so be conservative. */
+ arg->data.str.size = arg->data.str.data+1; /* count the terminating null byte */
+ arg->data.str.head = 0;
+ break;
+
+ case LUA_TUSERDATA:
+ case LUA_TNIL:
+ case LUA_TTABLE:
+ case LUA_TFUNCTION:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ arg->type = ARGT_SINT;
+ arg->data.sint = 0;
+ break;
+ }
+ return 1;
+}
+
+/* the following functions are used to convert a struct sample
+ * in Lua type. This useful to convert the return of the
+ * fetches or converters.
+ */
+__LJMP static int hlua_smp2lua(lua_State *L, struct sample *smp)
+{
+ switch (smp->data.type) {
+ case SMP_T_SINT:
+ case SMP_T_BOOL:
+ lua_pushinteger(L, smp->data.u.sint);
+ break;
+
+ case SMP_T_BIN:
+ case SMP_T_STR:
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ break;
+
+ case SMP_T_METH:
+ switch (smp->data.u.meth.meth) {
+ case HTTP_METH_OPTIONS: lua_pushstring(L, "OPTIONS"); break;
+ case HTTP_METH_GET: lua_pushstring(L, "GET"); break;
+ case HTTP_METH_HEAD: lua_pushstring(L, "HEAD"); break;
+ case HTTP_METH_POST: lua_pushstring(L, "POST"); break;
+ case HTTP_METH_PUT: lua_pushstring(L, "PUT"); break;
+ case HTTP_METH_DELETE: lua_pushstring(L, "DELETE"); break;
+ case HTTP_METH_TRACE: lua_pushstring(L, "TRACE"); break;
+ case HTTP_METH_CONNECT: lua_pushstring(L, "CONNECT"); break;
+ case HTTP_METH_OTHER:
+ lua_pushlstring(L, smp->data.u.meth.str.area, smp->data.u.meth.str.data);
+ break;
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ break;
+
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_ADDR: /* This type is never used to qualify a sample. */
+ if (sample_casts[smp->data.type][SMP_T_STR] &&
+ sample_casts[smp->data.type][SMP_T_STR](smp))
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ else
+ lua_pushnil(L);
+ break;
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ return 1;
+}
+
+/* the following functions are used to convert a struct sample
+ * in Lua strings. This is useful to convert the return of the
+ * fetches or converters.
+ */
+__LJMP static int hlua_smp2lua_str(lua_State *L, struct sample *smp)
+{
+ switch (smp->data.type) {
+
+ case SMP_T_BIN:
+ case SMP_T_STR:
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ break;
+
+ case SMP_T_METH:
+ switch (smp->data.u.meth.meth) {
+ case HTTP_METH_OPTIONS: lua_pushstring(L, "OPTIONS"); break;
+ case HTTP_METH_GET: lua_pushstring(L, "GET"); break;
+ case HTTP_METH_HEAD: lua_pushstring(L, "HEAD"); break;
+ case HTTP_METH_POST: lua_pushstring(L, "POST"); break;
+ case HTTP_METH_PUT: lua_pushstring(L, "PUT"); break;
+ case HTTP_METH_DELETE: lua_pushstring(L, "DELETE"); break;
+ case HTTP_METH_TRACE: lua_pushstring(L, "TRACE"); break;
+ case HTTP_METH_CONNECT: lua_pushstring(L, "CONNECT"); break;
+ case HTTP_METH_OTHER:
+ lua_pushlstring(L, smp->data.u.meth.str.area, smp->data.u.meth.str.data);
+ break;
+ default:
+ lua_pushstring(L, "");
+ break;
+ }
+ break;
+
+ case SMP_T_SINT:
+ case SMP_T_BOOL:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_ADDR: /* This type is never used to qualify a sample. */
+ if (sample_casts[smp->data.type][SMP_T_STR] &&
+ sample_casts[smp->data.type][SMP_T_STR](smp))
+ lua_pushlstring(L, smp->data.u.str.area, smp->data.u.str.data);
+ else
+ lua_pushstring(L, "");
+ break;
+ default:
+ lua_pushstring(L, "");
+ break;
+ }
+ return 1;
+}
+
+/* The following function is used to convert a Lua type to a
+ * struct sample. This is useful to provide data from LUA code to
+ * a converter.
+ *
+ * Note: although lua_tolstring() may raise a memory error according to
+ * lua documentation, in practise this could only happen when using to
+ * use lua_tolstring() on a number (lua will try to push the number as a
+ * string on the stack, and this may result in memory failure), so here we
+ * assume that hlua_lua2arg() will never raise an exception since it is
+ * exclusively used with lua string inputs.
+ *
+ * Note2: You should be extra careful when using <smp> argument, since
+ * string arguments rely on lua_tolstring() which returns a pointer to lua
+ * object that may be garbage collected at any time when removed from lua
+ * stack, thus you should make sure that <smp> is only used from a local
+ * scope within lua context (not exported or stored in a lua-independent
+ * ctx) and that related lua object still exists when accessing arg data.
+ * See: https://www.lua.org/manual/5.4/manual.html#4.1.3
+ *
+ * If you don't comply with this usage restriction, then you should consider
+ * duplicating the smp using smp_dup() to make it portable (little overhead),
+ * as this will ensure that the smp always points to valid memory block.
+ */
+static int hlua_lua2smp(lua_State *L, int ud, struct sample *smp)
+{
+ switch (lua_type(L, ud)) {
+
+ case LUA_TNUMBER:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = lua_tointeger(L, ud);
+ break;
+
+
+ case LUA_TBOOLEAN:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = lua_toboolean(L, ud);
+ break;
+
+ case LUA_TSTRING:
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = (char *)lua_tolstring(L, ud, &smp->data.u.str.data);
+ /* We don't know the actual size of the underlying allocation, so be conservative. */
+ smp->data.u.str.size = smp->data.u.str.data+1; /* count the terminating null byte */
+ smp->data.u.str.head = 0;
+ break;
+
+ case LUA_TUSERDATA:
+ case LUA_TNIL:
+ case LUA_TTABLE:
+ case LUA_TFUNCTION:
+ case LUA_TTHREAD:
+ case LUA_TLIGHTUSERDATA:
+ case LUA_TNONE:
+ default:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ break;
+ }
+ return 1;
+}
+
+/* This function check the "argp" built by another conversion function
+ * is in accord with the expected argp defined by the "mask". The function
+ * returns true or false. It can be adjust the types if there compatibles.
+ *
+ * This function assumes that the argp argument contains ARGM_NBARGS + 1
+ * entries and that there is at least one stop at the last position.
+ */
+__LJMP int hlua_lua2arg_check(lua_State *L, int first, struct arg *argp,
+ uint64_t mask, struct proxy *p)
+{
+ int min_arg;
+ int idx;
+ struct proxy *px;
+ struct userlist *ul;
+ struct my_regex *reg;
+ const char *msg = NULL;
+ char *sname, *pname, *err = NULL;
+
+ idx = 0;
+ min_arg = ARGM(mask);
+ mask >>= ARGM_BITS;
+
+ while (1) {
+ struct buffer tmp = BUF_NULL;
+
+ /* Check for mandatory arguments. */
+ if (argp[idx].type == ARGT_STOP) {
+ if (idx < min_arg) {
+
+ /* If miss other argument than the first one, we return an error. */
+ if (idx > 0) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+
+ /* If first argument have a certain type, some default values
+ * may be used. See the function smp_resolve_args().
+ */
+ switch (mask & ARGT_MASK) {
+
+ case ARGT_FE:
+ if (!(p->cap & PR_CAP_FE)) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.prx = p;
+ argp[idx].type = ARGT_FE;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ case ARGT_BE:
+ if (!(p->cap & PR_CAP_BE)) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.prx = p;
+ argp[idx].type = ARGT_BE;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ case ARGT_TAB:
+ if (!p->table) {
+ msg = "Mandatory argument expected";
+ goto error;
+ }
+ argp[idx].data.t = p->table;
+ argp[idx].type = ARGT_TAB;
+ argp[idx+1].type = ARGT_STOP;
+ break;
+
+ default:
+ msg = "Mandatory argument expected";
+ goto error;
+ break;
+ }
+ }
+ break;
+ }
+
+ /* Check for exceed the number of required argument. */
+ if ((mask & ARGT_MASK) == ARGT_STOP &&
+ argp[idx].type != ARGT_STOP) {
+ msg = "Last argument expected";
+ goto error;
+ }
+
+ if ((mask & ARGT_MASK) == ARGT_STOP &&
+ argp[idx].type == ARGT_STOP) {
+ break;
+ }
+
+ /* Convert some argument types. All string in argp[] are for not
+ * duplicated yet.
+ */
+ switch (mask & ARGT_MASK) {
+ case ARGT_SINT:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_SINT;
+ break;
+
+ case ARGT_TIME:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_TIME;
+ break;
+
+ case ARGT_SIZE:
+ if (argp[idx].type != ARGT_SINT) {
+ msg = "integer expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_SIZE;
+ break;
+
+ case ARGT_FE:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.prx = proxy_fe_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.prx) {
+ msg = "frontend doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_FE;
+ break;
+
+ case ARGT_BE:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.prx = proxy_be_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.prx) {
+ msg = "backend doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_BE;
+ break;
+
+ case ARGT_TAB:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ argp[idx].data.t = stktable_find_by_name(argp[idx].data.str.area);
+ if (!argp[idx].data.t) {
+ msg = "table doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_TAB;
+ break;
+
+ case ARGT_SRV:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ sname = strrchr(argp[idx].data.str.area, '/');
+ if (sname) {
+ *sname++ = '\0';
+ pname = argp[idx].data.str.area;
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ msg = "backend doesn't exist";
+ goto error;
+ }
+ }
+ else {
+ sname = argp[idx].data.str.area;
+ px = p;
+ }
+ argp[idx].data.srv = findserver(px, sname);
+ if (!argp[idx].data.srv) {
+ msg = "server doesn't exist";
+ goto error;
+ }
+ argp[idx].type = ARGT_SRV;
+ break;
+
+ case ARGT_IPV4:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (inet_pton(AF_INET, argp[idx].data.str.area, &argp[idx].data.ipv4)) {
+ msg = "invalid IPv4 address";
+ goto error;
+ }
+ argp[idx].type = ARGT_IPV4;
+ break;
+
+ case ARGT_MSK4:
+ if (argp[idx].type == ARGT_SINT)
+ len2mask4(argp[idx].data.sint, &argp[idx].data.ipv4);
+ else if (argp[idx].type == ARGT_STR) {
+ if (!str2mask(argp[idx].data.str.area, &argp[idx].data.ipv4)) {
+ msg = "invalid IPv4 mask";
+ goto error;
+ }
+ }
+ else {
+ msg = "integer or string expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_MSK4;
+ break;
+
+ case ARGT_IPV6:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (inet_pton(AF_INET6, argp[idx].data.str.area, &argp[idx].data.ipv6)) {
+ msg = "invalid IPv6 address";
+ goto error;
+ }
+ argp[idx].type = ARGT_IPV6;
+ break;
+
+ case ARGT_MSK6:
+ if (argp[idx].type == ARGT_SINT)
+ len2mask6(argp[idx].data.sint, &argp[idx].data.ipv6);
+ else if (argp[idx].type == ARGT_STR) {
+ if (!str2mask6(argp[idx].data.str.area, &argp[idx].data.ipv6)) {
+ msg = "invalid IPv6 mask";
+ goto error;
+ }
+ }
+ else {
+ msg = "integer or string expected";
+ goto error;
+ }
+ argp[idx].type = ARGT_MSK6;
+ break;
+
+ case ARGT_REG:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ reg = regex_comp(argp[idx].data.str.area, !(argp[idx].type_flags & ARGF_REG_ICASE), 1, &err);
+ if (!reg) {
+ msg = lua_pushfstring(L, "error compiling regex '%s' : '%s'",
+ argp[idx].data.str.area, err);
+ free(err);
+ goto error;
+ }
+ argp[idx].type = ARGT_REG;
+ argp[idx].data.reg = reg;
+ break;
+
+ case ARGT_USR:
+ if (argp[idx].type != ARGT_STR) {
+ msg = "string expected";
+ goto error;
+ }
+ if (p->uri_auth && p->uri_auth->userlist &&
+ strcmp(p->uri_auth->userlist->name, argp[idx].data.str.area) == 0)
+ ul = p->uri_auth->userlist;
+ else
+ ul = auth_find_userlist(argp[idx].data.str.area);
+
+ if (!ul) {
+ msg = lua_pushfstring(L, "unable to find userlist '%s'", argp[idx].data.str.area);
+ goto error;
+ }
+ argp[idx].type = ARGT_USR;
+ argp[idx].data.usr = ul;
+ break;
+
+ case ARGT_STR:
+ if (!chunk_dup(&tmp, &argp[idx].data.str)) {
+ msg = "unable to duplicate string arg";
+ goto error;
+ }
+ argp[idx].data.str = tmp;
+ break;
+
+ case ARGT_MAP:
+ msg = "type not yet supported";
+ goto error;
+ break;
+
+ }
+
+ /* Check for type of argument. */
+ if ((mask & ARGT_MASK) != argp[idx].type) {
+ msg = lua_pushfstring(L, "'%s' expected, got '%s'",
+ arg_type_names[(mask & ARGT_MASK)],
+ arg_type_names[argp[idx].type & ARGT_MASK]);
+ goto error;
+ }
+
+ /* Next argument. */
+ mask >>= ARGT_BITS;
+ idx++;
+ }
+ return 0;
+
+ error:
+ argp[idx].type = ARGT_STOP;
+ free_args(argp);
+ WILL_LJMP(luaL_argerror(L, first + idx, msg));
+ return 0; /* Never reached */
+}
+
+/*
+ * The following functions are used to make correspondence between the the
+ * executed lua pointer and the "struct hlua *" that contain the context.
+ *
+ * - hlua_gethlua : return the hlua context associated with an lua_State.
+ * - hlua_sethlua : create the association between hlua context and lua_state.
+ */
+inline struct hlua *hlua_gethlua(lua_State *L)
+{
+ struct hlua **hlua = lua_getextraspace(L);
+ return *hlua;
+}
+static inline void hlua_sethlua(struct hlua *hlua)
+{
+ struct hlua **hlua_store = lua_getextraspace(hlua->T);
+ *hlua_store = hlua;
+}
+
+/* Will return a non-NULL string indicating the Lua call trace if the caller
+ * currently is executing from within a Lua function. One line per entry will
+ * be emitted, and each extra line will be prefixed with <pfx>. If a current
+ * Lua function is not detected, NULL is returned.
+ */
+const char *hlua_show_current_location(const char *pfx)
+{
+ lua_State *L;
+ lua_Debug ar;
+
+ /* global or per-thread stack initializing ? */
+ if (hlua_state_id != -1 && (L = hlua_states[hlua_state_id]) && lua_getstack(L, 0, &ar))
+ return hlua_traceback(L, pfx);
+
+ /* per-thread stack running ? */
+ if (hlua_states[tid + 1] && (L = hlua_states[tid + 1]) && lua_getstack(L, 0, &ar))
+ return hlua_traceback(L, pfx);
+
+ /* global stack running ? */
+ if (hlua_states[0] && (L = hlua_states[0]) && lua_getstack(L, 0, &ar))
+ return hlua_traceback(L, pfx);
+
+ return NULL;
+}
+
+/* This function is used to send logs. It tries to send them to:
+ * - the log target applicable in the current context, OR
+ * - stderr when no logger is in use for the current context
+ */
+static inline void hlua_sendlog(struct proxy *px, int level, const char *msg)
+{
+ struct tm tm;
+ char *p;
+
+ /* Cleanup the log message. */
+ p = trash.area;
+ for (; *msg != '\0'; msg++, p++) {
+ if (p >= trash.area + trash.size - 1) {
+ /* Break the message if exceed the buffer size. */
+ *(p-4) = ' ';
+ *(p-3) = '.';
+ *(p-2) = '.';
+ *(p-1) = '.';
+ break;
+ }
+ if (isprint((unsigned char)*msg))
+ *p = *msg;
+ else
+ *p = '.';
+ }
+ *p = '\0';
+
+ if (hlua_log_opts & HLUA_LOG_LOGGERS_ON)
+ send_log(px, level, "%s\n", trash.area);
+
+ if (!(global.mode & MODE_QUIET) || (global.mode & (MODE_VERBOSE | MODE_STARTING))) {
+ if (!(hlua_log_opts & HLUA_LOG_STDERR_MASK))
+ return;
+
+ /* when logging via stderr is set to 'auto', it behaves like 'off' unless one of:
+ * - logging via loggers is disabled
+ * - this is a non-proxy context and there is no global logger configured
+ * - this is a proxy context and the proxy has no logger configured
+ */
+ if ((hlua_log_opts & (HLUA_LOG_STDERR_MASK | HLUA_LOG_LOGGERS_ON)) == (HLUA_LOG_STDERR_AUTO | HLUA_LOG_LOGGERS_ON)) {
+ /* AUTO=OFF in non-proxy context only if at least one global logger is defined */
+ if ((px == NULL) && (!LIST_ISEMPTY(&global.loggers)))
+ return;
+
+ /* AUTO=OFF in proxy context only if at least one logger is configured for the proxy */
+ if ((px != NULL) && (!LIST_ISEMPTY(&px->loggers)))
+ return;
+ }
+
+ if (level == LOG_DEBUG && !(global.mode & MODE_DEBUG))
+ return;
+
+ get_localtime(date.tv_sec, &tm);
+ fprintf(stderr, "[%s] %03d/%02d%02d%02d (%d) : %s\n",
+ log_levels[level], tm.tm_yday, tm.tm_hour, tm.tm_min, tm.tm_sec,
+ (int)getpid(), trash.area);
+ fflush(stderr);
+ }
+}
+
+/* This function just ensure that the yield will be always
+ * returned with a timeout and permit to set some flags
+ * <timeout> is a tick value
+ */
+__LJMP void hlua_yieldk(lua_State *L, int nresults, lua_KContext ctx,
+ lua_KFunction k, int timeout, unsigned int flags)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ return;
+ }
+
+ /* Set the wake timeout. If timeout is required, we set
+ * the expiration time.
+ */
+ hlua->wake_time = timeout;
+
+ hlua->flags |= flags;
+
+ /* Process the yield. */
+ MAY_LJMP(lua_yieldk(L, nresults, ctx, k));
+}
+
+/* This function initialises the Lua environment stored in the stream.
+ * It must be called at the start of the stream. This function creates
+ * an LUA coroutine. It can not be use to crete the main LUA context.
+ *
+ * This function is particular. it initialises a new Lua thread. If the
+ * initialisation fails (example: out of memory error), the lua function
+ * throws an error (longjmp).
+ *
+ * This function manipulates two Lua stacks: the main and the thread. Only
+ * the main stack can fail. The thread is not manipulated. This function
+ * MUST NOT manipulate the created thread stack state, because it is not
+ * protected against errors thrown by the thread stack.
+ */
+int hlua_ctx_init(struct hlua *lua, int state_id, struct task *task)
+{
+ lua->Mref = LUA_REFNIL;
+ lua->flags = 0;
+ lua->gc_count = 0;
+ lua->wake_time = TICK_ETERNITY;
+ lua->state_id = state_id;
+ hlua_timer_init(&lua->timer, 0); /* default value, no timeout */
+ LIST_INIT(&lua->com);
+ MT_LIST_INIT(&lua->hc_list);
+ if (!SET_SAFE_LJMP_PARENT(lua)) {
+ lua->Tref = LUA_REFNIL;
+ return 0;
+ }
+ lua->T = lua_newthread(hlua_states[state_id]);
+ if (!lua->T) {
+ lua->Tref = LUA_REFNIL;
+ RESET_SAFE_LJMP_PARENT(lua);
+ return 0;
+ }
+ hlua_sethlua(lua);
+ lua->Tref = luaL_ref(hlua_states[state_id], LUA_REGISTRYINDEX);
+ lua->task = task;
+ RESET_SAFE_LJMP_PARENT(lua);
+ return 1;
+}
+
+/* kill all associated httpclient to this hlua task
+ * We must take extra precautions as we're manipulating lua-exposed
+ * objects without the main lua lock.
+ */
+static void hlua_httpclient_destroy_all(struct hlua *hlua)
+{
+ struct hlua_httpclient *hlua_hc;
+
+ /* use thread-safe accessors for hc_list since GC cycle initiated by
+ * another thread sharing the same main lua stack (lua coroutine)
+ * could execute hlua_httpclient_gc() on the hlua->hc_list items
+ * in parallel: Lua GC applies on the main stack, it is not limited to
+ * a single coroutine stack, see Github issue #2037 for reference.
+ * Remember, coroutines created using lua_newthread() are not meant to
+ * be thread safe in Lua. (From lua co-author:
+ * http://lua-users.org/lists/lua-l/2011-07/msg00072.html)
+ *
+ * This security measure is superfluous when 'lua-load-per-thread' is used
+ * since in this case coroutines exclusively run on the same thread
+ * (main stack is not shared between OS threads).
+ */
+ while ((hlua_hc = MT_LIST_POP(&hlua->hc_list, typeof(hlua_hc), by_hlua))) {
+ httpclient_stop_and_destroy(hlua_hc->hc);
+ hlua_hc->hc = NULL;
+ }
+}
+
+
+/* Used to destroy the Lua coroutine when the attached stream or task
+ * is destroyed. The destroy also the memory context. The struct "lua"
+ * will be freed.
+ */
+void hlua_ctx_destroy(struct hlua *lua)
+{
+ if (!lua)
+ return;
+
+ if (!lua->T)
+ goto end;
+
+ /* clean all running httpclient */
+ hlua_httpclient_destroy_all(lua);
+
+ /* Purge all the pending signals. */
+ notification_purge(&lua->com);
+
+ if (!SET_SAFE_LJMP(lua))
+ return;
+ luaL_unref(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+ RESET_SAFE_LJMP(lua);
+
+ if (!SET_SAFE_LJMP_PARENT(lua))
+ return;
+ luaL_unref(hlua_states[lua->state_id], LUA_REGISTRYINDEX, lua->Tref);
+ RESET_SAFE_LJMP_PARENT(lua);
+ /* Forces a garbage collecting process. If the Lua program is finished
+ * without error, we run the GC on the thread pointer. Its freed all
+ * the unused memory.
+ * If the thread is finnish with an error or is currently yielded,
+ * it seems that the GC applied on the thread doesn't clean anything,
+ * so e run the GC on the main thread.
+ * NOTE: maybe this action locks all the Lua threads untiml the en of
+ * the garbage collection.
+ */
+ if (lua->gc_count) {
+ if (!SET_SAFE_LJMP_PARENT(lua))
+ return;
+ lua_gc(hlua_states[lua->state_id], LUA_GCCOLLECT, 0);
+ RESET_SAFE_LJMP_PARENT(lua);
+ }
+
+ lua->T = NULL;
+
+end:
+ pool_free(pool_head_hlua, lua);
+}
+
+/* This function is used to restore the Lua context when a coroutine
+ * fails. This function copy the common memory between old coroutine
+ * and the new coroutine. The old coroutine is destroyed, and its
+ * replaced by the new coroutine.
+ * If the flag "keep_msg" is set, the last entry of the old is assumed
+ * as string error message and it is copied in the new stack.
+ */
+static int hlua_ctx_renew(struct hlua *lua, int keep_msg)
+{
+ lua_State *T;
+ int new_ref;
+
+ /* New Lua coroutine. */
+ T = lua_newthread(hlua_states[lua->state_id]);
+ if (!T)
+ return 0;
+
+ /* Copy last error message. */
+ if (keep_msg)
+ lua_xmove(lua->T, T, 1);
+
+ /* Copy data between the coroutines. */
+ lua_rawgeti(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+ lua_xmove(lua->T, T, 1);
+ new_ref = luaL_ref(T, LUA_REGISTRYINDEX); /* Value popped. */
+
+ /* Destroy old data. */
+ luaL_unref(lua->T, LUA_REGISTRYINDEX, lua->Mref);
+
+ /* The thread is garbage collected by Lua. */
+ luaL_unref(hlua_states[lua->state_id], LUA_REGISTRYINDEX, lua->Tref);
+
+ /* Fill the struct with the new coroutine values. */
+ lua->Mref = new_ref;
+ lua->T = T;
+ lua->Tref = luaL_ref(hlua_states[lua->state_id], LUA_REGISTRYINDEX);
+
+ /* Set context. */
+ hlua_sethlua(lua);
+
+ return 1;
+}
+
+void hlua_hook(lua_State *L, lua_Debug *ar)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return;
+
+ if (hlua->T != L) {
+ /* We don't want to enforce a yield on a sub coroutine, since
+ * we have no guarantees that the yield will be handled properly.
+ * Indeed, only the hlua->T coroutine is being handled through
+ * hlua_ctx_resume() function.
+ *
+ * Instead, we simply check for timeouts and wait for the sub
+ * coroutine to finish..
+ */
+ goto check_timeout;
+ }
+
+ /* Lua cannot yield when its returning from a function,
+ * so, we can fix the interrupt hook to 1 instruction,
+ * expecting that the function is finished.
+ */
+ if (lua_gethookmask(L) & LUA_MASKRET) {
+ lua_sethook(hlua->T, hlua_hook, LUA_MASKCOUNT, 1);
+ return;
+ }
+
+ /* If we interrupt the Lua processing in yieldable state, we yield.
+ * If the state is not yieldable, trying yield causes an error.
+ */
+ if (lua_isyieldable(L)) {
+ /* note: for converters/fetches.. where yielding is not allowed
+ * hlua_ctx_resume() will simply perform a goto resume_execution
+ * instead of rescheduling hlua->task.
+ * also: hlua_ctx_resume() will take care of checking execution
+ * timeout and re-applying the hook as needed.
+ */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, NULL, TICK_ETERNITY, HLUA_CTRLYIELD));
+ /* lua docs says that the hook should return immediately after lua_yieldk
+ *
+ * From: https://www.lua.org/manual/5.3/manual.html#lua_yieldk
+ *
+ * Moreover, it seems that we don't want to continue after the yield
+ * because the end of the function is about handling unyieldable function,
+ * which is not the case here.
+ *
+ * ->if we don't return lua_sethook gets incorrectly set with MASKRET later
+ * in the function.
+ */
+ return;
+ }
+
+ check_timeout:
+ /* If we cannot yield, check the timeout. */
+ if (!hlua_timer_check(&hlua->timer)) {
+ lua_pushfstring(L, "execution timeout");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Try to interrupt the process at the end of the current
+ * unyieldable function.
+ */
+ lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_nb_instruction);
+}
+
+/* This function start or resumes the Lua stack execution. If the flag
+ * "yield_allowed" if no set and the LUA stack execution returns a yield
+ * The function return an error.
+ *
+ * The function can returns 4 values:
+ * - HLUA_E_OK : The execution is terminated without any errors.
+ * - HLUA_E_AGAIN : The execution must continue at the next associated
+ * task wakeup.
+ * - HLUA_E_ERRMSG : An error has occurred, an error message is set in
+ * the top of the stack.
+ * - HLUA_E_ERR : An error has occurred without error message.
+ *
+ * If an error occurred, the stack is renewed and it is ready to run new
+ * LUA code.
+ */
+static enum hlua_exec hlua_ctx_resume(struct hlua *lua, int yield_allowed)
+{
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ int nres;
+#endif
+ int ret;
+ const char *msg;
+ const char *trace;
+
+ /* Lock the whole Lua execution. This lock must be before the
+ * label "resume_execution".
+ */
+ hlua_lock(lua);
+
+ /* reset the timer as we might be re-entering the function to
+ * resume the coroutine after a successful yield
+ * (cumulative time will be updated)
+ */
+ hlua_timer_reset(&lua->timer);
+
+resume_execution:
+
+ /* This hook interrupts the Lua processing each 'hlua_nb_instruction'
+ * instructions. it is used for preventing infinite loops.
+ */
+ lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_nb_instruction);
+
+ /* Remove all flags except the running flags. */
+ HLUA_SET_RUN(lua);
+ HLUA_CLR_CTRLYIELD(lua);
+ HLUA_CLR_WAKERESWR(lua);
+ HLUA_CLR_WAKEREQWR(lua);
+ HLUA_CLR_NOYIELD(lua);
+ if (!yield_allowed)
+ HLUA_SET_NOYIELD(lua);
+
+ /* reset wake_time. */
+ lua->wake_time = TICK_ETERNITY;
+
+ /* start the timer as we're about to start lua processing */
+ hlua_timer_start(&lua->timer);
+
+ /* Call the function. */
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs, &nres);
+#else
+ ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs);
+#endif
+
+ /* out of lua processing, stop the timer */
+ hlua_timer_stop(&lua->timer);
+
+ /* reset nargs because those possibly passed to the lua_resume() call
+ * were already consumed, and since we may call lua_resume() again
+ * after a successful yield, we don't want to pass stale nargs hint
+ * to the Lua API. As such, nargs should be set explicitly before each
+ * lua_resume() (or hlua_ctx_resume()) invocation if needed.
+ */
+ lua->nargs = 0;
+
+ switch (ret) {
+
+ case LUA_OK:
+ ret = HLUA_E_OK;
+ break;
+
+ case LUA_YIELD:
+ /* Check if the execution timeout is expired. If it is the case, we
+ * break the Lua execution.
+ */
+ if (!hlua_timer_check(&lua->timer)) {
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_ETMOUT;
+ break;
+ }
+ /* Process the forced yield. if the general yield is not allowed or
+ * if no task were associated this the current Lua execution
+ * coroutine, we resume the execution. Else we want to return in the
+ * scheduler and we want to be waked up again, to continue the
+ * current Lua execution. So we schedule our own task.
+ */
+ if (HLUA_IS_CTRLYIELDING(lua)) {
+ if (!yield_allowed || !lua->task)
+ goto resume_execution;
+ task_wakeup(lua->task, TASK_WOKEN_MSG);
+ }
+ if (!yield_allowed) {
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_YIELD;
+ break;
+ }
+ ret = HLUA_E_AGAIN;
+ break;
+
+ case LUA_ERRRUN:
+
+ /* Special exit case. The traditional exit is returned as an error
+ * because the errors ares the only one mean to return immediately
+ * from and lua execution.
+ */
+ if (lua->flags & HLUA_EXIT) {
+ ret = HLUA_E_OK;
+ hlua_ctx_renew(lua, 1);
+ break;
+ }
+
+ lua->wake_time = TICK_ETERNITY;
+ if (!lua_checkstack(lua->T, 1)) {
+ ret = HLUA_E_ERR;
+ break;
+ }
+ msg = lua_tostring(lua->T, -1);
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ trace = hlua_traceback(lua->T, ", ");
+ if (msg)
+ lua_pushfstring(lua->T, "[state-id %d] runtime error: %s from %s", lua->state_id, msg, trace);
+ else
+ lua_pushfstring(lua->T, "[state-id %d] unknown runtime error from %s", lua->state_id, trace);
+ ret = HLUA_E_ERRMSG;
+ break;
+
+ case LUA_ERRMEM:
+ lua->wake_time = TICK_ETERNITY;
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_NOMEM;
+ break;
+
+ case LUA_ERRERR:
+ lua->wake_time = TICK_ETERNITY;
+ if (!lua_checkstack(lua->T, 1)) {
+ ret = HLUA_E_ERR;
+ break;
+ }
+ msg = lua_tostring(lua->T, -1);
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ if (msg)
+ lua_pushfstring(lua->T, "[state-id %d] message handler error: %s", lua->state_id, msg);
+ else
+ lua_pushfstring(lua->T, "[state-id %d] message handler error", lua->state_id);
+ ret = HLUA_E_ERRMSG;
+ break;
+
+ default:
+ lua->wake_time = TICK_ETERNITY;
+ lua_settop(lua->T, 0); /* Empty the stack. */
+ ret = HLUA_E_ERR;
+ break;
+ }
+
+ switch (ret) {
+ case HLUA_E_AGAIN:
+ break;
+
+ case HLUA_E_ERRMSG:
+ notification_purge(&lua->com);
+ hlua_ctx_renew(lua, 1);
+ HLUA_CLR_RUN(lua);
+ break;
+
+ case HLUA_E_ETMOUT:
+ case HLUA_E_NOMEM:
+ case HLUA_E_YIELD:
+ case HLUA_E_ERR:
+ HLUA_CLR_RUN(lua);
+ notification_purge(&lua->com);
+ hlua_ctx_renew(lua, 0);
+ break;
+
+ case HLUA_E_OK:
+ HLUA_CLR_RUN(lua);
+ notification_purge(&lua->com);
+ break;
+ }
+
+ /* This is the main exit point, remove the Lua lock. */
+ hlua_unlock(lua);
+
+ return ret;
+}
+
+/* This function exit the current code. */
+__LJMP static int hlua_done(lua_State *L)
+{
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ hlua->flags |= HLUA_EXIT;
+ WILL_LJMP(lua_error(L));
+
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for deleting ACL from a referenced ACL file.
+ */
+__LJMP static int hlua_del_acl(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "del_acl"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'del_acl': unknown acl file '%s'", name));
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for deleting map entry from a referenced map file.
+ */
+static int hlua_del_map(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "del_map"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'del_map': unknown acl file '%s'", name));
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for adding ACL pattern from a referenced ACL file.
+ */
+static int hlua_add_acl(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 2, "add_acl"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'add_acl': unknown acl file '%s'", name));
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key) == NULL)
+ pat_ref_add(ref, key, NULL, NULL);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for setting map pattern and sample from a referenced map
+ * file.
+ */
+static int hlua_set_map(lua_State *L)
+{
+ const char *name;
+ const char *key;
+ const char *value;
+ struct pat_ref *ref;
+
+ MAY_LJMP(check_args(L, 3, "set_map"));
+
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+ key = MAY_LJMP(luaL_checkstring(L, 2));
+ value = MAY_LJMP(luaL_checkstring(L, 3));
+
+ ref = pat_ref_lookup(name);
+ if (!ref)
+ WILL_LJMP(luaL_error(L, "'set_map': unknown map file '%s'", name));
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key) != NULL)
+ pat_ref_set(ref, key, value, NULL, NULL);
+ else
+ pat_ref_add(ref, key, value, NULL);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ return 0;
+}
+
+/* This function is an LUA binding. It provides a function
+ * for retrieving a var from the proc scope in core.
+ */
+ static int hlua_core_get_var(lua_State *L)
+{
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 1, "get_var"));
+
+ name = MAY_LJMP(luaL_checklstring(L, 1, &len));
+
+ /* We can only retrieve information from the proc. scope */
+ /* FIXME: I didn't want to expose vars_hash_name from vars.c */
+ if (len < 5 || strncmp(name, "proc.", 5) != 0)
+ WILL_LJMP(luaL_error(L, "'get_var': Only 'proc.' scope allowed to be retrieved in 'core.get_var()'."));
+
+ memset(&smp, 0, sizeof(smp));
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return MAY_LJMP(hlua_smp2lua(L, &smp));
+ return 1;
+}
+
+/* This function disables the sending of email through the
+ * legacy email sending function which is implemented using
+ * checks.
+ *
+ * It may not be used during runtime.
+ */
+__LJMP static int hlua_disable_legacy_mailers(lua_State *L)
+{
+ if (hlua_gethlua(L))
+ WILL_LJMP(luaL_error(L, "disable_legacy_mailers: "
+ "not available outside of init or body context"));
+ send_email_disabled = 1;
+ return 0;
+}
+
+/* A class is a lot of memory that contain data. This data can be a table,
+ * an integer or user data. This data is associated with a metatable. This
+ * metatable have an original version registered in the global context with
+ * the name of the object (_G[<name>] = <metable> ).
+ *
+ * A metable is a table that modify the standard behavior of a standard
+ * access to the associated data. The entries of this new metatable are
+ * defined as is:
+ *
+ * http://lua-users.org/wiki/MetatableEvents
+ *
+ * __index
+ *
+ * we access an absent field in a table, the result is nil. This is
+ * true, but it is not the whole truth. Actually, such access triggers
+ * the interpreter to look for an __index metamethod: If there is no
+ * such method, as usually happens, then the access results in nil;
+ * otherwise, the metamethod will provide the result.
+ *
+ * Control 'prototype' inheritance. When accessing "myTable[key]" and
+ * the key does not appear in the table, but the metatable has an __index
+ * property:
+ *
+ * - if the value is a function, the function is called, passing in the
+ * table and the key; the return value of that function is returned as
+ * the result.
+ *
+ * - if the value is another table, the value of the key in that table is
+ * asked for and returned (and if it doesn't exist in that table, but that
+ * table's metatable has an __index property, then it continues on up)
+ *
+ * - Use "rawget(myTable,key)" to skip this metamethod.
+ *
+ * http://www.lua.org/pil/13.4.1.html
+ *
+ * __newindex
+ *
+ * Like __index, but control property assignment.
+ *
+ * __mode - Control weak references. A string value with one or both
+ * of the characters 'k' and 'v' which specifies that the the
+ * keys and/or values in the table are weak references.
+ *
+ * __call - Treat a table like a function. When a table is followed by
+ * parenthesis such as "myTable( 'foo' )" and the metatable has
+ * a __call key pointing to a function, that function is invoked
+ * (passing any specified arguments) and the return value is
+ * returned.
+ *
+ * __metatable - Hide the metatable. When "getmetatable( myTable )" is
+ * called, if the metatable for myTable has a __metatable
+ * key, the value of that key is returned instead of the
+ * actual metatable.
+ *
+ * __tostring - Control string representation. When the builtin
+ * "tostring( myTable )" function is called, if the metatable
+ * for myTable has a __tostring property set to a function,
+ * that function is invoked (passing myTable to it) and the
+ * return value is used as the string representation.
+ *
+ * __len - Control table length. When the table length is requested using
+ * the length operator ( '#' ), if the metatable for myTable has
+ * a __len key pointing to a function, that function is invoked
+ * (passing myTable to it) and the return value used as the value
+ * of "#myTable".
+ *
+ * __gc - Userdata finalizer code. When userdata is set to be garbage
+ * collected, if the metatable has a __gc field pointing to a
+ * function, that function is first invoked, passing the userdata
+ * to it. The __gc metamethod is not called for tables.
+ * (See http://lua-users.org/lists/lua-l/2006-11/msg00508.html)
+ *
+ * Special metamethods for redefining standard operators:
+ * http://www.lua.org/pil/13.1.html
+ *
+ * __add "+"
+ * __sub "-"
+ * __mul "*"
+ * __div "/"
+ * __unm "!"
+ * __pow "^"
+ * __concat ".."
+ *
+ * Special methods for redefining standard relations
+ * http://www.lua.org/pil/13.2.html
+ *
+ * __eq "=="
+ * __lt "<"
+ * __le "<="
+ */
+
+/*
+ *
+ *
+ * Class Map
+ *
+ *
+ */
+
+/* Returns a struct hlua_map if the stack entry "ud" is
+ * a class session, otherwise it throws an error.
+ */
+__LJMP static struct map_descriptor *hlua_checkmap(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_map_ref));
+}
+
+/* This function is the map constructor. It don't need
+ * the class Map object. It creates and return a new Map
+ * object. It must be called only during "body" or "init"
+ * context because it process some filesystem accesses.
+ */
+__LJMP static int hlua_map_new(struct lua_State *L)
+{
+ const char *fn;
+ int match = PAT_MATCH_STR;
+ struct sample_conv conv;
+ const char *file = "";
+ int line = 0;
+ lua_Debug ar;
+ char *err = NULL;
+ struct arg args[2];
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "'new' needs at least 1 argument."));
+
+ fn = MAY_LJMP(luaL_checkstring(L, 1));
+
+ if (lua_gettop(L) >= 2) {
+ match = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (match < 0 || match >= PAT_MATCH_NUM)
+ WILL_LJMP(luaL_error(L, "'new' needs a valid match method."));
+ }
+
+ /* Get Lua filename and line number. */
+ if (lua_getstack(L, 1, &ar)) { /* check function at level */
+ lua_getinfo(L, "Sl", &ar); /* get info about it */
+ if (ar.currentline > 0) { /* is there info? */
+ file = ar.short_src;
+ line = ar.currentline;
+ }
+ }
+
+ /* fill fake sample_conv struct. */
+ conv.kw = ""; /* unused. */
+ conv.process = NULL; /* unused. */
+ conv.arg_mask = 0; /* unused. */
+ conv.val_args = NULL; /* unused. */
+ conv.out_type = SMP_T_STR;
+ conv.private = (void *)(long)match;
+ switch (match) {
+ case PAT_MATCH_STR: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_BEG: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_SUB: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_DIR: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_DOM: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_END: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_REG: conv.in_type = SMP_T_STR; break;
+ case PAT_MATCH_INT: conv.in_type = SMP_T_SINT; break;
+ case PAT_MATCH_IP: conv.in_type = SMP_T_ADDR; break;
+ default:
+ WILL_LJMP(luaL_error(L, "'new' doesn't support this match mode."));
+ }
+
+ /* fill fake args. */
+ args[0].type = ARGT_STR;
+ args[0].data.str.area = strdup(fn);
+ args[0].data.str.data = strlen(fn);
+ args[0].data.str.size = args[0].data.str.data+1;
+ args[1].type = ARGT_STOP;
+
+ /* load the map. */
+ if (!sample_load_map(args, &conv, file, line, &err)) {
+ /* error case: we can't use luaL_error because we must
+ * free the err variable.
+ */
+ luaL_where(L, 1);
+ lua_pushfstring(L, "'new': %s.", err);
+ lua_concat(L, 2);
+ free(err);
+ chunk_destroy(&args[0].data.str);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* create the lua object. */
+ lua_newtable(L);
+ lua_pushlightuserdata(L, args[0].data.map);
+ lua_rawseti(L, -2, 0);
+
+ /* Pop a class Map metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_map_ref);
+ lua_setmetatable(L, -2);
+
+
+ return 1;
+}
+
+__LJMP static inline int _hlua_map_lookup(struct lua_State *L, int str)
+{
+ struct map_descriptor *desc;
+ struct pattern *pat;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "lookup"));
+ desc = MAY_LJMP(hlua_checkmap(L, 1));
+ if (desc->pat.expect_type == SMP_T_SINT) {
+ smp.data.type = SMP_T_SINT;
+ smp.data.u.sint = MAY_LJMP(luaL_checkinteger(L, 2));
+ }
+ else {
+ smp.data.type = SMP_T_STR;
+ smp.flags = SMP_F_CONST;
+ smp.data.u.str.area = (char *)MAY_LJMP(luaL_checklstring(L, 2, (size_t *)&smp.data.u.str.data));
+ smp.data.u.str.size = smp.data.u.str.data + 1;
+ }
+
+ pat = pattern_exec_match(&desc->pat, &smp, 1);
+ if (!pat || !pat->data) {
+ if (str)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* The Lua pattern must return a string, so we can't check the returned type */
+ lua_pushlstring(L, pat->data->u.str.area, pat->data->u.str.data);
+ return 1;
+}
+
+__LJMP static int hlua_map_lookup(struct lua_State *L)
+{
+ return _hlua_map_lookup(L, 0);
+}
+
+__LJMP static int hlua_map_slookup(struct lua_State *L)
+{
+ return _hlua_map_lookup(L, 1);
+}
+
+/*
+ *
+ *
+ * Class Socket
+ *
+ *
+ */
+
+__LJMP static struct hlua_socket *hlua_checksocket(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_socket_ref));
+}
+
+/* This function is the handler called for each I/O on the established
+ * connection. It is used for notify space available to send or data
+ * received.
+ */
+static void hlua_socket_handler(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ notification_wake(&ctx->wake_on_read);
+ notification_wake(&ctx->wake_on_write);
+ return;
+ }
+
+ if (ctx->die) {
+ se_fl_set(appctx->sedesc, SE_FL_EOI|SE_FL_EOS);
+ notification_wake(&ctx->wake_on_read);
+ notification_wake(&ctx->wake_on_write);
+ return;
+ }
+
+ /* If we can't write, wakeup the pending write signals. */
+ if (channel_output_closed(sc_ic(sc)))
+ notification_wake(&ctx->wake_on_write);
+
+ /* If we can't read, wakeup the pending read signals. */
+ if (channel_input_closed(sc_oc(sc)))
+ notification_wake(&ctx->wake_on_read);
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ return;
+ }
+
+ /* This function is called after the connect. */
+ ctx->connected = 1;
+
+ /* Wake the tasks which wants to write if the buffer have available space. */
+ if (channel_may_recv(sc_ic(sc)))
+ notification_wake(&ctx->wake_on_write);
+
+ /* Wake the tasks which wants to read if the buffer contains data. */
+ if (co_data(sc_oc(sc)))
+ notification_wake(&ctx->wake_on_read);
+
+ /* If write notifications are registered, we considers we want
+ * to write, so we clear the blocking flag.
+ */
+ if (notification_registered(&ctx->wake_on_write))
+ applet_have_more_data(appctx);
+}
+
+static int hlua_socket_init(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *csk_ctx = appctx->svcctx;
+ struct stream *s;
+
+ if (appctx_finalize_startup(appctx, socket_proxy, &BUF_NULL) == -1)
+ goto error;
+
+ s = appctx_strm(appctx);
+
+ /* Configure "right" stream connector. This stconn is used to connect
+ * and retrieve data from the server. The connection is initialized
+ * with the "struct server".
+ */
+ sc_set_state(s->scb, SC_ST_ASS);
+
+ /* Force destination server. */
+ s->flags |= SF_DIRECT | SF_ASSIGNED | SF_BE_ASSIGNED;
+ s->target = &csk_ctx->srv->obj_type;
+
+ if (csk_ctx->timeout) {
+ s->sess->fe->timeout.connect = csk_ctx->timeout;
+ s->scf->ioto = csk_ctx->timeout;
+ s->scb->ioto = csk_ctx->timeout;
+ }
+
+ return 0;
+
+ error:
+ return -1;
+}
+
+/* This function is called when the "struct stream" is destroyed.
+ * Remove the link from the object to this stream.
+ * Wake all the pending signals.
+ */
+static void hlua_socket_release(struct appctx *appctx)
+{
+ struct hlua_csk_ctx *ctx = appctx->svcctx;
+ struct xref *peer;
+
+ /* Remove my link in the original objects. */
+ peer = xref_get_peer_and_lock(&ctx->xref);
+ if (peer)
+ xref_disconnect(&ctx->xref, peer);
+
+ /* Wake all the task waiting for me. */
+ notification_wake(&ctx->wake_on_read);
+ notification_wake(&ctx->wake_on_write);
+}
+
+/* If the garbage collectio of the object is launch, nobody
+ * uses this object. If the stream does not exists, just quit.
+ * Send the shutdown signal to the stream. In some cases,
+ * pending signal can rest in the read and write lists. destroy
+ * it.
+ */
+__LJMP static int hlua_socket_gc(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct xref *peer;
+
+ MAY_LJMP(check_args(L, 1, "__gc"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ return 0;
+
+ ctx = container_of(peer, struct hlua_csk_ctx, xref);
+
+ /* Set the flag which destroy the session. */
+ ctx->die = 1;
+ appctx_wakeup(ctx->appctx);
+
+ /* Remove all reference between the Lua stack and the coroutine stream. */
+ xref_disconnect(&socket->xref, peer);
+ return 0;
+}
+
+/* The close function send shutdown signal and break the
+ * links between the stream and the object.
+ */
+__LJMP static int hlua_socket_close_helper(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct xref *peer;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ return 0;
+
+ hlua->gc_count--;
+ ctx = container_of(peer, struct hlua_csk_ctx, xref);
+
+ /* Set the flag which destroy the session. */
+ ctx->die = 1;
+ appctx_wakeup(ctx->appctx);
+
+ /* Remove all reference between the Lua stack and the coroutine stream. */
+ xref_disconnect(&socket->xref, peer);
+ return 0;
+}
+
+/* The close function calls close_helper.
+ */
+__LJMP static int hlua_socket_close(lua_State *L)
+{
+ MAY_LJMP(check_args(L, 1, "close"));
+ return hlua_socket_close_helper(L);
+}
+
+/* This Lua function assumes that the stack contain three parameters.
+ * 1 - USERDATA containing a struct socket
+ * 2 - INTEGER with values of the macro defined below
+ * If the integer is -1, we must read at most one line.
+ * If the integer is -2, we ust read all the data until the
+ * end of the stream.
+ * If the integer is positive value, we must read a number of
+ * bytes corresponding to this value.
+ */
+#define HLSR_READ_LINE (-1)
+#define HLSR_READ_ALL (-2)
+__LJMP static int hlua_socket_receive_yield(struct lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket = MAY_LJMP(hlua_checksocket(L, 1));
+ int wanted = lua_tointeger(L, 2);
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ size_t len;
+ int nblk;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+ int skip_at_end = 0;
+ struct channel *oc;
+ struct stream *s;
+ struct xref *peer;
+ int missing_bytes;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* Check if this lua stack is schedulable. */
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'receive' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer)
+ goto no_peer;
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ if (!csk_ctx->connected)
+ goto connection_closed;
+
+ appctx = csk_ctx->appctx;
+ s = appctx_strm(appctx);
+
+ oc = &s->res;
+ if (wanted == HLSR_READ_LINE) {
+ /* Read line. */
+ nblk = co_getline_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+
+ /* remove final \r\n. */
+ if (nblk == 1) {
+ if (blk1[len1-1] == '\n') {
+ len1--;
+ skip_at_end++;
+ if (blk1[len1-1] == '\r') {
+ len1--;
+ skip_at_end++;
+ }
+ }
+ }
+ else {
+ if (blk2[len2-1] == '\n') {
+ len2--;
+ skip_at_end++;
+ if (blk2[len2-1] == '\r') {
+ len2--;
+ skip_at_end++;
+ }
+ }
+ }
+ }
+
+ else if (wanted == HLSR_READ_ALL) {
+ /* Read all the available data. */
+ nblk = co_getblk_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+ }
+
+ else {
+ /* Read a block of data. */
+ nblk = co_getblk_nc(oc, &blk1, &len1, &blk2, &len2);
+ if (nblk < 0) /* Connection close. */
+ goto connection_closed;
+ if (nblk == 0) /* No data available. */
+ goto connection_empty;
+
+ missing_bytes = wanted - socket->b.n;
+ if (len1 > missing_bytes) {
+ nblk = 1;
+ len1 = missing_bytes;
+ } if (nblk == 2 && len1 + len2 > missing_bytes)
+ len2 = missing_bytes - len1;
+ }
+
+ len = len1;
+
+ luaL_addlstring(&socket->b, blk1, len1);
+ if (nblk == 2) {
+ len += len2;
+ luaL_addlstring(&socket->b, blk2, len2);
+ }
+
+ /* Consume data. */
+ co_skip(oc, len + skip_at_end);
+
+ /* Don't wait anything. */
+ appctx_wakeup(appctx);
+
+ /* If the pattern reclaim to read all the data
+ * in the connection, got out.
+ */
+ if (wanted == HLSR_READ_ALL)
+ goto connection_empty;
+ else if (wanted >= 0 && socket->b.n < wanted)
+ goto connection_empty;
+
+ /* Return result. */
+ luaL_pushresult(&socket->b);
+ xref_unlock(&socket->xref, peer);
+ return 1;
+
+connection_closed:
+
+ xref_unlock(&socket->xref, peer);
+
+no_peer:
+
+ /* If the buffer containds data. */
+ if (socket->b.n > 0) {
+ luaL_pushresult(&socket->b);
+ return 1;
+ }
+ lua_pushnil(L);
+ lua_pushstring(L, "connection closed.");
+ return 2;
+
+connection_empty:
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_read, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_receive_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This Lua function gets two parameters. The first one can be string
+ * or a number. If the string is "*l", the user requires one line. If
+ * the string is "*a", the user requires all the contents of the stream.
+ * If the value is a number, the user require a number of bytes equal
+ * to the value. The default value is "*l" (a line).
+ *
+ * This parameter with a variable type is converted in integer. This
+ * integer takes this values:
+ * -1 : read a line
+ * -2 : read all the stream
+ * >0 : amount of bytes.
+ *
+ * The second parameter is optional. It contains a string that must be
+ * concatenated with the read data.
+ */
+__LJMP static int hlua_socket_receive(struct lua_State *L)
+{
+ int wanted = HLSR_READ_LINE;
+ const char *pattern;
+ int lastarg, type;
+ char *error;
+ size_t len;
+ struct hlua_socket *socket;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "The 'receive' function requires between 1 and 3 arguments."));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for pattern. */
+ if (lua_gettop(L) >= 2) {
+ type = lua_type(L, 2);
+ if (type == LUA_TSTRING) {
+ pattern = lua_tostring(L, 2);
+ if (strcmp(pattern, "*a") == 0)
+ wanted = HLSR_READ_ALL;
+ else if (strcmp(pattern, "*l") == 0)
+ wanted = HLSR_READ_LINE;
+ else {
+ wanted = strtoll(pattern, &error, 10);
+ if (*error != '\0')
+ WILL_LJMP(luaL_error(L, "Unsupported pattern."));
+ }
+ }
+ else if (type == LUA_TNUMBER) {
+ wanted = lua_tointeger(L, 2);
+ if (wanted < 0)
+ WILL_LJMP(luaL_error(L, "Unsupported size."));
+ }
+ }
+
+ /* Set pattern. */
+ lua_pushinteger(L, wanted);
+
+ /* Check if we would replace the top by itself. */
+ if (lua_gettop(L) != 2)
+ lua_replace(L, 2);
+
+ /* Save index of the top of the stack because since buffers are used, it
+ * may change
+ */
+ lastarg = lua_gettop(L);
+
+ /* init buffer, and fill it with prefix. */
+ luaL_buffinit(L, &socket->b);
+
+ /* Check prefix. */
+ if (lastarg >= 3) {
+ if (lua_type(L, 3) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "Expect a 'string' for the prefix"));
+ pattern = lua_tolstring(L, 3, &len);
+ luaL_addlstring(&socket->b, pattern, len);
+ }
+
+ return __LJMP(hlua_socket_receive_yield(L, 0, 0));
+}
+
+/* Write the Lua input string in the output buffer.
+ * This function returns a yield if no space is available.
+ */
+static int hlua_socket_write_yield(struct lua_State *L,int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket;
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ size_t buf_len;
+ const char *buf;
+ int len;
+ int send_len;
+ int sent;
+ struct xref *peer;
+ struct stream *s;
+ struct stconn *sc;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* Check if this lua stack is schedulable. */
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'write' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ /* Get object */
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+ buf = MAY_LJMP(luaL_checklstring(L, 2, &buf_len));
+ sent = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ if (!csk_ctx->connected) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ appctx = csk_ctx->appctx;
+ sc = appctx_sc(appctx);
+ s = __sc_strm(sc);
+
+ /* Check for connection close. */
+ if (channel_output_closed(&s->req)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ /* Update the input buffer data. */
+ buf += sent;
+ send_len = buf_len - sent;
+
+ /* All the data are sent. */
+ if (sent >= buf_len) {
+ xref_unlock(&socket->xref, peer);
+ return 1; /* Implicitly return the length sent. */
+ }
+
+ /* Check if the buffer is available because HAProxy doesn't allocate
+ * the request buffer if its not required.
+ */
+ if (s->req.buf.size == 0) {
+ if (!sc_alloc_ibuf(sc, &appctx->buffer_wait))
+ goto hlua_socket_write_yield_return;
+ }
+
+ /* Check for available space. */
+ len = b_room(&s->req.buf);
+ if (len <= 0) {
+ goto hlua_socket_write_yield_return;
+ }
+
+ /* send data */
+ if (len < send_len)
+ send_len = len;
+ len = ci_putblk(&s->req, buf, send_len);
+
+ /* "Not enough space" (-1), "Buffer too little to contain
+ * the data" (-2) are not expected because the available length
+ * is tested.
+ * Other unknown error are also not expected.
+ */
+ if (len <= 0) {
+ if (len == -1)
+ s->req.flags |= CF_WAKE_WRITE;
+
+ MAY_LJMP(hlua_socket_close_helper(L));
+ lua_pop(L, 1);
+ lua_pushinteger(L, -1);
+ xref_unlock(&socket->xref, peer);
+ return 1;
+ }
+
+ /* update buffers. */
+ appctx_wakeup(appctx);
+
+ /* Update length sent. */
+ lua_pop(L, 1);
+ lua_pushinteger(L, sent + len);
+
+ /* All the data buffer is sent ? */
+ if (sent + len >= buf_len) {
+ xref_unlock(&socket->xref, peer);
+ return 1;
+ }
+
+hlua_socket_write_yield_return:
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_write_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This function initiate the send of data. It just check the input
+ * parameters and push an integer in the Lua stack that contain the
+ * amount of data written to the buffer. This is used by the function
+ * "hlua_socket_write_yield" that can yield.
+ *
+ * The Lua function gets between 3 and 4 parameters. The first one is
+ * the associated object. The second is a string buffer. The third is
+ * a facultative integer that represents where is the buffer position
+ * of the start of the data that can send. The first byte is the
+ * position "1". The default value is "1". The fourth argument is a
+ * facultative integer that represents where is the buffer position
+ * of the end of the data that can send. The default is the last byte.
+ */
+static int hlua_socket_send(struct lua_State *L)
+{
+ int i;
+ int j;
+ const char *buf;
+ size_t buf_len;
+
+ /* Check number of arguments. */
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'send' needs between 2 and 4 arguments"));
+
+ /* Get the string. */
+ buf = MAY_LJMP(luaL_checklstring(L, 2, &buf_len));
+
+ /* Get and check j. */
+ if (lua_gettop(L) == 4) {
+ j = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (j < 0)
+ j = buf_len + j + 1;
+ if (j > buf_len)
+ j = buf_len + 1;
+ lua_pop(L, 1);
+ }
+ else
+ j = buf_len;
+
+ /* Get and check i. */
+ if (lua_gettop(L) == 3) {
+ i = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (i < 0)
+ i = buf_len + i + 1;
+ if (i > buf_len)
+ i = buf_len + 1;
+ lua_pop(L, 1);
+ } else
+ i = 1;
+
+ /* Check bth i and j. */
+ if (i > j) {
+ lua_pushinteger(L, 0);
+ return 1;
+ }
+ if (i == 0 && j == 0) {
+ lua_pushinteger(L, 0);
+ return 1;
+ }
+ if (i == 0)
+ i = 1;
+ if (j == 0)
+ j = 1;
+
+ /* Pop the string. */
+ lua_pop(L, 1);
+
+ /* Update the buffer length. */
+ buf += i - 1;
+ buf_len = j - i + 1;
+ lua_pushlstring(L, buf, buf_len);
+
+ /* This unsigned is used to remember the amount of sent data. */
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_socket_write_yield(L, 0, 0));
+}
+
+#define SOCKET_INFO_MAX_LEN sizeof("[0000:0000:0000:0000:0000:0000:0000:0000]:12345")
+__LJMP static inline int hlua_socket_info(struct lua_State *L, const struct sockaddr_storage *addr)
+{
+ static char buffer[SOCKET_INFO_MAX_LEN];
+ int ret;
+ int len;
+ char *p;
+
+ ret = addr_to_str(addr, buffer+1, SOCKET_INFO_MAX_LEN-1);
+ if (ret <= 0) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (ret == AF_UNIX) {
+ lua_pushstring(L, buffer+1);
+ return 1;
+ }
+ else if (ret == AF_INET6) {
+ buffer[0] = '[';
+ len = strlen(buffer);
+ buffer[len] = ']';
+ len++;
+ buffer[len] = ':';
+ len++;
+ p = buffer;
+ }
+ else if (ret == AF_INET) {
+ p = buffer + 1;
+ len = strlen(p);
+ p[len] = ':';
+ len++;
+ }
+ else {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (port_to_str(addr, p + len, SOCKET_INFO_MAX_LEN-1 - len) <= 0) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushstring(L, p);
+ return 1;
+}
+
+/* Returns information about the peer of the connection. */
+__LJMP static int hlua_socket_getpeername(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct xref *peer;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ struct stconn *sc;
+ const struct sockaddr_storage *dst;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "getpeername"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ if (!csk_ctx->connected) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ appctx = csk_ctx->appctx;
+ sc = appctx_sc(appctx);
+ dst = sc_dst(sc_opposite(sc));
+ if (!dst) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = MAY_LJMP(hlua_socket_info(L, dst));
+ xref_unlock(&socket->xref, peer);
+ return ret;
+}
+
+/* Returns information about my connection side. */
+static int hlua_socket_getsockname(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct connection *conn;
+ struct appctx *appctx;
+ struct xref *peer;
+ struct hlua_csk_ctx *csk_ctx;
+ struct stream *s;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "getsockname"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ if (!csk_ctx->connected) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ appctx = csk_ctx->appctx;
+ s = appctx_strm(appctx);
+
+ conn = sc_conn(s->scb);
+ if (!conn || !conn_get_src(conn)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = hlua_socket_info(L, conn->src);
+ xref_unlock(&socket->xref, peer);
+ return ret;
+}
+
+/* This struct define the applet. */
+static struct applet update_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<LUA_TCP>",
+ .fct = hlua_socket_handler,
+ .init = hlua_socket_init,
+ .release = hlua_socket_release,
+};
+
+__LJMP static int hlua_socket_connect_yield(struct lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_socket *socket = MAY_LJMP(hlua_checksocket(L, 1));
+ struct hlua *hlua;
+ struct xref *peer;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ struct stream *s;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ lua_pushstring(L, "Can't connect");
+ return 2;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ appctx = csk_ctx->appctx;
+ s = appctx_strm(appctx);
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+ }
+
+ /* Check for connection close. */
+ if (!hlua || channel_output_closed(&s->req)) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushnil(L);
+ lua_pushstring(L, "Can't connect");
+ return 2;
+ }
+
+ appctx = __sc_appctx(s->scf);
+
+ /* Check for connection established. */
+ if (csk_ctx->connected) {
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, 1);
+ return 1;
+ }
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory error"));
+ }
+ xref_unlock(&socket->xref, peer);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_connect_yield, TICK_ETERNITY, 0));
+ return 0;
+}
+
+/* This function fail or initite the connection. */
+__LJMP static int hlua_socket_connect(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ int port = -1;
+ const char *ip;
+ struct hlua *hlua;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ int low, high;
+ struct sockaddr_storage *addr;
+ struct xref *peer;
+ struct stconn *sc;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ if (lua_gettop(L) < 2)
+ WILL_LJMP(luaL_error(L, "connect: need at least 2 arguments"));
+
+ /* Get args. */
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ ip = MAY_LJMP(luaL_checkstring(L, 2));
+ if (lua_gettop(L) >= 3) {
+ luaL_Buffer b;
+ port = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ /* Force the ip to end with a colon, to support IPv6 addresses
+ * that are not enclosed within square brackets.
+ */
+ if (port > 0) {
+ luaL_buffinit(L, &b);
+ luaL_addstring(&b, ip);
+ luaL_addchar(&b, ':');
+ luaL_pushresult(&b);
+ ip = lua_tolstring(L, lua_gettop(L), NULL);
+ }
+ }
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ if (!csk_ctx->srv)
+ csk_ctx->srv = socket_tcp;
+
+ /* Parse ip address. */
+ addr = str2sa_range(ip, NULL, &low, &high, NULL, NULL, NULL, NULL, NULL, NULL, PA_O_PORT_OK | PA_O_STREAM);
+ if (!addr) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: cannot parse destination address '%s'", ip));
+ }
+
+ /* Set port. */
+ if (low == 0) {
+ if (addr->ss_family == AF_INET) {
+ if (port == -1) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: port missing"));
+ }
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ } else if (addr->ss_family == AF_INET6) {
+ if (port == -1) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: port missing"));
+ }
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+ }
+ }
+
+ appctx = csk_ctx->appctx;
+ if (appctx_sc(appctx)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: connect already performed\n"));
+ }
+
+ if (appctx_init(appctx) == -1) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: fail to init applet."));
+ }
+
+ sc = appctx_sc(appctx);
+
+ if (!sockaddr_alloc(&sc_opposite(sc)->dst, addr, sizeof(*addr))) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "connect: internal error"));
+ }
+
+ /* inform the stream that we want to be notified whenever the
+ * connection completes.
+ */
+ applet_need_more_data(appctx);
+ applet_have_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ hlua->gc_count++;
+
+ if (!notification_new(&hlua->com, &csk_ctx->wake_on_write, hlua->task)) {
+ xref_unlock(&socket->xref, peer);
+ WILL_LJMP(luaL_error(L, "out of memory"));
+ }
+ xref_unlock(&socket->xref, peer);
+
+ /* Return yield waiting for connection. */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_socket_connect_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+#ifdef USE_OPENSSL
+__LJMP static int hlua_socket_connect_ssl(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct xref *peer;
+
+ MAY_LJMP(check_args(L, 3, "connect_ssl"));
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* check for connection break. If some data where read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ container_of(peer, struct hlua_csk_ctx, xref)->srv = socket_ssl;
+
+ xref_unlock(&socket->xref, peer);
+ return MAY_LJMP(hlua_socket_connect(L));
+}
+#endif
+
+__LJMP static int hlua_socket_setoption(struct lua_State *L)
+{
+ return 0;
+}
+
+__LJMP static int hlua_socket_settimeout(struct lua_State *L)
+{
+ struct hlua_socket *socket;
+ int tmout;
+ double dtmout;
+ struct xref *peer;
+ struct hlua_csk_ctx *csk_ctx;
+ struct appctx *appctx;
+ struct stream *s;
+
+ MAY_LJMP(check_args(L, 2, "settimeout"));
+
+ socket = MAY_LJMP(hlua_checksocket(L, 1));
+
+ /* convert the timeout to millis */
+ dtmout = MAY_LJMP(luaL_checknumber(L, 2)) * 1000;
+
+ /* Check for negative values */
+ if (dtmout < 0)
+ WILL_LJMP(luaL_error(L, "settimeout: cannot set negatives values"));
+
+ if (dtmout > INT_MAX) /* overflow check */
+ WILL_LJMP(luaL_error(L, "settimeout: cannot set values larger than %d ms", INT_MAX));
+
+ tmout = MS_TO_TICKS((int)dtmout);
+ if (tmout == 0)
+ tmout++; /* very small timeouts are adjusted to a minimum of 1ms */
+
+ /* Check if we run on the same thread than the xreator thread.
+ * We cannot access to the socket if the thread is different.
+ */
+ if (socket->tid != tid)
+ WILL_LJMP(luaL_error(L, "connect: cannot use socket on other thread"));
+
+ /* check for connection break. If some data were read, return it. */
+ peer = xref_get_peer_and_lock(&socket->xref);
+ if (!peer) {
+ hlua_pusherror(L, "socket: not yet initialised, you can't set timeouts.");
+ WILL_LJMP(lua_error(L));
+ return 0;
+ }
+
+ csk_ctx = container_of(peer, struct hlua_csk_ctx, xref);
+ csk_ctx->timeout = tmout;
+
+ appctx = csk_ctx->appctx;
+ if (!appctx_sc(appctx))
+ goto end;
+
+ s = appctx_strm(csk_ctx->appctx);
+
+ s->sess->fe->timeout.connect = tmout;
+ s->scf->ioto = tmout;
+ s->scb->ioto = tmout;
+
+ s->task->expire = (tick_is_expired(s->task->expire, now_ms) ? 0 : s->task->expire);
+ s->task->expire = tick_first(s->task->expire, tick_add_ifset(now_ms, tmout));
+ task_queue(s->task);
+
+ end:
+ xref_unlock(&socket->xref, peer);
+ lua_pushinteger(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_socket_new(lua_State *L)
+{
+ struct hlua_socket *socket;
+ struct hlua_csk_ctx *ctx;
+ struct appctx *appctx;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3)) {
+ hlua_pusherror(L, "socket: full stack");
+ goto out_fail_conf;
+ }
+
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ socket = MAY_LJMP(lua_newuserdata(L, sizeof(*socket)));
+ lua_rawseti(L, -2, 0);
+ memset(socket, 0, sizeof(*socket));
+ socket->tid = tid;
+
+ /* Check if the various memory pools are initialized. */
+ if (!pool_head_stream || !pool_head_buffer) {
+ hlua_pusherror(L, "socket: uninitialized pools.");
+ goto out_fail_conf;
+ }
+
+ /* Pop a class stream metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_socket_ref);
+ lua_setmetatable(L, -2);
+
+ /* Create the applet context */
+ appctx = appctx_new_here(&update_applet, NULL);
+ if (!appctx) {
+ hlua_pusherror(L, "socket: out of memory");
+ goto out_fail_conf;
+ }
+ ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ ctx->connected = 0;
+ ctx->die = 0;
+ ctx->srv = NULL;
+ ctx->timeout = 0;
+ ctx->appctx = appctx;
+ LIST_INIT(&ctx->wake_on_write);
+ LIST_INIT(&ctx->wake_on_read);
+
+ /* Initialise cross reference between stream and Lua socket object. */
+ xref_create(&socket->xref, &ctx->xref);
+ return 1;
+
+ out_fail_conf:
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class Channel
+ *
+ *
+ */
+
+/* Returns the struct hlua_channel join to the class channel in the
+ * stack entry "ud" or throws an argument error.
+ */
+__LJMP static struct channel *hlua_checkchannel(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_channel_ref));
+}
+
+/* Pushes the channel onto the top of the stack. If the stask does not have a
+ * free slots, the function fails and returns 0;
+ */
+static int hlua_channel_new(lua_State *L, struct channel *channel)
+{
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ lua_newtable(L);
+ lua_pushlightuserdata(L, channel);
+ lua_rawseti(L, -2, 0);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_channel_ref);
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+/* Helper function returning a filter attached to a channel at the position <ud>
+ * in the stack, filling the current offset and length of the filter. If no
+ * filter is attached, NULL is returned and <offset> and <len> are not
+ * initialized.
+ */
+static struct filter *hlua_channel_filter(lua_State *L, int ud, struct channel *chn, size_t *offset, size_t *len)
+{
+ struct filter *filter = NULL;
+
+ if (lua_getfield(L, ud, "__filter") == LUA_TLIGHTUSERDATA) {
+ struct hlua_flt_ctx *flt_ctx;
+
+ filter = lua_touserdata (L, -1);
+ flt_ctx = filter->ctx;
+ if (hlua_filter_from_payload(filter)) {
+ *offset = flt_ctx->cur_off[CHN_IDX(chn)];
+ *len = flt_ctx->cur_len[CHN_IDX(chn)];
+ }
+ }
+
+ lua_pop(L, 1);
+ return filter;
+}
+
+/* Copies <len> bytes of data present in the channel's buffer, starting at the
+* offset <offset>, and put it in a LUA string variable. It is the caller
+* responsibility to ensure <len> and <offset> are valid. It always return the
+* length of the built string. <len> may be 0, in this case, an empty string is
+* created and 0 is returned.
+*/
+static inline int _hlua_channel_dup(struct channel *chn, lua_State *L, size_t offset, size_t len)
+{
+ size_t block1, block2;
+ luaL_Buffer b;
+
+ block1 = len;
+ if (block1 > b_contig_data(&chn->buf, b_peek_ofs(&chn->buf, offset)))
+ block1 = b_contig_data(&chn->buf, b_peek_ofs(&chn->buf, offset));
+ block2 = len - block1;
+
+ luaL_buffinit(L, &b);
+ luaL_addlstring(&b, b_peek(&chn->buf, offset), block1);
+ if (block2)
+ luaL_addlstring(&b, b_orig(&chn->buf), block2);
+ luaL_pushresult(&b);
+ return len;
+}
+
+/* Inserts the string <str> to the channel's buffer at the offset <offset>. This
+ * function returns -1 if data cannot be copied. Otherwise, it returns the
+ * number of bytes copied.
+ */
+static int _hlua_channel_insert(struct channel *chn, lua_State *L, struct ist str, size_t offset)
+{
+ int ret = 0;
+
+ /* Nothing to do, just return */
+ if (unlikely(istlen(str) == 0))
+ goto end;
+
+ if (istlen(str) > c_room(chn)) {
+ ret = -1;
+ goto end;
+ }
+ ret = b_insert_blk(&chn->buf, offset, istptr(str), istlen(str));
+
+ end:
+ return ret;
+}
+
+/* Removes <len> bytes of data at the absolute position <offset>.
+ */
+static void _hlua_channel_delete(struct channel *chn, size_t offset, size_t len)
+{
+ size_t end = offset + len;
+
+ if (b_peek(&chn->buf, end) != b_tail(&chn->buf))
+ b_move(&chn->buf, b_peek_ofs(&chn->buf, end),
+ b_data(&chn->buf) - end, -len);
+ b_sub(&chn->buf, len);
+}
+
+/* Copies input data in the channel's buffer. It is possible to set a specific
+ * offset (0 by default) and a length (all remaining input data starting for the
+ * offset by default). If there is not enough input data and more data can be
+ * received, this function yields.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get_data_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ /* Wait for more data if possible if no length was specified and there
+ * is no data or not enough data was received.
+ */
+ if (!len || offset + len > output + input) {
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data, as requested */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_get_data_yield, TICK_ETERNITY, 0));
+ }
+
+ /* Return 'nil' if there is no data and the channel can't receive more data */
+ if (!len) {
+ lua_pushnil(L);
+ return -1;
+ }
+
+ /* Otherwise, return all data */
+ len = output + input - offset;
+ }
+
+ dup:
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* Copies the first line (including the trailing LF) of input data in the
+ * channel's buffer. It is possible to set a specific offset (0 by default) and
+ * a length (all remaining input data starting for the offset by default). If
+ * there is not enough input data and more data can be received, the function
+ * yields. If a length is explicitly specified, no more data are
+ * copied. Otherwise, if no LF is found and more data can be received, this
+ * function yields.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get_line_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t l, input, output;
+ int offset, len;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ for (l = 0; l < len; l++) {
+ if (l + offset >= output + input)
+ break;
+ if (*(b_peek(&chn->buf, offset + l)) == '\n') {
+ len = l+1;
+ goto dup;
+ }
+ }
+
+ /* Wait for more data if possible if no line is found and no length was
+ * specified or not enough data was received.
+ */
+ if (lua_gettop(L) != 3 || offset + len > output + input) {
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_get_line_yield, TICK_ETERNITY, 0));
+ }
+
+ /* Return 'nil' if there is no data and the channel can't receive more data */
+ if (!len) {
+ lua_pushnil(L);
+ return -1;
+ }
+
+ /* Otherwise, return all data */
+ len = output + input - offset;
+ }
+
+ dup:
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Duplicate all input data foud in the channel's buffer. The data are not
+ * removed from the buffer. This function relies on _hlua_channel_dup().
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_dup(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len;
+
+ MAY_LJMP(check_args(L, 1, "dup"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ _hlua_channel_dup(chn, L, offset, len);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Get all input data foud in the channel's buffer. The data are removed from
+ * the buffer after the copy. This function relies on _hlua_channel_dup() and
+ * _hlua_channel_delete().
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_get(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 1, "get"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ret = _hlua_channel_dup(chn, L, offset, len);
+ _hlua_channel_delete(chn, offset, ret);
+ return 1;
+}
+
+/* This functions consumes and returns one line. If the channel is closed,
+ * and the last data does not contains a final '\n', the data are returned
+ * without the final '\n'. When no more data are available, it returns nil
+ * value.
+ *
+ * From an action, All input data are considered. For a filter, the offset and
+ * the length of input data to consider are retrieved from the filter context.
+ */
+__LJMP static int hlua_channel_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t l, offset, len;
+ int ret;
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(chn) && channel_input_closed(chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ for (l = 0; l < len; l++) {
+ if (*(b_peek(&chn->buf, offset+l)) == '\n') {
+ len = l+1;
+ goto dup;
+ }
+ }
+
+ if (!HLUA_CANT_YIELD(hlua_gethlua(L)) && !channel_input_closed(chn) && channel_may_recv(chn)) {
+ /* Yield waiting for more data */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ dup:
+ ret = _hlua_channel_dup(chn, L, offset, len);
+ _hlua_channel_delete(chn, offset, ret);
+ return 1;
+}
+
+/* [ DEPRECATED ]
+ *
+ * Check arguments for the function "hlua_channel_getline_yield".
+ */
+__LJMP static int hlua_channel_getline(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "getline"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_getline_yield(L, 0, 0));
+}
+
+/* Retrieves a given amount of input data at the given offset. By default all
+ * available input data are returned. The offset may be negactive to start from
+ * the end of input data. The length may be -1 to set it to the maximum buffer
+ * size.
+ */
+__LJMP static int hlua_channel_get_data(lua_State *L)
+{
+ struct channel *chn;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'data' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_get_data_yield(L, 0, 0));
+}
+
+/* Retrieves a given amount of input data at the given offset. By default all
+ * available input data are returned. The offset may be negactive to start from
+ * the end of input data. The length may be -1 to set it to the maximum buffer
+ * size.
+ */
+__LJMP static int hlua_channel_get_line(lua_State *L)
+{
+ struct channel *chn;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'line' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ return MAY_LJMP(hlua_channel_get_line_yield(L, 0, 0));
+}
+
+/* Appends a string into the input side of channel. It returns the length of the
+ * written string, or -1 if the channel is closed or if the buffer size is too
+ * little for the data. 0 may be returned if nothing is copied. This function
+ * does not yield.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_append(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "append"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Prepends a string into the input side of channel. It returns the length of the
+ * written string, or -1 if the channel is closed or if the buffer size is too
+ * little for the data. 0 may be returned if nothing is copied. This function
+ * does not yield.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_prepend(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, offset, len;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "prepend"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Inserts a given amount of input data at the given offset by a string
+ * content. By default the string is appended in front of input data. It
+ * returns the length of the written string, or -1 if the channel is closed or
+ * if the buffer size is too little for the data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_insert_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, input, output;
+ int ret, offset;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'insert' expects at least 1 argument and at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (ret > 0 && filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, ret);
+ flt_ctx->cur_len[CHN_IDX(chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+/* Replaces a given amount of input data at the given offset by a string
+ * content. By default all remaining data are removed (offset = 0 and len =
+ * -1). It returns the length of the written string, or -1 if the channel is
+ * closed or if the buffer size is too little for the data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_set_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t sz, input, output;
+ int ret, offset, len;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set' expects at least 1 argument and at most 3 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 4) {
+ len = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (!len)
+ goto set;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ set:
+ /* Be sure we can copied the string once input data will be removed. */
+ if (sz > c_room(chn) + len)
+ lua_pushinteger(L, -1);
+ else {
+ _hlua_channel_delete(chn, offset, len);
+ ret = _hlua_channel_insert(chn, L, ist2(str, sz), offset);
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ len -= (ret > 0 ? ret : 0);
+ flt_update_offsets(filter, chn, -len);
+ flt_ctx->cur_len[CHN_IDX(chn)] -= len;
+ }
+
+ lua_pushinteger(L, ret);
+ }
+ return 1;
+}
+
+/* Removes a given amount of input data at the given offset. By default all
+ * input data are removed (offset = 0 and len = -1). It returns the amount of
+ * the removed data.
+ *
+ * For a filter, the context is updated on success.
+ */
+__LJMP static int hlua_channel_del_data(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'remove' expects at most 2 arguments"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ output = co_data(chn);
+ input = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto end;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ _hlua_channel_delete(chn, offset, len);
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_update_offsets(filter, chn, -len);
+ flt_ctx->cur_len[CHN_IDX(chn)] -= len;
+ }
+
+ end:
+ lua_pushinteger(L, len);
+ return 1;
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_channel_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int l, ret;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ l = MAY_LJMP(luaL_checkinteger(L, 3));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+
+ if (unlikely(channel_output_closed(chn))) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ len = c_room(chn);
+ if (len > sz -l) {
+ if (filter) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+ len = sz - l;
+ }
+
+ ret = _hlua_channel_insert(chn, L, ist2(str, len), offset);
+ if (ret == -1) {
+ lua_pop(L, 1);
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+ if (ret) {
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+
+ flt_update_offsets(filter, chn, ret);
+ FLT_OFF(filter, chn) += ret;
+ flt_ctx->cur_off[CHN_IDX(chn)] += ret;
+ }
+ else
+ c_adv(chn, ret);
+
+ l += ret;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+ }
+
+ if (l < sz) {
+ /* Yield only if the channel's output is not empty.
+ * Otherwise it means we cannot add more data. */
+ if (co_data(chn) == 0 || HLUA_CANT_YIELD(hlua_gethlua(L)))
+ return 1;
+
+ /* If we are waiting for space in the response buffer, we
+ * must set the flag WAKERESWR. This flag required the task
+ * wake up if any activity is detected on the response buffer.
+ */
+ if (chn->flags & CF_ISRESP)
+ HLUA_SET_WAKERESWR(hlua);
+ else
+ HLUA_SET_WAKEREQWR(hlua);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_send_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just a wrapper of "_hlua_channel_send". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ *
+ * This function cannot be called from a filter.
+ */
+__LJMP static int hlua_channel_send(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "send"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ lua_pushinteger(L, 0);
+ return MAY_LJMP(hlua_channel_send_yield(L, 0, 0));
+}
+
+/* This function forward and amount of butes. The data pass from
+ * the input side of the buffer to the output side, and can be
+ * forwarded. This function never fails.
+ *
+ * The Lua function takes an amount of bytes to be forwarded in
+ * input. It returns the number of bytes forwarded.
+ */
+__LJMP static int hlua_channel_forward_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t offset, len, fwd;
+ int l, max;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ fwd = MAY_LJMP(luaL_checkinteger(L, 2));
+ l = MAY_LJMP(luaL_checkinteger(L, -1));
+
+ offset = co_data(chn);
+ len = ci_data(chn);
+
+ filter = hlua_channel_filter(L, 1, chn, &offset, &len);
+ if (filter && !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ max = fwd - l;
+ if (max > len)
+ max = len;
+
+ if (filter) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, chn) += max;
+ flt_ctx->cur_off[CHN_IDX(chn)] += max;
+ flt_ctx->cur_len[CHN_IDX(chn)] -= max;
+ }
+ else
+ channel_forward(chn, max);
+
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* Check if it miss bytes to forward. */
+ if (l < fwd) {
+ /* The the input channel or the output channel are closed, we
+ * must return the amount of data forwarded.
+ */
+ if (channel_input_closed(chn) || channel_output_closed(chn) || HLUA_CANT_YIELD(hlua_gethlua(L)))
+ return 1;
+
+ /* If we are waiting for space data in the response buffer, we
+ * must set the flag WAKERESWR. This flag required the task
+ * wake up if any activity is detected on the response buffer.
+ */
+ if (chn->flags & CF_ISRESP)
+ HLUA_SET_WAKERESWR(hlua);
+ else
+ HLUA_SET_WAKEREQWR(hlua);
+
+ /* Otherwise, we can yield waiting for new data in the input side. */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_channel_forward_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just check the input and prepare the stack for the previous
+ * function "hlua_channel_forward_yield"
+ *
+ * This function cannot be called from a filter.
+ */
+__LJMP static int hlua_channel_forward(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "forward"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ if (IS_HTX_STRM(chn_strm(chn))) {
+ lua_pushfstring(L, "Cannot manipulate HAProxy channels in HTTP mode.");
+ WILL_LJMP(lua_error(L));
+ }
+ lua_pushinteger(L, 0);
+ return MAY_LJMP(hlua_channel_forward_yield(L, 0, 0));
+}
+
+/* Just returns the number of bytes available in the input
+ * side of the buffer. This function never fails.
+ */
+__LJMP static int hlua_channel_get_in_len(lua_State *L)
+{
+ struct channel *chn;
+ struct filter *filter;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "input"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+ filter = hlua_channel_filter(L, 1, chn, &output, &input);
+ if (filter || !IS_HTX_STRM(chn_strm(chn)))
+ lua_pushinteger(L, input);
+ else {
+ struct htx *htx = htxbuf(&chn->buf);
+
+ lua_pushinteger(L, htx->data - co_data(chn));
+ }
+ return 1;
+}
+
+/* Returns true if the channel is full. */
+__LJMP static int hlua_channel_is_full(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "is_full"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ /* ignore the reserve, we are not on a producer side (ie in an
+ * applet).
+ */
+ lua_pushboolean(L, channel_full(chn, 0));
+ return 1;
+}
+
+/* Returns true if the channel may still receive data. */
+__LJMP static int hlua_channel_may_recv(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+ lua_pushboolean(L, (!channel_input_closed(chn) && channel_may_recv(chn)));
+ return 1;
+}
+
+/* Returns true if the channel is the response channel. */
+__LJMP static int hlua_channel_is_resp(lua_State *L)
+{
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 1, "is_resp"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ lua_pushboolean(L, !!(chn->flags & CF_ISRESP));
+ return 1;
+}
+
+/* Just returns the number of bytes available in the output
+ * side of the buffer. This function never fails.
+ */
+__LJMP static int hlua_channel_get_out_len(lua_State *L)
+{
+ struct channel *chn;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "output"));
+ chn = MAY_LJMP(hlua_checkchannel(L, 1));
+
+ output = co_data(chn);
+ input = ci_data(chn);
+ hlua_channel_filter(L, 1, chn, &output, &input);
+
+ lua_pushinteger(L, output);
+ return 1;
+}
+
+/*
+ *
+ *
+ * Class Fetches
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_smp *hlua_checkfetches(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_fetches_ref));
+}
+
+/* This function creates and push in the stack a fetch object according
+ * with a current TXN.
+ */
+static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags)
+{
+ struct hlua_smp *hsmp;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Fetches object is the
+ * transaction object.
+ */
+ lua_newtable(L);
+ hsmp = lua_newuserdata(L, sizeof(*hsmp));
+ lua_rawseti(L, -2, 0);
+
+ hsmp->s = txn->s;
+ hsmp->p = txn->p;
+ hsmp->dir = txn->dir;
+ hsmp->flags = flags;
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_fetches_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function is an LUA binding. It is called with each sample-fetch.
+ * It uses closure argument to store the associated sample-fetch. It
+ * returns only one argument or throws an error. An error is thrown
+ * only if an error is encountered during the argument parsing. If
+ * the "sample-fetch" function fails, nil is returned.
+ */
+__LJMP static int hlua_run_sample_fetch(lua_State *L)
+{
+ struct hlua_smp *hsmp;
+ struct sample_fetch *f;
+ struct arg args[ARGM_NBARGS + 1] = {{0}};
+ int i;
+ struct sample smp;
+
+ /* Get closure arguments. */
+ f = lua_touserdata(L, lua_upvalueindex(1));
+
+ /* Get traditional arguments. */
+ hsmp = MAY_LJMP(hlua_checkfetches(L, 1));
+
+ /* Check execution authorization. */
+ if (f->use & SMP_USE_HTTP_ANY &&
+ !(hsmp->flags & HLUA_F_MAY_USE_HTTP)) {
+ lua_pushfstring(L, "the sample-fetch '%s' needs an HTTP parser which "
+ "is not available in Lua services", f->kw);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Get extra arguments. */
+ for (i = 0; i < lua_gettop(L) - 1; i++) {
+ if (i >= ARGM_NBARGS)
+ break;
+ hlua_lua2arg(L, i + 2, &args[i]);
+ }
+ args[i].type = ARGT_STOP;
+ args[i].data.str.area = NULL;
+
+ /* Check arguments. */
+ MAY_LJMP(hlua_lua2arg_check(L, 2, args, f->arg_mask, hsmp->p));
+
+ /* Run the special args checker. */
+ if (f->val_args && !f->val_args(args, NULL)) {
+ lua_pushfstring(L, "error in arguments");
+ goto error;
+ }
+
+ /* Initialise the sample. */
+ memset(&smp, 0, sizeof(smp));
+
+ /* Run the sample fetch process. */
+ smp_set_owner(&smp, hsmp->p, hsmp->s->sess, hsmp->s, hsmp->dir & SMP_OPT_DIR);
+ if (!f->process(args, &smp, f->kw, f->private)) {
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ goto end;
+ }
+
+ /* Convert the returned sample in lua value. */
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ MAY_LJMP(hlua_smp2lua_str(L, &smp));
+ else
+ MAY_LJMP(hlua_smp2lua(L, &smp));
+
+ end:
+ free_args(args);
+ return 1;
+
+ error:
+ free_args(args);
+ WILL_LJMP(lua_error(L));
+ return 0; /* Never reached */
+}
+
+/*
+ *
+ *
+ * Class Converters
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_smp *hlua_checkconverters(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_converters_ref));
+}
+
+/* This function creates and push in the stack a Converters object
+ * according with a current TXN.
+ */
+static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags)
+{
+ struct hlua_smp *hsmp;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ hsmp = lua_newuserdata(L, sizeof(*hsmp));
+ lua_rawseti(L, -2, 0);
+
+ hsmp->s = txn->s;
+ hsmp->p = txn->p;
+ hsmp->dir = txn->dir;
+ hsmp->flags = flags;
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_converters_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function is an LUA binding. It is called with each converter.
+ * It uses closure argument to store the associated converter. It
+ * returns only one argument or throws an error. An error is thrown
+ * only if an error is encountered during the argument parsing. If
+ * the converter function function fails, nil is returned.
+ */
+__LJMP static int hlua_run_sample_conv(lua_State *L)
+{
+ struct hlua_smp *hsmp;
+ struct sample_conv *conv;
+ struct arg args[ARGM_NBARGS + 1] = {{0}};
+ int i;
+ struct sample smp;
+
+ /* Get closure arguments. */
+ conv = lua_touserdata(L, lua_upvalueindex(1));
+
+ /* Get traditional arguments. */
+ hsmp = MAY_LJMP(hlua_checkconverters(L, 1));
+
+ /* Get extra arguments. */
+ for (i = 0; i < lua_gettop(L) - 2; i++) {
+ if (i >= ARGM_NBARGS)
+ break;
+ hlua_lua2arg(L, i + 3, &args[i]);
+ }
+ args[i].type = ARGT_STOP;
+ args[i].data.str.area = NULL;
+
+ /* Check arguments. */
+ MAY_LJMP(hlua_lua2arg_check(L, 3, args, conv->arg_mask, hsmp->p));
+
+ /* Run the special args checker. */
+ if (conv->val_args && !conv->val_args(args, conv, "", 0, NULL)) {
+ hlua_pusherror(L, "error in arguments");
+ goto error;
+ }
+
+ /* Initialise the sample. */
+ memset(&smp, 0, sizeof(smp));
+ if (!hlua_lua2smp(L, 2, &smp)) {
+ hlua_pusherror(L, "error in the input argument");
+ goto error;
+ }
+
+ smp_set_owner(&smp, hsmp->p, hsmp->s->sess, hsmp->s, hsmp->dir & SMP_OPT_DIR);
+
+ /* Apply expected cast. */
+ if (!sample_casts[smp.data.type][conv->in_type]) {
+ hlua_pusherror(L, "invalid input argument: cannot cast '%s' to '%s'",
+ smp_to_type[smp.data.type], smp_to_type[conv->in_type]);
+ goto error;
+ }
+ if (sample_casts[smp.data.type][conv->in_type] != c_none &&
+ !sample_casts[smp.data.type][conv->in_type](&smp)) {
+ hlua_pusherror(L, "error during the input argument casting");
+ goto error;
+ }
+
+ /* Run the sample conversion process. */
+ if (!conv->process(args, &smp, conv->private)) {
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ lua_pushstring(L, "");
+ else
+ lua_pushnil(L);
+ goto end;
+ }
+
+ /* Convert the returned sample in lua value. */
+ if (hsmp->flags & HLUA_F_AS_STRING)
+ MAY_LJMP(hlua_smp2lua_str(L, &smp));
+ else
+ MAY_LJMP(hlua_smp2lua(L, &smp));
+ end:
+ free_args(args);
+ return 1;
+
+ error:
+ free_args(args);
+ WILL_LJMP(lua_error(L));
+ return 0; /* Never reached */
+}
+
+/*
+ *
+ *
+ * Class AppletTCP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_appctx *hlua_checkapplet_tcp(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_applet_tcp_ref));
+}
+
+/* This function creates and push in the stack an Applet object
+ * according with a current TXN.
+ */
+static int hlua_applet_tcp_new(lua_State *L, struct appctx *ctx)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s = appctx_strm(ctx);
+ struct proxy *p;
+
+ ALREADY_CHECKED(s);
+ p = s->be;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ luactx = lua_newuserdata(L, sizeof(*luactx));
+ lua_rawseti(L, -2, 0);
+ luactx->appctx = ctx;
+ luactx->htxn.s = s;
+ luactx->htxn.p = p;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_applet_tcp_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_set_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. We don't need to dup the smp, vars API
+ * already takes care of duplicating dynamic var data.
+ */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_unset_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_applet_tcp_get_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return MAY_LJMP(hlua_smp2lua(L, &smp));
+}
+
+__LJMP static int hlua_applet_tcp_set_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua)
+ return 0;
+ hlua = s->hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_applet_tcp_get_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+ hlua = s->hlua;
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_tcp_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ int ret;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+
+ /* Read the maximum amount of data available. */
+ ret = co_getline_nc(sc_oc(sc), &blk1, &len1, &blk2, &len2);
+
+ /* Data not yet available. return yield. */
+ if (ret == 0) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ /* End of data: commit the total strings and return. */
+ if (ret < 0) {
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* Ensure that the block 2 length is usable. */
+ if (ret == 1)
+ len2 = 0;
+
+ /* don't check the max length read and don't check. */
+ luaL_addlstring(&luactx->b, blk1, len1);
+ luaL_addlstring(&luactx->b, blk2, len2);
+
+ /* Consume input channel output buffer data. */
+ co_skip(sc_oc(sc), len1 + len2);
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_tcp_getline(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_tcp_getline_yield(L, 0, 0));
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_tcp_recv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ size_t len = MAY_LJMP(luaL_checkinteger(L, 2));
+ int ret;
+ const char *blk1;
+ size_t len1;
+ const char *blk2;
+ size_t len2;
+
+ /* Read the maximum amount of data available. */
+ ret = co_getblk_nc(sc_oc(sc), &blk1, &len1, &blk2, &len2);
+
+ /* Data not yet available. return yield. */
+ if (ret == 0) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* End of data: commit the total strings and return. */
+ if (ret < 0) {
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* Ensure that the block 2 length is usable. */
+ if (ret == 1)
+ len2 = 0;
+
+ if (len == -1) {
+
+ /* If len == -1, catenate all the data avalaile and
+ * yield because we want to get all the data until
+ * the end of data stream.
+ */
+ luaL_addlstring(&luactx->b, blk1, len1);
+ luaL_addlstring(&luactx->b, blk2, len2);
+ co_skip(sc_oc(sc), len1 + len2);
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+
+ } else {
+
+ /* Copy the first block caping to the length required. */
+ if (len1 > len)
+ len1 = len;
+ luaL_addlstring(&luactx->b, blk1, len1);
+ len -= len1;
+
+ /* Copy the second block. */
+ if (len2 > len)
+ len2 = len;
+ luaL_addlstring(&luactx->b, blk2, len2);
+ len -= len2;
+
+ /* Consume input channel output buffer data. */
+ co_skip(sc_oc(sc), len1 + len2);
+
+ /* If there is no other data available, yield waiting for new data. */
+ if (len > 0) {
+ lua_pushinteger(L, len);
+ lua_replace(L, 2);
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+ }
+
+ /* we never execute this */
+ hlua_pusherror(L, "Lua: internal error");
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_tcp_recv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ int len = -1;
+
+ if (lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "The 'recv' function requires between 1 and 2 arguments."));
+ if (lua_gettop(L) >= 2) {
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ lua_pop(L, 1);
+ }
+
+ /* Confirm or set the required length */
+ lua_pushinteger(L, len);
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_tcp_recv_yield(L, 0, 0));
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_applet_tcp_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ size_t len;
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_tcp(L, 1));
+ const char *str = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ int l = MAY_LJMP(luaL_checkinteger(L, 3));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *chn = sc_ic(sc);
+ int max;
+
+ /* Get the max amount of data which can write as input in the channel. */
+ max = channel_recv_max(chn);
+ if (max > (len - l))
+ max = len - l;
+
+ /* Copy data. */
+ ci_putblk(chn, str + l, max);
+
+ /* update counters. */
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* If some data is not send, declares the situation to the
+ * applet, and returns a yield.
+ */
+ if (l < len) {
+ sc_need_room(sc, channel_recv_max(chn) + 1);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_tcp_send_yield, TICK_ETERNITY, 0));
+ }
+
+ return 1;
+}
+
+/* Just a wrapper of "hlua_applet_tcp_send_yield". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ */
+__LJMP static int hlua_applet_tcp_send(lua_State *L)
+{
+ MAY_LJMP(check_args(L, 2, "send"));
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_applet_tcp_send_yield(L, 0, 0));
+}
+
+/*
+ *
+ *
+ * Class AppletHTTP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_appctx *hlua_checkapplet_http(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_applet_http_ref));
+}
+
+/* This function creates and push in the stack an Applet object
+ * according with a current TXN.
+ * It relies on the caller to have already reserved the room in ctx->svcctx
+ * for the local storage of hlua_http_ctx.
+ */
+static int hlua_applet_http_new(lua_State *L, struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+ struct hlua_appctx *luactx;
+ struct hlua_txn htxn;
+ struct stream *s = appctx_strm(ctx);
+ struct proxy *px = s->be;
+ struct htx *htx;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ struct ist path;
+ unsigned long long len = 0;
+ int32_t pos;
+ struct http_uri_parser parser;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ luactx = lua_newuserdata(L, sizeof(*luactx));
+ lua_rawseti(L, -2, 0);
+ luactx->appctx = ctx;
+ http_ctx->status = 200; /* Default status code returned. */
+ http_ctx->reason = NULL; /* Use default reason based on status */
+ luactx->htxn.s = s;
+ luactx->htxn.p = px;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, &luactx->htxn, 0))
+ return 0;
+ lua_settable(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, &luactx->htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_settable(L, -3);
+
+ htx = htxbuf(&s->req.buf);
+ blk = htx_get_first_blk(htx);
+ BUG_ON(!blk || htx_get_blk_type(blk) != HTX_BLK_REQ_SL);
+ sl = htx_get_blk_ptr(htx, blk);
+
+ /* Stores the request method. */
+ lua_pushstring(L, "method");
+ lua_pushlstring(L, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ lua_settable(L, -3);
+
+ /* Stores the http version. */
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ lua_settable(L, -3);
+
+ /* creates an array of headers. hlua_http_get_headers() crates and push
+ * the array on the top of the stack.
+ */
+ lua_pushstring(L, "headers");
+ htxn.s = s;
+ htxn.p = px;
+ htxn.dir = SMP_OPT_DIR_REQ;
+ if (!hlua_http_get_headers(L, &htxn.s->txn->req))
+ return 0;
+ lua_settable(L, -3);
+
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ char *p, *q, *end;
+
+ p = path.ptr;
+ end = istend(path);
+ q = p;
+ while (q < end && *q != '?')
+ q++;
+
+ /* Stores the request path. */
+ lua_pushstring(L, "path");
+ lua_pushlstring(L, p, q - p);
+ lua_settable(L, -3);
+
+ /* Stores the query string. */
+ lua_pushstring(L, "qs");
+ if (*q == '?')
+ q++;
+ lua_pushlstring(L, q, end - q);
+ lua_settable(L, -3);
+ }
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ if (htx->extra != HTX_UNKOWN_PAYLOAD_LENGTH)
+ len += htx->extra;
+
+ /* Stores the request path. */
+ lua_pushstring(L, "length");
+ lua_pushinteger(L, len);
+ lua_settable(L, -3);
+
+ /* Create an empty array of HTTP request headers. */
+ lua_pushstring(L, "response");
+ lua_newtable(L);
+ lua_settable(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_applet_http_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_set_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. We don't need to dup the smp, vars API
+ * already takes care of duplicating dynamic var data.
+ */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_unset_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_get_var(lua_State *L)
+{
+ struct hlua_appctx *luactx;
+ struct stream *s;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ s = luactx->htxn.s;
+
+ smp_set_owner(&smp, s->be, s->sess, s, 0);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return MAY_LJMP(hlua_smp2lua(L, &smp));
+}
+
+__LJMP static int hlua_applet_http_set_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua)
+ return 0;
+ hlua = s->hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_applet_http_get_priv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stream *s = luactx->htxn.s;
+ struct hlua *hlua;
+
+ /* Note that this hlua struct is from the session and not from the applet. */
+ if (!s->hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+ hlua = s->hlua;
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_http_getline_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *req = sc_oc(sc);
+ struct htx *htx;
+ struct htx_blk *blk;
+ size_t count;
+ int stop = 0;
+
+ htx = htx_from_buf(&req->buf);
+ count = co_data(req);
+ blk = htx_get_first_blk(htx);
+
+ while (count && !stop && blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+ uint32_t vlen;
+ char *nl;
+
+ vlen = sz;
+ if (vlen > count) {
+ if (type != HTX_BLK_DATA)
+ break;
+ vlen = count;
+ }
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v.len = vlen;
+ nl = istchr(v, '\n');
+ if (nl != NULL) {
+ stop = 1;
+ vlen = nl - v.ptr + 1;
+ }
+ luaL_addlstring(&luactx->b, v.ptr, vlen);
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ stop = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ c_rew(req, vlen);
+ count -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ break;
+ }
+ }
+
+ /* The message was fully consumed and no more data are expected
+ * (EOM flag set).
+ */
+ if (htx_is_empty(htx) && (sc_opposite(sc)->flags & SC_FL_EOI))
+ stop = 1;
+
+ htx_to_buf(htx, &req->buf);
+ if (!stop) {
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_getline_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_http_getline(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_http_getline_yield(L, 0, 0));
+}
+
+/* If expected data not yet available, it returns a yield. This function
+ * consumes the data in the buffer. It returns a string containing the
+ * data. This string can be empty.
+ */
+__LJMP static int hlua_applet_http_recv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *req = sc_oc(sc);
+ struct htx *htx;
+ struct htx_blk *blk;
+ size_t count;
+ int len;
+
+ htx = htx_from_buf(&req->buf);
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ count = co_data(req);
+ blk = htx_get_head_blk(htx);
+ while (count && len && blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+ struct ist v;
+ uint32_t vlen;
+
+ vlen = sz;
+ if (len > 0 && vlen > len)
+ vlen = len;
+ if (vlen > count) {
+ if (type != HTX_BLK_DATA)
+ break;
+ vlen = count;
+ }
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ luaL_addlstring(&luactx->b, v.ptr, vlen);
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ len = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ c_rew(req, vlen);
+ count -= vlen;
+ if (len > 0)
+ len -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ break;
+ }
+ }
+
+ /* The message was fully consumed and no more data are expected
+ * (EOM flag set).
+ */
+ if (htx_is_empty(htx) && (sc_opposite(sc)->flags & SC_FL_EOI))
+ len = 0;
+
+ htx_to_buf(htx, &req->buf);
+
+ /* If we are no other data available, yield waiting for new data. */
+ if (len) {
+ if (len > 0) {
+ lua_pushinteger(L, len);
+ lua_replace(L, 2);
+ }
+ applet_need_more_data(luactx->appctx);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_recv_yield, TICK_ETERNITY, 0));
+ }
+
+ /* return the result. */
+ luaL_pushresult(&luactx->b);
+ return 1;
+}
+
+/* Check arguments for the function "hlua_channel_get_yield". */
+__LJMP static int hlua_applet_http_recv(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ int len = -1;
+
+ /* Check arguments. */
+ if (lua_gettop(L) > 2)
+ WILL_LJMP(luaL_error(L, "The 'recv' function requires between 1 and 2 arguments."));
+ if (lua_gettop(L) >= 2) {
+ len = MAY_LJMP(luaL_checkinteger(L, 2));
+ lua_pop(L, 1);
+ }
+
+ lua_pushinteger(L, len);
+
+ /* Initialise the string catenation. */
+ luaL_buffinit(L, &luactx->b);
+
+ return MAY_LJMP(hlua_applet_http_recv_yield(L, 0, 0));
+}
+
+/* Append data in the output side of the buffer. This data is immediately
+ * sent. The function returns the amount of data written. If the buffer
+ * cannot contain the data, the function yields. The function returns -1
+ * if the channel is closed.
+ */
+__LJMP static int hlua_applet_http_send_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+ struct htx *htx = htx_from_buf(&res->buf);
+ const char *data;
+ size_t len;
+ int l = MAY_LJMP(luaL_checkinteger(L, 3));
+ int max;
+
+ max = htx_get_max_blksz(htx, channel_htx_recv_max(res, htx));
+ if (!max)
+ goto snd_yield;
+
+ data = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Get the max amount of data which can write as input in the channel. */
+ if (max > (len - l))
+ max = len - l;
+
+ /* Copy data. */
+ max = htx_add_data(htx, ist2(data + l, max));
+ channel_add_input(res, max);
+
+ /* update counters. */
+ l += max;
+ lua_pop(L, 1);
+ lua_pushinteger(L, l);
+
+ /* If some data is not send, declares the situation to the
+ * applet, and returns a yield.
+ */
+ if (l < len) {
+ snd_yield:
+ htx_to_buf(htx, &res->buf);
+ sc_need_room(sc, channel_recv_max(res) + 1);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_send_yield, TICK_ETERNITY, 0));
+ }
+
+ htx_to_buf(htx, &res->buf);
+ return 1;
+}
+
+/* Just a wrapper of "hlua_applet_send_yield". This wrapper permits
+ * yield the LUA process, and resume it without checking the
+ * input arguments.
+ */
+__LJMP static int hlua_applet_http_send(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+
+ /* We want to send some data. Headers must be sent. */
+ if (!(http_ctx->flags & APPLET_HDR_SENT)) {
+ hlua_pusherror(L, "Lua: 'send' you must call start_response() before sending data.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* This integer is used for followinf the amount of data sent. */
+ lua_pushinteger(L, 0);
+
+ return MAY_LJMP(hlua_applet_http_send_yield(L, 0, 0));
+}
+
+__LJMP static int hlua_applet_http_addheader(lua_State *L)
+{
+ const char *name;
+ int ret;
+
+ MAY_LJMP(hlua_checkapplet_http(L, 1));
+ name = MAY_LJMP(luaL_checkstring(L, 2));
+ MAY_LJMP(luaL_checkstring(L, 3));
+
+ /* Push in the stack the "response" entry. */
+ ret = lua_getfield(L, 1, "response");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua: 'add_header' internal error: AppletHTTP['response'] "
+ "is expected as an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* check if the header is already registered if it is not
+ * the case, register it.
+ */
+ ret = lua_getfield(L, -1, name);
+ if (ret == LUA_TNIL) {
+
+ /* Entry not found. */
+ lua_pop(L, 1); /* remove the nil. The "response" table is the top of the stack. */
+
+ /* Insert the new header name in the array in the top of the stack.
+ * It left the new array in the top of the stack.
+ */
+ lua_newtable(L);
+ lua_pushvalue(L, 2);
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4);
+
+ } else if (ret != LUA_TTABLE) {
+
+ /* corruption error. */
+ hlua_pusherror(L, "Lua: 'add_header' internal error: AppletHTTP['response']['%s'] "
+ "is expected as an array. %s found", name, lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Now the top of thestack is an array of values. We push
+ * the header value as new entry.
+ */
+ lua_pushvalue(L, 3);
+ ret = lua_rawlen(L, -2);
+ lua_rawseti(L, -2, ret + 1);
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_applet_http_status(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ int status = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+
+ if (status < 100 || status > 599) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ http_ctx->status = status;
+ http_ctx->reason = reason;
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+
+__LJMP static int hlua_applet_http_send_response(lua_State *L)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct hlua_http_ctx *http_ctx = luactx->appctx->svcctx;
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ const char *status, *reason;
+ const char *name, *value;
+ size_t nlen, vlen;
+ unsigned int flags;
+
+ /* Send the message at once. */
+ htx = htx_from_buf(&res->buf);
+ h1m_init_res(&h1m);
+
+ /* Use the same http version than the request. */
+ status = ultoa_r(http_ctx->status, trash.area, trash.size);
+ reason = http_ctx->reason;
+ if (reason == NULL)
+ reason = http_get_reason(http_ctx->status);
+ if (http_ctx->flags & APPLET_HTTP11) {
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist(status), ist(reason));
+ }
+ else {
+ flags = HTX_SL_F_IS_RESP;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.0"), ist(status), ist(reason));
+ }
+ if (!sl) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to create response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+ sl->info.res.status = http_ctx->status;
+
+ /* Get the array associated to the field "response" in the object AppletHTTP. */
+ if (lua_getfield(L, 1, "response") != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response'] missing.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Browse the list of headers. */
+ lua_pushnil(L);
+ while(lua_next(L, -2) != 0) {
+ /* We expect a string as -2. */
+ if (lua_type(L, -2) != LUA_TSTRING) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response'][] element must be a string. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ lua_typename(L, lua_type(L, -2)));
+ WILL_LJMP(lua_error(L));
+ }
+ name = lua_tolstring(L, -2, &nlen);
+
+ /* We expect an array as -1. */
+ if (lua_type(L, -1) != LUA_TTABLE) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'] element must be an table. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name,
+ lua_typename(L, lua_type(L, -1)));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Browse the table who is on the top of the stack. */
+ lua_pushnil(L);
+ while(lua_next(L, -2) != 0) {
+ int id;
+
+ /* We expect a number as -2. */
+ if (lua_type(L, -2) != LUA_TNUMBER) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'][] element must be a number. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name,
+ lua_typename(L, lua_type(L, -2)));
+ WILL_LJMP(lua_error(L));
+ }
+ id = lua_tointeger(L, -2);
+
+ /* We expect a string as -2. */
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ hlua_pusherror(L, "Lua applet http '%s': AppletHTTP['response']['%s'][%d] element must be a string. got %s.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name, id,
+ lua_typename(L, lua_type(L, -1)));
+ WILL_LJMP(lua_error(L));
+ }
+ value = lua_tolstring(L, -1, &vlen);
+
+ /* Simple Protocol checks. */
+ if (isteqi(ist2(name, nlen), ist("transfer-encoding"))) {
+ int ret;
+
+ ret = h1_parse_xfer_enc_header(&h1m, ist2(value, vlen));
+ if (ret < 0) {
+ hlua_pusherror(L, "Lua applet http '%s': Invalid '%s' header.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ else if (ret == 0)
+ goto next; /* Skip it */
+ }
+ else if (isteqi(ist2(name, nlen), ist("content-length"))) {
+ struct ist v = ist2(value, vlen);
+ int ret;
+
+ ret = h1_parse_cont_len_header(&h1m, &v);
+ if (ret < 0) {
+ hlua_pusherror(L, "Lua applet http '%s': Invalid '%s' header.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ else if (ret == 0)
+ goto next; /* Skip it */
+ }
+
+ /* Add a new header */
+ if (!htx_add_header(htx, ist2(name, nlen), ist2(value, vlen))) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to add header '%s' in the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name,
+ name);
+ WILL_LJMP(lua_error(L));
+ }
+ next:
+ /* Remove the array from the stack, and get next element with a remaining string. */
+ lua_pop(L, 1);
+ }
+
+ /* Remove the array from the stack, and get next element with a remaining string. */
+ lua_pop(L, 1);
+ }
+
+ if (h1m.flags & H1_MF_CHNK)
+ h1m.flags &= ~H1_MF_CLEN;
+ if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ /* Uset HTX start-line flags */
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ sl->flags |= flags;
+
+ /* If we don't have a content-length set, and the HTTP version is 1.1
+ * and the status code implies the presence of a message body, we must
+ * announce a transfer encoding chunked. This is required by haproxy
+ * for the keepalive compliance. If the applet announces a transfer-encoding
+ * chunked itself, don't do anything.
+ */
+ if ((flags & (HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN)) == HTX_SL_F_VER_11 &&
+ http_ctx->status >= 200 && http_ctx->status != 204 && http_ctx->status != 304) {
+ /* Add a new header */
+ sl->flags |= (HTX_SL_F_XFER_ENC|H1_MF_CHNK|H1_MF_XFER_LEN);
+ if (!htx_add_header(htx, ist("transfer-encoding"), ist("chunked"))) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed to add header 'transfer-encoding' in the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ /* Finalize headers. */
+ if (!htx_add_endof(htx, HTX_BLK_EOH)) {
+ hlua_pusherror(L, "Lua applet http '%s': Failed create the response.\n",
+ luactx->appctx->rule->arg.hlua_rule->fcn->name);
+ WILL_LJMP(lua_error(L));
+ }
+
+ if (htx_used_space(htx) > b_size(&res->buf) - global.tune.maxrewrite) {
+ b_reset(&res->buf);
+ hlua_pusherror(L, "Lua: 'start_response': response header block too big");
+ WILL_LJMP(lua_error(L));
+ }
+
+ htx_to_buf(htx, &res->buf);
+ channel_add_input(res, htx->data);
+
+ /* Headers sent, set the flag. */
+ http_ctx->flags |= APPLET_HDR_SENT;
+ return 0;
+
+}
+/* We will build the status line and the headers of the HTTP response.
+ * We will try send at once if its not possible, we give back the hand
+ * waiting for more room.
+ */
+__LJMP static int hlua_applet_http_start_response_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_appctx *luactx = MAY_LJMP(hlua_checkapplet_http(L, 1));
+ struct stconn *sc = appctx_sc(luactx->appctx);
+ struct channel *res = sc_ic(sc);
+
+ if (co_data(res)) {
+ sc_need_room(sc, -1);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_applet_http_start_response_yield, TICK_ETERNITY, 0));
+ }
+ return MAY_LJMP(hlua_applet_http_send_response(L));
+}
+
+
+__LJMP static int hlua_applet_http_start_response(lua_State *L)
+{
+ return MAY_LJMP(hlua_applet_http_start_response_yield(L, 0, 0));
+}
+
+/*
+ *
+ *
+ * Class HTTP
+ *
+ *
+ */
+
+/* Returns a struct hlua_txn if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_txn *hlua_checkhttp(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_http_ref));
+}
+
+/* This function creates and push in the stack a HTTP object
+ * according with a current TXN.
+ */
+static int hlua_http_new(lua_State *L, struct hlua_txn *txn)
+{
+ struct hlua_txn *htxn;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* Create the object: obj[0] = userdata.
+ * Note that the base of the Converters object is the
+ * same than the TXN object.
+ */
+ lua_newtable(L);
+ htxn = lua_newuserdata(L, sizeof(*htxn));
+ lua_rawseti(L, -2, 0);
+
+ htxn->s = txn->s;
+ htxn->p = txn->p;
+ htxn->dir = txn->dir;
+ htxn->flags = txn->flags;
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_http_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* This function creates and returns an array containing the status-line
+ * elements. This function does not fails.
+ */
+__LJMP static int hlua_http_get_stline(lua_State *L, struct htx_sl *sl)
+{
+ /* Create the table. */
+ lua_newtable(L);
+
+ if (sl->flags & HTX_SL_F_IS_RESP) {
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "code");
+ lua_pushlstring(L, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "reason");
+ lua_pushlstring(L, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl));
+ lua_settable(L, -3);
+ }
+ else {
+ lua_pushstring(L, "method");
+ lua_pushlstring(L, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "uri");
+ lua_pushlstring(L, HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl));
+ lua_settable(L, -3);
+ lua_pushstring(L, "version");
+ lua_pushlstring(L, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+/* This function creates ans returns an array of HTTP headers.
+ * This function does not fails. It is used as wrapper with the
+ * 2 following functions.
+ */
+__LJMP static int hlua_http_get_headers(lua_State *L, struct http_msg *msg)
+{
+ struct htx *htx;
+ int32_t pos;
+
+ /* Create the table. */
+ lua_newtable(L);
+
+
+ htx = htxbuf(&msg->chn->buf);
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+ int len;
+
+ if (type == HTX_BLK_HDR) {
+ n = htx_get_blk_name(htx,blk);
+ v = htx_get_blk_value(htx, blk);
+ }
+ else if (type == HTX_BLK_EOH)
+ break;
+ else
+ continue;
+
+ /* Check for existing entry:
+ * assume that the table is on the top of the stack, and
+ * push the key in the stack, the function lua_gettable()
+ * perform the lookup.
+ */
+ lua_pushlstring(L, n.ptr, n.len);
+ lua_gettable(L, -2);
+
+ switch (lua_type(L, -1)) {
+ case LUA_TNIL:
+ /* Table not found, create it. */
+ lua_pop(L, 1); /* remove the nil value. */
+ lua_pushlstring(L, n.ptr, n.len); /* push the header name as key. */
+ lua_newtable(L); /* create and push empty table. */
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, 0); /* index header value (pop it). */
+ lua_rawset(L, -3); /* index new table with header name (pop the values). */
+ break;
+
+ case LUA_TTABLE:
+ /* Entry found: push the value in the table. */
+ len = lua_rawlen(L, -1);
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, len+1); /* index header value (pop it). */
+ lua_pop(L, 1); /* remove the table (it is stored in the main table). */
+ break;
+
+ default:
+ /* Other cases are errors. */
+ hlua_pusherror(L, "internal error during the parsing of headers.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ return 1;
+}
+
+__LJMP static int hlua_http_req_get_headers(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 1, "req_get_headers"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_get_headers(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 1, "res_get_headers"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, &htxn->s->txn->rsp);
+}
+
+/* This function replace full header, or just a value in
+ * the request or in the response. It is a wrapper fir the
+ * 4 following functions.
+ */
+__LJMP static inline int hlua_http_rep_hdr(lua_State *L, struct http_msg *msg, int full)
+{
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+ const char *reg = MAY_LJMP(luaL_checkstring(L, 3));
+ const char *value = MAY_LJMP(luaL_checkstring(L, 4));
+ struct htx *htx;
+ struct my_regex *re;
+
+ if (!(re = regex_comp(reg, 1, 1, NULL)))
+ WILL_LJMP(luaL_argerror(L, 3, "invalid regex"));
+
+ htx = htxbuf(&msg->chn->buf);
+ http_replace_hdrs(chn_strm(msg->chn), htx, ist2(name, name_len), value, re, full);
+ regex_free(re);
+ return 0;
+}
+
+__LJMP static int hlua_http_req_rep_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "req_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->req, 1));
+}
+
+__LJMP static int hlua_http_res_rep_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "res_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->rsp, 1));
+}
+
+__LJMP static int hlua_http_req_rep_val(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "req_rep_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->req, 0));
+}
+
+__LJMP static int hlua_http_res_rep_val(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 4, "res_rep_val"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return MAY_LJMP(hlua_http_rep_hdr(L, &htxn->s->txn->rsp, 0));
+}
+
+/* This function deletes all the occurrences of an header.
+ * It is a wrapper for the 2 following functions.
+ */
+__LJMP static inline int hlua_http_del_hdr(lua_State *L, struct http_msg *msg)
+{
+ size_t len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist2(name, len), &ctx, 1))
+ http_remove_header(htx, &ctx);
+ return 0;
+}
+
+__LJMP static int hlua_http_req_del_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "req_del_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_del_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "res_del_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, &htxn->s->txn->rsp);
+}
+
+/* This function adds an header. It is a wrapper used by
+ * the 2 following functions.
+ */
+__LJMP static inline int hlua_http_add_hdr(lua_State *L, struct http_msg *msg)
+{
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+ size_t value_len;
+ const char *value = MAY_LJMP(luaL_checklstring(L, 3, &value_len));
+ struct htx *htx = htxbuf(&msg->chn->buf);
+
+ lua_pushboolean(L, http_add_header(htx, ist2(name, name_len),
+ ist2(value, value_len)));
+ return 0;
+}
+
+__LJMP static int hlua_http_req_add_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "req_add_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, &htxn->s->txn->req);
+}
+
+__LJMP static int hlua_http_res_add_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "res_add_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, &htxn->s->txn->rsp);
+}
+
+static int hlua_http_req_set_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "req_set_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, &htxn->s->txn->req);
+ return hlua_http_add_hdr(L, &htxn->s->txn->req);
+}
+
+static int hlua_http_res_set_hdr(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "res_set_hdr"));
+ htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, &htxn->s->txn->rsp);
+ return hlua_http_add_hdr(L, &htxn->s->txn->rsp);
+}
+
+/* This function set the method. */
+static int hlua_http_req_set_meth(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(0, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the method. */
+static int hlua_http_req_set_path(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(1, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the query-string. */
+static int hlua_http_req_set_query(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ /* Check length. */
+ if (name_len > trash.size - 1) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Add the mark question as prefix. */
+ chunk_reset(&trash);
+ trash.area[trash.data++] = '?';
+ memcpy(trash.area + trash.data, name, name_len);
+ trash.data += name_len;
+
+ lua_pushboolean(L,
+ http_req_replace_stline(2, trash.area, trash.data, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the uri. */
+static int hlua_http_req_set_uri(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ size_t name_len;
+ const char *name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if (htxn->dir != SMP_OPT_DIR_REQ || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_req_replace_stline(3, name, name_len, htxn->p, htxn->s) != -1);
+ return 1;
+}
+
+/* This function set the response code & optionally reason. */
+static int hlua_http_res_set_status(lua_State *L)
+{
+ struct hlua_txn *htxn = MAY_LJMP(hlua_checkhttp(L, 1));
+ unsigned int code = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *str = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+ const struct ist reason = ist2(str, (str ? strlen(str) : 0));
+
+ if (htxn->dir != SMP_OPT_DIR_RES || !IS_HTX_STRM(htxn->s))
+ WILL_LJMP(lua_error(L));
+
+ http_res_set_status(code, reason, htxn->s);
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class HTTPMessage
+ *
+ *
+ */
+
+/* Returns a struct http_msg if the stack entry "ud" is a class HTTPMessage,
+ * otherwise it throws an error.
+ */
+__LJMP static struct http_msg *hlua_checkhttpmsg(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_http_msg_ref));
+}
+
+/* Creates and pushes on the stack a HTTP object according with a current TXN.
+ */
+static int hlua_http_msg_new(lua_State *L, struct http_msg *msg)
+{
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ lua_newtable(L);
+ lua_pushlightuserdata(L, msg);
+ lua_rawseti(L, -2, 0);
+
+ /* Create the "channel" field that contains the request channel object. */
+ lua_pushstring(L, "channel");
+ if (!hlua_channel_new(L, msg->chn))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Pop a class stream metatable and affect it to the table. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_http_msg_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+/* Helper function returning a filter attached to the HTTP message at the
+ * position <ud> in the stack, filling the current offset and length of the
+ * filter. If no filter is attached, NULL is returned and <offset> and <len> are
+ * filled with output and input length respectively.
+ */
+static struct filter *hlua_http_msg_filter(lua_State *L, int ud, struct http_msg *msg, size_t *offset, size_t *len)
+{
+ struct channel *chn = msg->chn;
+ struct htx *htx = htxbuf(&chn->buf);
+ struct filter *filter = NULL;
+
+ *offset = co_data(msg->chn);
+ *len = htx->data - co_data(msg->chn);
+
+ if (lua_getfield(L, ud, "__filter") == LUA_TLIGHTUSERDATA) {
+ filter = lua_touserdata (L, -1);
+ if (msg->msg_state >= HTTP_MSG_DATA) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ *offset = flt_ctx->cur_off[CHN_IDX(chn)];
+ *len = flt_ctx->cur_len[CHN_IDX(chn)];
+ }
+ }
+
+ lua_pop(L, 1);
+ return filter;
+}
+
+/* Returns true if the channel attached to the HTTP message is the response
+ * channel.
+ */
+__LJMP static int hlua_http_msg_is_resp(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "is_resp"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ lua_pushboolean(L, !!(msg->chn->flags & CF_ISRESP));
+ return 1;
+}
+
+/* Returns an array containing the elements status-line of the HTTP message. It relies
+ * on hlua_http_get_stline().
+ */
+__LJMP static int hlua_http_msg_get_stline(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+ struct htx_sl *sl;
+
+ MAY_LJMP(check_args(L, 1, "get_stline"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ htx = htxbuf(&msg->chn->buf);
+ sl = http_get_stline(htx);
+ if (!sl)
+ return 0;
+ return hlua_http_get_stline(L, sl);
+}
+
+/* Returns an array containing all headers of the HTTP message. it relies on
+ * hlua_http_get_headers().
+ */
+__LJMP static int hlua_http_msg_get_headers(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "get_headers"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_get_headers(L, msg);
+}
+
+/* Deletes all occurrences of an header in the HTTP message matching on its
+ * name. It relies on hlua_http_del_hdr().
+ */
+__LJMP static int hlua_http_msg_del_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 2, "del_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_del_hdr(L, msg);
+}
+
+/* Matches the full value line of all occurrences of an header in the HTTP
+ * message given its name against a regex and replaces it if it matches. It
+ * relies on hlua_http_rep_hdr().
+ */
+__LJMP static int hlua_http_msg_rep_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 4, "rep_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_rep_hdr(L, msg, 1);
+}
+
+/* Matches all comma-separated values of all occurrences of an header in the HTTP
+ * message given its name against a regex and replaces it if it matches. It
+ * relies on hlua_http_rep_hdr().
+ */
+__LJMP static int hlua_http_msg_rep_val(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 4, "rep_value"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_rep_hdr(L, msg, 0);
+}
+
+/* Add an header in the HTTP message. It relies on hlua_http_add_hdr() */
+__LJMP static int hlua_http_msg_add_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 3, "add_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ return hlua_http_add_hdr(L, msg);
+}
+
+/* Add an header in the HTTP message removing existing headers with the same
+ * name. It relies on hlua_http_del_hdr() and hlua_http_add_hdr().
+ */
+__LJMP static int hlua_http_msg_set_hdr(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 3, "set_header"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ hlua_http_del_hdr(L, msg);
+ return hlua_http_add_hdr(L, msg);
+}
+
+/* Rewrites the request method. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_meth(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_method"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(0, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request path. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_path(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_path"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(1, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request query-string. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_query(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_query"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ /* Check length. */
+ if (name_len > trash.size - 1) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Add the mark question as prefix. */
+ chunk_reset(&trash);
+ trash.area[trash.data++] = '?';
+ memcpy(trash.area + trash.data, name, name_len);
+ trash.data += name_len;
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(2, trash.area, trash.data, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the request URI. It relies on http_req_replace_stline(). */
+__LJMP static int hlua_http_msg_set_uri(lua_State *L)
+{
+ struct stream *s;
+ struct http_msg *msg;
+ const char *name;
+ size_t name_len;
+
+ MAY_LJMP(check_args(L, 2, "set_uri"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &name_len));
+
+ if ((msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ s = chn_strm(msg->chn);
+ lua_pushboolean(L, http_req_replace_stline(3, name, name_len, s->be, s) != -1);
+ return 1;
+}
+
+/* Rewrites the response status code. It relies on http_res_set_status(). */
+__LJMP static int hlua_http_msg_set_status(lua_State *L)
+{
+ struct http_msg *msg;
+ unsigned int code;
+ const char *reason;
+ size_t reason_len;
+
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ code = MAY_LJMP(luaL_checkinteger(L, 2));
+ reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, &reason_len));
+
+ if (!(msg->chn->flags & CF_ISRESP) || msg->msg_state > HTTP_MSG_BODY)
+ WILL_LJMP(lua_error(L));
+
+ lua_pushboolean(L, http_res_set_status(code, ist2(reason, reason_len), chn_strm(msg->chn)) != -1);
+ return 1;
+}
+
+/* Returns true if the HTTP message is full. */
+__LJMP static int hlua_http_msg_is_full(lua_State *L)
+{
+ struct http_msg *msg;
+
+ MAY_LJMP(check_args(L, 1, "is_full"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ lua_pushboolean(L, channel_full(msg->chn, 0));
+ return 1;
+}
+
+/* Returns true if the HTTP message may still receive data. */
+__LJMP static int hlua_http_msg_may_recv(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ lua_pushboolean(L, (htx_expect_more(htx) && !channel_input_closed(msg->chn) && channel_may_recv(msg->chn)));
+ return 1;
+}
+
+/* Returns true if the HTTP message EOM was received */
+__LJMP static int hlua_http_msg_is_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "may_recv"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ lua_pushboolean(L, !htx_expect_more(htx));
+ return 1;
+}
+
+/* Returns the number of bytes available in the input side of the HTTP
+ * message. This function never fails.
+ */
+__LJMP static int hlua_http_msg_get_in_len(lua_State *L)
+{
+ struct http_msg *msg;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "input"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ hlua_http_msg_filter(L, 1, msg, &output, &input);
+ lua_pushinteger(L, input);
+ return 1;
+}
+
+/* Returns the number of bytes available in the output side of the HTTP
+ * message. This function never fails.
+ */
+__LJMP static int hlua_http_msg_get_out_len(lua_State *L)
+{
+ struct http_msg *msg;
+ size_t output, input;
+
+ MAY_LJMP(check_args(L, 1, "output"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ hlua_http_msg_filter(L, 1, msg, &output, &input);
+ lua_pushinteger(L, output);
+ return 1;
+}
+
+/* Copies at most <len> bytes of DATA blocks from the HTTP message <msg>
+ * starting at the offset <offset> and put it in a string LUA variables. It
+ * returns the built string length. It stops on the first non-DATA HTX
+ * block. This function is called during the payload filtering, so the headers
+ * are already scheduled for output (from the filter point of view).
+ */
+static int _hlua_http_msg_dup(struct http_msg *msg, lua_State *L, size_t offset, size_t len)
+{
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct htx_ret htxret;
+ luaL_Buffer b;
+ int ret = 0;
+
+ luaL_buffinit(L, &b);
+ htxret = htx_find_offset(htx, offset);
+ for (blk = htxret.blk, offset = htxret.ret; blk && len; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, offset);
+ v = isttrim(v, len);
+
+ luaL_addlstring(&b, v.ptr, v.len);
+ ret += v.len;
+ break;
+
+ default:
+ if (!ret)
+ goto no_data;
+ goto end;
+ }
+ offset = 0;
+ }
+
+end:
+ if (!ret && (htx->flags & HTX_FL_EOM))
+ goto no_data;
+ luaL_pushresult(&b);
+ return ret;
+
+ no_data:
+ /* Remove the empty string and push nil on the stack */
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ return 0;
+}
+
+/* Copies the string <str> to the HTTP message <msg> at the offset
+ * <offset>. This function returns -1 if data cannot be copied. Otherwise, it
+ * returns the amount of data written. This function is responsible to update
+ * the filter context.
+ */
+static int _hlua_http_msg_insert(struct http_msg *msg, struct filter *filter, struct ist str, size_t offset)
+{
+ struct htx *htx = htx_from_buf(&msg->chn->buf);
+ struct htx_ret htxret;
+ int /*max, */ret = 0;
+
+ /* Nothing to do, just return */
+ if (unlikely(istlen(str) == 0))
+ goto end;
+
+ if (istlen(str) > htx_free_data_space(htx)) {
+ ret = -1;
+ goto end;
+ }
+
+ htxret = htx_find_offset(htx, offset);
+ if (!htxret.blk || htx_get_blk_type(htxret.blk) != HTX_BLK_DATA) {
+ if (!htx_add_last_data(htx, str))
+ goto end;
+ }
+ else {
+ struct ist v = htx_get_blk_value(htx, htxret.blk);
+ v.ptr += htxret.ret;
+ v.len = 0;
+ if (!htx_replace_blk_value(htx, htxret.blk, v, str))
+ goto end;
+ }
+ ret = str.len;
+ if (ret) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ flt_update_offsets(filter, msg->chn, ret);
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] += ret;
+ }
+
+ end:
+ htx_to_buf(htx, &msg->chn->buf);
+ return ret;
+}
+
+/* Helper function removing at most <len> bytes of DATA blocks at the absolute
+ * position <offset>. It stops on the first non-DATA HTX block. This function is
+ * called during the payload filtering, so the headers are already scheduled for
+ * output (from the filter point of view). This function is responsible to
+ * update the filter context.
+ */
+static void _hlua_http_msg_delete(struct http_msg *msg, struct filter *filter, size_t offset, size_t len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct htx *htx = htx_from_buf(&msg->chn->buf);
+ struct htx_blk *blk;
+ struct htx_ret htxret;
+ size_t ret = 0;
+
+ /* Be sure <len> is always the amount of DATA to remove */
+ if (htx->data == offset+len && htx_get_tail_type(htx) == HTX_BLK_DATA) {
+ /* When htx tail type == HTX_BLK_DATA, no need to take care
+ * of special blocks like HTX_BLK_EOT.
+ * We simply truncate after offset
+ * (truncate targeted blk and discard the following ones)
+ */
+ htx_truncate(htx, offset);
+ ret = len;
+ goto end;
+ }
+
+ htxret = htx_find_offset(htx, offset);
+ blk = htxret.blk;
+ if (htxret.ret) {
+ /* dealing with offset: we need to trim targeted blk */
+ struct ist v;
+
+ if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ goto end;
+
+ v = htx_get_blk_value(htx, blk);
+ v = istadv(v, htxret.ret);
+
+ v = isttrim(v, len);
+ /* trimming data in blk: discard everything after the offset
+ * (replace 'v' with 'IST_NULL')
+ */
+ blk = htx_replace_blk_value(htx, blk, v, IST_NULL);
+ if (blk && v.len < len) {
+ /* In this case, caller wants to keep removing data,
+ * but we need to spare current blk
+ * because it was already trimmed
+ */
+ blk = htx_get_next_blk(htx, blk);
+ }
+ len -= v.len;
+ ret += v.len;
+ }
+
+
+ while (blk && len) {
+ /* there is more data that needs to be discarded */
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ switch (type) {
+ case HTX_BLK_UNUSED:
+ break;
+
+ case HTX_BLK_DATA:
+ if (len < sz) {
+ /* don't discard whole blk, only part of it
+ * (from the beginning)
+ */
+ htx_cut_data_blk(htx, blk, len);
+ ret += len;
+ goto end;
+ }
+ break;
+
+ default:
+ /* HTX_BLK_EOT blk won't be removed */
+ goto end;
+ }
+
+ /* Remove all the data block */
+ len -= sz;
+ ret += sz;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+end:
+ flt_update_offsets(filter, msg->chn, -ret);
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ /* WARNING: we don't call htx_to_buf() on purpose, because we don't want
+ * to loose the EOM flag if the message is empty.
+ */
+}
+
+/* Copies input data found in an HTTP message. Unlike the channel function used
+ * to duplicate raw data, this one can only be called inside a filter, from
+ * http_payload callback. So it cannot yield. An exception is returned if it is
+ * called from another callback. If nothing was copied, a nil value is pushed on
+ * the stack.
+ */
+__LJMP static int hlua_http_msg_get_body(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t output, input;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'data' expects at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ if (!ci_data(msg->chn) && channel_input_closed(msg->chn)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto dup;
+ if (len == -1)
+ len = global.tune.bufsize;
+ if (len < 0) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ dup:
+ _hlua_http_msg_dup(msg, L, offset, len);
+ return 1;
+}
+
+/* Appends a string to the HTTP message, after all existing DATA blocks but
+ * before the trailers, if any. It returns the amount of data written or -1 if
+ * nothing was copied. Unlike the channel function used to append data, this one
+ * can only be called inside a filter, from http_payload callback. So it cannot
+ * yield. An exception is returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_append(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "append"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset+len);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Prepends a string to the HTTP message, before all existing DATA blocks. It
+ * returns the amount of data written or -1 if nothing was copied. Unlike the
+ * channel function used to prepend data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_prepend(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "prepend"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Inserts a string to the HTTP message at a given offset. By default the string
+ * is appended at the end of DATA blocks. It returns the amount of data written
+ * or -1 if nothing was copied. Unlike the channel function used to insert data,
+ * this one can only be called inside a filter, from http_payload callback. So
+ * it cannot yield. An exception is returned if it is called from another
+ * callback.
+ */
+__LJMP static int hlua_http_msg_insert_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ const char *str;
+ size_t input, output, sz;
+ int offset;
+ int ret;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'insert' expects at least 1 argument and at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Removes a given amount of data from the HTTP message at a given offset. By
+ * default all DATA blocks are removed. It returns the amount of data
+ * removed. Unlike the channel function used to remove data, this one can only
+ * be called inside a filter, from http_payload callback. So it cannot yield. An
+ * exception is returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_del_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t input, output;
+ int offset, len;
+
+ if (lua_gettop(L) < 1 || lua_gettop(L) > 3)
+ WILL_LJMP(luaL_error(L, "'remove' expects at most 2 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 1) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 2));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset > output + input) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 3) {
+ len = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (!len)
+ goto end;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ _hlua_http_msg_delete(msg, filter, offset, len);
+
+ end:
+ lua_pushinteger(L, len);
+ return 1;
+}
+
+/* Replaces a given amount of data at the given offset by a string. By default,
+ * all remaining data are removed, accordingly to the filter context. It returns
+ * the amount of data written or -1 if nothing was copied. Unlike the channel
+ * function used to replace data, this one can only be called inside a filter,
+ * from http_payload callback. So it cannot yield. An exception is returned if
+ * it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_set_data(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ struct htx *htx;
+ const char *str;
+ size_t input, output, sz;
+ int offset, len;
+ int ret;
+
+ if (lua_gettop(L) < 2 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set' expects at least 1 argument and at most 3 arguments"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &output, &input);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ offset = output;
+ if (lua_gettop(L) > 2) {
+ offset = MAY_LJMP(luaL_checkinteger(L, 3));
+ if (offset < 0)
+ offset = MAX(0, (int)input + offset);
+ offset += output;
+ if (offset < output || offset > input + output) {
+ lua_pushfstring(L, "offset out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ len = output + input - offset;
+ if (lua_gettop(L) == 4) {
+ len = MAY_LJMP(luaL_checkinteger(L, 4));
+ if (!len)
+ goto set;
+ if (len == -1)
+ len = output + input - offset;
+ if (len < 0 || offset + len > output + input) {
+ lua_pushfstring(L, "length out of range.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+
+ set:
+ /* Be sure we can copied the string once input data will be removed. */
+ htx = htx_from_buf(&msg->chn->buf);
+ if (sz > htx_free_data_space(htx) + len)
+ lua_pushinteger(L, -1);
+ else {
+ _hlua_http_msg_delete(msg, filter, offset, len);
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ lua_pushinteger(L, ret);
+ }
+ return 1;
+}
+
+/* Prepends data into an HTTP message and forward it, from the filter point of
+ * view. It returns the amount of data written or -1 if nothing was sent. Unlike
+ * the channel function used to send data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback.
+ */
+__LJMP static int hlua_http_msg_send(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ struct htx *htx;
+ const char *str;
+ size_t offset, len, sz;
+ int ret;
+
+ MAY_LJMP(check_args(L, 2, "send"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ str = MAY_LJMP(luaL_checklstring(L, 2, &sz));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ /* Return an error if the channel's output is closed */
+ if (unlikely(channel_output_closed(msg->chn))) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ htx = htx_from_buf(&msg->chn->buf);
+ if (sz > htx_free_data_space(htx)) {
+ lua_pushinteger(L, -1);
+ return 1;
+ }
+
+ ret = _hlua_http_msg_insert(msg, filter, ist2(str, sz), offset);
+ if (ret > 0) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, msg->chn) += ret;
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ flt_ctx->cur_off[CHN_IDX(msg->chn)] += ret;
+ }
+
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Forwards a given amount of bytes. It return -1 if the channel's output is
+ * closed. Otherwise, it returns the number of bytes forwarded. Unlike the
+ * channel function used to forward data, this one can only be called inside a
+ * filter, from http_payload callback. So it cannot yield. An exception is
+ * returned if it is called from another callback. All other functions deal with
+ * DATA block, this one not.
+*/
+__LJMP static int hlua_http_msg_forward(lua_State *L)
+{
+ struct http_msg *msg;
+ struct filter *filter;
+ size_t offset, len;
+ int fwd, ret = 0;
+
+ MAY_LJMP(check_args(L, 2, "forward"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+
+ if (msg->msg_state < HTTP_MSG_DATA)
+ WILL_LJMP(lua_error(L));
+
+ fwd = MAY_LJMP(luaL_checkinteger(L, 2));
+ filter = hlua_http_msg_filter(L, 1, msg, &offset, &len);
+ if (!filter || !hlua_filter_from_payload(filter))
+ WILL_LJMP(lua_error(L));
+
+ /* Nothing to do, just return */
+ if (!fwd)
+ goto end;
+
+ /* Return an error if the channel's output is closed */
+ if (unlikely(channel_output_closed(msg->chn))) {
+ ret = -1;
+ goto end;
+ }
+
+ ret = fwd;
+ if (ret > len)
+ ret = len;
+
+ if (ret) {
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ FLT_OFF(filter, msg->chn) += ret;
+ flt_ctx->cur_off[CHN_IDX(msg->chn)] += ret;
+ flt_ctx->cur_len[CHN_IDX(msg->chn)] -= ret;
+ }
+
+ end:
+ lua_pushinteger(L, ret);
+ return 1;
+}
+
+/* Set EOM flag on the HTX message.
+ *
+ * NOTE: Not sure it is a good idea to manipulate this flag but for now I don't
+ * really know how to do without this feature.
+ */
+__LJMP static int hlua_http_msg_set_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "set_eom"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ htx->flags |= HTX_FL_EOM;
+ return 0;
+}
+
+/* Unset EOM flag on the HTX message.
+ *
+ * NOTE: Not sure it is a good idea to manipulate this flag but for now I don't
+ * really know how to do without this feature.
+ */
+__LJMP static int hlua_http_msg_unset_eom(lua_State *L)
+{
+ struct http_msg *msg;
+ struct htx *htx;
+
+ MAY_LJMP(check_args(L, 1, "set_eom"));
+ msg = MAY_LJMP(hlua_checkhttpmsg(L, 1));
+ htx = htxbuf(&msg->chn->buf);
+ htx->flags &= ~HTX_FL_EOM;
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class HTTPClient
+ *
+ *
+ */
+__LJMP static struct hlua_httpclient *hlua_checkhttpclient(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_httpclient_ref));
+}
+
+
+/* stops the httpclient and ask it to kill itself */
+__LJMP static int hlua_httpclient_gc(lua_State *L)
+{
+ struct hlua_httpclient *hlua_hc;
+
+ MAY_LJMP(check_args(L, 1, "__gc"));
+
+ hlua_hc = MAY_LJMP(hlua_checkhttpclient(L, 1));
+
+ if (MT_LIST_DELETE(&hlua_hc->by_hlua)) {
+ /* we won the race against hlua_httpclient_destroy_all() */
+ httpclient_stop_and_destroy(hlua_hc->hc);
+ hlua_hc->hc = NULL;
+ }
+
+ return 0;
+}
+
+
+__LJMP static int hlua_httpclient_new(lua_State *L)
+{
+ struct hlua_httpclient *hlua_hc;
+ struct hlua *hlua;
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3)) {
+ hlua_pusherror(L, "httpclient: full stack");
+ goto err;
+ }
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ hlua_hc = MAY_LJMP(lua_newuserdata(L, sizeof(*hlua_hc)));
+ lua_rawseti(L, -2, 0);
+ memset(hlua_hc, 0, sizeof(*hlua_hc));
+
+ hlua_hc->hc = httpclient_new(hlua, 0, IST_NULL);
+ if (!hlua_hc->hc)
+ goto err;
+
+ MT_LIST_APPEND(&hlua->hc_list, &hlua_hc->by_hlua);
+
+ /* Pop a class stream metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_httpclient_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+
+ err:
+ WILL_LJMP(lua_error(L));
+ return 0;
+}
+
+
+/*
+ * Callback of the httpclient, this callback wakes the lua task up, once the
+ * httpclient receives some data
+ *
+ */
+
+static void hlua_httpclient_cb(struct httpclient *hc)
+{
+ struct hlua *hlua = hc->caller;
+
+ if (!hlua || !hlua->task)
+ return;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+}
+
+/*
+ * Fill the lua stack with headers from the httpclient response
+ * This works the same way as the hlua_http_get_headers() function
+ */
+__LJMP static int hlua_httpclient_get_headers(lua_State *L, struct hlua_httpclient *hlua_hc)
+{
+ struct http_hdr *hdr;
+
+ lua_newtable(L);
+
+ for (hdr = hlua_hc->hc->res.hdrs; hdr && isttest(hdr->n); hdr++) {
+ struct ist n, v;
+ int len;
+
+ n = hdr->n;
+ v = hdr->v;
+
+ /* Check for existing entry:
+ * assume that the table is on the top of the stack, and
+ * push the key in the stack, the function lua_gettable()
+ * perform the lookup.
+ */
+
+ lua_pushlstring(L, n.ptr, n.len);
+ lua_gettable(L, -2);
+
+ switch (lua_type(L, -1)) {
+ case LUA_TNIL:
+ /* Table not found, create it. */
+ lua_pop(L, 1); /* remove the nil value. */
+ lua_pushlstring(L, n.ptr, n.len); /* push the header name as key. */
+ lua_newtable(L); /* create and push empty table. */
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, 0); /* index header value (pop it). */
+ lua_rawset(L, -3); /* index new table with header name (pop the values). */
+ break;
+
+ case LUA_TTABLE:
+ /* Entry found: push the value in the table. */
+ len = lua_rawlen(L, -1);
+ lua_pushlstring(L, v.ptr, v.len); /* push header value. */
+ lua_rawseti(L, -2, len+1); /* index header value (pop it). */
+ lua_pop(L, 1); /* remove the table (it is stored in the main table). */
+ break;
+
+ default:
+ /* Other cases are errors. */
+ hlua_pusherror(L, "internal error during the parsing of headers.");
+ WILL_LJMP(lua_error(L));
+ }
+ }
+ return 1;
+}
+
+/*
+ * Allocate and return an array of http_hdr ist extracted from the <headers> lua table
+ *
+ * Caller must free the result
+ */
+struct http_hdr *hlua_httpclient_table_to_hdrs(lua_State *L)
+{
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ struct http_hdr *result = NULL;
+ uint32_t hdr_num = 0;
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ struct ist name, value;
+ const char *n, *v;
+ size_t nlen, vlen;
+
+ if (!lua_isstring(L, -2) || !lua_istable(L, -1)) {
+ /* Skip element if the key is not a string or if the value is not a table */
+ goto next_hdr;
+ }
+
+ n = lua_tolstring(L, -2, &nlen);
+ name = ist2(n, nlen);
+
+ /* Loop on header's values */
+ lua_pushnil(L);
+ while (lua_next(L, -2)) {
+ if (!lua_isstring(L, -1)) {
+ /* Skip the value if it is not a string */
+ goto next_value;
+ }
+
+ v = lua_tolstring(L, -1, &vlen);
+ value = ist2(v, vlen);
+ name = ist2(n, nlen);
+
+ hdrs[hdr_num].n = istdup(name);
+ hdrs[hdr_num].v = istdup(value);
+
+ hdr_num++;
+
+ next_value:
+ lua_pop(L, 1);
+ }
+
+ next_hdr:
+ lua_pop(L, 1);
+
+ }
+
+ if (hdr_num) {
+ /* alloc and copy the headers in the httpclient struct */
+ result = calloc((hdr_num + 1), sizeof(*result));
+ if (!result)
+ goto skip_headers;
+ memcpy(result, hdrs, sizeof(struct http_hdr) * (hdr_num + 1));
+
+ result[hdr_num].n = IST_NULL;
+ result[hdr_num].v = IST_NULL;
+ }
+
+skip_headers:
+
+ return result;
+}
+
+
+/*
+ * For each yield, checks if there is some data in the httpclient and push them
+ * in the lua buffer, once the httpclient finished its job, push the result on
+ * the stack
+ */
+__LJMP static int hlua_httpclient_rcv_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct buffer *tr;
+ int res;
+ struct hlua *hlua = hlua_gethlua(L);
+ struct hlua_httpclient *hlua_hc = hlua_checkhttpclient(L, 1);
+
+
+ tr = get_trash_chunk();
+
+ res = httpclient_res_xfer(hlua_hc->hc, tr);
+ luaL_addlstring(&hlua_hc->b, b_orig(tr), res);
+
+ if (!httpclient_data(hlua_hc->hc) && httpclient_ended(hlua_hc->hc)) {
+
+ luaL_pushresult(&hlua_hc->b);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "status");
+ lua_pushinteger(L, hlua_hc->hc->res.status);
+ lua_settable(L, -3);
+
+
+ lua_pushstring(L, "reason");
+ lua_pushlstring(L, hlua_hc->hc->res.reason.ptr, hlua_hc->hc->res.reason.len);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "headers");
+ hlua_httpclient_get_headers(L, hlua_hc);
+ lua_settable(L, -3);
+
+ return 1;
+ }
+
+ if (httpclient_data(hlua_hc->hc))
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_rcv_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+/*
+ * Call this when trying to stream a body during a request
+ */
+__LJMP static int hlua_httpclient_snd_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua *hlua;
+ struct hlua_httpclient *hlua_hc = hlua_checkhttpclient(L, 1);
+ const char *body_str = NULL;
+ int ret;
+ int end = 0;
+ size_t buf_len;
+ size_t to_send = 0;
+
+ hlua = hlua_gethlua(L);
+
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'get' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ ret = lua_getfield(L, -1, "body");
+ if (ret != LUA_TSTRING)
+ goto rcv;
+
+ body_str = lua_tolstring(L, -1, &buf_len);
+ lua_pop(L, 1);
+
+ to_send = buf_len - hlua_hc->sent;
+
+ if ((hlua_hc->sent + to_send) >= buf_len)
+ end = 1;
+
+ /* the end flag is always set since we are using the whole remaining size */
+ hlua_hc->sent += httpclient_req_xfer(hlua_hc->hc, ist2(body_str + hlua_hc->sent, to_send), end);
+
+ if (buf_len > hlua_hc->sent) {
+ /* still need to process the buffer */
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_snd_yield, TICK_ETERNITY, 0));
+ } else {
+ goto rcv;
+ /* we sent the whole request buffer we can recv */
+ }
+ return 0;
+
+rcv:
+
+ /* we return a "res" object */
+ lua_newtable(L);
+
+ lua_pushstring(L, "body");
+ luaL_buffinit(L, &hlua_hc->b);
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_rcv_yield, TICK_ETERNITY, 0));
+
+ return 1;
+}
+
+/*
+ * Send an HTTP request and wait for a response
+ */
+
+__LJMP static int hlua_httpclient_send(lua_State *L, enum http_meth_t meth)
+{
+ struct hlua_httpclient *hlua_hc;
+ struct http_hdr *hdrs = NULL;
+ struct http_hdr *hdrs_i = NULL;
+ struct hlua *hlua;
+ const char *url_str = NULL;
+ const char *body_str = NULL;
+ size_t buf_len = 0;
+ int ret;
+
+ hlua = hlua_gethlua(L);
+
+ if (!hlua || !hlua->task)
+ WILL_LJMP(luaL_error(L, "The 'get' function is only allowed in "
+ "'frontend', 'backend' or 'task'"));
+
+ if (lua_gettop(L) != 2 || lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "'get' needs a table as argument"));
+
+ hlua_hc = hlua_checkhttpclient(L, 1);
+
+ lua_pushnil(L); /* first key */
+ while (lua_next(L, 2)) {
+ if (strcmp(lua_tostring(L, -2), "dst") == 0) {
+ if (httpclient_set_dst(hlua_hc->hc, lua_tostring(L, -1)) < 0)
+ WILL_LJMP(luaL_error(L, "Can't use the 'dst' argument"));
+
+ } else if (strcmp(lua_tostring(L, -2), "url") == 0) {
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'url', must be a string"));
+ url_str = lua_tostring(L, -1);
+
+ } else if (strcmp(lua_tostring(L, -2), "timeout") == 0) {
+ if (lua_type(L, -1) != LUA_TNUMBER)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'timeout', must be a number"));
+ httpclient_set_timeout(hlua_hc->hc, lua_tointeger(L, -1));
+
+ } else if (strcmp(lua_tostring(L, -2), "headers") == 0) {
+ if (lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'headers', must be a table"));
+ hdrs = hlua_httpclient_table_to_hdrs(L);
+
+ } else if (strcmp(lua_tostring(L, -2), "body") == 0) {
+ if (lua_type(L, -1) != LUA_TSTRING)
+ WILL_LJMP(luaL_error(L, "invalid parameter in 'body', must be a string"));
+ body_str = lua_tolstring(L, -1, &buf_len);
+
+ } else {
+ WILL_LJMP(luaL_error(L, "'%s' invalid parameter name", lua_tostring(L, -2)));
+ }
+ /* removes 'value'; keeps 'key' for next iteration */
+ lua_pop(L, 1);
+ }
+
+ if (!url_str) {
+ WILL_LJMP(luaL_error(L, "'get' need a 'url' argument"));
+ return 0;
+ }
+
+ hlua_hc->sent = 0;
+
+ istfree(&hlua_hc->hc->req.url);
+ hlua_hc->hc->req.url = istdup(ist(url_str));
+ hlua_hc->hc->req.meth = meth;
+
+ /* update the httpclient callbacks */
+ hlua_hc->hc->ops.res_stline = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_headers = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_payload = hlua_httpclient_cb;
+ hlua_hc->hc->ops.res_end = hlua_httpclient_cb;
+
+ /* a body is available, it will use the request callback */
+ if (body_str && buf_len) {
+ hlua_hc->hc->ops.req_payload = hlua_httpclient_cb;
+ }
+
+ ret = httpclient_req_gen(hlua_hc->hc, hlua_hc->hc->req.url, meth, hdrs, IST_NULL);
+
+ /* free the temporary headers array */
+ hdrs_i = hdrs;
+ while (hdrs_i && isttest(hdrs_i->n)) {
+ istfree(&hdrs_i->n);
+ istfree(&hdrs_i->v);
+ hdrs_i++;
+ }
+ ha_free(&hdrs);
+
+
+ if (ret != ERR_NONE) {
+ WILL_LJMP(luaL_error(L, "Can't generate the HTTP request"));
+ return 0;
+ }
+
+ if (!httpclient_start(hlua_hc->hc))
+ WILL_LJMP(luaL_error(L, "couldn't start the httpclient"));
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_httpclient_snd_yield, TICK_ETERNITY, 0));
+
+ return 0;
+}
+
+/*
+ * Sends an HTTP HEAD request and wait for a response
+ *
+ * httpclient:head(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_head(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_HEAD);
+}
+
+/*
+ * Send an HTTP GET request and wait for a response
+ *
+ * httpclient:get(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_get(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_GET);
+
+}
+
+/*
+ * Sends an HTTP PUT request and wait for a response
+ *
+ * httpclient:put(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_put(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_PUT);
+}
+
+/*
+ * Send an HTTP POST request and wait for a response
+ *
+ * httpclient:post(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_post(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_POST);
+}
+
+
+/*
+ * Sends an HTTP DELETE request and wait for a response
+ *
+ * httpclient:delete(url, headers, payload)
+ */
+__LJMP static int hlua_httpclient_delete(lua_State *L)
+{
+ return hlua_httpclient_send(L, HTTP_METH_DELETE);
+}
+
+/*
+ *
+ *
+ * Class TXN
+ *
+ *
+ */
+
+/* Returns a struct hlua_session if the stack entry "ud" is
+ * a class stream, otherwise it throws an error.
+ */
+__LJMP static struct hlua_txn *hlua_checktxn(lua_State *L, int ud)
+{
+ return MAY_LJMP(hlua_checkudata(L, ud, class_txn_ref));
+}
+
+__LJMP static int hlua_set_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'set_var' needs between 3 and 4 arguments"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Converts the third argument in a sample. */
+ memset(&smp, 0, sizeof(smp));
+ hlua_lua2smp(L, 3, &smp);
+
+ /* Store the sample in a variable. We don't need to dup the smp, vars API
+ * already takes care of duplicating dynamic var data.
+ */
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+
+ if (lua_gettop(L) == 4 && lua_toboolean(L, 4))
+ lua_pushboolean(L, vars_set_by_name_ifexist(name, len, &smp) != 0);
+ else
+ lua_pushboolean(L, vars_set_by_name(name, len, &smp) != 0);
+
+ return 1;
+}
+
+__LJMP static int hlua_unset_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "unset_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ /* Unset the variable. */
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+ lua_pushboolean(L, vars_unset_by_name_ifexist(name, len, &smp) != 0);
+ return 1;
+}
+
+__LJMP static int hlua_get_var(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *name;
+ size_t len;
+ struct sample smp;
+
+ MAY_LJMP(check_args(L, 2, "get_var"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ name = MAY_LJMP(luaL_checklstring(L, 2, &len));
+
+ smp_set_owner(&smp, htxn->p, htxn->s->sess, htxn->s, htxn->dir & SMP_OPT_DIR);
+ if (!vars_get_by_name(name, len, &smp, NULL)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ return MAY_LJMP(hlua_smp2lua(L, &smp));
+}
+
+__LJMP static int hlua_set_priv(lua_State *L)
+{
+ struct hlua *hlua;
+
+ MAY_LJMP(check_args(L, 2, "set_priv"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua)
+ return 0;
+
+ /* Remove previous value. */
+ luaL_unref(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ /* Get and store new value. */
+ lua_pushvalue(L, 2); /* Copy the element 2 at the top of the stack. */
+ hlua->Mref = luaL_ref(L, LUA_REGISTRYINDEX); /* pop the previously pushed value. */
+
+ return 0;
+}
+
+__LJMP static int hlua_get_priv(lua_State *L)
+{
+ struct hlua *hlua;
+
+ MAY_LJMP(check_args(L, 1, "get_priv"));
+
+ /* It is useles to retrieve the stream, but this function
+ * runs only in a stream context.
+ */
+ MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* Push configuration index in the stack. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, hlua->Mref);
+
+ return 1;
+}
+
+/* Create stack entry containing a class TXN. This function
+ * return 0 if the stack does not contains free slots,
+ * otherwise it returns 1.
+ */
+static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags)
+{
+ struct hlua_txn *htxn;
+
+ /* Check stack size. */
+ if (!lua_checkstack(L, 3))
+ return 0;
+
+ /* NOTE: The allocation never fails. The failure
+ * throw an error, and the function never returns.
+ * if the throw is not available, the process is aborted.
+ */
+ /* Create the object: obj[0] = userdata. */
+ lua_newtable(L);
+ htxn = lua_newuserdata(L, sizeof(*htxn));
+ lua_rawseti(L, -2, 0);
+
+ htxn->s = s;
+ htxn->p = p;
+ htxn->dir = dir;
+ htxn->flags = flags;
+
+ /* Create the "f" field that contains a list of fetches. */
+ lua_pushstring(L, "f");
+ if (!hlua_fetches_new(L, htxn, HLUA_F_MAY_USE_HTTP))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "sf" field that contains a list of stringsafe fetches. */
+ lua_pushstring(L, "sf");
+ if (!hlua_fetches_new(L, htxn, HLUA_F_MAY_USE_HTTP | HLUA_F_AS_STRING))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "c" field that contains a list of converters. */
+ lua_pushstring(L, "c");
+ if (!hlua_converters_new(L, htxn, 0))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "sc" field that contains a list of stringsafe converters. */
+ lua_pushstring(L, "sc");
+ if (!hlua_converters_new(L, htxn, HLUA_F_AS_STRING))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "req" field that contains the request channel object. */
+ lua_pushstring(L, "req");
+ if (!hlua_channel_new(L, &s->req))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Create the "res" field that contains the response channel object. */
+ lua_pushstring(L, "res");
+ if (!hlua_channel_new(L, &s->res))
+ return 0;
+ lua_rawset(L, -3);
+
+ /* Creates the HTTP object is the current proxy allows http. */
+ lua_pushstring(L, "http");
+ if (IS_HTX_STRM(s)) {
+ if (!hlua_http_new(L, htxn))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+
+ if ((htxn->flags & HLUA_TXN_CTX_MASK) == HLUA_TXN_FLT_CTX) {
+ /* HTTPMessage object are created when a lua TXN is created from
+ * a filter context only
+ */
+
+ /* Creates the HTTP-Request object is the current proxy allows http. */
+ lua_pushstring(L, "http_req");
+ if (p->mode == PR_MODE_HTTP) {
+ if (!hlua_http_msg_new(L, &s->txn->req))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+
+ /* Creates the HTTP-Response object is the current proxy allows http. */
+ lua_pushstring(L, "http_res");
+ if (p->mode == PR_MODE_HTTP) {
+ if (!hlua_http_msg_new(L, &s->txn->rsp))
+ return 0;
+ }
+ else
+ lua_pushnil(L);
+ lua_rawset(L, -3);
+ }
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_txn_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+__LJMP static int hlua_txn_deflog(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "deflog"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+
+ hlua_sendlog(htxn->s->be, htxn->s->logs.level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log(lua_State *L)
+{
+ int level;
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 3, "log"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ level = MAY_LJMP(luaL_checkinteger(L, 2));
+ msg = MAY_LJMP(luaL_checkstring(L, 3));
+
+ if (level < 0 || level >= NB_LOG_LEVELS)
+ WILL_LJMP(luaL_argerror(L, 1, "Invalid loglevel."));
+
+ hlua_sendlog(htxn->s->be, level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_debug(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Debug"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_DEBUG, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_info(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Info"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_INFO, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_warning(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Warning"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_WARNING, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_log_alert(lua_State *L)
+{
+ const char *msg;
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "Alert"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+ hlua_sendlog(htxn->s->be, LOG_ALERT, msg);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_loglevel(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int ll;
+
+ MAY_LJMP(check_args(L, 2, "set_loglevel"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ ll = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ if (ll < 0 || ll > 7)
+ WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be between 0 and 7"));
+
+ htxn->s->logs.level = ll;
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_tos(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int tos;
+
+ MAY_LJMP(check_args(L, 2, "set_tos"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ tos = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ conn_set_tos(objt_conn(htxn->s->sess->origin), tos);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_mark(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ int mark;
+
+ MAY_LJMP(check_args(L, 2, "set_mark"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ mark = MAY_LJMP(luaL_checkinteger(L, 2));
+
+ conn_set_mark(objt_conn(htxn->s->sess->origin), mark);
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_priority_class(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "set_priority_class"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ htxn->s->priority_class = queue_limit_class(MAY_LJMP(luaL_checkinteger(L, 2)));
+ return 0;
+}
+
+__LJMP static int hlua_txn_set_priority_offset(lua_State *L)
+{
+ struct hlua_txn *htxn;
+
+ MAY_LJMP(check_args(L, 2, "set_priority_offset"));
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ htxn->s->priority_offset = queue_limit_offset(MAY_LJMP(luaL_checkinteger(L, 2)));
+ return 0;
+}
+
+/* Forward the Reply object to the client. This function converts the reply in
+ * HTX an push it to into the response channel. It is response to forward the
+ * message and terminate the transaction. It returns 1 on success and 0 on
+ * error. The Reply must be on top of the stack.
+ */
+__LJMP static int hlua_txn_forward_reply(lua_State *L, struct stream *s)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ const char *status, *reason, *body;
+ size_t status_len, reason_len, body_len;
+ int ret, code, flags;
+
+ code = 200;
+ status = "200";
+ status_len = 3;
+ ret = lua_getfield(L, -1, "status");
+ if (ret == LUA_TNUMBER) {
+ code = lua_tointeger(L, -1);
+ status = lua_tolstring(L, -1, &status_len);
+ }
+ lua_pop(L, 1);
+
+ reason = http_get_reason(code);
+ reason_len = strlen(reason);
+ ret = lua_getfield(L, -1, "reason");
+ if (ret == LUA_TSTRING)
+ reason = lua_tolstring(L, -1, &reason_len);
+ lua_pop(L, 1);
+
+ body = NULL;
+ body_len = 0;
+ ret = lua_getfield(L, -1, "body");
+ if (ret == LUA_TSTRING)
+ body = lua_tolstring(L, -1, &body_len);
+ lua_pop(L, 1);
+
+ /* Prepare the response before inserting the headers */
+ h1m_init_res(&h1m);
+ htx = htx_from_buf(&s->res.buf);
+ channel_htx_truncate(&s->res, htx);
+ if (s->txn->req.flags & HTTP_MSGF_VER_11) {
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"),
+ ist2(status, status_len), ist2(reason, reason_len));
+ }
+ else {
+ flags = HTX_SL_F_IS_RESP;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.0"),
+ ist2(status, status_len), ist2(reason, reason_len));
+ }
+ if (!sl)
+ goto fail;
+ sl->info.res.status = code;
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, -1, "headers");
+ if (ret != LUA_TTABLE)
+ goto skip_headers;
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ struct ist name, value;
+ const char *n, *v;
+ size_t nlen, vlen;
+
+ if (!lua_isstring(L, -2) || !lua_istable(L, -1)) {
+ /* Skip element if the key is not a string or if the value is not a table */
+ goto next_hdr;
+ }
+
+ n = lua_tolstring(L, -2, &nlen);
+ name = ist2(n, nlen);
+ if (isteqi(name, ist("content-length"))) {
+ /* Always skip content-length header. It will be added
+ * later with the correct len
+ */
+ goto next_hdr;
+ }
+
+ /* Loop on header's values */
+ lua_pushnil(L);
+ while (lua_next(L, -2)) {
+ if (!lua_isstring(L, -1)) {
+ /* Skip the value if it is not a string */
+ goto next_value;
+ }
+
+ v = lua_tolstring(L, -1, &vlen);
+ value = ist2(v, vlen);
+
+ if (isteqi(name, ist("transfer-encoding")))
+ h1_parse_xfer_enc_header(&h1m, value);
+ if (!htx_add_header(htx, ist2(n, nlen), ist2(v, vlen)))
+ goto fail;
+
+ next_value:
+ lua_pop(L, 1);
+ }
+
+ next_hdr:
+ lua_pop(L, 1);
+ }
+ skip_headers:
+ lua_pop(L, 1);
+
+ /* Update h1m flags: CLEN is set if CHNK is not present */
+ if (!(h1m.flags & H1_MF_CHNK)) {
+ const char *clen = ultoa(body_len);
+
+ h1m.flags |= H1_MF_CLEN;
+ if (!htx_add_header(htx, ist("content-length"), ist(clen)))
+ goto fail;
+ }
+ if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ /* Update HTX start-line flags */
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ sl->flags |= flags;
+
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH) ||
+ (body_len && !htx_add_data_atonce(htx, ist2(body, body_len))))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+
+ /* Now, forward the response and terminate the transaction */
+ s->txn->status = code;
+ htx_to_buf(htx, &s->res.buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ return 1;
+
+ fail:
+ channel_htx_truncate(&s->res, htx);
+ return 0;
+}
+
+/* Terminate a transaction if called from a lua action. For TCP streams,
+ * processing is just aborted. Nothing is returned to the client and all
+ * arguments are ignored. For HTTP streams, if a reply is passed as argument, it
+ * is forwarded to the client before terminating the transaction. On success,
+ * the function exits with ACT_RET_DONE code. If an error occurred, it exits
+ * with ACT_RET_ERR code. If this function is not called from a lua action, it
+ * just exits without any processing.
+ */
+__LJMP static int hlua_txn_done(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ struct stream *s;
+ int finst;
+
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+
+ /* If the flags NOTERM is set, we cannot terminate the session, so we
+ * just end the execution of the current lua code. */
+ if (htxn->flags & HLUA_TXN_NOTERM)
+ WILL_LJMP(hlua_done(L));
+
+ s = htxn->s;
+ if (!IS_HTX_STRM(htxn->s)) {
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+
+ channel_auto_read(req);
+ channel_abort(req);
+ channel_erase(req);
+
+ channel_auto_read(res);
+ channel_auto_close(res);
+ sc_schedule_abort(s->scb);
+
+ finst = ((htxn->dir == SMP_OPT_DIR_REQ) ? SF_FINST_R : SF_FINST_D);
+ goto done;
+ }
+
+ if (lua_gettop(L) == 1 || !lua_istable(L, 2)) {
+ /* No reply or invalid reply */
+ s->txn->status = 0;
+ http_reply_and_close(s, 0, NULL);
+ }
+ else {
+ /* Remove extra args to have the reply on top of the stack */
+ if (lua_gettop(L) > 2)
+ lua_pop(L, lua_gettop(L) - 2);
+
+ if (!hlua_txn_forward_reply(L, s)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ lua_pushinteger(L, ACT_RET_ERR);
+ WILL_LJMP(hlua_done(L));
+ return 0; /* Never reached */
+ }
+ }
+
+ finst = ((htxn->dir == SMP_OPT_DIR_REQ) ? SF_FINST_R : SF_FINST_H);
+ if (htxn->dir == SMP_OPT_DIR_REQ) {
+ /* let's log the request time */
+ s->logs.request_ts = now_ns;
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ done:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= finst;
+
+ if ((htxn->flags & HLUA_TXN_CTX_MASK) == HLUA_TXN_FLT_CTX)
+ lua_pushinteger(L, -1);
+ else
+ lua_pushinteger(L, ACT_RET_ABRT);
+ WILL_LJMP(hlua_done(L));
+ return 0;
+}
+
+/*
+ *
+ *
+ * Class REPLY
+ *
+ *
+ */
+
+/* Pushes the TXN reply onto the top of the stack. If the stask does not have a
+ * free slots, the function fails and returns 0;
+ */
+static int hlua_txn_reply_new(lua_State *L)
+{
+ struct hlua_txn *htxn;
+ const char *reason, *body = NULL;
+ int ret, status;
+
+ htxn = MAY_LJMP(hlua_checktxn(L, 1));
+ if (!IS_HTX_STRM(htxn->s)) {
+ hlua_pusherror(L, "txn object is not an HTTP transaction.");
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Default value */
+ status = 200;
+ reason = http_get_reason(status);
+
+ if (lua_istable(L, 2)) {
+ /* load status and reason from the table argument at index 2 */
+ ret = lua_getfield(L, 2, "status");
+ if (ret == LUA_TNIL)
+ goto reason;
+ else if (ret != LUA_TNUMBER) {
+ /* invalid status: ignore the reason */
+ goto body;
+ }
+ status = lua_tointeger(L, -1);
+
+ reason:
+ lua_pop(L, 1); /* restore the stack: remove status */
+ ret = lua_getfield(L, 2, "reason");
+ if (ret == LUA_TSTRING)
+ reason = lua_tostring(L, -1);
+
+ body:
+ lua_pop(L, 1); /* restore the stack: remove invalid status or reason */
+ ret = lua_getfield(L, 2, "body");
+ if (ret == LUA_TSTRING)
+ body = lua_tostring(L, -1);
+ lua_pop(L, 1); /* restore the stack: remove body */
+ }
+
+ /* Create the Reply table */
+ lua_newtable(L);
+
+ /* Add status element */
+ lua_pushstring(L, "status");
+ lua_pushinteger(L, status);
+ lua_settable(L, -3);
+
+ /* Add reason element */
+ reason = http_get_reason(status);
+ lua_pushstring(L, "reason");
+ lua_pushstring(L, reason);
+ lua_settable(L, -3);
+
+ /* Add body element, nil if undefined */
+ lua_pushstring(L, "body");
+ if (body)
+ lua_pushstring(L, body);
+ else
+ lua_pushnil(L);
+ lua_settable(L, -3);
+
+ /* Add headers element */
+ lua_pushstring(L, "headers");
+ lua_newtable(L);
+
+ /* stack: [ txn, <Arg:table>, <Reply:table>, "headers", <headers:table> ] */
+ if (lua_istable(L, 2)) {
+ /* load headers from the table argument at index 2. If it is a table, copy it. */
+ ret = lua_getfield(L, 2, "headers");
+ if (ret == LUA_TTABLE) {
+ /* stack: [ ... <headers:table>, <table> ] */
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ /* stack: [ ... <headers:table>, <table>, k, v] */
+ if (!lua_isstring(L, -1) && !lua_istable(L, -1)) {
+ /* invalid value type, skip it */
+ lua_pop(L, 1);
+ continue;
+ }
+
+
+ /* Duplicate the key and swap it with the value. */
+ lua_pushvalue(L, -2);
+ lua_insert(L, -2);
+ /* stack: [ ... <headers:table>, <table>, k, k, v ] */
+
+ lua_newtable(L);
+ lua_insert(L, -2);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, v ] */
+
+ if (lua_isstring(L, -1)) {
+ /* push the value in the inner table */
+ lua_rawseti(L, -2, 1);
+ }
+ else { /* table */
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, <v:table>, k2, v2 ] */
+ if (!lua_isstring(L, -1)) {
+ /* invalid value type, skip it*/
+ lua_pop(L, 1);
+ continue;
+ }
+ /* push the value in the inner table */
+ lua_rawseti(L, -4, lua_rawlen(L, -4) + 1);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table>, <v:table>, k2 ] */
+ }
+ lua_pop(L, 1);
+ /* stack: [ ... <headers:table>, <table>, k, k, <inner:table> ] */
+ }
+
+ /* push (k,v) on the stack in the headers table:
+ * stack: [ ... <headers:table>, <table>, k, k, v ]
+ */
+ lua_settable(L, -5);
+ /* stack: [ ... <headers:table>, <table>, k ] */
+ }
+ }
+ lua_pop(L, 1);
+ }
+ /* stack: [ txn, <Arg:table>, <Reply:table>, "headers", <headers:table> ] */
+ lua_settable(L, -3);
+ /* stack: [ txn, <Arg:table>, <Reply:table> ] */
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_txn_reply_ref);
+ lua_setmetatable(L, -2);
+ return 1;
+}
+
+/* Set the reply status code, and optionally the reason. If no reason is
+ * provided, the default one corresponding to the status code is used.
+ */
+__LJMP static int hlua_txn_reply_set_status(lua_State *L)
+{
+ int status = MAY_LJMP(luaL_checkinteger(L, 2));
+ const char *reason = MAY_LJMP(luaL_optlstring(L, 3, NULL, NULL));
+
+ /* First argument (self) must be a table */
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+
+ if (status < 100 || status > 599) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ if (!reason)
+ reason = http_get_reason(status);
+
+ lua_pushinteger(L, status);
+ lua_setfield(L, 1, "status");
+
+ lua_pushstring(L, reason);
+ lua_setfield(L, 1, "reason");
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Add a header into the reply object. Each header name is associated to an
+ * array of values in the "headers" table. If the header name is not found, a
+ * new entry is created.
+ */
+__LJMP static int hlua_txn_reply_add_header(lua_State *L)
+{
+ const char *name = MAY_LJMP(luaL_checkstring(L, 2));
+ const char *value = MAY_LJMP(luaL_checkstring(L, 3));
+ int ret;
+
+ /* First argument (self) must be a table */
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, 1, "headers");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers'] is expected to a an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* check if the header is already registered. If not, register it. */
+ ret = lua_getfield(L, -1, name);
+ if (ret == LUA_TNIL) {
+ /* Entry not found. */
+ lua_pop(L, 1); /* remove the nil. The "headers" table is the top of the stack. */
+
+ /* Insert the new header name in the array in the top of the stack.
+ * It left the new array in the top of the stack.
+ */
+ lua_newtable(L);
+ lua_pushstring(L, name);
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4);
+ }
+ else if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers']['%s'] is expected to be an array. %s found", name, lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ /* Now the top of thestack is an array of values. We push
+ * the header value as new entry.
+ */
+ lua_pushstring(L, value);
+ ret = lua_rawlen(L, -2);
+ lua_rawseti(L, -2, ret + 1);
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Remove all occurrences of a given header name. */
+__LJMP static int hlua_txn_reply_del_header(lua_State *L)
+{
+ const char *name = MAY_LJMP(luaL_checkstring(L, 2));
+ int ret;
+
+ /* First argument (self) must be a table */
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+
+ /* Push in the stack the "headers" entry. */
+ ret = lua_getfield(L, 1, "headers");
+ if (ret != LUA_TTABLE) {
+ hlua_pusherror(L, "Reply['headers'] is expected to be an array. %s found", lua_typename(L, ret));
+ WILL_LJMP(lua_error(L));
+ }
+
+ lua_pushstring(L, name);
+ lua_pushnil(L);
+ lua_settable(L, -3);
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* Set the reply's body. Overwrite any existing entry. */
+__LJMP static int hlua_txn_reply_set_body(lua_State *L)
+{
+ const char *payload = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* First argument (self) must be a table */
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+
+ lua_pushstring(L, payload);
+ lua_setfield(L, 1, "body");
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+__LJMP static int hlua_log(lua_State *L)
+{
+ int level;
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 2, "log"));
+ level = MAY_LJMP(luaL_checkinteger(L, 1));
+ msg = MAY_LJMP(luaL_checkstring(L, 2));
+
+ if (level < 0 || level >= NB_LOG_LEVELS)
+ WILL_LJMP(luaL_argerror(L, 1, "Invalid loglevel."));
+
+ hlua_sendlog(NULL, level, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_debug(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "debug"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_DEBUG, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_info(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "info"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_INFO, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_warning(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "warning"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_WARNING, msg);
+ return 0;
+}
+
+__LJMP static int hlua_log_alert(lua_State *L)
+{
+ const char *msg;
+
+ MAY_LJMP(check_args(L, 1, "alert"));
+ msg = MAY_LJMP(luaL_checkstring(L, 1));
+ hlua_sendlog(NULL, LOG_ALERT, msg);
+ return 0;
+}
+
+__LJMP static int hlua_sleep_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ int wakeup_ms = lua_tointeger(L, -1);
+ if (!tick_is_expired(wakeup_ms, now_ms))
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+__LJMP static int hlua_sleep(lua_State *L)
+{
+ unsigned int delay;
+ int wakeup_ms; // tick value
+
+ MAY_LJMP(check_args(L, 1, "sleep"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1)) * 1000;
+ wakeup_ms = tick_add(now_ms, delay);
+ lua_pushinteger(L, wakeup_ms);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+__LJMP static int hlua_msleep(lua_State *L)
+{
+ unsigned int delay;
+ int wakeup_ms; // tick value
+
+ MAY_LJMP(check_args(L, 1, "msleep"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1));
+ wakeup_ms = tick_add(now_ms, delay);
+ lua_pushinteger(L, wakeup_ms);
+
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_sleep_yield, wakeup_ms, 0));
+ return 0;
+}
+
+/* This functionis an LUA binding. it permits to give back
+ * the hand at the HAProxy scheduler. It is used when the
+ * LUA processing consumes a lot of time.
+ */
+__LJMP static int hlua_yield_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ return 0;
+}
+
+__LJMP static int hlua_yield(lua_State *L)
+{
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_yield_yield, TICK_ETERNITY, HLUA_CTRLYIELD));
+ return 0;
+}
+
+/* This function change the nice of the currently executed
+ * task. It is used set low or high priority at the current
+ * task.
+ */
+__LJMP static int hlua_set_nice(lua_State *L)
+{
+ struct hlua *hlua;
+ int nice;
+
+ MAY_LJMP(check_args(L, 1, "set_nice"));
+ nice = MAY_LJMP(luaL_checkinteger(L, 1));
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ /* If the task is not set, I'm in a start mode. */
+ if (!hlua || !hlua->task)
+ return 0;
+
+ if (nice < -1024)
+ nice = -1024;
+ else if (nice > 1024)
+ nice = 1024;
+
+ hlua->task->nice = nice;
+ return 0;
+}
+
+/* safe lua coroutine.create() function:
+ *
+ * This is a simple wrapper for coroutine.create() that
+ * ensures the current hlua state ctx is available from
+ * the new subroutine state
+ */
+__LJMP static int hlua_coroutine_create(lua_State *L)
+{
+ lua_State *new; /* new coroutine state */
+ struct hlua **hlua_store;
+ struct hlua *hlua = hlua_gethlua(L);
+
+ new = lua_newthread(L);
+ if (!new)
+ return 0;
+
+ hlua_store = lua_getextraspace(new);
+ /* Expose current hlua ctx on new lua thread
+ * (hlua_gethlua() will properly return the last "known"
+ * hlua ctx instead of NULL when it is called from such coroutines)
+ */
+ *hlua_store = hlua;
+
+ /* new lua thread is on the top of the stack, we
+ * need to duplicate first stack argument (<f> from coroutine.create(<f>))
+ * on the top of the stack to be able to use xmove() to move it on the new
+ * stack
+ */
+ lua_pushvalue(L, 1);
+ /* move <f> function to the new stack */
+ lua_xmove(L, new, 1);
+ /* new lua thread is back at the top of the stack */
+ return 1;
+}
+
+/* This function is used as a callback of a task. It is called by the
+ * HAProxy task subsystem when the task is awaked. The LUA runtime can
+ * return an E_AGAIN signal, the emmiter of this signal must set a
+ * signal to wake the task.
+ *
+ * Task wrapper are longjmp safe because the only one Lua code
+ * executed is the safe hlua_ctx_resume();
+ */
+struct task *hlua_process_task(struct task *task, void *context, unsigned int state)
+{
+ struct hlua *hlua = context;
+ enum hlua_exec status;
+
+ if (task->tid < 0)
+ task->tid = tid;
+
+ /* If it is the first call to the task, we must initialize the
+ * execution timeouts.
+ */
+ if (!HLUA_IS_RUNNING(hlua))
+ hlua_timer_init(&hlua->timer, hlua_timeout_task);
+
+ /* Execute the Lua code. */
+ status = hlua_ctx_resume(hlua, 1);
+
+ switch (status) {
+ /* finished or yield */
+ case HLUA_E_OK:
+ hlua_ctx_destroy(hlua);
+ task_destroy(task);
+ task = NULL;
+ break;
+
+ case HLUA_E_AGAIN: /* co process or timeout wake me later. */
+ notification_gc(&hlua->com);
+ task->expire = hlua->wake_time;
+ break;
+
+ /* finished with error. */
+ case HLUA_E_ETMOUT:
+ SEND_ERR(NULL, "Lua task: execution timeout.\n");
+ goto err_task_abort;
+ case HLUA_E_ERRMSG:
+ SEND_ERR(NULL, "Lua task: %s.\n", lua_tostring(hlua->T, -1));
+ goto err_task_abort;
+ case HLUA_E_ERR:
+ default:
+ SEND_ERR(NULL, "Lua task: unknown error.\n");
+ err_task_abort:
+ hlua_ctx_destroy(hlua);
+ task_destroy(task);
+ task = NULL;
+ break;
+ }
+ return task;
+}
+
+/* Helper function to prepare the lua ctx for a given stream
+ *
+ * ctx will be enforced in <state_id> parent stack on initial creation.
+ * If s->hlua->state_id differs from <state_id>, which may happen at
+ * runtime since existing stream hlua ctx will be reused for other
+ * "independent" (but stream-related) lua executions, hlua will be
+ * recreated with the expected state id.
+ *
+ * Returns 1 for success and 0 for failure
+ */
+static int hlua_stream_ctx_prepare(struct stream *s, int state_id)
+{
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ ctx_renew:
+ if (!s->hlua) {
+ struct hlua *hlua;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua)
+ return 0;
+ HLUA_INIT(hlua);
+ if (!hlua_ctx_init(hlua, state_id, s->task)) {
+ pool_free(pool_head_hlua, hlua);
+ return 0;
+ }
+ s->hlua = hlua;
+ }
+ else if (s->hlua->state_id != state_id) {
+ /* ctx already created, but not in proper state.
+ * It should only happen after the previous execution is
+ * finished, otherwise it's probably a bug since we don't
+ * want to abort unfinished job..
+ */
+ BUG_ON(HLUA_IS_RUNNING(s->hlua));
+ hlua_ctx_destroy(s->hlua);
+ s->hlua = NULL;
+ goto ctx_renew;
+ }
+ return 1;
+}
+
+/* This function is an LUA binding that register LUA function to be
+ * executed after the HAProxy configuration parsing and before the
+ * HAProxy scheduler starts. This function expect only one LUA
+ * argument that is a function. This function returns nothing, but
+ * throws if an error is encountered.
+ */
+__LJMP static int hlua_register_init(lua_State *L)
+{
+ struct hlua_init_function *init;
+ int ref;
+
+ MAY_LJMP(check_args(L, 1, "register_init"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_init: not available outside of body context"));
+ }
+
+ ref = MAY_LJMP(hlua_checkfunction(L, 1));
+
+ init = calloc(1, sizeof(*init));
+ if (!init) {
+ hlua_unref(L, ref);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ }
+
+ init->function_ref = ref;
+ LIST_APPEND(&hlua_init_functions[hlua_state_id], &init->l);
+ return 0;
+}
+
+/* This function is an LUA binding. It permits to register a task
+ * executed in parallel of the main HAroxy activity. The task is
+ * created and it is set in the HAProxy scheduler. It can be called
+ * from the "init" section, "post init" or during the runtime.
+ *
+ * Lua prototype:
+ *
+ * <none> core.register_task(<function>[, <arg1>[, <arg2>[, ...[, <arg4>]]]])
+ *
+ * <arg1..4> are optional arguments that will be provided to <function>
+ */
+__LJMP static int hlua_register_task(lua_State *L)
+{
+ struct hlua *hlua = NULL;
+ struct task *task = NULL;
+ int ref;
+ int nb_arg;
+ int it;
+ int arg_ref[4]; /* optional arguments */
+ int state_id;
+
+ nb_arg = lua_gettop(L);
+ if (nb_arg < 1)
+ WILL_LJMP(luaL_error(L, "register_task: <func> argument is required"));
+ else if (nb_arg > 5)
+ WILL_LJMP(luaL_error(L, "register_task: no more that 4 optional arguments may be provided"));
+
+ /* first arg: function ref */
+ ref = MAY_LJMP(hlua_checkfunction(L, 1));
+
+ /* extract optional args (if any) */
+ it = 0;
+ while (--nb_arg) {
+ lua_pushvalue(L, 2 + it);
+ arg_ref[it] = hlua_ref(L); /* get arg reference */
+ it += 1;
+ }
+ nb_arg = it;
+
+ /* Get the reference state. If the reference is NULL, L is the master
+ * state, otherwise hlua->T is.
+ */
+ hlua = hlua_gethlua(L);
+ if (hlua)
+ /* we are in runtime processing */
+ state_id = hlua->state_id;
+ else
+ /* we are in initialization mode */
+ state_id = hlua_state_id;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua)
+ goto alloc_error;
+ HLUA_INIT(hlua);
+
+ /* We are in the common lua state, execute the task anywhere,
+ * otherwise, inherit the current thread identifier
+ */
+ if (state_id == 0)
+ task = task_new_anywhere();
+ else
+ task = task_new_here();
+ if (!task)
+ goto alloc_error;
+
+ task->context = hlua;
+ task->process = hlua_process_task;
+
+ if (!hlua_ctx_init(hlua, state_id, task))
+ goto alloc_error;
+
+ /* Ensure there is enough space on the stack for the function
+ * plus optional arguments
+ */
+ if (!lua_checkstack(hlua->T, (1 + nb_arg)))
+ goto alloc_error;
+
+ /* Restore the function in the stack. */
+ hlua_pushref(hlua->T, ref);
+ /* function ref not needed anymore since it was pushed to the substack */
+ hlua_unref(L, ref);
+
+ hlua->nargs = nb_arg;
+
+ /* push optional arguments to the function */
+ for (it = 0; it < nb_arg; it++) {
+ /* push arg to the stack */
+ hlua_pushref(hlua->T, arg_ref[it]);
+ /* arg ref not needed anymore since it was pushed to the substack */
+ hlua_unref(L, arg_ref[it]);
+ }
+
+ /* Schedule task. */
+ task_wakeup(task, TASK_WOKEN_INIT);
+
+ return 0;
+
+ alloc_error:
+ task_destroy(task);
+ hlua_unref(L, ref);
+ for (it = 0; it < nb_arg; it++) {
+ hlua_unref(L, arg_ref[it]);
+ }
+ hlua_ctx_destroy(hlua);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* called from unsafe location */
+static void hlua_event_subscription_destroy(struct hlua_event_sub *hlua_sub)
+{
+ /* hlua cleanup */
+
+ hlua_lock(hlua_sub->hlua);
+ /* registry is shared between coroutines */
+ hlua_unref(hlua_sub->hlua->T, hlua_sub->fcn_ref);
+ hlua_unlock(hlua_sub->hlua);
+
+ hlua_ctx_destroy(hlua_sub->hlua);
+
+ /* free */
+ pool_free(pool_head_hlua_event_sub, hlua_sub);
+}
+
+/* single event handler: hlua ctx is shared between multiple events handlers
+ * issued from the same subscription. Thus, it is not destroyed when the event
+ * is processed: it is destroyed when no more events are expected for the
+ * subscription (ie: when the subscription ends).
+ *
+ * Moreover, events are processed sequentially within the subscription:
+ * one event must be fully processed before another one may be processed.
+ * This ensures proper consistency for lua event handling from an ordering
+ * point of view. This is especially useful with server events for example
+ * where ADD/DEL/UP/DOWN events ordering really matters to trigger specific
+ * actions from lua (e.g.: sending emails or making API calls).
+ *
+ * Due to this design, each lua event handler is pleased to process the event
+ * as fast as possible to prevent the event queue from growing up.
+ * Strictly speaking, there is no runtime limit for the callback function
+ * (timeout set to default task timeout), but if the event queue goes past
+ * the limit of unconsumed events an error will be reported and the
+ * susbscription will pause itself for as long as it takes for the handler to
+ * catch up (events will be lost as a result).
+ * If the event handler does not need the sequential ordering and wants to
+ * process multiple events at a time, it may spawn a new side-task using
+ * 'core.register_task' to delegate the event handling and make parallel event
+ * processing within the same subscription set.
+ */
+static void hlua_event_handler(struct hlua *hlua)
+{
+ enum hlua_exec status;
+
+ /* If it is the first call to the task, we must initialize the
+ * execution timeouts.
+ */
+ if (!HLUA_IS_RUNNING(hlua))
+ hlua_timer_init(&hlua->timer, hlua_timeout_task);
+
+ /* make sure to reset the task expiry before each hlua_ctx_resume()
+ * since the task is re-used for multiple cb function calls
+ * We couldn't risk to have t->expire pointing to a past date because
+ * it was set during last function invocation but was never reset since
+ * (ie: E_AGAIN)
+ */
+ hlua->task->expire = TICK_ETERNITY;
+
+ /* Execute the Lua code. */
+ status = hlua_ctx_resume(hlua, 1);
+
+ switch (status) {
+ /* finished or yield */
+ case HLUA_E_OK:
+ break;
+
+ case HLUA_E_AGAIN: /* co process or timeout wake me later. */
+ notification_gc(&hlua->com);
+ hlua->task->expire = hlua->wake_time;
+ break;
+
+ /* finished with error. */
+ case HLUA_E_ETMOUT:
+ SEND_ERR(NULL, "Lua event_hdl: execution timeout.\n");
+ break;
+
+ case HLUA_E_ERRMSG:
+ SEND_ERR(NULL, "Lua event_hdl: %s.\n", lua_tostring(hlua->T, -1));
+ break;
+
+ case HLUA_E_ERR:
+ default:
+ SEND_ERR(NULL, "Lua event_hdl: unknown error.\n");
+ break;
+ }
+}
+
+__LJMP static void hlua_event_hdl_cb_push_event_checkres(lua_State *L,
+ struct event_hdl_cb_data_server_checkres *check)
+{
+ lua_pushstring(L, "agent");
+ lua_pushboolean(L, check->agent);
+ lua_settable(L, -3);
+ lua_pushstring(L, "result");
+ switch (check->result) {
+ case CHK_RES_FAILED:
+ lua_pushstring(L, "FAILED");
+ break;
+ case CHK_RES_PASSED:
+ lua_pushstring(L, "PASSED");
+ break;
+ case CHK_RES_CONDPASS:
+ lua_pushstring(L, "CONDPASS");
+ break;
+ default:
+ lua_pushnil(L);
+ break;
+ }
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "duration");
+ lua_pushinteger(L, check->duration);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "reason");
+ lua_newtable(L);
+
+ lua_pushstring(L, "short");
+ lua_pushstring(L, get_check_status_info(check->reason.status));
+ lua_settable(L, -3);
+ lua_pushstring(L, "desc");
+ lua_pushstring(L, get_check_status_description(check->reason.status));
+ lua_settable(L, -3);
+ if (check->reason.status >= HCHK_STATUS_L57DATA) {
+ /* code only available when the check reached data analysis stage */
+ lua_pushstring(L, "code");
+ lua_pushinteger(L, check->reason.code);
+ lua_settable(L, -3);
+ }
+
+ lua_settable(L, -3); /* reason table */
+
+ lua_pushstring(L, "health");
+ lua_newtable(L);
+
+ lua_pushstring(L, "cur");
+ lua_pushinteger(L, check->health.cur);
+ lua_settable(L, -3);
+ lua_pushstring(L, "rise");
+ lua_pushinteger(L, check->health.rise);
+ lua_settable(L, -3);
+ lua_pushstring(L, "fall");
+ lua_pushinteger(L, check->health.fall);
+ lua_settable(L, -3);
+
+ lua_settable(L, -3); /* health table */
+}
+
+/* This function pushes various arguments such as event type and event data to
+ * the lua function that will be called to consume the event.
+ */
+__LJMP static void hlua_event_hdl_cb_push_args(struct hlua_event_sub *hlua_sub,
+ struct event_hdl_async_event *e)
+{
+ struct hlua *hlua = hlua_sub->hlua;
+ struct event_hdl_sub_type event = e->type;
+ void *data = e->data;
+
+ /* push event type */
+ hlua->nargs = 1;
+ lua_pushstring(hlua->T, event_hdl_sub_type_to_string(event));
+
+ /* push event data (according to event type) */
+ if (event_hdl_sub_family_equal(EVENT_HDL_SUB_SERVER, event)) {
+ struct event_hdl_cb_data_server *e_server = data;
+ struct proxy *px;
+ struct server *server;
+
+ hlua->nargs += 1;
+ lua_newtable(hlua->T);
+ /* Add server name */
+ lua_pushstring(hlua->T, "name");
+ lua_pushstring(hlua->T, e_server->safe.name);
+ lua_settable(hlua->T, -3);
+ /* Add server puid */
+ lua_pushstring(hlua->T, "puid");
+ lua_pushinteger(hlua->T, e_server->safe.puid);
+ lua_settable(hlua->T, -3);
+ /* Add server rid */
+ lua_pushstring(hlua->T, "rid");
+ lua_pushinteger(hlua->T, e_server->safe.rid);
+ lua_settable(hlua->T, -3);
+ /* Add server proxy name */
+ lua_pushstring(hlua->T, "proxy_name");
+ lua_pushstring(hlua->T, e_server->safe.proxy_name);
+ lua_settable(hlua->T, -3);
+ /* Add server proxy uuid */
+ lua_pushstring(hlua->T, "proxy_uuid");
+ lua_pushinteger(hlua->T, e_server->safe.proxy_uuid);
+ lua_settable(hlua->T, -3);
+
+ /* special events, fetch additional info with explicit type casting */
+ if (event_hdl_sub_type_equal(EVENT_HDL_SUB_SERVER_STATE, event)) {
+ struct event_hdl_cb_data_server_state *state = data;
+ int it;
+
+ if (!lua_checkstack(hlua->T, 20))
+ WILL_LJMP(luaL_error(hlua->T, "Lua out of memory error."));
+
+ /* state subclass */
+ lua_pushstring(hlua->T, "state");
+ lua_newtable(hlua->T);
+
+ lua_pushstring(hlua->T, "admin");
+ lua_pushboolean(hlua->T, state->safe.type);
+ lua_settable(hlua->T, -3);
+
+ /* is it because of a check ? */
+ if (!state->safe.type &&
+ (state->safe.op_st_chg.cause == SRV_OP_STCHGC_HEALTH ||
+ state->safe.op_st_chg.cause == SRV_OP_STCHGC_AGENT)) {
+ /* yes, provide check result */
+ lua_pushstring(hlua->T, "check");
+ lua_newtable(hlua->T);
+ hlua_event_hdl_cb_push_event_checkres(hlua->T, &state->safe.op_st_chg.check);
+ lua_settable(hlua->T, -3); /* check table */
+ }
+
+ lua_pushstring(hlua->T, "cause");
+ if (state->safe.type)
+ lua_pushstring(hlua->T, srv_adm_st_chg_cause(state->safe.adm_st_chg.cause));
+ else
+ lua_pushstring(hlua->T, srv_op_st_chg_cause(state->safe.op_st_chg.cause));
+ lua_settable(hlua->T, -3);
+
+ /* old_state, new_state */
+ for (it = 0; it < 2; it++) {
+ enum srv_state srv_state = (!it) ? state->safe.old_state : state->safe.new_state;
+
+ lua_pushstring(hlua->T, (!it) ? "old_state" : "new_state");
+ switch (srv_state) {
+ case SRV_ST_STOPPED:
+ lua_pushstring(hlua->T, "STOPPED");
+ break;
+ case SRV_ST_STOPPING:
+ lua_pushstring(hlua->T, "STOPPING");
+ break;
+ case SRV_ST_STARTING:
+ lua_pushstring(hlua->T, "STARTING");
+ break;
+ case SRV_ST_RUNNING:
+ lua_pushstring(hlua->T, "RUNNING");
+ break;
+ default:
+ lua_pushnil(hlua->T);
+ break;
+ }
+ lua_settable(hlua->T, -3);
+ }
+
+ /* requeued */
+ lua_pushstring(hlua->T, "requeued");
+ lua_pushinteger(hlua->T, state->safe.requeued);
+ lua_settable(hlua->T, -3);
+
+ lua_settable(hlua->T, -3); /* state table */
+ }
+ else if (event_hdl_sub_type_equal(EVENT_HDL_SUB_SERVER_ADMIN, event)) {
+ struct event_hdl_cb_data_server_admin *admin = data;
+ int it;
+
+ if (!lua_checkstack(hlua->T, 20))
+ WILL_LJMP(luaL_error(hlua->T, "Lua out of memory error."));
+
+ /* admin subclass */
+ lua_pushstring(hlua->T, "admin");
+ lua_newtable(hlua->T);
+
+ lua_pushstring(hlua->T, "cause");
+ lua_pushstring(hlua->T, srv_adm_st_chg_cause(admin->safe.cause));
+ lua_settable(hlua->T, -3);
+
+ /* old_admin, new_admin */
+ for (it = 0; it < 2; it++) {
+ enum srv_admin srv_admin = (!it) ? admin->safe.old_admin : admin->safe.new_admin;
+
+ lua_pushstring(hlua->T, (!it) ? "old_admin" : "new_admin");
+
+ /* admin state matrix */
+ lua_newtable(hlua->T);
+
+ lua_pushstring(hlua->T, "MAINT");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_MAINT);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "FMAINT");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_FMAINT);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "IMAINT");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_IMAINT);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "RMAINT");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_RMAINT);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "CMAINT");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_CMAINT);
+ lua_settable(hlua->T, -3);
+
+ lua_pushstring(hlua->T, "DRAIN");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_DRAIN);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "FDRAIN");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_FDRAIN);
+ lua_settable(hlua->T, -3);
+ lua_pushstring(hlua->T, "IDRAIN");
+ lua_pushboolean(hlua->T, srv_admin & SRV_ADMF_IDRAIN);
+ lua_settable(hlua->T, -3);
+
+ lua_settable(hlua->T, -3); /* matrix table */
+ }
+ /* requeued */
+ lua_pushstring(hlua->T, "requeued");
+ lua_pushinteger(hlua->T, admin->safe.requeued);
+ lua_settable(hlua->T, -3);
+
+ lua_settable(hlua->T, -3); /* admin table */
+ }
+ else if (event_hdl_sub_type_equal(EVENT_HDL_SUB_SERVER_CHECK, event)) {
+ struct event_hdl_cb_data_server_check *check = data;
+
+ if (!lua_checkstack(hlua->T, 20))
+ WILL_LJMP(luaL_error(hlua->T, "Lua out of memory error."));
+
+ /* check subclass */
+ lua_pushstring(hlua->T, "check");
+ lua_newtable(hlua->T);
+
+ /* check result snapshot */
+ hlua_event_hdl_cb_push_event_checkres(hlua->T, &check->safe.res);
+
+ lua_settable(hlua->T, -3); /* check table */
+ }
+
+ /* attempt to provide reference server object
+ * (if it wasn't removed yet, SERVER_DEL will never succeed here)
+ */
+ px = proxy_find_by_id(e_server->safe.proxy_uuid, PR_CAP_BE, 0);
+ BUG_ON(!px);
+ server = findserver_unique_id(px, e_server->safe.puid, e_server->safe.rid);
+ if (server) {
+ lua_pushstring(hlua->T, "reference");
+ hlua_fcn_new_server(hlua->T, server);
+ lua_settable(hlua->T, -3);
+ }
+ }
+ /* sub mgmt */
+ hlua->nargs += 1;
+ hlua_fcn_new_event_sub(hlua->T, hlua_sub->sub);
+
+ /* when? */
+ hlua->nargs += 1;
+ lua_pushinteger(hlua->T, e->when.tv_sec);
+}
+
+/* events runner: if there's an ongoing hlua event handling process, finish it
+ * then, check if there are new events waiting to be processed
+ * (events are processed sequentially)
+ *
+ * We have a safety measure to warn/guard if the event queue is growing up
+ * too much due to many events being generated and lua handler is unable to
+ * keep up the pace (e.g.: when the event queue grows past 100 unconsumed events).
+ * TODO: make it tunable
+ */
+static struct task *hlua_event_runner(struct task *task, void *context, unsigned int state)
+{
+ struct hlua_event_sub *hlua_sub = context;
+ struct event_hdl_async_event *event;
+ const char *error = NULL;
+
+ if (!hlua_sub->paused && event_hdl_async_equeue_size(&hlua_sub->equeue) > 100) {
+ const char *trace = NULL;
+
+ /* We reached the limit of pending events in the queue: we should
+ * warn the user, and temporarily pause the subscription to give a chance
+ * to the handler to catch up? (it also prevents resource shortage since
+ * the queue could grow indefinitely otherwise)
+ * TODO: find a way to inform the handler that it missed some events
+ * (example: stats within the subscription in event_hdl api exposed via lua api?)
+ *
+ * Nonetheless, reaching this limit means that the handler is not fast enough
+ * and/or that it subscribed to events that happen too frequently and did not
+ * expect it. This could come from an inadequate design in the user's script.
+ */
+ event_hdl_pause(hlua_sub->sub);
+ hlua_sub->paused = 1;
+
+ if (SET_SAFE_LJMP(hlua_sub->hlua)) {
+ /* The following Lua call may fail. */
+ trace = hlua_traceback(hlua_sub->hlua->T, ", ");
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(hlua_sub->hlua);
+ } else {
+ /* Lua error was raised while fetching lua trace from current ctx */
+ SEND_ERR(NULL, "Lua event_hdl: unexpected error (memory failure?).\n");
+ }
+ ha_warning("Lua event_hdl: pausing the subscription because the handler fails "
+ "to keep up the pace (%u unconsumed events) from %s.\n",
+ event_hdl_async_equeue_size(&hlua_sub->equeue),
+ (trace) ? trace : "[unknown]");
+ }
+
+ if (HLUA_IS_RUNNING(hlua_sub->hlua)) {
+ /* ongoing hlua event handler, resume it */
+ hlua_event_handler(hlua_sub->hlua);
+ } else if ((event = event_hdl_async_equeue_pop(&hlua_sub->equeue))) { /* check for new events */
+ if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_END)) {
+ /* ending event: no more events to come */
+ event_hdl_async_free_event(event);
+ task_destroy(task);
+ hlua_event_subscription_destroy(hlua_sub);
+ return NULL;
+ }
+ /* new event: start processing it */
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua_sub->hlua)) {
+ if (lua_type(hlua_sub->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua_sub->hlua->T, -1);
+ else
+ error = "critical error";
+ ha_alert("Lua event_hdl: %s.\n", error);
+ goto skip_event;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua_sub->hlua->T, 5)) {
+ ha_alert("Lua event_hdl: full stack.\n");
+ RESET_SAFE_LJMP(hlua_sub->hlua);
+ goto skip_event;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(hlua_sub->hlua->T, hlua_sub->fcn_ref);
+
+ /* push args */
+ hlua_sub->hlua->nargs = 0;
+ MAY_LJMP(hlua_event_hdl_cb_push_args(hlua_sub, event));
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(hlua_sub->hlua);
+
+ /* At this point the event was successfully translated into hlua ctx,
+ * or hlua error occurred, so we can safely discard it
+ */
+ event_hdl_async_free_event(event);
+ event = NULL;
+
+ hlua_event_handler(hlua_sub->hlua);
+ skip_event:
+ if (event)
+ event_hdl_async_free_event(event);
+
+ }
+
+ if (!HLUA_IS_RUNNING(hlua_sub->hlua)) {
+ /* we just finished the processing of one event..
+ * check for new events before becoming idle
+ */
+ if (!event_hdl_async_equeue_isempty(&hlua_sub->equeue)) {
+ /* more events to process, make sure the task
+ * will be resumed ASAP to process pending events
+ */
+ task_wakeup(task, TASK_WOKEN_OTHER);
+ }
+ else if (hlua_sub->paused) {
+ /* empty queue, the handler caught up: resume the subscription */
+ event_hdl_resume(hlua_sub->sub);
+ hlua_sub->paused = 0;
+ }
+ }
+
+ return task;
+}
+
+/* Must be called directly under lua protected/safe environment
+ * (not from external callback)
+ * <fcn_ref> should NOT be dropped after the function successfully returns:
+ * it will be done automatically in hlua_event_subscription_destroy() when the
+ * subscription ends.
+ *
+ * Returns the new subscription on success and NULL on failure (memory error)
+ */
+static struct event_hdl_sub *hlua_event_subscribe(event_hdl_sub_list *list, struct event_hdl_sub_type e_type,
+ int state_id, int fcn_ref)
+{
+ struct hlua_event_sub *hlua_sub;
+ struct task *task = NULL;
+
+ hlua_sub = pool_alloc(pool_head_hlua_event_sub);
+ if (!hlua_sub)
+ goto mem_error;
+ hlua_sub->task = NULL;
+ hlua_sub->hlua = NULL;
+ hlua_sub->paused = 0;
+ if ((task = task_new_here()) == NULL) {
+ ha_alert("out of memory while allocating hlua event task");
+ goto mem_error;
+ }
+ task->process = hlua_event_runner;
+ task->context = hlua_sub;
+ event_hdl_async_equeue_init(&hlua_sub->equeue);
+ hlua_sub->task = task;
+ hlua_sub->fcn_ref = fcn_ref;
+ hlua_sub->state_id = state_id;
+ hlua_sub->hlua = pool_alloc(pool_head_hlua);
+ if (!hlua_sub->hlua)
+ goto mem_error;
+ HLUA_INIT(hlua_sub->hlua);
+ if (!hlua_ctx_init(hlua_sub->hlua, hlua_sub->state_id, task))
+ goto mem_error;
+
+ hlua_sub->sub = event_hdl_subscribe_ptr(list, e_type,
+ EVENT_HDL_ASYNC_TASK(&hlua_sub->equeue,
+ task,
+ hlua_sub,
+ NULL));
+ if (!hlua_sub->sub)
+ goto mem_error;
+
+ return hlua_sub->sub; /* returns pointer to event_hdl_sub struct */
+
+ mem_error:
+ if (hlua_sub) {
+ task_destroy(hlua_sub->task);
+ if (hlua_sub->hlua)
+ hlua_ctx_destroy(hlua_sub->hlua);
+ pool_free(pool_head_hlua_event_sub, hlua_sub);
+ }
+
+ return NULL;
+}
+
+/* looks for an array of strings referring to a composition of event_hdl subscription
+ * types at <index> in <L> stack
+ */
+__LJMP static struct event_hdl_sub_type hlua_check_event_sub_types(lua_State *L, int index)
+{
+ struct event_hdl_sub_type subscriptions;
+ const char *msg;
+
+ if (lua_type(L, index) != LUA_TTABLE) {
+ msg = lua_pushfstring(L, "table of strings expected, got %s", luaL_typename(L, index));
+ luaL_argerror(L, index, msg);
+ }
+
+ subscriptions = EVENT_HDL_SUB_NONE;
+
+ /* browse the argument as an array. */
+ lua_pushnil(L);
+ while (lua_next(L, index) != 0) {
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ msg = lua_pushfstring(L, "table of strings expected, got %s", luaL_typename(L, index));
+ luaL_argerror(L, index, msg);
+ }
+
+ if (event_hdl_sub_type_equal(EVENT_HDL_SUB_NONE, event_hdl_string_to_sub_type(lua_tostring(L, -1)))) {
+ msg = lua_pushfstring(L, "'%s' event type is unknown", lua_tostring(L, -1));
+ luaL_argerror(L, index, msg);
+ }
+
+ /* perform subscriptions |= current sub */
+ subscriptions = event_hdl_sub_type_add(subscriptions, event_hdl_string_to_sub_type(lua_tostring(L, -1)));
+
+ /* pop the current value. */
+ lua_pop(L, 1);
+ }
+
+ return subscriptions;
+}
+
+/* Wrapper for hlua_fcn_new_event_sub(): catch errors raised by
+ * the function to prevent LJMP
+ *
+ * If no error occurred, the function returns 1, else it returns 0 and
+ * the error message is pushed at the top of the stack
+ */
+__LJMP static int _hlua_new_event_sub_safe(lua_State *L)
+{
+ struct event_hdl_sub *sub = lua_touserdata(L, 1);
+
+ /* this function may raise errors */
+ return MAY_LJMP(hlua_fcn_new_event_sub(L, sub));
+}
+static int hlua_new_event_sub_safe(lua_State *L, struct event_hdl_sub *sub)
+{
+ if (!lua_checkstack(L, 2))
+ return 0;
+ lua_pushcfunction(L, _hlua_new_event_sub_safe);
+ lua_pushlightuserdata(L, sub);
+ switch (lua_pcall(L, 1, 1, 0)) {
+ case LUA_OK:
+ return 1;
+ default:
+ /* error was caught */
+ return 0;
+ }
+}
+
+/* This function is a LUA helper used for registering lua event callbacks.
+ * It expects an event subscription array and the function to be executed
+ * when subscribed events occur (stack arguments).
+ * It can be called from the "init" section, "post init" or during the runtime.
+ *
+ * <sub_list> is the subscription list where the subscription will be attempted
+ *
+ * Pushes the newly allocated subscription on the stack on success
+ */
+__LJMP int hlua_event_sub(lua_State *L, event_hdl_sub_list *sub_list)
+{
+ struct hlua *hlua;
+ struct event_hdl_sub *sub;
+ struct event_hdl_sub_type subscriptions;
+ int fcn_ref;
+ int state_id;
+
+ MAY_LJMP(check_args(L, 2, "event_sub"));
+
+ /* Get the reference state */
+ hlua = hlua_gethlua(L);
+ if (hlua)
+ /* we are in runtime processing, any thread may subscribe to events:
+ * subscription events will be handled by the thread who performed
+ * the registration.
+ */
+ state_id = hlua->state_id;
+ else {
+ /* we are in initialization mode, only thread 0 (actual calling thread)
+ * may subscribe to events to prevent the same handler (from different lua
+ * stacks) from being registered multiple times
+ *
+ * hlua_state_id == 0: monostack (lua-load)
+ * hlua_state_id > 0: hlua_state_id=tid+1, multi-stack (lua-load-per-thread)
+ * (thus if hlua_state_id > 1, it means we are not in primary thread ctx)
+ */
+ if (hlua_state_id > 1)
+ return 0; /* skip registration */
+ state_id = hlua_state_id;
+ }
+
+ /* First argument : event subscriptions. */
+ subscriptions = MAY_LJMP(hlua_check_event_sub_types(L, 1));
+
+ if (event_hdl_sub_type_equal(subscriptions, EVENT_HDL_SUB_NONE)) {
+ WILL_LJMP(luaL_error(L, "event_sub: no valid event types were provided"));
+ return 0; /* Never reached */
+ }
+
+ /* Second argument : lua function. */
+ fcn_ref = MAY_LJMP(hlua_checkfunction(L, 2));
+
+ /* try to subscribe */
+ sub = hlua_event_subscribe(sub_list, subscriptions, state_id, fcn_ref);
+ if (!sub) {
+ hlua_unref(L, fcn_ref);
+ WILL_LJMP(luaL_error(L, "event_sub: lua out of memory error"));
+ return 0; /* Never reached */
+ }
+
+ /* push the subscription to the stack
+ *
+ * Here we use the safe function so that lua errors will be
+ * handled explicitly to prevent 'sub' from being lost
+ */
+ if (!hlua_new_event_sub_safe(L, sub)) {
+ /* Some events could already be pending in the handler's queue.
+ * However it is wiser to cancel the subscription since we are unable to
+ * provide a valid reference to it.
+ * Pending events will be delivered (unless lua keeps raising errors).
+ */
+ event_hdl_unsubscribe(sub); /* cancel the subscription */
+ WILL_LJMP(luaL_error(L, "event_sub: cannot push the subscription (%s)", lua_tostring(L, -1)));
+ return 0; /* Never reached */
+ }
+ event_hdl_drop(sub); /* sub has been duplicated, discard old ref */
+
+ return 1;
+}
+
+/* This function is a LUA wrapper used for registering global lua event callbacks
+ * The new subscription is pushed onto the stack on success
+ * Returns the number of arguments pushed to the stack (1 for success)
+ */
+__LJMP static int hlua_event_global_sub(lua_State *L)
+{
+ /* NULL <sub_list> = global subscription list */
+ return MAY_LJMP(hlua_event_sub(L, NULL));
+}
+
+/* Wrapper called by HAProxy to execute an LUA converter. This wrapper
+ * doesn't allow "yield" functions because the HAProxy engine cannot
+ * resume converters.
+ */
+static int hlua_sample_conv_wrapper(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct hlua_function *fcn = private;
+ struct stream *stream = smp->strm;
+ const char *error;
+
+ if (!stream)
+ return 0;
+
+ if (!hlua_stream_ctx_prepare(stream, fcn_ref_to_stack_id(fcn))) {
+ SEND_ERR(stream->be, "Lua converter '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(stream->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(stream->hlua)) {
+ if (lua_type(stream->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(stream->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(stream->be, "Lua converter '%s': %s.\n", fcn->name, error);
+ return 0;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(stream->hlua->T, fcn->function_ref[stream->hlua->state_id]);
+
+ /* convert input sample and pust-it in the stack. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ MAY_LJMP(hlua_smp2lua(stream->hlua->T, smp));
+ stream->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ if (arg_p) {
+ for (; arg_p->type != ARGT_STOP; arg_p++) {
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(stream->be, "Lua converter '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ MAY_LJMP(hlua_arg2lua(stream->hlua->T, arg_p));
+ stream->hlua->nargs++;
+ }
+ }
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(stream->hlua);
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(stream->hlua, 0)) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* If the stack is empty, the function fails. */
+ if (lua_gettop(stream->hlua->T) <= 0)
+ return 0;
+
+ /* Convert the returned value in sample. */
+ hlua_lua2smp(stream->hlua->T, -1, smp);
+ /* dup the smp before popping the related lua value and
+ * returning it to haproxy
+ */
+ smp_dup(smp);
+ lua_pop(stream->hlua->T, 1);
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ SEND_ERR(stream->be, "Lua converter '%s': cannot use yielded functions.\n", fcn->name);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(stream->be, "Lua converter '%s': %s.\n",
+ fcn->name, lua_tostring(stream->hlua->T, -1));
+ lua_pop(stream->hlua->T, 1);
+ return 0;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(stream->be, "Lua converter '%s': execution timeout.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(stream->be, "Lua converter '%s': out of memory error.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_YIELD:
+ SEND_ERR(stream->be, "Lua converter '%s': yield functions like core.tcp() or core.sleep() are not allowed.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(stream->be, "Lua converter '%s' returns an unknown error.\n", fcn->name);
+ __fallthrough;
+
+ default:
+ return 0;
+ }
+}
+
+/* Wrapper called by HAProxy to execute a sample-fetch. this wrapper
+ * doesn't allow "yield" functions because the HAProxy engine cannot
+ * resume sample-fetches. This function will be called by the sample
+ * fetch engine to call lua-based fetch operations.
+ */
+static int hlua_sample_fetch_wrapper(const struct arg *arg_p, struct sample *smp,
+ const char *kw, void *private)
+{
+ struct hlua_function *fcn = private;
+ struct stream *stream = smp->strm;
+ const char *error;
+ unsigned int hflags = HLUA_TXN_NOTERM | HLUA_TXN_SMP_CTX;
+
+ if (!stream)
+ return 0;
+
+ if (!hlua_stream_ctx_prepare(stream, fcn_ref_to_stack_id(fcn))) {
+ SEND_ERR(stream->be, "Lua sample-fetch '%s': can't initialize Lua context.\n", fcn->name);
+ return 0;
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(stream->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(stream->hlua)) {
+ if (lua_type(stream->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(stream->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': %s.\n", fcn->name, error);
+ return 0;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 2)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(stream->hlua->T, fcn->function_ref[stream->hlua->state_id]);
+
+ /* push arguments in the stack. */
+ if (!hlua_txn_new(stream->hlua->T, stream, smp->px, smp->opt & SMP_OPT_DIR, hflags)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ stream->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (; arg_p && arg_p->type != ARGT_STOP; arg_p++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(stream->hlua->T, 1)) {
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': full stack.\n", fcn->name);
+ RESET_SAFE_LJMP(stream->hlua);
+ return 0;
+ }
+ MAY_LJMP(hlua_arg2lua(stream->hlua->T, arg_p));
+ stream->hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(stream->hlua);
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(stream->hlua, 0)) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* If the stack is empty, the function fails. */
+ if (lua_gettop(stream->hlua->T) <= 0)
+ return 0;
+
+ /* Convert the returned value in sample. */
+ hlua_lua2smp(stream->hlua->T, -1, smp);
+ /* dup the smp before popping the related lua value and
+ * returning it to haproxy
+ */
+ smp_dup(smp);
+ lua_pop(stream->hlua->T, 1);
+
+ /* Set the end of execution flag. */
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': cannot use yielded functions.\n", fcn->name);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': %s.\n",
+ fcn->name, lua_tostring(stream->hlua->T, -1));
+ lua_pop(stream->hlua->T, 1);
+ return 0;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': execution timeout.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': out of memory error.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_YIELD:
+ SEND_ERR(smp->px, "Lua sample-fetch '%s': yield not allowed.\n", fcn->name);
+ return 0;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(smp->px, "Lua sample-fetch '%s' returns an unknown error.\n", fcn->name);
+ __fallthrough;
+
+ default:
+ return 0;
+ }
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_converters(lua_State *L)
+{
+ struct sample_conv_kw_list *sck;
+ const char *name;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ struct sample_conv *sc;
+ struct buffer *trash;
+
+ MAY_LJMP(check_args(L, 2, "register_converters"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_converters: not available outside of body context"));
+ }
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 2));
+
+ /* Check if the converter is already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ sc = find_sample_conv(trash->area, trash->data);
+ if (sc != NULL) {
+ fcn = sc->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register converter 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ hlua_unref(L, fcn->function_ref[hlua_state_id]);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ sck = calloc(1, sizeof(*sck) + sizeof(struct sample_conv) * 2);
+ if (!sck)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ sck->list.n = sck->list.p = NULL;
+
+ /* converter keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ sck->kw[0].kw = calloc(1, len);
+ if (!sck->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)sck->kw[0].kw, len, "lua.%s", name);
+ sck->kw[0].process = hlua_sample_conv_wrapper;
+ sck->kw[0].arg_mask = ARG12(0,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR);
+ sck->kw[0].val_args = NULL;
+ sck->kw[0].in_type = SMP_T_STR;
+ sck->kw[0].out_type = SMP_T_STR;
+ sck->kw[0].private = fcn;
+
+ /* Register this new converter */
+ sample_register_convs(sck);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ ha_free(&sck);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-fetch" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_fetches(lua_State *L)
+{
+ const char *name;
+ int ref;
+ int len;
+ struct sample_fetch_kw_list *sfk;
+ struct hlua_function *fcn = NULL;
+ struct sample_fetch *sf;
+ struct buffer *trash;
+
+ MAY_LJMP(check_args(L, 2, "register_fetches"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_fetches: not available outside of body context"));
+ }
+
+ /* First argument : sample-fetch name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 2));
+
+ /* Check if the sample-fetch is already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ sf = find_sample_fetch(trash->area, trash->data);
+ if (sf != NULL) {
+ fcn = sf->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register sample-fetch 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ hlua_unref(L, fcn->function_ref[hlua_state_id]);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ sfk = calloc(1, sizeof(*sfk) + sizeof(struct sample_fetch) * 2);
+ if (!sfk)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ sfk->list.n = sfk->list.p = NULL;
+
+ /* sample-fetch keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ sfk->kw[0].kw = calloc(1, len);
+ if (!sfk->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)sfk->kw[0].kw, len, "lua.%s", name);
+ sfk->kw[0].process = hlua_sample_fetch_wrapper;
+ sfk->kw[0].arg_mask = ARG12(0,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR,STR);
+ sfk->kw[0].val_args = NULL;
+ sfk->kw[0].out_type = SMP_T_STR;
+ sfk->kw[0].use = SMP_USE_HTTP_ANY;
+ sfk->kw[0].val = 0;
+ sfk->kw[0].private = fcn;
+
+ /* Register this new fetch. */
+ sample_register_fetches(sfk);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ ha_free(&sfk);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function is a lua binding to set the wake_time.
+ */
+__LJMP static int hlua_set_wake_time(lua_State *L)
+{
+ struct hlua *hlua;
+ unsigned int delay;
+ int wakeup_ms; // tick value
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+ if (!hlua) {
+ return 0;
+ }
+
+ MAY_LJMP(check_args(L, 1, "wake_time"));
+
+ delay = MAY_LJMP(luaL_checkinteger(L, 1));
+ wakeup_ms = tick_add(now_ms, delay);
+ hlua->wake_time = wakeup_ms;
+ return 0;
+}
+
+/* This function is a wrapper to execute each LUA function declared as an action
+ * wrapper during the initialisation period. This function may return any
+ * ACT_RET_* value. On error ACT_RET_CONT is returned and the action is
+ * ignored. If the lua action yields, ACT_RET_YIELD is returned. On success, the
+ * return value is the first element on the stack.
+ */
+static enum act_return hlua_action(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ char **arg;
+ unsigned int hflags = HLUA_TXN_ACT_CTX;
+ int dir, act_ret = ACT_RET_CONT;
+ const char *error;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ default:
+ SEND_ERR(px, "Lua: internal error while execute action.\n");
+ goto end;
+ }
+
+ if (!hlua_stream_ctx_prepare(s, fcn_ref_to_stack_id(rule->arg.hlua_rule->fcn))) {
+ SEND_ERR(px, "Lua action '%s': can't initialize Lua context.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto end;
+ }
+
+ /* If it is the first run, initialize the data for the call. */
+ if (!HLUA_IS_RUNNING(s->hlua)) {
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(s->hlua)) {
+ if (lua_type(s->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(s->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(px, "Lua function '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, error);
+ goto end;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(s->hlua->T, rule->arg.hlua_rule->fcn->function_ref[s->hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_txn_new(s->hlua->T, s, px, dir, hflags)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+ s->hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(px, "Lua function '%s': full stack.\n",
+ rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(s->hlua);
+ goto end;
+ }
+ lua_pushstring(s->hlua->T, *arg);
+ s->hlua->nargs++;
+ }
+
+ /* Now the execution is safe. */
+ RESET_SAFE_LJMP(s->hlua);
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(s->hlua, !(flags & ACT_OPT_FINAL))) {
+ /* finished. */
+ case HLUA_E_OK:
+ /* Catch the return value */
+ if (lua_gettop(s->hlua->T) > 0)
+ act_ret = lua_tointeger(s->hlua->T, -1);
+
+ /* Set timeout in the required channel. */
+ if (act_ret == ACT_RET_YIELD) {
+ if (flags & ACT_OPT_FINAL)
+ goto err_yield;
+
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = tick_first((tick_is_expired(s->req.analyse_exp, now_ms) ? 0 : s->req.analyse_exp),
+ s->hlua->wake_time);
+ else
+ s->res.analyse_exp = tick_first((tick_is_expired(s->res.analyse_exp, now_ms) ? 0 : s->res.analyse_exp),
+ s->hlua->wake_time);
+ }
+ goto end;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ /* Set timeout in the required channel. */
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = tick_first((tick_is_expired(s->req.analyse_exp, now_ms) ? 0 : s->req.analyse_exp),
+ s->hlua->wake_time);
+ else
+ s->res.analyse_exp = tick_first((tick_is_expired(s->res.analyse_exp, now_ms) ? 0 : s->res.analyse_exp),
+ s->hlua->wake_time);
+
+ /* Some actions can be wake up when a "write" event
+ * is detected on a response channel. This is useful
+ * only for actions targeted on the requests.
+ */
+ if (HLUA_IS_WAKERESWR(s->hlua))
+ s->res.flags |= CF_WAKE_WRITE;
+ if (HLUA_IS_WAKEREQWR(s->hlua))
+ s->req.flags |= CF_WAKE_WRITE;
+ act_ret = ACT_RET_YIELD;
+ goto end;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua function '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(s->hlua->T, -1));
+ lua_pop(s->hlua->T, 1);
+ goto end;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua function '%s': execution timeout.\n", rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua function '%s': out of memory error.\n", rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_YIELD:
+ err_yield:
+ act_ret = ACT_RET_CONT;
+ SEND_ERR(px, "Lua function '%s': yield not allowed.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto end;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua function '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+
+ default:
+ goto end;
+ }
+
+ end:
+ if (act_ret != ACT_RET_YIELD && s->hlua)
+ s->hlua->wake_time = TICK_ETERNITY;
+ return act_ret;
+}
+
+struct task *hlua_applet_wakeup(struct task *t, void *context, unsigned int state)
+{
+ struct appctx *ctx = context;
+
+ appctx_wakeup(ctx);
+ t->expire = TICK_ETERNITY;
+ return t;
+}
+
+static int hlua_applet_tcp_init(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = applet_reserve_svcctx(ctx, sizeof(*tcp_ctx));
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct hlua *hlua;
+ struct task *task;
+ char **arg;
+ const char *error;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ HLUA_INIT(hlua);
+ tcp_ctx->hlua = hlua;
+ tcp_ctx->flags = 0;
+
+ /* Create task used by signal to wakeup applets. */
+ task = task_new_here();
+ if (!task) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ task->nice = 0;
+ task->context = ctx;
+ task->process = hlua_applet_wakeup;
+ tcp_ctx->task = task;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(ctx->rule->arg.hlua_rule->fcn), task)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': can't initialize Lua context.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+
+ /* Set timeout according with the applet configuration. */
+ hlua_timer_init(&hlua->timer, ctx->applet->timeout);
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(strm->be, "Lua applet tcp '%s': %s.\n",
+ ctx->rule->arg.hlua_rule->fcn->name, error);
+ return -1;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(hlua->T, ctx->rule->arg.hlua_rule->fcn->function_ref[hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_applet_tcp_new(hlua->T, ctx)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = ctx->rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet tcp '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ lua_pushstring(hlua->T, *arg);
+ hlua->nargs++;
+ }
+
+ RESET_SAFE_LJMP(hlua);
+
+ /* Wakeup the applet ASAP. */
+ applet_need_more_data(ctx);
+ applet_have_more_data(ctx);
+
+ return 0;
+}
+
+void hlua_applet_tcp_fct(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = ctx->svcctx;
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct act_rule *rule = ctx->rule;
+ struct proxy *px = strm->be;
+ struct hlua *hlua = tcp_ctx->hlua;
+
+ if (unlikely(se_fl_test(ctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW))))
+ goto out;
+
+ /* The applet execution is already done. */
+ if (tcp_ctx->flags & APPLET_DONE)
+ goto out;
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+ /* finished. */
+ case HLUA_E_OK:
+ tcp_ctx->flags |= APPLET_DONE;
+ se_fl_set(ctx->sedesc, SE_FL_EOI|SE_FL_EOS);
+ break;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(tcp_ctx->task, hlua->wake_time);
+ break;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet tcp '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ goto error;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua applet tcp '%s': execution timeout.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua applet tcp '%s': out of memory error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(px, "Lua applet tcp '%s': yield not allowed.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet tcp '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ default:
+ goto error;
+ }
+
+out:
+ /* eat the whole request */
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ return;
+
+error:
+ se_fl_set(ctx->sedesc, SE_FL_ERROR);
+ tcp_ctx->flags |= APPLET_DONE;
+ goto out;
+}
+
+static void hlua_applet_tcp_release(struct appctx *ctx)
+{
+ struct hlua_tcp_ctx *tcp_ctx = ctx->svcctx;
+
+ task_destroy(tcp_ctx->task);
+ tcp_ctx->task = NULL;
+ hlua_ctx_destroy(tcp_ctx->hlua);
+ tcp_ctx->hlua = NULL;
+}
+
+/* The function returns 0 if the initialisation is complete or -1 if
+ * an errors occurs. It also reserves the appctx for an hlua_http_ctx.
+ */
+static int hlua_applet_http_init(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = applet_reserve_svcctx(ctx, sizeof(*http_ctx));
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct http_txn *txn;
+ struct hlua *hlua;
+ char **arg;
+ struct task *task;
+ const char *error;
+
+ txn = strm->txn;
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(strm->be, "Lua applet http '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ HLUA_INIT(hlua);
+ http_ctx->hlua = hlua;
+ http_ctx->left_bytes = -1;
+ http_ctx->flags = 0;
+
+ if (txn->req.flags & HTTP_MSGF_VER_11)
+ http_ctx->flags |= APPLET_HTTP11;
+
+ /* Create task used by signal to wakeup applets. */
+ task = task_new_here();
+ if (!task) {
+ SEND_ERR(strm->be, "Lua applet http '%s': out of memory.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+ task->nice = 0;
+ task->context = ctx;
+ task->process = hlua_applet_wakeup;
+ http_ctx->task = task;
+
+ /* In the execution wrappers linked with a stream, the
+ * Lua context can be not initialized. This behavior
+ * permits to save performances because a systematic
+ * Lua initialization cause 5% performances loss.
+ */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(ctx->rule->arg.hlua_rule->fcn), task)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': can't initialize Lua context.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ return -1;
+ }
+
+ /* Set timeout according with the applet configuration. */
+ hlua_timer_init(&hlua->timer, ctx->applet->timeout);
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(strm->be, "Lua applet http '%s': %s.\n",
+ ctx->rule->arg.hlua_rule->fcn->name, error);
+ return -1;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(hlua->T, ctx->rule->arg.hlua_rule->fcn->function_ref[hlua->state_id]);
+
+ /* Create and and push object stream in the stack. */
+ if (!hlua_applet_http_new(hlua->T, ctx)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (arg = ctx->rule->arg.hlua_rule->args; arg && *arg; arg++) {
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(strm->be, "Lua applet http '%s': full stack.\n",
+ ctx->rule->arg.hlua_rule->fcn->name);
+ RESET_SAFE_LJMP(hlua);
+ return -1;
+ }
+ lua_pushstring(hlua->T, *arg);
+ hlua->nargs++;
+ }
+
+ RESET_SAFE_LJMP(hlua);
+
+ /* Wakeup the applet when data is ready for read. */
+ applet_need_more_data(ctx);
+
+ return 0;
+}
+
+void hlua_applet_http_fct(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+ struct stconn *sc = appctx_sc(ctx);
+ struct stream *strm = __sc_strm(sc);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct act_rule *rule = ctx->rule;
+ struct proxy *px = strm->be;
+ struct hlua *hlua = http_ctx->hlua;
+ struct htx *req_htx, *res_htx;
+
+ res_htx = htx_from_buf(&res->buf);
+
+ if (unlikely(se_fl_test(ctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW))))
+ goto out;
+
+ /* The applet execution is already done. */
+ if (http_ctx->flags & APPLET_DONE)
+ goto out;
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ /* Set the currently running flag. */
+ if (!HLUA_IS_RUNNING(hlua) &&
+ !(http_ctx->flags & APPLET_DONE)) {
+ if (!co_data(req)) {
+ applet_need_more_data(ctx);
+ goto out;
+ }
+ }
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+ /* finished. */
+ case HLUA_E_OK:
+ http_ctx->flags |= APPLET_DONE;
+ break;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(http_ctx->task, hlua->wake_time);
+ goto out;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet http '%s': %s.\n",
+ rule->arg.hlua_rule->fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ goto error;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(px, "Lua applet http '%s': execution timeout.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(px, "Lua applet http '%s': out of memory error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(px, "Lua applet http '%s': yield not allowed.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(px, "Lua applet http '%s' return an unknown error.\n",
+ rule->arg.hlua_rule->fcn->name);
+ goto error;
+
+ default:
+ goto error;
+ }
+
+ if (http_ctx->flags & APPLET_DONE) {
+ if (http_ctx->flags & APPLET_RSP_SENT)
+ goto out;
+
+ if (!(http_ctx->flags & APPLET_HDR_SENT))
+ goto error;
+
+ /* no more data are expected. If the response buffer is empty
+ * for a chunked message, be sure to add something (EOT block in
+ * this case) to have something to send. It is important to be
+ * sure the EOM flags will be handled by the endpoint.
+ */
+ if (htx_is_empty(res_htx) && (strm->txn->rsp.flags & (HTTP_MSGF_XFER_LEN|HTTP_MSGF_CNT_LEN)) == HTTP_MSGF_XFER_LEN) {
+ if (!htx_add_endof(res_htx, HTX_BLK_EOT)) {
+ sc_need_room(sc, sizeof(struct htx_blk)+1);
+ goto out;
+ }
+ channel_add_input(res, 1);
+ }
+
+ res_htx->flags |= HTX_FL_EOM;
+ se_fl_set(ctx->sedesc, SE_FL_EOI|SE_FL_EOS);
+ strm->txn->status = http_ctx->status;
+ http_ctx->flags |= APPLET_RSP_SENT;
+ }
+
+ out:
+ htx_to_buf(res_htx, &res->buf);
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ return;
+
+ error:
+
+ /* If we are in HTTP mode, and we are not send any
+ * data, return a 500 server error in best effort:
+ * if there is no room available in the buffer,
+ * just close the connection.
+ */
+ if (!(http_ctx->flags & APPLET_HDR_SENT)) {
+ struct buffer *err = &http_err_chunks[HTTP_ERR_500];
+
+ channel_erase(res);
+ res->buf.data = b_data(err);
+ memcpy(res->buf.area, b_head(err), b_data(err));
+ res_htx = htx_from_buf(&res->buf);
+ channel_add_input(res, res_htx->data);
+ se_fl_set(ctx->sedesc, SE_FL_EOI|SE_FL_EOS);
+ }
+ else
+ se_fl_set(ctx->sedesc, SE_FL_ERROR);
+
+ if (!(strm->flags & SF_ERR_MASK))
+ strm->flags |= SF_ERR_RESOURCE;
+ http_ctx->flags |= APPLET_DONE;
+ goto out;
+}
+
+static void hlua_applet_http_release(struct appctx *ctx)
+{
+ struct hlua_http_ctx *http_ctx = ctx->svcctx;
+
+ task_destroy(http_ctx->task);
+ http_ctx->task = NULL;
+ hlua_ctx_destroy(http_ctx->hlua);
+ http_ctx->hlua = NULL;
+}
+
+/* global {tcp|http}-request parser. Return ACT_RET_PRS_OK in
+ * success case, else return ACT_RET_PRS_ERR.
+ *
+ * This function can fail with an abort() due to an Lua critical error.
+ * We are in the configuration parsing process of HAProxy, this abort() is
+ * tolerated.
+ */
+static enum act_parse_ret action_register_lua(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+ int i;
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ /* Memory for arguments. */
+ rule->arg.hlua_rule->args = calloc(fcn->nargs + 1,
+ sizeof(*rule->arg.hlua_rule->args));
+ if (!rule->arg.hlua_rule->args) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* Expect some arguments */
+ for (i = 0; i < fcn->nargs; i++) {
+ if (*args[*cur_arg] == '\0') {
+ memprintf(err, "expect %d arguments", fcn->nargs);
+ goto error;
+ }
+ rule->arg.hlua_rule->args[i] = strdup(args[*cur_arg]);
+ if (!rule->arg.hlua_rule->args[i]) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+ (*cur_arg)++;
+ }
+ rule->arg.hlua_rule->args[i] = NULL;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = hlua_action;
+ return ACT_RET_PRS_OK;
+
+ error:
+ if (rule->arg.hlua_rule) {
+ if (rule->arg.hlua_rule->args) {
+ for (i = 0; i < fcn->nargs; i++)
+ ha_free(&rule->arg.hlua_rule->args[i]);
+ ha_free(&rule->arg.hlua_rule->args);
+ }
+ ha_free(&rule->arg.hlua_rule);
+ }
+ return ACT_RET_PRS_ERR;
+}
+
+static enum act_parse_ret action_register_service_http(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+
+ /* HTTP applets are forbidden in tcp-request rules.
+ * HTTP applet request requires everything initialized by
+ * "http_process_request" (analyzer flag AN_REQ_HTTP_INNER).
+ * The applet will be immediately initialized, but its before
+ * the call of this analyzer.
+ */
+ if (rule->from != ACT_F_HTTP_REQ) {
+ memprintf(err, "HTTP applets are forbidden from 'tcp-request' rulesets");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* TODO: later accept arguments. */
+ rule->arg.hlua_rule->args = NULL;
+
+ /* Add applet pointer in the rule. */
+ rule->applet.obj_type = OBJ_TYPE_APPLET;
+ rule->applet.name = fcn->name;
+ rule->applet.init = hlua_applet_http_init;
+ rule->applet.fct = hlua_applet_http_fct;
+ rule->applet.release = hlua_applet_http_release;
+ rule->applet.timeout = hlua_timeout_applet;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_action(lua_State *L)
+{
+ struct action_kw_list *akl = NULL;
+ const char *name;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ int nargs;
+ struct buffer *trash;
+ struct action_kw *akw;
+
+ /* Initialise the number of expected arguments at 0. */
+ nargs = 0;
+
+ if (lua_gettop(L) < 3 || lua_gettop(L) > 4)
+ WILL_LJMP(luaL_error(L, "'register_action' needs between 3 and 4 arguments"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_action: not available outside of body context"));
+ }
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : environment. */
+ if (lua_type(L, 2) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "register_action: second argument must be a table of strings"));
+
+ /* Third argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Fourth argument : number of mandatory arguments expected on the configuration line. */
+ if (lua_gettop(L) >= 4)
+ nargs = MAY_LJMP(luaL_checkinteger(L, 4));
+
+ /* browse the second argument as an array. */
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ hlua_unref(L, ref);
+ WILL_LJMP(luaL_error(L, "register_action: second argument must be a table of strings"));
+ }
+
+ /* Check if action exists */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ if (strcmp(lua_tostring(L, -1), "tcp-req") == 0) {
+ akw = tcp_req_cont_action(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "tcp-res") == 0) {
+ akw = tcp_res_cont_action(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "http-req") == 0) {
+ akw = action_http_req_custom(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "http-res") == 0) {
+ akw = action_http_res_custom(trash->area);
+ } else if (strcmp(lua_tostring(L, -1), "http-after-res") == 0) {
+ akw = action_http_after_res_custom(trash->area);
+ } else {
+ akw = NULL;
+ }
+ if (akw != NULL) {
+ fcn = akw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register action 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ hlua_unref(L, fcn->function_ref[hlua_state_id]);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* pop the environment string. */
+ lua_pop(L, 1);
+ continue;
+ }
+
+ /* Check required environment. Only accepted "http" or "tcp". */
+ /* Allocate and fill the sample fetch keyword struct. */
+ akl = calloc(1, sizeof(*akl) + sizeof(struct action_kw) * 2);
+ if (!akl)
+ goto alloc_error;;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ fcn->name = strdup(name);
+ if (!fcn->name)
+ goto alloc_error;
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* Set the expected number of arguments. */
+ fcn->nargs = nargs;
+
+ /* List head */
+ akl->list.n = akl->list.p = NULL;
+
+ /* action keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ akl->kw[0].kw = calloc(1, len);
+ if (!akl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)akl->kw[0].kw, len, "lua.%s", name);
+
+ akl->kw[0].flags = 0;
+ akl->kw[0].private = fcn;
+ akl->kw[0].parse = action_register_lua;
+
+ /* select the action registering point. */
+ if (strcmp(lua_tostring(L, -1), "tcp-req") == 0)
+ tcp_req_cont_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "tcp-res") == 0)
+ tcp_res_cont_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "http-req") == 0)
+ http_req_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "http-res") == 0)
+ http_res_keywords_register(akl);
+ else if (strcmp(lua_tostring(L, -1), "http-after-res") == 0)
+ http_after_res_keywords_register(akl);
+ else {
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ if (akl)
+ ha_free((char **)&(akl->kw[0].kw));
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua action environment '%s' is unknown. "
+ "'tcp-req', 'tcp-res', 'http-req', 'http-res' "
+ "or 'http-after-res' "
+ "are expected.", lua_tostring(L, -1)));
+ }
+
+ /* pop the environment string. */
+ lua_pop(L, 1);
+
+ /* reset for next loop */
+ akl = NULL;
+ fcn = NULL;
+ }
+ return ACT_RET_PRS_OK;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+static enum act_parse_ret action_register_service_tcp(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct hlua_function *fcn = rule->kw->private;
+
+ if (px->mode == PR_MODE_HTTP) {
+ memprintf(err, "Lua TCP services cannot be used on HTTP proxies");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Memory for the rule. */
+ rule->arg.hlua_rule = calloc(1, sizeof(*rule->arg.hlua_rule));
+ if (!rule->arg.hlua_rule) {
+ memprintf(err, "out of memory error");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Reference the Lua function and store the reference. */
+ rule->arg.hlua_rule->fcn = fcn;
+
+ /* TODO: later accept arguments. */
+ rule->arg.hlua_rule->args = NULL;
+
+ /* Add applet pointer in the rule. */
+ rule->applet.obj_type = OBJ_TYPE_APPLET;
+ rule->applet.name = fcn->name;
+ rule->applet.init = hlua_applet_tcp_init;
+ rule->applet.fct = hlua_applet_tcp_fct;
+ rule->applet.release = hlua_applet_tcp_release;
+ rule->applet.timeout = hlua_timeout_applet;
+
+ return 0;
+}
+
+/* This function is an LUA binding used for registering
+ * "sample-conv" functions. It expects a converter name used
+ * in the haproxy configuration file, and an LUA function.
+ */
+__LJMP static int hlua_register_service(lua_State *L)
+{
+ struct action_kw_list *akl;
+ const char *name;
+ const char *env;
+ int ref;
+ int len;
+ struct hlua_function *fcn = NULL;
+ struct buffer *trash;
+ struct action_kw *akw;
+
+ MAY_LJMP(check_args(L, 3, "register_service"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_service: not available outside of body context"));
+ }
+
+ /* First argument : converter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : environment. */
+ env = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* Third argument : lua function. */
+ ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Check for service already registered */
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ akw = service_find(trash->area);
+ if (akw != NULL) {
+ fcn = akw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register service 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ hlua_unref(L, fcn->function_ref[hlua_state_id]);
+ }
+ fcn->function_ref[hlua_state_id] = ref;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ akl = calloc(1, sizeof(*akl) + sizeof(struct action_kw) * 2);
+ if (!akl)
+ goto alloc_error;
+ fcn = new_hlua_function();
+ if (!fcn)
+ goto alloc_error;
+
+ /* Fill fcn. */
+ len = strlen("<lua.>") + strlen(name) + 1;
+ fcn->name = calloc(1, len);
+ if (!fcn->name)
+ goto alloc_error;
+ snprintf((char *)fcn->name, len, "<lua.%s>", name);
+ fcn->function_ref[hlua_state_id] = ref;
+
+ /* List head */
+ akl->list.n = akl->list.p = NULL;
+
+ /* converter keyword. */
+ len = strlen("lua.") + strlen(name) + 1;
+ akl->kw[0].kw = calloc(1, len);
+ if (!akl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)akl->kw[0].kw, len, "lua.%s", name);
+
+ /* Check required environment. Only accepted "http" or "tcp". */
+ if (strcmp(env, "tcp") == 0)
+ akl->kw[0].parse = action_register_service_tcp;
+ else if (strcmp(env, "http") == 0)
+ akl->kw[0].parse = action_register_service_http;
+ else {
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ if (akl)
+ ha_free((char **)&(akl->kw[0].kw));
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua service environment '%s' is unknown. "
+ "'tcp' or 'http' are expected.", env));
+ }
+
+ akl->kw[0].flags = 0;
+ akl->kw[0].private = fcn;
+
+ /* End of array. */
+ memset(&akl->kw[1], 0, sizeof(*akl->kw));
+
+ /* Register this new converter */
+ service_keywords_register(akl);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_function(fcn);
+ hlua_unref(L, ref);
+ ha_free(&akl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+/* This function initialises Lua cli handler. It copies the
+ * arguments in the Lua stack and create channel IO objects.
+ */
+static int hlua_cli_parse_fct(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct hlua_cli_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct hlua *hlua;
+ struct hlua_function *fcn;
+ int i;
+ const char *error;
+
+ fcn = private;
+ ctx->fcn = private;
+
+ hlua = pool_alloc(pool_head_hlua);
+ if (!hlua) {
+ SEND_ERR(NULL, "Lua cli '%s': out of memory.\n", fcn->name);
+ return 1;
+ }
+ HLUA_INIT(hlua);
+ ctx->hlua = hlua;
+
+ /* Create task used by signal to wakeup applets.
+ * We use the same wakeup function than the Lua applet_tcp and
+ * applet_http. It is absolutely compatible.
+ */
+ ctx->task = task_new_here();
+ if (!ctx->task) {
+ SEND_ERR(NULL, "Lua cli '%s': out of memory.\n", fcn->name);
+ goto error;
+ }
+ ctx->task->nice = 0;
+ ctx->task->context = appctx;
+ ctx->task->process = hlua_applet_wakeup;
+
+ /* Initialises the Lua context */
+ if (!hlua_ctx_init(hlua, fcn_ref_to_stack_id(fcn), ctx->task)) {
+ SEND_ERR(NULL, "Lua cli '%s': can't initialize Lua context.\n", fcn->name);
+ goto error;
+ }
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(hlua)) {
+ if (lua_type(hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(NULL, "Lua cli '%s': %s.\n", fcn->name, error);
+ goto error;
+ }
+
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 2)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+
+ /* Restore the function in the stack. */
+ hlua_pushref(hlua->T, fcn->function_ref[hlua->state_id]);
+
+ /* Once the arguments parsed, the CLI is like an AppletTCP,
+ * so push AppletTCP in the stack.
+ */
+ if (!hlua_applet_tcp_new(hlua->T, appctx)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+ hlua->nargs = 1;
+
+ /* push keywords in the stack. */
+ for (i = 0; *args[i]; i++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(hlua->T, 1)) {
+ SEND_ERR(NULL, "Lua cli '%s': full stack.\n", fcn->name);
+ goto error;
+ }
+ lua_pushstring(hlua->T, args[i]);
+ hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&hlua->timer, hlua_timeout_session);
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(hlua);
+
+ /* It's ok */
+ return 0;
+
+ /* It's not ok. */
+error:
+ RESET_SAFE_LJMP(hlua);
+ hlua_ctx_destroy(hlua);
+ ctx->hlua = NULL;
+ return 1;
+}
+
+static int hlua_cli_io_handler_fct(struct appctx *appctx)
+{
+ struct hlua_cli_ctx *ctx = appctx->svcctx;
+ struct hlua *hlua;
+ struct stconn *sc;
+ struct hlua_function *fcn;
+
+ hlua = ctx->hlua;
+ sc = appctx_sc(appctx);
+ fcn = ctx->fcn;
+
+ /* Execute the function. */
+ switch (hlua_ctx_resume(hlua, 1)) {
+
+ /* finished. */
+ case HLUA_E_OK:
+ return 1;
+
+ /* yield. */
+ case HLUA_E_AGAIN:
+ /* We want write. */
+ if (HLUA_IS_WAKERESWR(hlua))
+ sc_need_room(sc, -1);
+ /* Set the timeout. */
+ if (hlua->wake_time != TICK_ETERNITY)
+ task_schedule(hlua->task, hlua->wake_time);
+ return 0;
+
+ /* finished with error. */
+ case HLUA_E_ERRMSG:
+ /* Display log. */
+ SEND_ERR(NULL, "Lua cli '%s': %s.\n",
+ fcn->name, lua_tostring(hlua->T, -1));
+ lua_pop(hlua->T, 1);
+ return 1;
+
+ case HLUA_E_ETMOUT:
+ SEND_ERR(NULL, "Lua converter '%s': execution timeout.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_NOMEM:
+ SEND_ERR(NULL, "Lua converter '%s': out of memory error.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_YIELD: /* unexpected */
+ SEND_ERR(NULL, "Lua converter '%s': yield not allowed.\n",
+ fcn->name);
+ return 1;
+
+ case HLUA_E_ERR:
+ /* Display log. */
+ SEND_ERR(NULL, "Lua cli '%s' return an unknown error.\n",
+ fcn->name);
+ return 1;
+
+ default:
+ return 1;
+ }
+
+ return 1;
+}
+
+static void hlua_cli_io_release_fct(struct appctx *appctx)
+{
+ struct hlua_cli_ctx *ctx = appctx->svcctx;
+
+ hlua_ctx_destroy(ctx->hlua);
+ ctx->hlua = NULL;
+}
+
+/* This function is an LUA binding used for registering
+ * new keywords in the cli. It expects a list of keywords
+ * which are the "path". It is limited to 5 keywords. A
+ * description of the command, a function to be executed
+ * for the parsing and a function for io handlers.
+ */
+__LJMP static int hlua_register_cli(lua_State *L)
+{
+ struct cli_kw_list *cli_kws;
+ const char *message;
+ int ref_io;
+ int len;
+ struct hlua_function *fcn = NULL;
+ int index;
+ int i;
+ struct buffer *trash;
+ const char *kw[5];
+ struct cli_kw *cli_kw;
+ const char *errmsg;
+ char *end;
+
+ MAY_LJMP(check_args(L, 3, "register_cli"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_cli: not available outside of body context"));
+ }
+
+ /* First argument : an array of maximum 5 keywords. */
+ if (!lua_istable(L, 1))
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table"));
+
+ /* Second argument : string with contextual message. */
+ message = MAY_LJMP(luaL_checkstring(L, 2));
+
+ /* Third and fourth argument : lua function. */
+ ref_io = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ /* Check for CLI service already registered */
+ trash = get_trash_chunk();
+ index = 0;
+ lua_pushnil(L);
+ memset(kw, 0, sizeof(kw));
+ while (lua_next(L, 1) != 0) {
+ if (index >= CLI_PREFIX_KW_NB) {
+ hlua_unref(L, ref_io);
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table with a maximum of 5 entries"));
+ }
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ hlua_unref(L, ref_io);
+ WILL_LJMP(luaL_argerror(L, 1, "1st argument must be a table filled with strings"));
+ }
+ kw[index] = lua_tostring(L, -1);
+ if (index == 0)
+ chunk_printf(trash, "%s", kw[index]);
+ else
+ chunk_appendf(trash, " %s", kw[index]);
+ index++;
+ lua_pop(L, 1);
+ }
+ cli_kw = cli_find_kw_exact((char **)kw);
+ if (cli_kw != NULL) {
+ fcn = cli_kw->private;
+ if (fcn->function_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register CLI keyword 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", trash->area);
+ hlua_unref(L, fcn->function_ref[hlua_state_id]);
+ }
+ fcn->function_ref[hlua_state_id] = ref_io;
+ return 0;
+ }
+
+ /* Allocate and fill the sample fetch keyword struct. */
+ cli_kws = calloc(1, sizeof(*cli_kws) + sizeof(struct cli_kw) * 2);
+ if (!cli_kws) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+ fcn = new_hlua_function();
+ if (!fcn) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+
+ /* Fill path. */
+ index = 0;
+ lua_pushnil(L);
+ while(lua_next(L, 1) != 0) {
+ if (index >= 5) {
+ errmsg = "1st argument must be a table with a maximum of 5 entries";
+ goto error;
+ }
+ if (lua_type(L, -1) != LUA_TSTRING) {
+ errmsg = "1st argument must be a table filled with strings";
+ goto error;
+ }
+ cli_kws->kw[0].str_kw[index] = strdup(lua_tostring(L, -1));
+ if (!cli_kws->kw[0].str_kw[index]) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+ index++;
+ lua_pop(L, 1);
+ }
+
+ /* Copy help message. */
+ cli_kws->kw[0].usage = strdup(message);
+ if (!cli_kws->kw[0].usage) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+
+ /* Fill fcn io handler. */
+ len = strlen("<lua.cli>") + 1;
+ for (i = 0; i < index; i++)
+ len += strlen(cli_kws->kw[0].str_kw[i]) + 1;
+ fcn->name = calloc(1, len);
+ if (!fcn->name) {
+ errmsg = "Lua out of memory error.";
+ goto error;
+ }
+
+ end = fcn->name;
+ len = 8;
+ memcpy(end, "<lua.cli", len);
+ end += len;
+
+ for (i = 0; i < index; i++) {
+ *(end++) = '.';
+ len = strlen(cli_kws->kw[0].str_kw[i]);
+ memcpy(end, cli_kws->kw[0].str_kw[i], len);
+ end += len;
+ }
+ *(end++) = '>';
+ *(end++) = 0;
+
+ fcn->function_ref[hlua_state_id] = ref_io;
+
+ /* Fill last entries. */
+ cli_kws->kw[0].private = fcn;
+ cli_kws->kw[0].parse = hlua_cli_parse_fct;
+ cli_kws->kw[0].io_handler = hlua_cli_io_handler_fct;
+ cli_kws->kw[0].io_release = hlua_cli_io_release_fct;
+
+ /* Register this new converter */
+ cli_register_kw(cli_kws);
+
+ return 0;
+
+ error:
+ release_hlua_function(fcn);
+ hlua_unref(L, ref_io);
+ if (cli_kws) {
+ for (i = 0; i < index; i++)
+ ha_free((char **)&(cli_kws->kw[0].str_kw[i]));
+ ha_free((char **)&(cli_kws->kw[0].usage));
+ }
+ ha_free(&cli_kws);
+ WILL_LJMP(luaL_error(L, errmsg));
+ return 0; /* Never reached */
+}
+
+static int hlua_filter_init_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ lua_State *L;
+ int error, pos, state_id, flt_ref;
+
+ state_id = reg_flt_to_stack_id(conf->reg);
+ L = hlua_states[state_id];
+ pos = lua_gettop(L);
+
+ /* The filter parsing function */
+ hlua_pushref(L, conf->reg->fun_ref[state_id]);
+
+ /* Push the filter class on the stack and resolve all callbacks */
+ hlua_pushref(L, conf->reg->flt_ref[state_id]);
+
+ /* Duplicate the filter class so each filter will have its own copy */
+ lua_newtable(L);
+ lua_pushnil(L);
+
+ while (lua_next(L, pos+2)) {
+ lua_pushvalue(L, -2);
+ lua_insert(L, -2);
+ lua_settable(L, -4);
+ }
+ flt_ref = hlua_ref(L);
+
+ /* Remove the original lua filter class from the stack */
+ lua_pop(L, 1);
+
+ /* Push the copy on the stack */
+ hlua_pushref(L, flt_ref);
+
+ /* extra args are pushed in a table */
+ lua_newtable(L);
+ for (pos = 0; conf->args[pos]; pos++) {
+ /* Check stack available size. */
+ if (!lua_checkstack(L, 1)) {
+ ha_alert("Lua filter '%s' : Lua error : full stack.", conf->reg->name);
+ goto error;
+ }
+ lua_pushstring(L, conf->args[pos]);
+ lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
+ }
+
+ error = lua_pcall(L, 2, LUA_MULTRET, 0);
+ switch (error) {
+ case LUA_OK:
+ /* replace the filter ref */
+ conf->ref[state_id] = flt_ref;
+ break;
+ case LUA_ERRRUN:
+ ha_alert("Lua filter '%s' : runtime error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+ case LUA_ERRMEM:
+ ha_alert("Lua filter '%s' : out of memory error", conf->reg->name);
+ goto error;
+ case LUA_ERRERR:
+ ha_alert("Lua filter '%s' : message handler error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM <= 503
+ case LUA_ERRGCMM:
+ ha_alert("Lua filter '%s' : garbage collector error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+#endif
+ default:
+ ha_alert("Lua filter '%s' : unknown error : %s", conf->reg->name, lua_tostring(L, -1));
+ goto error;
+ }
+
+ lua_settop(L, 0);
+ return 0;
+
+ error:
+ lua_settop(L, 0);
+ return -1;
+}
+
+static void hlua_filter_deinit_per_thread(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ lua_State *L;
+ int state_id;
+
+ if (!conf)
+ return;
+
+ state_id = reg_flt_to_stack_id(conf->reg);
+ L = hlua_states[state_id];
+ hlua_unref(L, conf->ref[state_id]);
+}
+
+static int hlua_filter_init(struct proxy *px, struct flt_conf *fconf)
+{
+ struct hlua_flt_config *conf = fconf->conf;
+ int state_id = reg_flt_to_stack_id(conf->reg);
+
+ /* Rely on per-thread init for global scripts */
+ if (!state_id)
+ return hlua_filter_init_per_thread(px, fconf);
+ return 0;
+}
+
+static void hlua_filter_deinit(struct proxy *px, struct flt_conf *fconf)
+{
+
+ if (fconf->conf) {
+ struct hlua_flt_config *conf = fconf->conf;
+ int state_id = reg_flt_to_stack_id(conf->reg);
+ int pos;
+
+ /* Rely on per-thread deinit for global scripts */
+ if (!state_id)
+ hlua_filter_deinit_per_thread(px, fconf);
+
+ for (pos = 0; conf->args[pos]; pos++)
+ free(conf->args[pos]);
+ free(conf->args);
+ }
+ ha_free(&fconf->conf);
+ ha_free((char **)&fconf->id);
+ ha_free(&fconf->ops);
+}
+
+static int hlua_filter_new(struct stream *s, struct filter *filter)
+{
+ struct hlua_flt_config *conf = FLT_CONF(filter);
+ struct hlua_flt_ctx *flt_ctx = NULL;
+ int ret = 1;
+
+ if (!hlua_stream_ctx_prepare(s, reg_flt_to_stack_id(conf->reg))) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+
+ flt_ctx = pool_zalloc(pool_head_hlua_flt_ctx);
+ if (!flt_ctx) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ flt_ctx->hlua[0] = pool_alloc(pool_head_hlua);
+ flt_ctx->hlua[1] = pool_alloc(pool_head_hlua);
+ if (!flt_ctx->hlua[0] || !flt_ctx->hlua[1]) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+ HLUA_INIT(flt_ctx->hlua[0]);
+ HLUA_INIT(flt_ctx->hlua[1]);
+ if (!hlua_ctx_init(flt_ctx->hlua[0], reg_flt_to_stack_id(conf->reg), s->task) ||
+ !hlua_ctx_init(flt_ctx->hlua[1], reg_flt_to_stack_id(conf->reg), s->task)) {
+ SEND_ERR(s->be, "Lua filter '%s': can't initialize filter Lua context.\n",
+ conf->reg->name);
+ ret = 0;
+ goto end;
+ }
+
+ if (!HLUA_IS_RUNNING(s->hlua)) {
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(s->hlua)) {
+ const char *error;
+
+ if (lua_type(s->hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(s->hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(s->be, "Lua filter '%s': %s.\n", conf->reg->name, error);
+ ret = 0;
+ goto end;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(s->hlua->T, 1)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(s->hlua);
+ ret = 0;
+ goto end;
+ }
+
+ hlua_pushref(s->hlua->T, conf->ref[s->hlua->state_id]);
+ if (lua_getfield(s->hlua->T, -1, "new") != LUA_TFUNCTION) {
+ SEND_ERR(s->be, "Lua filter '%s': 'new' field is not a function.\n",
+ conf->reg->name);
+ RESET_SAFE_LJMP(s->hlua);
+ ret = 0;
+ goto end;
+ }
+ lua_insert(s->hlua->T, -2);
+
+ /* Push the copy on the stack */
+ s->hlua->nargs = 1;
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(s->hlua);
+ }
+
+ switch (hlua_ctx_resume(s->hlua, 0)) {
+ case HLUA_E_OK:
+ /* Nothing returned or not a table, ignore the filter for current stream */
+ if (!lua_gettop(s->hlua->T) || !lua_istable(s->hlua->T, 1)) {
+ ret = 0;
+ goto end;
+ }
+
+ /* Attached the filter pointer to the ctx */
+ lua_pushstring(s->hlua->T, "__filter");
+ lua_pushlightuserdata(s->hlua->T, filter);
+ lua_settable(s->hlua->T, -3);
+
+ /* Save a ref on the filter ctx */
+ lua_pushvalue(s->hlua->T, 1);
+ flt_ctx->ref = hlua_ref(s->hlua->T);
+ filter->ctx = flt_ctx;
+ break;
+ case HLUA_E_ERRMSG:
+ SEND_ERR(s->be, "Lua filter '%s' : %s.\n", conf->reg->name, lua_tostring(s->hlua->T, -1));
+ ret = -1;
+ goto end;
+ case HLUA_E_ETMOUT:
+ SEND_ERR(s->be, "Lua filter '%s' : 'new' execution timeout.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_NOMEM:
+ SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_AGAIN:
+ case HLUA_E_YIELD:
+ SEND_ERR(s->be, "Lua filter '%s': yield functions like core.tcp() or core.sleep()"
+ " are not allowed from 'new' function.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ case HLUA_E_ERR:
+ SEND_ERR(s->be, "Lua filter '%s': 'new' returns an unknown error.\n", conf->reg->name);
+ ret = 0;
+ goto end;
+ default:
+ ret = 0;
+ goto end;
+ }
+
+ end:
+ if (s->hlua)
+ lua_settop(s->hlua->T, 0);
+ if (ret <= 0) {
+ if (flt_ctx) {
+ hlua_ctx_destroy(flt_ctx->hlua[0]);
+ hlua_ctx_destroy(flt_ctx->hlua[1]);
+ pool_free(pool_head_hlua_flt_ctx, flt_ctx);
+ }
+ }
+ return ret;
+}
+
+static void hlua_filter_delete(struct stream *s, struct filter *filter)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ hlua_unref(s->hlua->T, flt_ctx->ref);
+ hlua_ctx_destroy(flt_ctx->hlua[0]);
+ hlua_ctx_destroy(flt_ctx->hlua[1]);
+ pool_free(pool_head_hlua_flt_ctx, flt_ctx);
+ filter->ctx = NULL;
+}
+
+static int hlua_filter_from_payload(struct filter *filter)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ return (flt_ctx && !!(flt_ctx->flags & HLUA_FLT_CTX_FL_PAYLOAD));
+}
+
+static int hlua_filter_callback(struct stream *s, struct filter *filter, const char *fun,
+ int dir, unsigned int flags)
+{
+ struct hlua *flt_hlua;
+ struct hlua_flt_config *conf = FLT_CONF(filter);
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ unsigned int hflags = HLUA_TXN_FLT_CTX;
+ int ret = 1;
+
+ flt_hlua = flt_ctx->hlua[(dir == SMP_OPT_DIR_REQ ? 0 : 1)];
+ if (!flt_hlua)
+ goto end;
+
+ if (!HLUA_IS_RUNNING(flt_hlua)) {
+ int extra_idx = lua_gettop(flt_hlua->T);
+
+ /* The following Lua calls can fail. */
+ if (!SET_SAFE_LJMP(flt_hlua)) {
+ const char *error;
+
+ if (lua_type(flt_hlua->T, -1) == LUA_TSTRING)
+ error = lua_tostring(flt_hlua->T, -1);
+ else
+ error = "critical error";
+ SEND_ERR(s->be, "Lua filter '%s': %s.\n", conf->reg->name, error);
+ goto end;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(flt_hlua->T, 3)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+
+ hlua_pushref(flt_hlua->T, flt_ctx->ref);
+ if (lua_getfield(flt_hlua->T, -1, fun) != LUA_TFUNCTION) {
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+ lua_insert(flt_hlua->T, -2);
+
+ if (!hlua_txn_new(flt_hlua->T, s, s->be, dir, hflags)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+ flt_hlua->nargs = 2;
+
+ if (flags & HLUA_FLT_CB_ARG_CHN) {
+ if (dir == SMP_OPT_DIR_REQ)
+ lua_getfield(flt_hlua->T, -1, "req");
+ else
+ lua_getfield(flt_hlua->T, -1, "res");
+ if (lua_type(flt_hlua->T, -1) == LUA_TTABLE) {
+ lua_pushstring(flt_hlua->T, "__filter");
+ lua_pushlightuserdata(flt_hlua->T, filter);
+ lua_settable(flt_hlua->T, -3);
+ }
+ flt_hlua->nargs++;
+ }
+ else if (flags & HLUA_FLT_CB_ARG_HTTP_MSG) {
+ if (dir == SMP_OPT_DIR_REQ)
+ lua_getfield(flt_hlua->T, -1, "http_req");
+ else
+ lua_getfield(flt_hlua->T, -1, "http_res");
+ if (lua_type(flt_hlua->T, -1) == LUA_TTABLE) {
+ lua_pushstring(flt_hlua->T, "__filter");
+ lua_pushlightuserdata(flt_hlua->T, filter);
+ lua_settable(flt_hlua->T, -3);
+ }
+ flt_hlua->nargs++;
+ }
+
+ /* Check stack size. */
+ if (!lua_checkstack(flt_hlua->T, 1)) {
+ SEND_ERR(s->be, "Lua filter '%s': full stack.\n", conf->reg->name);
+ RESET_SAFE_LJMP(flt_hlua);
+ goto end;
+ }
+
+ while (extra_idx--) {
+ lua_pushvalue(flt_hlua->T, 1);
+ lua_remove(flt_hlua->T, 1);
+ flt_hlua->nargs++;
+ }
+
+ /* We must initialize the execution timeouts. */
+ hlua_timer_init(&flt_hlua->timer, hlua_timeout_session);
+
+ /* At this point the execution is safe. */
+ RESET_SAFE_LJMP(flt_hlua);
+ }
+
+ switch (hlua_ctx_resume(flt_hlua, !(flags & HLUA_FLT_CB_FINAL))) {
+ case HLUA_E_OK:
+ /* Catch the return value if it required */
+ if ((flags & HLUA_FLT_CB_RETVAL) && lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+
+ /* Set timeout in the required channel. */
+ if (flt_hlua->wake_time != TICK_ETERNITY) {
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = flt_hlua->wake_time;
+ else
+ s->res.analyse_exp = flt_hlua->wake_time;
+ }
+ break;
+ case HLUA_E_AGAIN:
+ /* Set timeout in the required channel. */
+ if (flt_hlua->wake_time != TICK_ETERNITY) {
+ if (dir == SMP_OPT_DIR_REQ)
+ s->req.analyse_exp = flt_hlua->wake_time;
+ else
+ s->res.analyse_exp = flt_hlua->wake_time;
+ }
+ /* Some actions can be wake up when a "write" event
+ * is detected on a response channel. This is useful
+ * only for actions targeted on the requests.
+ */
+ if (HLUA_IS_WAKERESWR(flt_hlua))
+ s->res.flags |= CF_WAKE_WRITE;
+ if (HLUA_IS_WAKEREQWR(flt_hlua))
+ s->req.flags |= CF_WAKE_WRITE;
+ ret = 0;
+ goto end;
+ case HLUA_E_ERRMSG:
+ SEND_ERR(s->be, "Lua filter '%s' : %s.\n", conf->reg->name, lua_tostring(flt_hlua->T, -1));
+ ret = -1;
+ goto end;
+ case HLUA_E_ETMOUT:
+ SEND_ERR(s->be, "Lua filter '%s' : '%s' callback execution timeout.\n", conf->reg->name, fun);
+ goto end;
+ case HLUA_E_NOMEM:
+ SEND_ERR(s->be, "Lua filter '%s' : out of memory error.\n", conf->reg->name);
+ goto end;
+ case HLUA_E_YIELD:
+ SEND_ERR(s->be, "Lua filter '%s': yield functions like core.tcp() or core.sleep()"
+ " are not allowed from '%s' callback.\n", conf->reg->name, fun);
+ goto end;
+ case HLUA_E_ERR:
+ SEND_ERR(s->be, "Lua filter '%s': '%s' returns an unknown error.\n", conf->reg->name, fun);
+ goto end;
+ default:
+ goto end;
+ }
+
+
+ end:
+ return ret;
+}
+
+static int hlua_filter_start_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags = 0;
+ return hlua_filter_callback(s, filter, "start_analyze",
+ (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_CHN));
+}
+
+static int hlua_filter_end_analyze(struct stream *s, struct filter *filter, struct channel *chn)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "end_analyze",
+ (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_CHN));
+}
+
+static int hlua_filter_http_headers(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "http_headers",
+ (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+}
+
+static int hlua_filter_http_payload(struct stream *s, struct filter *filter, struct http_msg *msg,
+ unsigned int offset, unsigned int len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct hlua *flt_hlua;
+ int dir = (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ int idx = (dir == SMP_OPT_DIR_REQ ? 0 : 1);
+ int ret;
+
+ flt_hlua = flt_ctx->hlua[idx];
+ flt_ctx->cur_off[idx] = offset;
+ flt_ctx->cur_len[idx] = len;
+ flt_ctx->flags |= HLUA_FLT_CTX_FL_PAYLOAD;
+ ret = hlua_filter_callback(s, filter, "http_payload", dir, (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+ if (ret != -1) {
+ ret = flt_ctx->cur_len[idx];
+ if (lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ if (ret > flt_ctx->cur_len[idx])
+ ret = flt_ctx->cur_len[idx];
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+ }
+ return ret;
+}
+
+static int hlua_filter_http_end(struct stream *s, struct filter *filter, struct http_msg *msg)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+
+ flt_ctx->flags &= ~HLUA_FLT_CTX_FL_PAYLOAD;
+ return hlua_filter_callback(s, filter, "http_end",
+ (!(msg->chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES),
+ (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_RETVAL | HLUA_FLT_CB_ARG_HTTP_MSG));
+}
+
+static int hlua_filter_tcp_payload(struct stream *s, struct filter *filter, struct channel *chn,
+ unsigned int offset, unsigned int len)
+{
+ struct hlua_flt_ctx *flt_ctx = filter->ctx;
+ struct hlua *flt_hlua;
+ int dir = (!(chn->flags & CF_ISRESP) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ int idx = (dir == SMP_OPT_DIR_REQ ? 0 : 1);
+ int ret;
+
+ flt_hlua = flt_ctx->hlua[idx];
+ flt_ctx->cur_off[idx] = offset;
+ flt_ctx->cur_len[idx] = len;
+ flt_ctx->flags |= HLUA_FLT_CTX_FL_PAYLOAD;
+ ret = hlua_filter_callback(s, filter, "tcp_payload", dir, (HLUA_FLT_CB_FINAL | HLUA_FLT_CB_ARG_CHN));
+ if (ret != -1) {
+ ret = flt_ctx->cur_len[idx];
+ if (lua_gettop(flt_hlua->T) > 0) {
+ ret = lua_tointeger(flt_hlua->T, -1);
+ if (ret > flt_ctx->cur_len[idx])
+ ret = flt_ctx->cur_len[idx];
+ lua_settop(flt_hlua->T, 0); /* Empty the stack. */
+ }
+ }
+ return ret;
+}
+
+static int hlua_filter_parse_fct(char **args, int *cur_arg, struct proxy *px,
+ struct flt_conf *fconf, char **err, void *private)
+{
+ struct hlua_reg_filter *reg_flt = private;
+ lua_State *L;
+ struct hlua_flt_config *conf = NULL;
+ const char *flt_id = NULL;
+ int state_id, pos, flt_flags = 0;
+ struct flt_ops *hlua_flt_ops = NULL;
+
+ state_id = reg_flt_to_stack_id(reg_flt);
+ L = hlua_states[state_id];
+
+ /* Initialize the filter ops with default callbacks */
+ hlua_flt_ops = calloc(1, sizeof(*hlua_flt_ops));
+ if (!hlua_flt_ops)
+ goto error;
+ hlua_flt_ops->init = hlua_filter_init;
+ hlua_flt_ops->deinit = hlua_filter_deinit;
+ if (state_id) {
+ /* Set per-thread callback if script is loaded per-thread */
+ hlua_flt_ops->init_per_thread = hlua_filter_init_per_thread;
+ hlua_flt_ops->deinit_per_thread = hlua_filter_deinit_per_thread;
+ }
+ hlua_flt_ops->attach = hlua_filter_new;
+ hlua_flt_ops->detach = hlua_filter_delete;
+
+ /* Push the filter class on the stack and resolve all callbacks */
+ hlua_pushref(L, reg_flt->flt_ref[state_id]);
+
+ if (lua_getfield(L, -1, "start_analyze") == LUA_TFUNCTION)
+ hlua_flt_ops->channel_start_analyze = hlua_filter_start_analyze;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "end_analyze") == LUA_TFUNCTION)
+ hlua_flt_ops->channel_end_analyze = hlua_filter_end_analyze;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_headers") == LUA_TFUNCTION)
+ hlua_flt_ops->http_headers = hlua_filter_http_headers;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_payload") == LUA_TFUNCTION)
+ hlua_flt_ops->http_payload = hlua_filter_http_payload;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "http_end") == LUA_TFUNCTION)
+ hlua_flt_ops->http_end = hlua_filter_http_end;
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "tcp_payload") == LUA_TFUNCTION)
+ hlua_flt_ops->tcp_payload = hlua_filter_tcp_payload;
+ lua_pop(L, 1);
+
+ /* Get id and flags of the filter class */
+ if (lua_getfield(L, -1, "id") == LUA_TSTRING)
+ flt_id = lua_tostring(L, -1);
+ lua_pop(L, 1);
+ if (lua_getfield(L, -1, "flags") == LUA_TNUMBER)
+ flt_flags = lua_tointeger(L, -1);
+ lua_pop(L, 1);
+
+ /* Create the filter config */
+ conf = calloc(1, sizeof(*conf));
+ if (!conf)
+ goto error;
+ conf->reg = reg_flt;
+
+ /* duplicate args */
+ for (pos = 0; *args[*cur_arg + 1 + pos]; pos++);
+ conf->args = calloc(pos + 1, sizeof(*conf->args));
+ if (!conf->args)
+ goto error;
+ for (pos = 0; *args[*cur_arg + 1 + pos]; pos++) {
+ conf->args[pos] = strdup(args[*cur_arg + 1 + pos]);
+ if (!conf->args[pos])
+ goto error;
+ }
+ conf->args[pos] = NULL;
+ *cur_arg += pos + 1;
+
+ if (flt_id) {
+ fconf->id = strdup(flt_id);
+ if (!fconf->id)
+ goto error;
+ }
+ fconf->flags = flt_flags;
+ fconf->conf = conf;
+ fconf->ops = hlua_flt_ops;
+
+ lua_settop(L, 0);
+ return 0;
+
+ error:
+ memprintf(err, "Lua filter '%s' : Lua out of memory error", reg_flt->name);
+ free(hlua_flt_ops);
+ if (conf && conf->args) {
+ for (pos = 0; conf->args[pos]; pos++)
+ free(conf->args[pos]);
+ free(conf->args);
+ }
+ free(conf);
+ free((char *)fconf->id);
+ lua_settop(L, 0);
+ return -1;
+}
+
+__LJMP static int hlua_register_data_filter(lua_State *L)
+{
+ struct filter *filter;
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "register_data_filter"));
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+ chn = MAY_LJMP(hlua_checkchannel(L, 2));
+
+ lua_getfield(L, 1, "__filter");
+ MAY_LJMP(luaL_checktype(L, -1, LUA_TLIGHTUSERDATA));
+ filter = lua_touserdata (L, -1);
+ lua_pop(L, 1);
+
+ register_data_filter(chn_strm(chn), chn, filter);
+ return 1;
+}
+
+__LJMP static int hlua_unregister_data_filter(lua_State *L)
+{
+ struct filter *filter;
+ struct channel *chn;
+
+ MAY_LJMP(check_args(L, 2, "unregister_data_filter"));
+ MAY_LJMP(luaL_checktype(L, 1, LUA_TTABLE));
+ chn = MAY_LJMP(hlua_checkchannel(L, 2));
+
+ lua_getfield(L, 1, "__filter");
+ MAY_LJMP(luaL_checktype(L, -1, LUA_TLIGHTUSERDATA));
+ filter = lua_touserdata (L, -1);
+ lua_pop(L, 1);
+
+ unregister_data_filter(chn_strm(chn), chn, filter);
+ return 1;
+}
+
+/* This function is an LUA binding used for registering a filter. It expects a
+ * filter name used in the haproxy configuration file and a LUA function to
+ * parse configuration arguments.
+ */
+__LJMP static int hlua_register_filter(lua_State *L)
+{
+ struct buffer *trash;
+ struct flt_kw_list *fkl;
+ struct flt_kw *fkw;
+ const char *name;
+ struct hlua_reg_filter *reg_flt= NULL;
+ int flt_ref, fun_ref;
+ int len;
+
+ MAY_LJMP(check_args(L, 3, "register_filter"));
+
+ if (hlua_gethlua(L)) {
+ /* runtime processing */
+ WILL_LJMP(luaL_error(L, "register_filter: not available outside of body context"));
+ }
+
+ /* First argument : filter name. */
+ name = MAY_LJMP(luaL_checkstring(L, 1));
+
+ /* Second argument : The filter class */
+ flt_ref = MAY_LJMP(hlua_checktable(L, 2));
+
+ /* Third argument : lua function. */
+ fun_ref = MAY_LJMP(hlua_checkfunction(L, 3));
+
+ trash = get_trash_chunk();
+ chunk_printf(trash, "lua.%s", name);
+ fkw = flt_find_kw(trash->area);
+ if (fkw != NULL) {
+ reg_flt = fkw->private;
+ if (reg_flt->flt_ref[hlua_state_id] != -1 || reg_flt->fun_ref[hlua_state_id] != -1) {
+ ha_warning("Trying to register filter 'lua.%s' more than once. "
+ "This will become a hard error in version 2.5.\n", name);
+ if (reg_flt->flt_ref[hlua_state_id] != -1)
+ hlua_unref(L, reg_flt->flt_ref[hlua_state_id]);
+ if (reg_flt->fun_ref[hlua_state_id] != -1)
+ hlua_unref(L, reg_flt->fun_ref[hlua_state_id]);
+ }
+ reg_flt->flt_ref[hlua_state_id] = flt_ref;
+ reg_flt->fun_ref[hlua_state_id] = fun_ref;
+ return 0;
+ }
+
+ fkl = calloc(1, sizeof(*fkl) + sizeof(struct flt_kw) * 2);
+ if (!fkl)
+ goto alloc_error;
+ fkl->scope = "HLUA";
+
+ reg_flt = new_hlua_reg_filter(name);
+ if (!reg_flt)
+ goto alloc_error;
+
+ reg_flt->flt_ref[hlua_state_id] = flt_ref;
+ reg_flt->fun_ref[hlua_state_id] = fun_ref;
+
+ /* The filter keyword */
+ len = strlen("lua.") + strlen(name) + 1;
+ fkl->kw[0].kw = calloc(1, len);
+ if (!fkl->kw[0].kw)
+ goto alloc_error;
+
+ snprintf((char *)fkl->kw[0].kw, len, "lua.%s", name);
+
+ fkl->kw[0].parse = hlua_filter_parse_fct;
+ fkl->kw[0].private = reg_flt;
+ memset(&fkl->kw[1], 0, sizeof(*fkl->kw));
+
+ /* Register this new filter */
+ flt_register_keywords(fkl);
+
+ return 0;
+
+ alloc_error:
+ release_hlua_reg_filter(reg_flt);
+ hlua_unref(L, flt_ref);
+ hlua_unref(L, fun_ref);
+ ha_free(&fkl);
+ WILL_LJMP(luaL_error(L, "Lua out of memory error."));
+ return 0; /* Never reached */
+}
+
+static int hlua_read_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err, unsigned int *timeout)
+{
+ const char *error;
+
+ error = parse_time_err(args[1], timeout, TIME_UNIT_MS);
+ if (error == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument <%s> to <%s> (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (error == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument <%s> to <%s> (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (error) {
+ memprintf(err, "%s: invalid timeout", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_burst_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_burst);
+}
+
+static int hlua_session_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_session);
+}
+
+static int hlua_task_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_task);
+}
+
+static int hlua_applet_timeout(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return hlua_read_timeout(args, section_type, curpx, defpx,
+ file, line, err, &hlua_timeout_applet);
+}
+
+static int hlua_forced_yield(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *error;
+
+ hlua_nb_instruction = strtoll(args[1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "%s: invalid number", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_parse_maxmem(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *error;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects an integer argument (Lua memory size in MB).", args[0]);
+ return -1;
+ }
+ hlua_global_allocator.limit = strtoll(args[1], &error, 10) * 1024L * 1024L;
+ if (*error != '\0') {
+ memprintf(err, "%s: invalid number %s (error at '%c')", args[0], args[1], *error);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_cfg_parse_log_loggers(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ hlua_log_opts |= HLUA_LOG_LOGGERS_ON;
+ else if (strcmp(args[1], "off") == 0)
+ hlua_log_opts &= ~HLUA_LOG_LOGGERS_ON;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+static int hlua_cfg_parse_log_stderr(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ hlua_log_opts = (hlua_log_opts & ~HLUA_LOG_STDERR_MASK) | HLUA_LOG_STDERR_ON;
+ else if (strcmp(args[1], "auto") == 0)
+ hlua_log_opts = (hlua_log_opts & ~HLUA_LOG_STDERR_MASK) | HLUA_LOG_STDERR_AUTO;
+ else if (strcmp(args[1], "off") == 0)
+ hlua_log_opts &= ~HLUA_LOG_STDERR_MASK;
+ else {
+ memprintf(err, "'%s' expects either 'on', 'auto', or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* This function is called by the main configuration key "lua-load". It loads and
+ * execute an lua file during the parsing of the HAProxy configuration file. It is
+ * the main lua entry point.
+ *
+ * This function runs with the HAProxy keywords API. It returns -1 if an error
+ * occurs, otherwise it returns 0.
+ *
+ * In some error case, LUA set an error message in top of the stack. This function
+ * returns this error message in the HAProxy logs and pop it from the stack.
+ *
+ * This function can fail with an abort() due to an Lua critical error.
+ * We are in the configuration parsing process of HAProxy, this abort() is
+ * tolerated.
+ */
+static int hlua_load_state(char **args, lua_State *L, char **err)
+{
+ int error;
+ int nargs;
+
+ /* Just load and compile the file. */
+ error = luaL_loadfile(L, args[0]);
+ if (error) {
+ memprintf(err, "error in Lua file '%s': %s", args[0], lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ }
+
+ /* Push args in the Lua stack, except the first one which is the filename */
+ for (nargs = 1; *(args[nargs]) != 0; nargs++) {
+ /* Check stack size. */
+ if (!lua_checkstack(L, 1)) {
+ memprintf(err, "Lua runtime error while loading arguments: stack is full.");
+ return -1;
+ }
+ lua_pushstring(L, args[nargs]);
+ }
+ nargs--;
+
+ /* If no syntax error where detected, execute the code. */
+ error = lua_pcall(L, nargs, LUA_MULTRET, 0);
+ switch (error) {
+ case LUA_OK:
+ break;
+ case LUA_ERRRUN:
+ memprintf(err, "Lua runtime error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ case LUA_ERRMEM:
+ memprintf(err, "Lua out of memory error");
+ return -1;
+ case LUA_ERRERR:
+ memprintf(err, "Lua message handler error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM <= 503
+ case LUA_ERRGCMM:
+ memprintf(err, "Lua garbage collector error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+#endif
+ default:
+ memprintf(err, "Lua unknown error: %s", lua_tostring(L, -1));
+ lua_pop(L, 1);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int hlua_load(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file name as parameter.", args[0]);
+ return -1;
+ }
+
+ /* loading for global state */
+ hlua_state_id = 0;
+ ha_set_thread(NULL);
+ return hlua_load_state(&args[1], hlua_states[0], err);
+}
+
+static int hlua_load_per_thread(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int len;
+ int i;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "'%s' expects a file as parameter.", args[0]);
+ return -1;
+ }
+
+ if (per_thread_load == NULL) {
+ /* allocate the first entry large enough to store the final NULL */
+ per_thread_load = calloc(1, sizeof(*per_thread_load));
+ if (per_thread_load == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+ }
+
+ /* count used entries */
+ for (len = 0; per_thread_load[len] != NULL; len++)
+ ;
+
+ per_thread_load = realloc(per_thread_load, (len + 2) * sizeof(*per_thread_load));
+ if (per_thread_load == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+ per_thread_load[len + 1] = NULL;
+
+ /* count args excepting the first, allocate array and copy args */
+ for (i = 0; *(args[i + 1]) != 0; i++);
+ per_thread_load[len] = calloc(i + 1, sizeof(*per_thread_load[len]));
+ if (per_thread_load[len] == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+ for (i = 1; *(args[i]) != 0; i++) {
+ per_thread_load[len][i - 1] = strdup(args[i]);
+ if (per_thread_load[len][i - 1] == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+ }
+ per_thread_load[len][i - 1] = strdup("");
+ if (per_thread_load[len][i - 1] == NULL) {
+ memprintf(err, "out of memory error");
+ return -1;
+ }
+
+ /* loading for thread 1 only */
+ hlua_state_id = 1;
+ ha_set_thread(NULL);
+ return hlua_load_state(per_thread_load[len], hlua_states[1], err);
+}
+
+/* Prepend the given <path> followed by a semicolon to the `package.<type>` variable
+ * in the given <ctx>.
+ */
+static int hlua_prepend_path(lua_State *L, char *type, char *path)
+{
+ lua_getglobal(L, "package"); /* push package variable */
+ lua_pushstring(L, path); /* push given path */
+ lua_pushstring(L, ";"); /* push semicolon */
+ lua_getfield(L, -3, type); /* push old path */
+ lua_concat(L, 3); /* concatenate to new path */
+ lua_setfield(L, -2, type); /* store new path */
+ lua_pop(L, 1); /* pop package variable */
+
+ return 0;
+}
+
+static int hlua_config_prepend_path(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *path;
+ char *type = "path";
+ struct prepend_path *p = NULL;
+ size_t i;
+
+ if (too_many_args(2, args, err, NULL)) {
+ goto err;
+ }
+
+ if (!(*args[1])) {
+ memprintf(err, "'%s' expects to receive a <path> as argument", args[0]);
+ goto err;
+ }
+ path = args[1];
+
+ if (*args[2]) {
+ if (strcmp(args[2], "path") != 0 && strcmp(args[2], "cpath") != 0) {
+ memprintf(err, "'%s' expects <type> to either be 'path' or 'cpath'", args[0]);
+ goto err;
+ }
+ type = args[2];
+ }
+
+ p = calloc(1, sizeof(*p));
+ if (p == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err;
+ }
+ p->path = strdup(path);
+ if (p->path == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err2;
+ }
+ p->type = strdup(type);
+ if (p->type == NULL) {
+ memprintf(err, "memory allocation failed");
+ goto err2;
+ }
+ LIST_APPEND(&prepend_path_list, &p->l);
+
+ /* Handle the global state and the per-thread state for the first
+ * thread. The remaining threads will be initialized based on
+ * prepend_path_list.
+ */
+ for (i = 0; i < 2; i++) {
+ lua_State *L = hlua_states[i];
+ const char *error;
+
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error = lua_tostring(L, -1);
+ else
+ error = "critical error";
+ fprintf(stderr, "lua-prepend-path: %s.\n", error);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ hlua_prepend_path(L, type, path);
+
+ lua_atpanic(L, hlua_panic_safe);
+ }
+
+ return 0;
+
+err2:
+ free(p->type);
+ free(p->path);
+err:
+ free(p);
+ return -1;
+}
+
+/* configuration keywords declaration */
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "lua-prepend-path", hlua_config_prepend_path },
+ { CFG_GLOBAL, "lua-load", hlua_load },
+ { CFG_GLOBAL, "lua-load-per-thread", hlua_load_per_thread },
+ { CFG_GLOBAL, "tune.lua.session-timeout", hlua_session_timeout },
+ { CFG_GLOBAL, "tune.lua.task-timeout", hlua_task_timeout },
+ { CFG_GLOBAL, "tune.lua.service-timeout", hlua_applet_timeout },
+ { CFG_GLOBAL, "tune.lua.burst-timeout", hlua_burst_timeout },
+ { CFG_GLOBAL, "tune.lua.forced-yield", hlua_forced_yield },
+ { CFG_GLOBAL, "tune.lua.maxmem", hlua_parse_maxmem },
+ { CFG_GLOBAL, "tune.lua.log.loggers", hlua_cfg_parse_log_loggers },
+ { CFG_GLOBAL, "tune.lua.log.stderr", hlua_cfg_parse_log_stderr },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+#ifdef USE_OPENSSL
+
+/*
+ * This function replace a ckch_store by another one, and rebuild the ckch_inst and all its dependencies.
+ * It does the sam as "cli_io_handler_commit_cert" but for lua, the major
+ * difference is that the yield in lua and for the CLI is not handled the same
+ * way.
+ */
+__LJMP static int hlua_ckch_commit_yield(lua_State *L, int status, lua_KContext ctx)
+{
+ struct ckch_inst **lua_ckchi = lua_touserdata(L, -1);
+ struct ckch_store **lua_ckchs = lua_touserdata(L, -2);
+ struct ckch_inst *ckchi = *lua_ckchi;
+ struct ckch_store *old_ckchs = lua_ckchs[0];
+ struct ckch_store *new_ckchs = lua_ckchs[1];
+ struct hlua *hlua;
+ char *err = NULL;
+ int y = 1;
+
+ hlua = hlua_gethlua(L);
+
+ /* get the first ckchi to copy */
+ if (ckchi == NULL)
+ ckchi = LIST_ELEM(old_ckchs->ckch_inst.n, typeof(ckchi), by_ckchs);
+
+ /* walk through the old ckch_inst and creates new ckch_inst using the updated ckchs */
+ list_for_each_entry_from(ckchi, &old_ckchs->ckch_inst, by_ckchs) {
+ struct ckch_inst *new_inst;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y % 10 == 0) {
+
+ *lua_ckchi = ckchi;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0));
+ }
+
+ if (ckch_inst_rebuild(new_ckchs, ckchi, &new_inst, &err))
+ goto error;
+
+ /* link the new ckch_inst to the duplicate */
+ LIST_APPEND(&new_ckchs->ckch_inst, &new_inst->by_ckchs);
+ y++;
+ }
+
+ /* The generation is finished, we can insert everything */
+ ckch_store_replace(old_ckchs, new_ckchs);
+
+ lua_pop(L, 2); /* pop the lua_ckchs and ckchi */
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return 0;
+
+error:
+ ckch_store_free(new_ckchs);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ WILL_LJMP(luaL_error(L, "%s", err));
+ free(err);
+
+ return 0;
+}
+
+/*
+ * Replace a ckch_store <filename> in the ckchs_tree with a ckch_store created
+ * from the table in parameter.
+ *
+ * This is equivalent to "set ssl cert" + "commit ssl cert" over the CLI, which
+ * means it does not need to have a transaction since everything is done in the
+ * same function.
+ *
+ * CertCache.set{filename="", crt="", key="", sctl="", ocsp="", issuer=""}
+ *
+ */
+__LJMP static int hlua_ckch_set(lua_State *L)
+{
+ struct hlua *hlua;
+ struct ckch_inst **lua_ckchi;
+ struct ckch_store **lua_ckchs;
+ struct ckch_store *old_ckchs = NULL;
+ struct ckch_store *new_ckchs = NULL;
+ int errcode = 0;
+ char *err = NULL;
+ struct cert_exts *cert_ext = NULL;
+ char *filename;
+ struct ckch_data *data;
+ int ret;
+
+ if (lua_type(L, -1) != LUA_TTABLE)
+ WILL_LJMP(luaL_error(L, "'CertCache.set' needs a table as argument"));
+
+ hlua = hlua_gethlua(L);
+
+ /* FIXME: this should not return an error but should come back later */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ WILL_LJMP(luaL_error(L, "CertCache already under lock"));
+
+ ret = lua_getfield(L, -1, "filename");
+ if (ret != LUA_TSTRING) {
+ memprintf(&err, "%sNo filename specified!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ filename = (char *)lua_tostring(L, -1);
+
+
+ /* look for the filename in the tree */
+ old_ckchs = ckchs_lookup(filename);
+ if (!old_ckchs) {
+ memprintf(&err, "%sCan't replace a certificate which is not referenced by the configuration!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ /* TODO: handle extra_files_noext */
+
+ new_ckchs = ckchs_dup(old_ckchs);
+ if (!new_ckchs) {
+ memprintf(&err, "%sCannot allocate memory!", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ data = new_ckchs->data;
+
+ /* loop on the field in the table, which have the same name as the
+ * possible extensions of files */
+ lua_pushnil(L);
+ while (lua_next(L, 1)) {
+ int i;
+ const char *field = lua_tostring(L, -2);
+ char *payload = (char *)lua_tostring(L, -1);
+
+ if (!field || strcmp(field, "filename") == 0) {
+ lua_pop(L, 1);
+ continue;
+ }
+
+ for (i = 0; field && cert_exts[i].ext != NULL; i++) {
+ if (strcmp(field, cert_exts[i].ext) == 0) {
+ cert_ext = &cert_exts[i];
+ break;
+ }
+ }
+
+ /* this is the default type, the field is not supported */
+ if (cert_ext == NULL) {
+ memprintf(&err, "%sUnsupported field '%s'", err ? err : "", field);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Reset the OCSP CID */
+ if (cert_ext->type == CERT_TYPE_PEM || cert_ext->type == CERT_TYPE_KEY ||
+ cert_ext->type == CERT_TYPE_ISSUER) {
+ OCSP_CERTID_free(new_ckchs->data->ocsp_cid);
+ new_ckchs->data->ocsp_cid = NULL;
+ }
+
+ /* apply the change on the duplicate */
+ if (cert_ext->load(filename, payload, data, &err) != 0) {
+ memprintf(&err, "%sCan't load the payload for '%s'", err ? err : "", cert_ext->ext);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ lua_pop(L, 1);
+ }
+
+ /* store the pointers on the lua stack */
+ lua_ckchs = lua_newuserdata(L, sizeof(struct ckch_store *) * 2);
+ lua_ckchs[0] = old_ckchs;
+ lua_ckchs[1] = new_ckchs;
+ lua_ckchi = lua_newuserdata(L, sizeof(struct ckch_inst *));
+ *lua_ckchi = NULL;
+
+ task_wakeup(hlua->task, TASK_WOKEN_MSG);
+ MAY_LJMP(hlua_yieldk(L, 0, 0, hlua_ckch_commit_yield, TICK_ETERNITY, 0));
+
+end:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ if (errcode & ERR_CODE) {
+ ckch_store_free(new_ckchs);
+ WILL_LJMP(luaL_error(L, "%s", err));
+ }
+ free(err);
+
+ return 0;
+}
+
+#else
+
+__LJMP static int hlua_ckch_set(lua_State *L)
+{
+ WILL_LJMP(luaL_error(L, "'CertCache.set' needs an HAProxy built with OpenSSL"));
+
+ return 0;
+}
+#endif /* ! USE_OPENSSL */
+
+
+
+/* This function can fail with an abort() due to an Lua critical error.
+ * We are in the initialisation process of HAProxy, this abort() is
+ * tolerated.
+ */
+int hlua_post_init_state(lua_State *L)
+{
+ struct hlua_init_function *init;
+ const char *msg;
+ enum hlua_exec ret;
+ const char *error;
+ const char *kind;
+ const char *trace;
+ int return_status = 1;
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ int nres;
+#endif
+
+ /* disable memory limit checks if limit is not set */
+ if (!hlua_global_allocator.limit)
+ hlua_global_allocator.limit = ~hlua_global_allocator.limit;
+
+ /* Call post initialisation function in safe environment. */
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error = lua_tostring(L, -1);
+ else
+ error = "critical error";
+ fprintf(stderr, "Lua post-init: %s.\n", error);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ list_for_each_entry(init, &hlua_init_functions[hlua_state_id], l) {
+ hlua_pushref(L, init->function_ref);
+ /* function ref should be released right away since it was pushed
+ * on the stack and will not be used anymore
+ */
+ hlua_unref(L, init->function_ref);
+
+#if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
+ ret = lua_resume(L, NULL, 0, &nres);
+#else
+ ret = lua_resume(L, NULL, 0);
+#endif
+ kind = NULL;
+ switch (ret) {
+
+ case LUA_OK:
+ lua_pop(L, -1);
+ break;
+
+ case LUA_ERRERR:
+ kind = "message handler error";
+ __fallthrough;
+ case LUA_ERRRUN:
+ if (!kind)
+ kind = "runtime error";
+ msg = lua_tostring(L, -1);
+ lua_settop(L, 0); /* Empty the stack. */
+ trace = hlua_traceback(L, ", ");
+ if (msg)
+ ha_alert("Lua init: %s: '%s' from %s\n", kind, msg, trace);
+ else
+ ha_alert("Lua init: unknown %s from %s\n", kind, trace);
+ return_status = 0;
+ break;
+
+ default:
+ /* Unknown error */
+ kind = "Unknown error";
+ __fallthrough;
+ case LUA_YIELD:
+ /* yield is not configured at this step, this state doesn't happen */
+ if (!kind)
+ kind = "yield not allowed";
+ __fallthrough;
+ case LUA_ERRMEM:
+ if (!kind)
+ kind = "out of memory error";
+ lua_settop(L, 0); /* Empty the stack. */
+ trace = hlua_traceback(L, ", ");
+ ha_alert("Lua init: %s: %s\n", kind, trace);
+ return_status = 0;
+ break;
+ }
+ if (!return_status)
+ break;
+ }
+
+ lua_atpanic(L, hlua_panic_safe);
+ return return_status;
+}
+
+int hlua_post_init()
+{
+ int ret;
+ int i;
+ int errors;
+ char *err = NULL;
+ struct hlua_function *fcn;
+ struct hlua_reg_filter *reg_flt;
+
+#if defined(USE_OPENSSL)
+ /* Initialize SSL server. */
+ if (socket_ssl->xprt->prepare_srv) {
+ int saved_used_backed = global.ssl_used_backend;
+ // don't affect maxconn automatic computation
+ socket_ssl->xprt->prepare_srv(socket_ssl);
+ global.ssl_used_backend = saved_used_backed;
+ }
+#endif
+
+ /* Perform post init of common thread */
+ hlua_state_id = 0;
+ ha_set_thread(&ha_thread_info[0]);
+ ret = hlua_post_init_state(hlua_states[hlua_state_id]);
+ if (ret == 0)
+ return 0;
+
+ /* init remaining lua states and load files */
+ for (hlua_state_id = 2; hlua_state_id < global.nbthread + 1; hlua_state_id++) {
+
+ /* set thread context */
+ ha_set_thread(&ha_thread_info[hlua_state_id - 1]);
+
+ /* Init lua state */
+ hlua_states[hlua_state_id] = hlua_init_state(hlua_state_id);
+
+ /* Load lua files */
+ for (i = 0; per_thread_load && per_thread_load[i]; i++) {
+ ret = hlua_load_state(per_thread_load[i], hlua_states[hlua_state_id], &err);
+ if (ret != 0) {
+ ha_alert("Lua init: %s\n", err);
+ return 0;
+ }
+ }
+ }
+
+ /* Reset thread context */
+ ha_set_thread(NULL);
+
+ /* Execute post init for all states */
+ for (hlua_state_id = 1; hlua_state_id < global.nbthread + 1; hlua_state_id++) {
+
+ /* set thread context */
+ ha_set_thread(&ha_thread_info[hlua_state_id - 1]);
+
+ /* run post init */
+ ret = hlua_post_init_state(hlua_states[hlua_state_id]);
+ if (ret == 0)
+ return 0;
+ }
+
+ /* Reset thread context */
+ ha_set_thread(NULL);
+
+ /* control functions registering. Each function must have:
+ * - only the function_ref[0] set positive and all other to -1
+ * - only the function_ref[0] set to -1 and all other positive
+ * This ensure a same reference is not used both in shared
+ * lua state and thread dedicated lua state. Note: is the case
+ * reach, the shared state is priority, but the bug will be
+ * complicated to found for the end user.
+ */
+ errors = 0;
+ list_for_each_entry(fcn, &referenced_functions, l) {
+ ret = 0;
+ for (i = 1; i < global.nbthread + 1; i++) {
+ if (fcn->function_ref[i] == -1)
+ ret--;
+ else
+ ret++;
+ }
+ if (abs(ret) != global.nbthread) {
+ ha_alert("Lua function '%s' is not referenced in all thread. "
+ "Expect function in all thread or in none thread.\n", fcn->name);
+ errors++;
+ continue;
+ }
+
+ if ((fcn->function_ref[0] == -1) == (ret < 0)) {
+ ha_alert("Lua function '%s' is referenced both ins shared Lua context (through lua-load) "
+ "and per-thread Lua context (through lua-load-per-thread). these two context "
+ "exclusive.\n", fcn->name);
+ errors++;
+ }
+ }
+
+ /* Do the same with registered filters */
+ list_for_each_entry(reg_flt, &referenced_filters, l) {
+ ret = 0;
+ for (i = 1; i < global.nbthread + 1; i++) {
+ if (reg_flt->flt_ref[i] == -1)
+ ret--;
+ else
+ ret++;
+ }
+ if (abs(ret) != global.nbthread) {
+ ha_alert("Lua filter '%s' is not referenced in all thread. "
+ "Expect function in all thread or in none thread.\n", reg_flt->name);
+ errors++;
+ continue;
+ }
+
+ if ((reg_flt->flt_ref[0] == -1) == (ret < 0)) {
+ ha_alert("Lua filter '%s' is referenced both ins shared Lua context (through lua-load) "
+ "and per-thread Lua context (through lua-load-per-thread). these two context "
+ "exclusive.\n", fcn->name);
+ errors++;
+ }
+ }
+
+
+ if (errors > 0)
+ return 0;
+
+ /* after this point, this global will no longer be used, so set to
+ * -1 in order to have probably a segfault if someone use it
+ */
+ hlua_state_id = -1;
+
+ return 1;
+}
+
+/* The memory allocator used by the Lua stack. <ud> is a pointer to the
+ * allocator's context. <ptr> is the pointer to alloc/free/realloc. <osize>
+ * is the previously allocated size or the kind of object in case of a new
+ * allocation. <nsize> is the requested new size. A new allocation is
+ * indicated by <ptr> being NULL. A free is indicated by <nsize> being
+ * zero. This one verifies that the limits are respected but is optimized
+ * for the fast case where limits are not used, hence stats are not updated.
+ *
+ * Warning: while this API ressembles glibc's realloc() a lot, glibc surpasses
+ * POSIX by making realloc(ptr,0) an effective free(), but others do not do
+ * that and will simply allocate zero as if it were the result of malloc(0),
+ * so mapping this onto realloc() will lead to memory leaks on non-glibc
+ * systems.
+ */
+static void *hlua_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+ struct hlua_mem_allocator *zone = ud;
+ size_t limit, old, new;
+
+ /* a limit of ~0 means unlimited and boot complete, so there's no need
+ * for accounting anymore.
+ */
+ if (likely(~zone->limit == 0)) {
+ if (!nsize)
+ ha_free(&ptr);
+ else
+ ptr = realloc(ptr, nsize);
+ return ptr;
+ }
+
+ if (!ptr)
+ osize = 0;
+
+ /* enforce strict limits across all threads */
+ limit = zone->limit;
+ old = _HA_ATOMIC_LOAD(&zone->allocated);
+ do {
+ new = old + nsize - osize;
+ if (unlikely(nsize && limit && new > limit))
+ return NULL;
+ } while (!_HA_ATOMIC_CAS(&zone->allocated, &old, new));
+
+ if (!nsize)
+ ha_free(&ptr);
+ else
+ ptr = realloc(ptr, nsize);
+
+ if (unlikely(!ptr && nsize)) // failed
+ _HA_ATOMIC_SUB(&zone->allocated, nsize - osize);
+
+ __ha_barrier_atomic_store();
+ return ptr;
+}
+
+/* This function can fail with an abort() due to a Lua critical error.
+ * We are in the initialisation process of HAProxy, this abort() is
+ * tolerated.
+ */
+lua_State *hlua_init_state(int thread_num)
+{
+ int i;
+ int idx;
+ struct sample_fetch *sf;
+ struct sample_conv *sc;
+ char *p;
+ const char *error_msg;
+ void **context;
+ lua_State *L;
+ struct prepend_path *pp;
+
+ /* Init main lua stack. */
+ L = lua_newstate(hlua_alloc, &hlua_global_allocator);
+
+ if (!L) {
+ fprintf(stderr,
+ "Lua init: critical error: lua_newstate() returned NULL."
+ " This may possibly be caused by a memory allocation error.\n");
+ exit(1);
+ }
+
+ /* Initialise Lua context to NULL */
+ context = lua_getextraspace(L);
+ *context = NULL;
+
+ /* From this point, until the end of the initialisation function,
+ * the Lua function can fail with an abort. We are in the initialisation
+ * process of HAProxy, this abort() is tolerated.
+ */
+
+ /* Call post initialisation function in safe environment. */
+ if (setjmp(safe_ljmp_env) != 0) {
+ lua_atpanic(L, hlua_panic_safe);
+ if (lua_type(L, -1) == LUA_TSTRING)
+ error_msg = lua_tostring(L, -1);
+ else
+ error_msg = "critical error";
+ fprintf(stderr, "Lua init: %s.\n", error_msg);
+ exit(1);
+ } else {
+ lua_atpanic(L, hlua_panic_ljmp);
+ }
+
+ /* Initialise lua. */
+ luaL_openlibs(L);
+#define HLUA_PREPEND_PATH_TOSTRING1(x) #x
+#define HLUA_PREPEND_PATH_TOSTRING(x) HLUA_PREPEND_PATH_TOSTRING1(x)
+#ifdef HLUA_PREPEND_PATH
+ hlua_prepend_path(L, "path", HLUA_PREPEND_PATH_TOSTRING(HLUA_PREPEND_PATH));
+#endif
+#ifdef HLUA_PREPEND_CPATH
+ hlua_prepend_path(L, "cpath", HLUA_PREPEND_PATH_TOSTRING(HLUA_PREPEND_CPATH));
+#endif
+#undef HLUA_PREPEND_PATH_TOSTRING
+#undef HLUA_PREPEND_PATH_TOSTRING1
+
+ /* Apply configured prepend path */
+ list_for_each_entry(pp, &prepend_path_list, l)
+ hlua_prepend_path(L, pp->type, pp->path);
+
+ /*
+ * Override some lua functions.
+ *
+ */
+
+ /* push our "safe" coroutine.create() function */
+ lua_getglobal(L, "coroutine");
+ lua_pushcclosure(L, hlua_coroutine_create, 0);
+ lua_setfield(L, -2, "create");
+
+ /*
+ *
+ * Create "core" object.
+ *
+ */
+
+ /* This table entry is the object "core" base. */
+ lua_newtable(L);
+
+ /* set the thread id */
+ hlua_class_const_int(L, "thread", thread_num);
+
+ /* Push the loglevel constants. */
+ for (i = 0; i < NB_LOG_LEVELS; i++)
+ hlua_class_const_int(L, log_levels[i], i);
+
+ /* Register special functions. */
+ hlua_class_function(L, "register_init", hlua_register_init);
+ hlua_class_function(L, "register_task", hlua_register_task);
+ hlua_class_function(L, "register_fetches", hlua_register_fetches);
+ hlua_class_function(L, "register_converters", hlua_register_converters);
+ hlua_class_function(L, "register_action", hlua_register_action);
+ hlua_class_function(L, "register_service", hlua_register_service);
+ hlua_class_function(L, "register_cli", hlua_register_cli);
+ hlua_class_function(L, "register_filter", hlua_register_filter);
+ hlua_class_function(L, "yield", hlua_yield);
+ hlua_class_function(L, "set_nice", hlua_set_nice);
+ hlua_class_function(L, "sleep", hlua_sleep);
+ hlua_class_function(L, "msleep", hlua_msleep);
+ hlua_class_function(L, "add_acl", hlua_add_acl);
+ hlua_class_function(L, "del_acl", hlua_del_acl);
+ hlua_class_function(L, "set_map", hlua_set_map);
+ hlua_class_function(L, "del_map", hlua_del_map);
+ hlua_class_function(L, "get_var", hlua_core_get_var);
+ hlua_class_function(L, "tcp", hlua_socket_new);
+ hlua_class_function(L, "httpclient", hlua_httpclient_new);
+ hlua_class_function(L, "event_sub", hlua_event_global_sub);
+ hlua_class_function(L, "log", hlua_log);
+ hlua_class_function(L, "Debug", hlua_log_debug);
+ hlua_class_function(L, "Info", hlua_log_info);
+ hlua_class_function(L, "Warning", hlua_log_warning);
+ hlua_class_function(L, "Alert", hlua_log_alert);
+ hlua_class_function(L, "done", hlua_done);
+ hlua_class_function(L, "disable_legacy_mailers", hlua_disable_legacy_mailers);
+ hlua_fcn_reg_core_fcn(L);
+
+ lua_setglobal(L, "core");
+
+ /*
+ *
+ * Create "act" object.
+ *
+ */
+
+ /* This table entry is the object "act" base. */
+ lua_newtable(L);
+
+ /* push action return constants */
+ hlua_class_const_int(L, "CONTINUE", ACT_RET_CONT);
+ hlua_class_const_int(L, "STOP", ACT_RET_STOP);
+ hlua_class_const_int(L, "YIELD", ACT_RET_YIELD);
+ hlua_class_const_int(L, "ERROR", ACT_RET_ERR);
+ hlua_class_const_int(L, "DONE", ACT_RET_DONE);
+ hlua_class_const_int(L, "DENY", ACT_RET_DENY);
+ hlua_class_const_int(L, "ABORT", ACT_RET_ABRT);
+ hlua_class_const_int(L, "INVALID", ACT_RET_INV);
+
+ hlua_class_function(L, "wake_time", hlua_set_wake_time);
+
+ lua_setglobal(L, "act");
+
+ /*
+ *
+ * Create "Filter" object.
+ *
+ */
+
+ /* This table entry is the object "filter" base. */
+ lua_newtable(L);
+
+ /* push flags and constants */
+ hlua_class_const_int(L, "CONTINUE", 1);
+ hlua_class_const_int(L, "WAIT", 0);
+ hlua_class_const_int(L, "ERROR", -1);
+
+ hlua_class_const_int(L, "FLT_CFG_FL_HTX", FLT_CFG_FL_HTX);
+
+ hlua_class_function(L, "wake_time", hlua_set_wake_time);
+ hlua_class_function(L, "register_data_filter", hlua_register_data_filter);
+ hlua_class_function(L, "unregister_data_filter", hlua_unregister_data_filter);
+
+ lua_setglobal(L, "filter");
+
+ /*
+ *
+ * Register class Map
+ *
+ */
+
+ /* This table entry is the object "Map" base. */
+ lua_newtable(L);
+
+ /* register pattern types. */
+ for (i=0; i<PAT_MATCH_NUM; i++)
+ hlua_class_const_int(L, pat_match_names[i], i);
+ for (i=0; i<PAT_MATCH_NUM; i++) {
+ snprintf(trash.area, trash.size, "_%s", pat_match_names[i]);
+ hlua_class_const_int(L, trash.area, i);
+ }
+
+ /* register constructor. */
+ hlua_class_function(L, "new", hlua_map_new);
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register . */
+ hlua_class_function(L, "lookup", hlua_map_lookup);
+ hlua_class_function(L, "slookup", hlua_map_slookup);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry.
+ * The function hlua_register_metatable() pops the stack, so we
+ * previously create a copy of the table.
+ */
+ lua_pushvalue(L, -1); /* Copy the -1 entry and push it on the stack. */
+ class_map_ref = hlua_register_metatable(L, CLASS_MAP);
+
+ /* Assign the metatable to the mai Map object. */
+ lua_setmetatable(L, -2);
+
+ /* Set a name to the table. */
+ lua_setglobal(L, "Map");
+
+ /*
+ *
+ * Register "CertCache" class
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+ /* Register */
+ hlua_class_function(L, "set", hlua_ckch_set);
+ lua_setglobal(L, CLASS_CERTCACHE); /* Create global object called Regex */
+
+ /*
+ *
+ * Register class Channel
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register . */
+ hlua_class_function(L, "data", hlua_channel_get_data);
+ hlua_class_function(L, "line", hlua_channel_get_line);
+ hlua_class_function(L, "set", hlua_channel_set_data);
+ hlua_class_function(L, "remove", hlua_channel_del_data);
+ hlua_class_function(L, "append", hlua_channel_append);
+ hlua_class_function(L, "prepend", hlua_channel_prepend);
+ hlua_class_function(L, "insert", hlua_channel_insert_data);
+ hlua_class_function(L, "send", hlua_channel_send);
+ hlua_class_function(L, "forward", hlua_channel_forward);
+ hlua_class_function(L, "input", hlua_channel_get_in_len);
+ hlua_class_function(L, "output", hlua_channel_get_out_len);
+ hlua_class_function(L, "may_recv", hlua_channel_may_recv);
+ hlua_class_function(L, "is_full", hlua_channel_is_full);
+ hlua_class_function(L, "is_resp", hlua_channel_is_resp);
+
+ /* Deprecated API */
+ hlua_class_function(L, "get", hlua_channel_get);
+ hlua_class_function(L, "dup", hlua_channel_dup);
+ hlua_class_function(L, "getline", hlua_channel_getline);
+ hlua_class_function(L, "get_in_len", hlua_channel_get_in_len);
+ hlua_class_function(L, "get_out_len", hlua_channel_get_out_len);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_channel_ref = hlua_register_metatable(L, CLASS_CHANNEL);
+
+ /*
+ *
+ * Register class Fetches
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Browse existing fetches and create the associated
+ * object method.
+ */
+ sf = NULL;
+ while ((sf = sample_fetch_getnext(sf, &idx)) != NULL) {
+ /* gL.Tua doesn't support '.' and '-' in the function names, replace it
+ * by an underscore.
+ */
+ strlcpy2(trash.area, sf->kw, trash.size);
+ for (p = trash.area; *p; p++)
+ if (*p == '.' || *p == '-' || *p == '+')
+ *p = '_';
+
+ /* Register the function. */
+ lua_pushstring(L, trash.area);
+ lua_pushlightuserdata(L, sf);
+ lua_pushcclosure(L, hlua_run_sample_fetch, 1);
+ lua_rawset(L, -3);
+ }
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_fetches_ref = hlua_register_metatable(L, CLASS_FETCHES);
+
+ /*
+ *
+ * Register class Converters
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Browse existing converters and create the associated
+ * object method.
+ */
+ sc = NULL;
+ while ((sc = sample_conv_getnext(sc, &idx)) != NULL) {
+ /* gL.Tua doesn't support '.' and '-' in the function names, replace it
+ * by an underscore.
+ */
+ strlcpy2(trash.area, sc->kw, trash.size);
+ for (p = trash.area; *p; p++)
+ if (*p == '.' || *p == '-' || *p == '+')
+ *p = '_';
+
+ /* Register the function. */
+ lua_pushstring(L, trash.area);
+ lua_pushlightuserdata(L, sc);
+ lua_pushcclosure(L, hlua_run_sample_conv, 1);
+ lua_rawset(L, -3);
+ }
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_converters_ref = hlua_register_metatable(L, CLASS_CONVERTERS);
+
+ /*
+ *
+ * Register class HTTP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "req_get_headers",hlua_http_req_get_headers);
+ hlua_class_function(L, "req_del_header", hlua_http_req_del_hdr);
+ hlua_class_function(L, "req_rep_header", hlua_http_req_rep_hdr);
+ hlua_class_function(L, "req_rep_value", hlua_http_req_rep_val);
+ hlua_class_function(L, "req_add_header", hlua_http_req_add_hdr);
+ hlua_class_function(L, "req_set_header", hlua_http_req_set_hdr);
+ hlua_class_function(L, "req_set_method", hlua_http_req_set_meth);
+ hlua_class_function(L, "req_set_path", hlua_http_req_set_path);
+ hlua_class_function(L, "req_set_query", hlua_http_req_set_query);
+ hlua_class_function(L, "req_set_uri", hlua_http_req_set_uri);
+
+ hlua_class_function(L, "res_get_headers",hlua_http_res_get_headers);
+ hlua_class_function(L, "res_del_header", hlua_http_res_del_hdr);
+ hlua_class_function(L, "res_rep_header", hlua_http_res_rep_hdr);
+ hlua_class_function(L, "res_rep_value", hlua_http_res_rep_val);
+ hlua_class_function(L, "res_add_header", hlua_http_res_add_hdr);
+ hlua_class_function(L, "res_set_header", hlua_http_res_set_hdr);
+ hlua_class_function(L, "res_set_status", hlua_http_res_set_status);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_http_ref = hlua_register_metatable(L, CLASS_HTTP);
+
+ /*
+ *
+ * Register class HTTPMessage
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "is_resp", hlua_http_msg_is_resp);
+ hlua_class_function(L, "get_stline", hlua_http_msg_get_stline);
+ hlua_class_function(L, "get_headers", hlua_http_msg_get_headers);
+ hlua_class_function(L, "del_header", hlua_http_msg_del_hdr);
+ hlua_class_function(L, "rep_header", hlua_http_msg_rep_hdr);
+ hlua_class_function(L, "rep_value", hlua_http_msg_rep_val);
+ hlua_class_function(L, "add_header", hlua_http_msg_add_hdr);
+ hlua_class_function(L, "set_header", hlua_http_msg_set_hdr);
+ hlua_class_function(L, "set_method", hlua_http_msg_set_meth);
+ hlua_class_function(L, "set_path", hlua_http_msg_set_path);
+ hlua_class_function(L, "set_query", hlua_http_msg_set_query);
+ hlua_class_function(L, "set_uri", hlua_http_msg_set_uri);
+ hlua_class_function(L, "set_status", hlua_http_msg_set_status);
+ hlua_class_function(L, "is_full", hlua_http_msg_is_full);
+ hlua_class_function(L, "may_recv", hlua_http_msg_may_recv);
+ hlua_class_function(L, "eom", hlua_http_msg_is_eom);
+ hlua_class_function(L, "input", hlua_http_msg_get_in_len);
+ hlua_class_function(L, "output", hlua_http_msg_get_out_len);
+
+ hlua_class_function(L, "body", hlua_http_msg_get_body);
+ hlua_class_function(L, "set", hlua_http_msg_set_data);
+ hlua_class_function(L, "remove", hlua_http_msg_del_data);
+ hlua_class_function(L, "append", hlua_http_msg_append);
+ hlua_class_function(L, "prepend", hlua_http_msg_prepend);
+ hlua_class_function(L, "insert", hlua_http_msg_insert_data);
+ hlua_class_function(L, "set_eom", hlua_http_msg_set_eom);
+ hlua_class_function(L, "unset_eom", hlua_http_msg_unset_eom);
+
+ hlua_class_function(L, "send", hlua_http_msg_send);
+ hlua_class_function(L, "forward", hlua_http_msg_forward);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_http_msg_ref = hlua_register_metatable(L, CLASS_HTTP_MSG);
+
+ /*
+ *
+ * Register class HTTPClient
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "get", hlua_httpclient_get);
+ hlua_class_function(L, "head", hlua_httpclient_head);
+ hlua_class_function(L, "put", hlua_httpclient_put);
+ hlua_class_function(L, "post", hlua_httpclient_post);
+ hlua_class_function(L, "delete", hlua_httpclient_delete);
+ lua_settable(L, -3); /* Sets the __index entry. */
+ /* Register the garbage collector entry. */
+ lua_pushstring(L, "__gc");
+ lua_pushcclosure(L, hlua_httpclient_gc, 0);
+ lua_settable(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+
+
+ class_httpclient_ref = hlua_register_metatable(L, CLASS_HTTPCLIENT);
+ /*
+ *
+ * Register class AppletTCP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "getline", hlua_applet_tcp_getline);
+ hlua_class_function(L, "receive", hlua_applet_tcp_recv);
+ hlua_class_function(L, "send", hlua_applet_tcp_send);
+ hlua_class_function(L, "set_priv", hlua_applet_tcp_set_priv);
+ hlua_class_function(L, "get_priv", hlua_applet_tcp_get_priv);
+ hlua_class_function(L, "set_var", hlua_applet_tcp_set_var);
+ hlua_class_function(L, "unset_var", hlua_applet_tcp_unset_var);
+ hlua_class_function(L, "get_var", hlua_applet_tcp_get_var);
+
+ lua_settable(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_applet_tcp_ref = hlua_register_metatable(L, CLASS_APPLET_TCP);
+
+ /*
+ *
+ * Register class AppletHTTP
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "set_priv", hlua_applet_http_set_priv);
+ hlua_class_function(L, "get_priv", hlua_applet_http_get_priv);
+ hlua_class_function(L, "set_var", hlua_applet_http_set_var);
+ hlua_class_function(L, "unset_var", hlua_applet_http_unset_var);
+ hlua_class_function(L, "get_var", hlua_applet_http_get_var);
+ hlua_class_function(L, "getline", hlua_applet_http_getline);
+ hlua_class_function(L, "receive", hlua_applet_http_recv);
+ hlua_class_function(L, "send", hlua_applet_http_send);
+ hlua_class_function(L, "add_header", hlua_applet_http_addheader);
+ hlua_class_function(L, "set_status", hlua_applet_http_status);
+ hlua_class_function(L, "start_response", hlua_applet_http_start_response);
+
+ lua_settable(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_applet_http_ref = hlua_register_metatable(L, CLASS_APPLET_HTTP);
+
+ /*
+ *
+ * Register class TXN
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+ /* Register Lua functions. */
+ hlua_class_function(L, "set_priv", hlua_set_priv);
+ hlua_class_function(L, "get_priv", hlua_get_priv);
+ hlua_class_function(L, "set_var", hlua_set_var);
+ hlua_class_function(L, "unset_var", hlua_unset_var);
+ hlua_class_function(L, "get_var", hlua_get_var);
+ hlua_class_function(L, "done", hlua_txn_done);
+ hlua_class_function(L, "reply", hlua_txn_reply_new);
+ hlua_class_function(L, "set_loglevel", hlua_txn_set_loglevel);
+ hlua_class_function(L, "set_tos", hlua_txn_set_tos);
+ hlua_class_function(L, "set_mark", hlua_txn_set_mark);
+ hlua_class_function(L, "set_priority_class", hlua_txn_set_priority_class);
+ hlua_class_function(L, "set_priority_offset", hlua_txn_set_priority_offset);
+ hlua_class_function(L, "deflog", hlua_txn_deflog);
+ hlua_class_function(L, "log", hlua_txn_log);
+ hlua_class_function(L, "Debug", hlua_txn_log_debug);
+ hlua_class_function(L, "Info", hlua_txn_log_info);
+ hlua_class_function(L, "Warning", hlua_txn_log_warning);
+ hlua_class_function(L, "Alert", hlua_txn_log_alert);
+
+ lua_rawset(L, -3);
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_txn_ref = hlua_register_metatable(L, CLASS_TXN);
+
+ /*
+ *
+ * Register class reply
+ *
+ */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "set_status", hlua_txn_reply_set_status);
+ hlua_class_function(L, "add_header", hlua_txn_reply_add_header);
+ hlua_class_function(L, "del_header", hlua_txn_reply_del_header);
+ hlua_class_function(L, "set_body", hlua_txn_reply_set_body);
+ lua_settable(L, -3); /* Sets the __index entry. */
+ class_txn_reply_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+
+
+ /*
+ *
+ * Register class Socket
+ *
+ */
+
+ /* Create and fill the metatable. */
+ lua_newtable(L);
+
+ /* Create and fill the __index entry. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+
+#ifdef USE_OPENSSL
+ hlua_class_function(L, "connect_ssl", hlua_socket_connect_ssl);
+#endif
+ hlua_class_function(L, "connect", hlua_socket_connect);
+ hlua_class_function(L, "send", hlua_socket_send);
+ hlua_class_function(L, "receive", hlua_socket_receive);
+ hlua_class_function(L, "close", hlua_socket_close);
+ hlua_class_function(L, "getpeername", hlua_socket_getpeername);
+ hlua_class_function(L, "getsockname", hlua_socket_getsockname);
+ hlua_class_function(L, "setoption", hlua_socket_setoption);
+ hlua_class_function(L, "settimeout", hlua_socket_settimeout);
+
+ lua_rawset(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+ /* Register the garbage collector entry. */
+ lua_pushstring(L, "__gc");
+ lua_pushcclosure(L, hlua_socket_gc, 0);
+ lua_rawset(L, -3); /* Push the last 2 entries in the table at index -3 */
+
+ /* Register previous table in the registry with reference and named entry. */
+ class_socket_ref = hlua_register_metatable(L, CLASS_SOCKET);
+
+ lua_atpanic(L, hlua_panic_safe);
+
+ return L;
+}
+
+void hlua_init(void) {
+ int i;
+ char *errmsg;
+#ifdef USE_OPENSSL
+ struct srv_kw *kw;
+ int tmp_error;
+ char *error;
+ char *args[] = { /* SSL client configuration. */
+ "ssl",
+ "verify",
+ "none",
+ NULL
+ };
+#endif
+
+ /* Init post init function list head */
+ for (i = 0; i < MAX_THREADS + 1; i++)
+ LIST_INIT(&hlua_init_functions[i]);
+
+ /* Init state for common/shared lua parts */
+ hlua_state_id = 0;
+ ha_set_thread(NULL);
+ hlua_states[0] = hlua_init_state(0);
+
+ /* Init state 1 for thread 0. We have at least one thread. */
+ hlua_state_id = 1;
+ ha_set_thread(NULL);
+ hlua_states[1] = hlua_init_state(1);
+
+ /* Proxy and server configuration initialisation. */
+ socket_proxy = alloc_new_proxy("LUA-SOCKET", PR_CAP_FE|PR_CAP_BE|PR_CAP_INT, &errmsg);
+ if (!socket_proxy) {
+ fprintf(stderr, "Lua init: %s\n", errmsg);
+ exit(1);
+ }
+
+ /* Init TCP server: unchanged parameters */
+ socket_tcp = new_server(socket_proxy);
+ if (!socket_tcp) {
+ fprintf(stderr, "Lua init: failed to allocate tcp server socket\n");
+ exit(1);
+ }
+
+#ifdef USE_OPENSSL
+ /* Init TCP server: unchanged parameters */
+ socket_ssl = new_server(socket_proxy);
+ if (!socket_ssl) {
+ fprintf(stderr, "Lua init: failed to allocate ssl server socket\n");
+ exit(1);
+ }
+
+ socket_ssl->use_ssl = 1;
+ socket_ssl->xprt = xprt_get(XPRT_SSL);
+
+ for (i = 0; args[i] != NULL; i++) {
+ if ((kw = srv_find_kw(args[i])) != NULL) { /* Maybe it's registered server keyword */
+ /*
+ *
+ * If the keyword is not known, we can search in the registered
+ * server keywords. This is useful to configure special SSL
+ * features like client certificates and ssl_verify.
+ *
+ */
+ tmp_error = kw->parse(args, &i, socket_proxy, socket_ssl, &error);
+ if (tmp_error != 0) {
+ fprintf(stderr, "INTERNAL ERROR: %s\n", error);
+ abort(); /* This must be never arrives because the command line
+ not editable by the user. */
+ }
+ i += kw->skip;
+ }
+ }
+#endif
+
+}
+
+static void hlua_deinit()
+{
+ int thr;
+ struct hlua_reg_filter *reg_flt, *reg_flt_bck;
+
+ list_for_each_entry_safe(reg_flt, reg_flt_bck, &referenced_filters, l)
+ release_hlua_reg_filter(reg_flt);
+
+ for (thr = 0; thr < MAX_THREADS+1; thr++) {
+ if (hlua_states[thr])
+ lua_close(hlua_states[thr]);
+ }
+
+ srv_drop(socket_tcp);
+
+#ifdef USE_OPENSSL
+ srv_drop(socket_ssl);
+#endif
+
+ free_proxy(socket_proxy);
+}
+
+REGISTER_POST_DEINIT(hlua_deinit);
+
+static void hlua_register_build_options(void)
+{
+ char *ptr = NULL;
+
+ memprintf(&ptr, "Built with Lua version : %s", LUA_RELEASE);
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, hlua_register_build_options);
diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c
new file mode 100644
index 0000000..d8dcdfd
--- /dev/null
+++ b/src/hlua_fcn.c
@@ -0,0 +1,2721 @@
+/*
+ * Lua safe functions
+ *
+ * Copyright 2015-2016 Thierry Fournier <tfournier@arpalert.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * All the functions in this file runs with a Lua stack, and can
+ * return with a longjmp. All of these function must be launched
+ * in an environment able to catch a longjmp, otherwise a
+ * critical error can be raised.
+ */
+
+#define _GNU_SOURCE
+
+#include <lauxlib.h>
+#include <lua.h>
+#include <lualib.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/cli-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/hlua.h>
+#include <haproxy/hlua_fcn.h>
+#include <haproxy/http.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/event_hdl.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/mailers.h>
+
+/* Contains the class reference of the concat object. */
+static int class_concat_ref;
+static int class_queue_ref;
+static int class_proxy_ref;
+static int class_server_ref;
+static int class_listener_ref;
+static int class_event_sub_ref;
+static int class_regex_ref;
+static int class_stktable_ref;
+static int class_proxy_list_ref;
+static int class_server_list_ref;
+
+#define STATS_LEN (MAX((int)ST_F_TOTAL_FIELDS, (int)INF_TOTAL_FIELDS))
+
+static THREAD_LOCAL struct field stats[STATS_LEN];
+
+int hlua_checkboolean(lua_State *L, int index)
+{
+ if (!lua_isboolean(L, index))
+ luaL_argerror(L, index, "boolean expected");
+ return lua_toboolean(L, index);
+}
+
+/* Helper to push unsigned integers to Lua stack, respecting Lua limitations */
+static int hlua_fcn_pushunsigned(lua_State *L, unsigned int val)
+{
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ lua_pushinteger(L, val);
+#else
+ if (val > INT_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, (int)val);
+#endif
+ return 1;
+}
+
+/* Helper to push unsigned long long to Lua stack, respecting Lua limitations */
+static int hlua_fcn_pushunsigned_ll(lua_State *L, unsigned long long val) {
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U64 is supported until LLONG_MAX */
+ if (val > LLONG_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, val);
+#else
+ /* 32 bits case, U64 is supported until INT_MAX */
+ if (val > INT_MAX)
+ lua_pushnumber(L, (lua_Number)val);
+ else
+ lua_pushinteger(L, (int)val);
+#endif
+ return 1;
+}
+
+/* This function gets a struct field and converts it in Lua
+ * variable. The variable is pushed at the top of the stack.
+ */
+int hlua_fcn_pushfield(lua_State *L, struct field *field)
+{
+ /* The lua_Integer is always signed. Its length depends on
+ * compilation options, so the following code is conditioned
+ * by some macros. Windows maros are not supported.
+ * If the number cannot be represented as integer, we try to
+ * convert to float.
+ */
+ switch (field_format(field, 0)) {
+
+ case FF_EMPTY:
+ lua_pushnil(L);
+ return 1;
+
+ case FF_S32:
+ /* S32 is always supported. */
+ lua_pushinteger(L, field->u.s32);
+ return 1;
+
+ case FF_U32:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U32 is always supported */
+ lua_pushinteger(L, field->u.u32);
+#else
+ /* 32 bits case, U32 is supported until INT_MAX. */
+ if (field->u.u32 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u32);
+ else
+ lua_pushinteger(L, field->u.u32);
+#endif
+ return 1;
+
+ case FF_S64:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, S64 is always supported */
+ lua_pushinteger(L, field->u.s64);
+#else
+ /* 64 bits case, S64 is supported between INT_MIN and INT_MAX */
+ if (field->u.s64 < INT_MIN || field->u.s64 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.s64);
+ else
+ lua_pushinteger(L, (int)field->u.s64);
+#endif
+ return 1;
+
+ case FF_U64:
+#if (LUA_MAXINTEGER == LLONG_MAX || ((LUA_MAXINTEGER == LONG_MAX) && (__WORDSIZE == 64)))
+ /* 64 bits case, U64 is supported until LLONG_MAX */
+ if (field->u.u64 > LLONG_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u64);
+ else
+ lua_pushinteger(L, field->u.u64);
+#else
+ /* 64 bits case, U64 is supported until INT_MAX */
+ if (field->u.u64 > INT_MAX)
+ lua_pushnumber(L, (lua_Number)field->u.u64);
+ else
+ lua_pushinteger(L, (int)field->u.u64);
+#endif
+ return 1;
+
+ case FF_STR:
+ lua_pushstring(L, field->u.str);
+ return 1;
+
+ default:
+ break;
+ }
+
+ /* Default case, never reached. */
+ lua_pushnil(L);
+ return 1;
+}
+
+/* Some string are started or terminated by blank chars,
+ * this function removes the spaces, tabs, \r and
+ * \n at the begin and at the end of the string "str", and
+ * push the result in the lua stack.
+ * Returns a pointer to the Lua internal copy of the string.
+ */
+const char *hlua_pushstrippedstring(lua_State *L, const char *str)
+{
+ const char *p;
+ int l;
+
+ for (p = str; HTTP_IS_LWS(*p); p++);
+
+ for (l = strlen(p); l && HTTP_IS_LWS(p[l-1]); l--);
+
+ return lua_pushlstring(L, p, l);
+}
+
+/* The three following functions are useful for adding entries
+ * in a table. These functions takes a string and respectively an
+ * integer, a string or a function and add it to the table in the
+ * top of the stack.
+ *
+ * These functions throws an error if no more stack size is
+ * available.
+ */
+void hlua_class_const_int(lua_State *L, const char *name, int value)
+{
+ lua_pushstring(L, name);
+ lua_pushinteger(L, value);
+ lua_rawset(L, -3);
+}
+void hlua_class_const_str(lua_State *L, const char *name, const char *value)
+{
+ lua_pushstring(L, name);
+ lua_pushstring(L, value);
+ lua_rawset(L, -3);
+}
+void hlua_class_function(lua_State *L, const char *name, int (*function)(lua_State *L))
+{
+ lua_pushstring(L, name);
+ lua_pushcclosure(L, function, 0);
+ lua_rawset(L, -3);
+}
+
+/* This function returns a string containing the HAProxy object name. */
+int hlua_dump_object(struct lua_State *L)
+{
+ const char *name = (const char *)lua_tostring(L, lua_upvalueindex(1));
+ lua_pushfstring(L, "HAProxy class %s", name);
+ return 1;
+}
+
+/* This function register a table as metatable and. It names
+ * the metatable, and returns the associated reference.
+ * The original table is popped from the top of the stack.
+ * "name" is the referenced class name.
+ */
+int hlua_register_metatable(struct lua_State *L, char *name)
+{
+ /* Check the type of the top element. it must be
+ * a table.
+ */
+ if (lua_type(L, -1) != LUA_TTABLE)
+ luaL_error(L, "hlua_register_metatable() requires a type Table "
+ "in the top of the stack");
+
+ /* Add the __tostring function which identify the
+ * created object.
+ */
+ lua_pushstring(L, "__tostring");
+ lua_pushstring(L, name);
+ lua_pushcclosure(L, hlua_dump_object, 1);
+ lua_rawset(L, -3);
+
+ /* Register a named entry for the table. The table
+ * reference is copied first because the function
+ * lua_setfield() pop the entry.
+ */
+ lua_pushvalue(L, -1);
+ lua_setfield(L, LUA_REGISTRYINDEX, name);
+
+ /* Creates the reference of the object. The
+ * function luaL_ref pop the top of the stack.
+ */
+ return luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* Return an object of the expected type, or throws an error. */
+void *hlua_checkudata(lua_State *L, int ud, int class_ref)
+{
+ void *p;
+ int ret;
+
+ /* Check if the stack entry is an array. */
+ if (!lua_istable(L, ud))
+ luaL_argerror(L, ud, NULL);
+
+ /* pop the metatable of the referencecd object. */
+ if (!lua_getmetatable(L, ud))
+ luaL_argerror(L, ud, NULL);
+
+ /* pop the expected metatable. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_ref);
+
+ /* Check if the metadata have the expected type. */
+ ret = lua_rawequal(L, -1, -2);
+ lua_pop(L, 2);
+ if (!ret)
+ luaL_argerror(L, ud, NULL);
+
+ /* Push on the stack at the entry [0] of the table. */
+ lua_rawgeti(L, ud, 0);
+
+ /* Check if this entry is userdata. */
+ p = lua_touserdata(L, -1);
+ if (!p)
+ luaL_argerror(L, ud, NULL);
+
+ /* Remove the entry returned by lua_rawgeti(). */
+ lua_pop(L, 1);
+
+ /* Return the associated struct. */
+ return p;
+}
+
+/* This function return the current date at epoch format in milliseconds. */
+int hlua_now(lua_State *L)
+{
+ /* WT: the doc says "returns the current time" and later says that it's
+ * monotonic. So the best fit is to use start_date+(now-start_time).
+ */
+ struct timeval tv;
+
+ tv = NS_TO_TV(now_ns - start_time_ns);
+ tv_add(&tv, &tv, &start_date);
+
+ lua_newtable(L);
+ lua_pushstring(L, "sec");
+ lua_pushinteger(L, tv.tv_sec);
+ lua_rawset(L, -3);
+ lua_pushstring(L, "usec");
+ lua_pushinteger(L, tv.tv_usec);
+ lua_rawset(L, -3);
+ return 1;
+}
+
+/* This functions expects a Lua string as HTTP date, parse it and
+ * returns an integer containing the epoch format of the date, or
+ * nil if the parsing fails.
+ */
+static int hlua_parse_date(lua_State *L, int (*fcn)(const char *, int, struct tm*))
+{
+ const char *str;
+ size_t len;
+ struct tm tm;
+ time_t time;
+
+ str = luaL_checklstring(L, 1, &len);
+
+ if (!fcn(str, len, &tm)) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ /* This function considers the content of the broken-down time
+ * is exprimed in the UTC timezone. timegm don't care about
+ * the gnu variable tm_gmtoff. If gmtoff is set, or if you know
+ * the timezone from the broken-down time, it must be fixed
+ * after the conversion.
+ */
+ time = my_timegm(&tm);
+ if (time == -1) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, (int)time);
+ return 1;
+}
+static int hlua_http_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_http_date);
+}
+static int hlua_imf_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_imf_date);
+}
+static int hlua_rfc850_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_rfc850_date);
+}
+static int hlua_asctime_date(lua_State *L)
+{
+ return hlua_parse_date(L, parse_asctime_date);
+}
+
+static int hlua_get_info(lua_State *L)
+{
+ int i;
+
+ stats_fill_info(stats, STATS_LEN, 0);
+
+ lua_newtable(L);
+ for (i=0; i<INF_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, info_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+static struct hlua_concat *hlua_check_concat(lua_State *L, int ud)
+{
+ return (hlua_checkudata(L, ud, class_concat_ref));
+}
+
+static int hlua_concat_add(lua_State *L)
+{
+ struct hlua_concat *b;
+ char *buffer;
+ char *new;
+ const char *str;
+ size_t l;
+
+ /* First arg must be a concat object. */
+ b = hlua_check_concat(L, 1);
+
+ /* Second arg must be a string. */
+ str = luaL_checklstring(L, 2, &l);
+
+ /* Get the buffer. */
+ lua_rawgeti(L, 1, 1);
+ buffer = lua_touserdata(L, -1);
+ lua_pop(L, 1);
+
+ /* Update the buffer size if it s required. The old buffer
+ * is crushed by the new in the object array, so it will
+ * be deleted by the GC.
+ * Note that in the first loop, the "new" variable is only
+ * used as a flag.
+ */
+ new = NULL;
+ while (b->size - b->len < l) {
+ b->size += HLUA_CONCAT_BLOCSZ;
+ new = buffer;
+ }
+ if (new) {
+ new = lua_newuserdata(L, b->size);
+ memcpy(new, buffer, b->len);
+ lua_rawseti(L, 1, 1);
+ buffer = new;
+ }
+
+ /* Copy string, and update metadata. */
+ memcpy(buffer + b->len, str, l);
+ b->len += l;
+ return 0;
+}
+
+static int hlua_concat_dump(lua_State *L)
+{
+ struct hlua_concat *b;
+ char *buffer;
+
+ /* First arg must be a concat object. */
+ b = hlua_check_concat(L, 1);
+
+ /* Get the buffer. */
+ lua_rawgeti(L, 1, 1);
+ buffer = lua_touserdata(L, -1);
+ lua_pop(L, 1);
+
+ /* Push the soncatenated string in the stack. */
+ lua_pushlstring(L, buffer, b->len);
+ return 1;
+}
+
+int hlua_concat_new(lua_State *L)
+{
+ struct hlua_concat *b;
+
+ lua_newtable(L);
+ b = lua_newuserdata(L, sizeof(*b));
+ b->size = HLUA_CONCAT_BLOCSZ;
+ b->len = 0;
+ lua_rawseti(L, -2, 0);
+ lua_newuserdata(L, HLUA_CONCAT_BLOCSZ);
+ lua_rawseti(L, -2, 1);
+
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_concat_ref);
+ lua_setmetatable(L, -2);
+
+ return 1;
+}
+
+static int concat_tostring(lua_State *L)
+{
+ const void *ptr = lua_topointer(L, 1);
+ lua_pushfstring(L, "Concat object: %p", ptr);
+ return 1;
+}
+
+static void hlua_concat_init(lua_State *L)
+{
+ /* Creates the buffered concat object. */
+ lua_newtable(L);
+
+ lua_pushstring(L, "__tostring");
+ lua_pushcclosure(L, concat_tostring, 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "__index"); /* Creates the index entry. */
+ lua_newtable(L); /* The "__index" content. */
+
+ lua_pushstring(L, "add");
+ lua_pushcclosure(L, hlua_concat_add, 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "dump");
+ lua_pushcclosure(L, hlua_concat_dump, 0);
+ lua_settable(L, -3);
+
+ lua_settable(L, -3); /* Sets the __index entry. */
+ class_concat_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+/* C backing storage for lua Queue class */
+struct hlua_queue {
+ uint32_t size;
+ struct mt_list list;
+ struct mt_list wait_tasks;
+};
+
+/* used to store lua objects in queue->list */
+struct hlua_queue_item {
+ int ref; /* lua object reference id */
+ struct mt_list list;
+};
+
+/* used to store wait entries in queue->wait_tasks */
+struct hlua_queue_wait
+{
+ struct task *task;
+ struct mt_list entry;
+};
+
+/* This is the memory pool containing struct hlua_queue_item (queue items)
+ */
+DECLARE_STATIC_POOL(pool_head_hlua_queue, "hlua_queue", sizeof(struct hlua_queue_item));
+
+/* This is the memory pool containing struct hlua_queue_wait
+ * (queue waiting tasks)
+ */
+DECLARE_STATIC_POOL(pool_head_hlua_queuew, "hlua_queuew", sizeof(struct hlua_queue_wait));
+
+static struct hlua_queue *hlua_check_queue(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_queue_ref);
+}
+
+/* queue:size(): returns an integer containing the current number of queued
+ * items.
+ */
+static int hlua_queue_size(lua_State *L)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+
+ BUG_ON(!queue);
+ lua_pushinteger(L, HA_ATOMIC_LOAD(&queue->size));
+
+ return 1;
+}
+
+/* queue:push(): push an item (any type, except nil) at the end of the queue
+ *
+ * Returns boolean:true for success and boolean:false on error
+ */
+static int hlua_queue_push(lua_State *L)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+ struct hlua_queue_item *item;
+ struct mt_list *elt1, elt2;
+ struct hlua_queue_wait *waiter;
+
+ if (lua_gettop(L) != 2 || lua_isnoneornil(L, 2)) {
+ luaL_error(L, "unexpected argument");
+ /* not reached */
+ return 0;
+ }
+ BUG_ON(!queue);
+
+ item = pool_alloc(pool_head_hlua_queue);
+ if (!item) {
+ /* memory error */
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* get a reference from lua object at the top of the stack */
+ item->ref = hlua_ref(L);
+
+ /* push new entry to the queue */
+ MT_LIST_INIT(&item->list);
+ HA_ATOMIC_INC(&queue->size);
+ MT_LIST_APPEND(&queue->list, &item->list);
+
+ /* notify tasks waiting on queue:pop_wait() (if any) */
+ mt_list_for_each_entry_safe(waiter, &queue->wait_tasks, entry, elt1, elt2) {
+ task_wakeup(waiter->task, TASK_WOKEN_MSG);
+ }
+
+ lua_pushboolean(L, 1);
+ return 1;
+}
+
+/* internal queue pop helper, returns 1 if it successfully popped an item
+ * from the queue and pushed it on lua stack.
+ *
+ * Else it returns 0 (nothing is pushed on the stack)
+ */
+static int _hlua_queue_pop(lua_State *L, struct hlua_queue *queue)
+{
+ struct hlua_queue_item *item;
+
+ item = MT_LIST_POP(&queue->list, typeof(item), list);
+ if (!item)
+ return 0; /* nothing in queue */
+
+ HA_ATOMIC_DEC(&queue->size);
+ /* push lua obj on the stack */
+ hlua_pushref(L, item->ref);
+
+ /* obj ref should be released right away since it was pushed
+ * on the stack and will not be used anymore
+ */
+ hlua_unref(L, item->ref);
+
+ /* free the queue item */
+ pool_free(pool_head_hlua_queue, item);
+
+ return 1;
+}
+
+/* queue:pop(): returns the first item at the top of que queue or nil if
+ * the queue is empty.
+ */
+static int hlua_queue_pop(lua_State *L)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+
+ BUG_ON(!queue);
+ if (!_hlua_queue_pop(L, queue)) {
+ /* nothing in queue, push nil */
+ lua_pushnil(L);
+ }
+ return 1; /* either item or nil is at the top of the stack */
+}
+
+/* queue:pop_wait(): same as queue:pop() but doesn't return on empty queue.
+ *
+ * Aborts if used incorrectly and returns nil in case of memory error.
+ */
+static int _hlua_queue_pop_wait(lua_State *L, int status, lua_KContext ctx)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+ struct hlua_queue_wait *wait = lua_touserdata(L, 2);
+
+ /* new pop attempt */
+ if (!_hlua_queue_pop(L, queue)) {
+ hlua_yieldk(L, 0, 0, _hlua_queue_pop_wait, TICK_ETERNITY, 0); // wait retry
+ return 0; // never reached, yieldk won't return
+ }
+
+ /* remove task from waiting list */
+ MT_LIST_DELETE(&wait->entry);
+ pool_free(pool_head_hlua_queuew, wait);
+
+ return 1; // success
+}
+static int hlua_queue_pop_wait(lua_State *L)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+ struct hlua_queue_wait *wait;
+ struct hlua *hlua;
+
+ BUG_ON(!queue);
+
+ /* Get hlua struct, or NULL if we execute from main lua state */
+ hlua = hlua_gethlua(L);
+
+ if (!hlua || HLUA_CANT_YIELD(hlua)) {
+ luaL_error(L, "pop_wait() may only be used within task context "
+ "(requires yielding)");
+ return 0; /* not reached */
+ }
+
+ /* try opportunistic pop (there could already be pending items) */
+ if (_hlua_queue_pop(L, queue))
+ return 1; // success
+
+ /* no pending items, waiting required */
+
+ wait = pool_alloc(pool_head_hlua_queuew);
+ if (!wait) {
+ lua_pushnil(L);
+ return 1; /* memory error, return nil */
+ }
+
+ wait->task = hlua->task;
+ MT_LIST_INIT(&wait->entry);
+
+ /* add task to queue's wait list */
+ MT_LIST_TRY_APPEND(&queue->wait_tasks, &wait->entry);
+
+ /* push wait entry at index 2 on the stack (queue is already there) */
+ lua_pushlightuserdata(L, wait);
+
+ /* Go to waiting loop which immediately performs a new attempt to make
+ * sure we didn't miss a push during the wait entry initialization.
+ *
+ * _hlua_queue_pop_wait() won't return to us if it has to yield, which
+ * is the most likely scenario. What happens in this case is that yieldk
+ * call never returns, and instead Lua will call the continuation
+ * function after a successful resume, so the calling function will
+ * no longer be us, but Lua instead. And when the continuation function
+ * eventually returns (because it successfully popped an item), Lua will
+ * directly give the hand back to the Lua function that called us.
+ *
+ * More info here: https://www.lua.org/manual/5.4/manual.html#4.7
+ */
+ return _hlua_queue_pop_wait(L, LUA_OK, 0);
+}
+
+static int hlua_queue_new(lua_State *L)
+{
+ struct hlua_queue *q;
+
+ lua_newtable(L);
+
+ /* set class metatable */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_queue_ref);
+ lua_setmetatable(L, -2);
+
+ /* index:0 is queue userdata (c data) */
+ q = lua_newuserdata(L, sizeof(*q));
+ MT_LIST_INIT(&q->list);
+ MT_LIST_INIT(&q->wait_tasks);
+ q->size = 0;
+ lua_rawseti(L, -2, 0);
+
+ /* class methods */
+ hlua_class_function(L, "size", hlua_queue_size);
+ hlua_class_function(L, "pop", hlua_queue_pop);
+ hlua_class_function(L, "pop_wait", hlua_queue_pop_wait);
+ hlua_class_function(L, "push", hlua_queue_push);
+
+ return 1;
+}
+
+static int hlua_queue_gc(struct lua_State *L)
+{
+ struct hlua_queue *queue = hlua_check_queue(L, 1);
+ struct hlua_queue_wait *wait;
+ struct hlua_queue_item *item;
+
+ /* Purge waiting tasks (if any)
+ *
+ * It is normally not expected to have waiting tasks, except if such
+ * task has been aborted while in the middle of a queue:pop_wait()
+ * function call.
+ */
+ while ((wait = MT_LIST_POP(&queue->wait_tasks, typeof(wait), entry))) {
+ /* free the wait entry */
+ pool_free(pool_head_hlua_queuew, wait);
+ }
+
+ /* purge remaining (unconsumed) items in the queue */
+ while ((item = MT_LIST_POP(&queue->list, typeof(item), list))) {
+ /* free the queue item */
+ pool_free(pool_head_hlua_queue, item);
+ }
+
+ /* queue (userdata) will automatically be freed by lua gc */
+
+ return 0;
+}
+
+static void hlua_queue_init(lua_State *L)
+{
+ /* Creates the queue object. */
+ lua_newtable(L);
+
+ hlua_class_function(L, "__gc", hlua_queue_gc);
+
+ class_queue_ref = luaL_ref(L, LUA_REGISTRYINDEX);
+}
+
+int hlua_fcn_new_stktable(lua_State *L, struct stktable *tbl)
+{
+ lua_newtable(L);
+
+ /* Pop a class stktbl metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_stktable_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, tbl);
+ lua_rawseti(L, -2, 0);
+ return 1;
+}
+
+static struct stktable *hlua_check_stktable(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_stktable_ref);
+}
+
+/* Extract stick table attributes into Lua table */
+int hlua_stktable_info(lua_State *L)
+{
+ struct stktable *tbl;
+ int dt;
+
+ tbl = hlua_check_stktable(L, 1);
+
+ if (!tbl->id) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_newtable(L);
+
+ lua_pushstring(L, "type");
+ lua_pushstring(L, stktable_types[tbl->type].kw);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "length");
+ lua_pushinteger(L, tbl->key_size);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "size");
+ hlua_fcn_pushunsigned(L, tbl->size);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "used");
+ hlua_fcn_pushunsigned(L, tbl->current);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "nopurge");
+ lua_pushboolean(L, tbl->nopurge > 0);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "expire");
+ lua_pushinteger(L, tbl->expire);
+ lua_settable(L, -3);
+
+ /* Save data types periods (if applicable) in 'data' table */
+ lua_pushstring(L, "data");
+ lua_newtable(L);
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+ if (tbl->data_ofs[dt] == 0)
+ continue;
+
+ lua_pushstring(L, stktable_data_types[dt].name);
+
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ lua_pushinteger(L, tbl->data_arg[dt].u);
+ else
+ lua_pushinteger(L, -1);
+
+ lua_settable(L, -3);
+ }
+
+ lua_settable(L, -3);
+
+ return 1;
+}
+
+/* Helper to get extract stick table entry into Lua table */
+static void hlua_stktable_entry(lua_State *L, struct stktable *t, struct stksess *ts)
+{
+ int dt;
+ void *ptr;
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+
+ ptr = stktable_data_ptr(t, ts, dt);
+ if (!ptr)
+ continue;
+
+ lua_pushstring(L, stktable_data_types[dt].name);
+
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ lua_pushinteger(L, stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ hlua_fcn_pushunsigned(L, stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ hlua_fcn_pushunsigned_ll(L, stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ lua_pushinteger(L, read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ de = stktable_data_cast(ptr, std_t_dict);
+ lua_pushstring(L, de ? (char *)de->value.key : "-");
+ break;
+ }
+ }
+
+ lua_settable(L, -3);
+ }
+}
+
+/* Looks in table <t> for a sticky session matching key <key>
+ * Returns table with session data or nil
+ *
+ * The returned table always contains 'use' and 'expire' (integer) fields.
+ * For frequency/rate counters, each data entry is returned as table with
+ * 'value' and 'period' fields.
+ */
+int hlua_stktable_lookup(lua_State *L)
+{
+ struct stktable *t;
+ struct sample smp;
+ struct stktable_key *skey;
+ struct stksess *ts;
+
+ t = hlua_check_stktable(L, 1);
+ smp.data.type = SMP_T_STR;
+ smp.flags = SMP_F_CONST;
+ smp.data.u.str.area = (char *)lua_tolstring(L, 2, &smp.data.u.str.data);
+
+ skey = smp_to_stkey(&smp, t);
+ if (!skey) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ ts = stktable_lookup_key(t, skey);
+ if (!ts) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_newtable(L);
+ lua_pushstring(L, "use");
+ lua_pushinteger(L, HA_ATOMIC_LOAD(&ts->ref_cnt) - 1);
+ lua_settable(L, -3);
+
+ lua_pushstring(L, "expire");
+ lua_pushinteger(L, tick_remain(now_ms, ts->expire));
+ lua_settable(L, -3);
+
+ hlua_stktable_entry(L, t, ts);
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+
+ return 1;
+}
+
+struct stk_filter {
+ long long val;
+ int type;
+ int op;
+};
+
+
+/* Helper for returning errors to callers using Lua convention (nil, err) */
+static int hlua_error(lua_State *L, const char *fmt, ...) {
+ char buf[256];
+ int len;
+ va_list args;
+ va_start(args, fmt);
+ len = vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ if (len < 0) {
+ ha_alert("hlua_error(): Could not write error message.\n");
+ lua_pushnil(L);
+ return 1;
+ } else if (len >= sizeof(buf))
+ ha_alert("hlua_error(): Error message was truncated.\n");
+
+ lua_pushnil(L);
+ lua_pushstring(L, buf);
+
+ return 2;
+}
+
+/* Dump the contents of stick table <t>*/
+int hlua_stktable_dump(lua_State *L)
+{
+ struct stktable *t;
+ struct ebmb_node *eb;
+ struct ebmb_node *n;
+ struct stksess *ts;
+ int type;
+ int op;
+ int dt;
+ long long val;
+ struct stk_filter filter[STKTABLE_FILTER_LEN];
+ int filter_count = 0;
+ int i;
+ int skip_entry;
+ void *ptr;
+
+ t = hlua_check_stktable(L, 1);
+ type = lua_type(L, 2);
+
+ switch (type) {
+ case LUA_TNONE:
+ case LUA_TNIL:
+ break;
+ case LUA_TTABLE:
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ int entry_idx = 0;
+
+ if (filter_count >= STKTABLE_FILTER_LEN)
+ return hlua_error(L, "Filter table too large (len > %d)", STKTABLE_FILTER_LEN);
+
+ if (lua_type(L, -1) != LUA_TTABLE || lua_rawlen(L, -1) != 3)
+ return hlua_error(L, "Filter table entry must be a triplet: {\"data_col\", \"op\", val} (entry #%d)", filter_count + 1);
+
+ lua_pushnil(L);
+ while (lua_next(L, -2) != 0) {
+ switch (entry_idx) {
+ case 0:
+ if (lua_type(L, -1) != LUA_TSTRING)
+ return hlua_error(L, "Filter table data column must be string (entry #%d)", filter_count + 1);
+
+ dt = stktable_get_data_type((char *)lua_tostring(L, -1));
+ if (dt < 0 || t->data_ofs[dt] == 0)
+ return hlua_error(L, "Filter table data column not present in stick table (entry #%d)", filter_count + 1);
+ filter[filter_count].type = dt;
+ break;
+ case 1:
+ if (lua_type(L, -1) != LUA_TSTRING)
+ return hlua_error(L, "Filter table operator must be string (entry #%d)", filter_count + 1);
+
+ op = get_std_op(lua_tostring(L, -1));
+ if (op < 0)
+ return hlua_error(L, "Unknown operator in filter table (entry #%d)", filter_count + 1);
+ filter[filter_count].op = op;
+ break;
+ case 2:
+ val = lua_tointeger(L, -1);
+ filter[filter_count].val = val;
+ filter_count++;
+ break;
+ default:
+ break;
+ }
+ entry_idx++;
+ lua_pop(L, 1);
+ }
+ lua_pop(L, 1);
+ }
+ break;
+ default:
+ return hlua_error(L, "filter table expected");
+ }
+
+ lua_newtable(L);
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ eb = ebmb_first(&t->keys);
+ for (n = eb; n; n = ebmb_next(n)) {
+ ts = ebmb_entry(n, struct stksess, key);
+ if (!ts) {
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+ return 1;
+ }
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ /* multi condition/value filter */
+ skip_entry = 0;
+ for (i = 0; i < filter_count; i++) {
+ ptr = stktable_data_ptr(t, ts, filter[i].type);
+ if (!ptr)
+ continue;
+
+ switch (stktable_data_types[filter[i].type].std_type) {
+ case STD_T_SINT:
+ val = stktable_data_cast(ptr, std_t_sint);
+ break;
+ case STD_T_UINT:
+ val = stktable_data_cast(ptr, std_t_uint);
+ break;
+ case STD_T_ULL:
+ val = stktable_data_cast(ptr, std_t_ull);
+ break;
+ case STD_T_FRQP:
+ val = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[filter[i].type].u);
+ break;
+ default:
+ continue;
+ break;
+ }
+
+ op = filter[i].op;
+
+ if ((val < filter[i].val && (op == STD_OP_EQ || op == STD_OP_GT || op == STD_OP_GE)) ||
+ (val == filter[i].val && (op == STD_OP_NE || op == STD_OP_GT || op == STD_OP_LT)) ||
+ (val > filter[i].val && (op == STD_OP_EQ || op == STD_OP_LT || op == STD_OP_LE))) {
+ skip_entry = 1;
+ break;
+ }
+ }
+
+ if (skip_entry) {
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+ continue;
+ }
+
+ if (t->type == SMP_T_IPV4) {
+ char addr[INET_ADDRSTRLEN];
+ inet_ntop(AF_INET, (const void *)&ts->key.key, addr, sizeof(addr));
+ lua_pushstring(L, addr);
+ } else if (t->type == SMP_T_IPV6) {
+ char addr[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, (const void *)&ts->key.key, addr, sizeof(addr));
+ lua_pushstring(L, addr);
+ } else if (t->type == SMP_T_SINT) {
+ lua_pushinteger(L, *ts->key.key);
+ } else if (t->type == SMP_T_STR) {
+ lua_pushstring(L, (const char *)ts->key.key);
+ } else {
+ return hlua_error(L, "Unsupported stick table key type");
+ }
+
+ lua_newtable(L);
+ hlua_stktable_entry(L, t, ts);
+ lua_settable(L, -3);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+ }
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return 1;
+}
+
+int hlua_fcn_new_listener(lua_State *L, struct listener *lst)
+{
+ lua_newtable(L);
+
+ /* Pop a class sesison metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_listener_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, lst);
+ lua_rawseti(L, -2, 0);
+ return 1;
+}
+
+static struct listener *hlua_check_listener(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_listener_ref);
+}
+
+int hlua_listener_get_stats(lua_State *L)
+{
+ struct listener *li;
+ int i;
+
+ li = hlua_check_listener(L, 1);
+
+ if (!li->bind_conf->frontend) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ stats_fill_li_stats(li->bind_conf->frontend, li, STAT_SHLGNDS, stats,
+ STATS_LEN, NULL);
+
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+
+}
+
+int hlua_server_gc(lua_State *L)
+{
+ struct server *srv = hlua_checkudata(L, 1, class_server_ref);
+
+ srv_drop(srv); /* srv_drop allows NULL srv */
+ return 0;
+}
+
+static struct server *hlua_check_server(lua_State *L, int ud)
+{
+ struct server *srv = hlua_checkudata(L, ud, class_server_ref);
+ if (srv->flags & SRV_F_DELETED) {
+ return NULL;
+ }
+ return srv;
+}
+
+int hlua_server_get_stats(lua_State *L)
+{
+ struct server *srv;
+ int i;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (!srv->proxy) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ stats_fill_sv_stats(srv->proxy, srv, STAT_SHLGNDS, stats,
+ STATS_LEN, NULL);
+
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+
+}
+
+int hlua_server_get_proxy(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (!srv->proxy) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ hlua_fcn_new_proxy(L, srv->proxy);
+ return 1;
+}
+
+int hlua_server_get_addr(lua_State *L)
+{
+ struct server *srv;
+ char addr[INET6_ADDRSTRLEN];
+ luaL_Buffer b;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ luaL_buffinit(L, &b);
+
+ switch (srv->addr.ss_family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &((struct sockaddr_in *)&srv->addr)->sin_addr,
+ addr, INET_ADDRSTRLEN);
+ luaL_addstring(&b, addr);
+ luaL_addstring(&b, ":");
+ snprintf(addr, INET_ADDRSTRLEN, "%d", srv->svc_port);
+ luaL_addstring(&b, addr);
+ break;
+ case AF_INET6:
+ inet_ntop(AF_INET6, &((struct sockaddr_in6 *)&srv->addr)->sin6_addr,
+ addr, INET6_ADDRSTRLEN);
+ luaL_addstring(&b, addr);
+ luaL_addstring(&b, ":");
+ snprintf(addr, INET_ADDRSTRLEN, "%d", srv->svc_port);
+ luaL_addstring(&b, addr);
+ break;
+ case AF_UNIX:
+ luaL_addstring(&b, (char *)((struct sockaddr_un *)&srv->addr)->sun_path);
+ break;
+ default:
+ luaL_addstring(&b, "<unknown>");
+ break;
+ }
+
+ luaL_pushresult(&b);
+ return 1;
+}
+
+int hlua_server_get_puid(lua_State *L)
+{
+ struct server *srv;
+ char buffer[12];
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ snprintf(buffer, sizeof(buffer), "%d", srv->puid);
+ lua_pushstring(L, buffer);
+ return 1;
+}
+
+int hlua_server_get_rid(lua_State *L)
+{
+ struct server *srv;
+ char buffer[12];
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ snprintf(buffer, sizeof(buffer), "%d", srv->rid);
+ lua_pushstring(L, buffer);
+ return 1;
+}
+
+int hlua_server_get_name(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushstring(L, srv->id);
+ return 1;
+}
+
+/* __index metamethod for server class
+ * support for additional keys that are missing from the main table
+ * stack:1 = table (server class), stack:2 = requested key
+ * Returns 1 if key is supported
+ * else returns 0 to make lua return NIL value to the caller
+ */
+static int hlua_server_index(struct lua_State *L)
+{
+ const char *key = lua_tostring(L, 2);
+
+ if (!strcmp(key, "name")) {
+ if (ONLY_ONCE())
+ ha_warning("hlua: use of server 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", "));
+ lua_pushvalue(L, 1);
+ hlua_server_get_name(L);
+ return 1;
+ }
+ if (!strcmp(key, "puid")) {
+ if (ONLY_ONCE())
+ ha_warning("hlua: use of server 'puid' attribute is deprecated and will eventually be removed, please use get_puid() function instead: %s\n", hlua_traceback(L, ", "));
+ lua_pushvalue(L, 1);
+ hlua_server_get_puid(L);
+ return 1;
+ }
+ /* unknown attribute */
+ return 0;
+}
+
+int hlua_server_is_draining(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushboolean(L, server_is_draining(srv));
+ return 1;
+}
+
+int hlua_server_is_backup(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushboolean(L, (srv->flags & SRV_F_BACKUP));
+ return 1;
+}
+
+int hlua_server_is_dynamic(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushboolean(L, (srv->flags & SRV_F_DYNAMIC));
+ return 1;
+}
+
+int hlua_server_get_cur_sess(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, srv->cur_sess);
+ return 1;
+}
+
+int hlua_server_get_pend_conn(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, srv->queue.length);
+ return 1;
+}
+
+int hlua_server_set_maxconn(lua_State *L)
+{
+ struct server *srv;
+ const char *maxconn;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ maxconn = luaL_checkstring(L, 2);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = server_parse_maxconn_change_request(srv, maxconn);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_get_maxconn(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, srv->maxconn);
+ return 1;
+}
+
+int hlua_server_set_weight(lua_State *L)
+{
+ struct server *srv;
+ const char *weight;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ weight = luaL_checkstring(L, 2);
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = server_parse_weight_change_request(srv, weight);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_get_weight(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushinteger(L, srv->uweight);
+ return 1;
+}
+
+int hlua_server_set_addr(lua_State *L)
+{
+ struct server *srv;
+ const char *addr;
+ const char *port;
+ const char *err;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ addr = luaL_checkstring(L, 2);
+ if (lua_gettop(L) >= 3)
+ port = luaL_checkstring(L, 3);
+ else
+ port = NULL;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ err = srv_update_addr_port(srv, addr, port, "Lua script");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (!err)
+ lua_pushnil(L);
+ else
+ hlua_pushstrippedstring(L, err);
+ return 1;
+}
+
+int hlua_server_shut_sess(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_shutdown_streams(srv, SF_ERR_KILLED);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_drain(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_drain(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_maint(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_maint(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_set_ready(lua_State *L)
+{
+ struct server *srv;
+
+ srv = hlua_check_server(L, 1);
+ if (srv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_adm_set_ready(srv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ return 0;
+}
+
+int hlua_server_check_enable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state |= CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_disable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state &= ~CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_up(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_LUA);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_nolb(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, SRV_OP_STCHGC_LUA);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_check_force_down(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->track)) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_LUA);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_enable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state |= CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_disable(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_force_up(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_LUA);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+int hlua_server_agent_force_down(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_LUA);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 0;
+}
+
+/* returns the tracked server, if any */
+int hlua_server_tracking(lua_State *L)
+{
+ struct server *sv;
+ struct server *tracked;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+
+ tracked = sv->track;
+ if (tracked == NULL)
+ lua_pushnil(L);
+ else
+ hlua_fcn_new_server(L, tracked);
+
+ return 1;
+}
+
+/* returns an array of servers tracking the current server */
+int hlua_server_get_trackers(lua_State *L)
+{
+ struct server *sv;
+ struct server *cur_tracker;
+ int index;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+
+ lua_newtable(L);
+ cur_tracker = sv->trackers;
+ for (index = 1; cur_tracker; cur_tracker = cur_tracker->tracknext, index++) {
+ if (!lua_checkstack(L, 5))
+ luaL_error(L, "Lua out of memory error.");
+ hlua_fcn_new_server(L, cur_tracker);
+ /* array index starts at 1 in Lua */
+ lua_rawseti(L, -2, index);
+ }
+ return 1;
+}
+
+/* hlua_event_sub wrapper for per-server subscription:
+ *
+ * hlua_event_sub() is called with sv->e_subs subscription list and
+ * lua arguments are passed as-is (skipping the first argument which
+ * is the server ctx)
+ */
+int hlua_server_event_sub(lua_State *L)
+{
+ struct server *sv;
+
+ sv = hlua_check_server(L, 1);
+ if (sv == NULL) {
+ return 0;
+ }
+ /* remove first argument from the stack (server) */
+ lua_remove(L, 1);
+
+ /* try to subscribe within server's subscription list */
+ return hlua_event_sub(L, &sv->e_subs);
+}
+
+int hlua_fcn_new_server(lua_State *L, struct server *srv)
+{
+ lua_newtable(L);
+
+ /* Pop a class server metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_server_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, srv);
+ lua_rawseti(L, -2, 0);
+
+ /* userdata is affected: increment server refcount */
+ srv_take(srv);
+
+ /* set public methods */
+ hlua_class_function(L, "get_name", hlua_server_get_name);
+ hlua_class_function(L, "get_puid", hlua_server_get_puid);
+ hlua_class_function(L, "get_rid", hlua_server_get_rid);
+ hlua_class_function(L, "is_draining", hlua_server_is_draining);
+ hlua_class_function(L, "is_backup", hlua_server_is_backup);
+ hlua_class_function(L, "is_dynamic", hlua_server_is_dynamic);
+ hlua_class_function(L, "get_cur_sess", hlua_server_get_cur_sess);
+ hlua_class_function(L, "get_pend_conn", hlua_server_get_pend_conn);
+ hlua_class_function(L, "set_maxconn", hlua_server_set_maxconn);
+ hlua_class_function(L, "get_maxconn", hlua_server_get_maxconn);
+ hlua_class_function(L, "set_weight", hlua_server_set_weight);
+ hlua_class_function(L, "get_weight", hlua_server_get_weight);
+ hlua_class_function(L, "set_addr", hlua_server_set_addr);
+ hlua_class_function(L, "get_addr", hlua_server_get_addr);
+ hlua_class_function(L, "get_stats", hlua_server_get_stats);
+ hlua_class_function(L, "get_proxy", hlua_server_get_proxy);
+ hlua_class_function(L, "shut_sess", hlua_server_shut_sess);
+ hlua_class_function(L, "set_drain", hlua_server_set_drain);
+ hlua_class_function(L, "set_maint", hlua_server_set_maint);
+ hlua_class_function(L, "set_ready", hlua_server_set_ready);
+ hlua_class_function(L, "check_enable", hlua_server_check_enable);
+ hlua_class_function(L, "check_disable", hlua_server_check_disable);
+ hlua_class_function(L, "check_force_up", hlua_server_check_force_up);
+ hlua_class_function(L, "check_force_nolb", hlua_server_check_force_nolb);
+ hlua_class_function(L, "check_force_down", hlua_server_check_force_down);
+ hlua_class_function(L, "agent_enable", hlua_server_agent_enable);
+ hlua_class_function(L, "agent_disable", hlua_server_agent_disable);
+ hlua_class_function(L, "agent_force_up", hlua_server_agent_force_up);
+ hlua_class_function(L, "agent_force_down", hlua_server_agent_force_down);
+ hlua_class_function(L, "tracking", hlua_server_tracking);
+ hlua_class_function(L, "get_trackers", hlua_server_get_trackers);
+ hlua_class_function(L, "event_sub", hlua_server_event_sub);
+
+ return 1;
+}
+
+static struct hlua_server_list *hlua_check_server_list(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_server_list_ref);
+}
+
+/* does nothing and returns 0, only prevents insertions in the
+ * table which represents the list of servers
+ */
+int hlua_listable_servers_newindex(lua_State *L) {
+ return 0;
+}
+
+/* first arg is the table (struct hlua_server_list * in metadata)
+ * second arg is the required index
+ */
+int hlua_listable_servers_index(lua_State *L)
+{
+ struct hlua_server_list *hlua_srv;
+ const char *name;
+ struct server *srv;
+
+ hlua_srv = hlua_check_server_list(L, 1);
+ name = luaL_checkstring(L, 2);
+
+ /* Perform a server lookup in px list */
+ srv = server_find_by_name(hlua_srv->px, name);
+ if (srv == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ hlua_fcn_new_server(L, srv);
+ return 1;
+}
+
+/* iterator must return key as string and value as server
+ * object, if we reach end of list, it returns nil.
+ * The context knows the last returned server. if the
+ * context contains srv == NULL, we start enumeration.
+ * Then, use 'srv->next' ptr to iterate through the list
+ */
+int hlua_listable_servers_pairs_iterator(lua_State *L)
+{
+ int context_index;
+ struct hlua_server_list_iterator_context *ctx;
+
+ context_index = lua_upvalueindex(1);
+ ctx = lua_touserdata(L, context_index);
+
+ if (ctx->cur == NULL) {
+ /* First iteration, initialize list on the first server */
+ ctx->cur = ctx->px->srv;
+ } else {
+
+ /* Next server (next ptr is always valid, even if current
+ * server has the SRV_F_DELETED flag set)
+ */
+ ctx->cur = ctx->cur->next;
+ }
+
+ /* next server is null, end of iteration */
+ if (ctx->cur == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushstring(L, ctx->cur->id);
+ hlua_fcn_new_server(L, ctx->cur);
+ return 2;
+}
+
+/* init the iterator context, return iterator function
+ * with context as closure. The only argument is a
+ * server list object.
+ */
+int hlua_listable_servers_pairs(lua_State *L)
+{
+ struct hlua_server_list_iterator_context *ctx;
+ struct hlua_server_list *hlua_srv_list;
+
+ hlua_srv_list = hlua_check_server_list(L, 1);
+
+ ctx = lua_newuserdata(L, sizeof(*ctx));
+ ctx->px = hlua_srv_list->px;
+ ctx->cur = NULL;
+
+ lua_pushcclosure(L, hlua_listable_servers_pairs_iterator, 1);
+ return 1;
+}
+
+void hlua_listable_servers(lua_State *L, struct proxy *px)
+{
+ struct hlua_server_list *list;
+
+ lua_newtable(L);
+ list = lua_newuserdata(L, sizeof(*list));
+ list->px = px;
+ lua_rawseti(L, -2, 0);
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_server_list_ref);
+ lua_setmetatable(L, -2);
+}
+
+static struct proxy *hlua_check_proxy(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_proxy_ref);
+}
+
+int hlua_proxy_get_name(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ lua_pushstring(L, px->id);
+ return 1;
+}
+
+int hlua_proxy_get_uuid(lua_State *L)
+{
+ struct proxy *px;
+ char buffer[17];
+
+ px = hlua_check_proxy(L, 1);
+ snprintf(buffer, sizeof(buffer), "%d", px->uuid);
+ lua_pushstring(L, buffer);
+ return 1;
+}
+
+/* __index metamethod for proxy class
+ * support for additional keys that are missing from the main table
+ * stack:1 = table (proxy class), stack:2 = requested key
+ * Returns 1 if key is supported
+ * else returns 0 to make lua return NIL value to the caller
+ */
+static int hlua_proxy_index(struct lua_State *L)
+{
+ const char *key = lua_tostring(L, 2);
+
+ if (!strcmp(key, "name")) {
+ if (ONLY_ONCE())
+ ha_warning("hlua: use of proxy 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", "));
+ lua_pushvalue(L, 1);
+ hlua_proxy_get_name(L);
+ return 1;
+ }
+ if (!strcmp(key, "uuid")) {
+ if (ONLY_ONCE())
+ ha_warning("hlua: use of proxy 'uuid' attribute is deprecated and will eventually be removed, please use get_uuid() function instead: %s\n", hlua_traceback(L, ", "));
+ lua_pushvalue(L, 1);
+ hlua_proxy_get_uuid(L);
+ return 1;
+ }
+ /* unknown attribute */
+ return 0;
+}
+
+int hlua_proxy_pause(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - pause_proxy takes it */
+ pause_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_resume(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - resume_proxy takes it */
+ resume_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_stop(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ /* safe to call without PROXY_LOCK - stop_proxy takes it */
+ stop_proxy(px);
+ return 0;
+}
+
+int hlua_proxy_get_cap(lua_State *L)
+{
+ struct proxy *px;
+ const char *str;
+
+ px = hlua_check_proxy(L, 1);
+ str = proxy_cap_str(px->cap);
+ lua_pushstring(L, str);
+ return 1;
+}
+
+int hlua_proxy_get_stats(lua_State *L)
+{
+ struct proxy *px;
+ int i;
+
+ px = hlua_check_proxy(L, 1);
+ if (px->cap & PR_CAP_BE)
+ stats_fill_be_stats(px, STAT_SHLGNDS, stats, STATS_LEN, NULL);
+ else
+ stats_fill_fe_stats(px, stats, STATS_LEN, NULL);
+ lua_newtable(L);
+ for (i=0; i<ST_F_TOTAL_FIELDS; i++) {
+ lua_pushstring(L, stat_fields[i].name);
+ hlua_fcn_pushfield(L, &stats[i]);
+ lua_settable(L, -3);
+ }
+ return 1;
+}
+
+int hlua_proxy_get_mode(lua_State *L)
+{
+ struct proxy *px;
+ const char *str;
+
+ px = hlua_check_proxy(L, 1);
+ str = proxy_mode_str(px->mode);
+ lua_pushstring(L, str);
+ return 1;
+}
+
+int hlua_proxy_shut_bcksess(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ srv_shutdown_backup_streams(px, SF_ERR_KILLED);
+ return 0;
+}
+
+int hlua_proxy_get_srv_act(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ lua_pushinteger(L, px->srv_act);
+ return 1;
+}
+
+int hlua_proxy_get_srv_bck(lua_State *L)
+{
+ struct proxy *px;
+
+ px = hlua_check_proxy(L, 1);
+ lua_pushinteger(L, px->srv_bck);
+ return 1;
+}
+
+/* Get mailers config info, used to implement email alert sending
+ * according to mailers config from lua.
+ */
+int hlua_proxy_get_mailers(lua_State *L)
+{
+ struct proxy *px;
+ int it;
+ struct mailer *mailer;
+
+ px = hlua_check_proxy(L, 1);
+
+ if (!px->email_alert.mailers.m)
+ return 0; /* email-alert mailers not found on proxy */
+
+ lua_newtable(L);
+
+ /* option log-health-checks */
+ lua_pushstring(L, "track_server_health");
+ lua_pushboolean(L, (px->options2 & PR_O2_LOGHCHKS));
+ lua_settable(L, -3);
+
+ /* email-alert level */
+ lua_pushstring(L, "log_level");
+ lua_pushinteger(L, px->email_alert.level);
+ lua_settable(L, -3);
+
+ /* email-alert mailers */
+ lua_pushstring(L, "mailservers");
+ lua_newtable(L);
+ for (it = 0, mailer = px->email_alert.mailers.m->mailer_list;
+ it < px->email_alert.mailers.m->count; it++, mailer = mailer->next) {
+ char *srv_address;
+
+ lua_pushstring(L, mailer->id);
+
+ /* For now, we depend on mailer->addr to restore mailer's address which
+ * was converted using str2sa_range() on startup.
+ *
+ * FIXME?:
+ * It could be a good idea to pass the raw address (unparsed) to allow fqdn
+ * to be resolved at runtime, unless we consider this as a pure legacy mode
+ * and mailers config support is going to be removed in the future?
+ */
+ srv_address = sa2str(&mailer->addr, get_host_port(&mailer->addr), 0);
+ if (srv_address) {
+ lua_pushstring(L, srv_address);
+ ha_free(&srv_address);
+ lua_settable(L, -3);
+ }
+ }
+ lua_settable(L, -3);
+
+ /* mailers timeout (from mailers section) */
+ lua_pushstring(L, "mailservers_timeout");
+ lua_pushinteger(L, px->email_alert.mailers.m->timeout.mail);
+ lua_settable(L, -3);
+
+ /* email-alert myhostname */
+ lua_pushstring(L, "smtp_hostname");
+ lua_pushstring(L, px->email_alert.myhostname);
+ lua_settable(L, -3);
+
+ /* email-alert from */
+ lua_pushstring(L, "smtp_from");
+ lua_pushstring(L, px->email_alert.from);
+ lua_settable(L, -3);
+
+ /* email-alert to */
+ lua_pushstring(L, "smtp_to");
+ lua_pushstring(L, px->email_alert.to);
+ lua_settable(L, -3);
+
+ return 1;
+}
+
+int hlua_fcn_new_proxy(lua_State *L, struct proxy *px)
+{
+ struct listener *lst;
+ int lid;
+ char buffer[17];
+
+ lua_newtable(L);
+
+ /* Pop a class proxy metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_proxy_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, px);
+ lua_rawseti(L, -2, 0);
+
+ /* set public methods */
+ hlua_class_function(L, "get_name", hlua_proxy_get_name);
+ hlua_class_function(L, "get_uuid", hlua_proxy_get_uuid);
+ hlua_class_function(L, "pause", hlua_proxy_pause);
+ hlua_class_function(L, "resume", hlua_proxy_resume);
+ hlua_class_function(L, "stop", hlua_proxy_stop);
+ hlua_class_function(L, "shut_bcksess", hlua_proxy_shut_bcksess);
+ hlua_class_function(L, "get_cap", hlua_proxy_get_cap);
+ hlua_class_function(L, "get_mode", hlua_proxy_get_mode);
+ hlua_class_function(L, "get_srv_act", hlua_proxy_get_srv_act);
+ hlua_class_function(L, "get_srv_bck", hlua_proxy_get_srv_bck);
+ hlua_class_function(L, "get_stats", hlua_proxy_get_stats);
+ hlua_class_function(L, "get_mailers", hlua_proxy_get_mailers);
+
+ /* Browse and register servers. */
+ lua_pushstring(L, "servers");
+ hlua_listable_servers(L, px);
+ lua_settable(L, -3);
+
+ /* Browse and register listeners. */
+ lua_pushstring(L, "listeners");
+ lua_newtable(L);
+ lid = 1;
+ list_for_each_entry(lst, &px->conf.listeners, by_fe) {
+ if (lst->name)
+ lua_pushstring(L, lst->name);
+ else {
+ snprintf(buffer, sizeof(buffer), "sock-%d", lid);
+ lid++;
+ lua_pushstring(L, buffer);
+ }
+ hlua_fcn_new_listener(L, lst);
+ lua_settable(L, -3);
+ }
+ lua_settable(L, -3);
+
+ if (px->table && px->table->id) {
+ lua_pushstring(L, "stktable");
+ hlua_fcn_new_stktable(L, px->table);
+ lua_settable(L, -3);
+ }
+
+ return 1;
+}
+
+static struct hlua_proxy_list *hlua_check_proxy_list(lua_State *L, int ud)
+{
+ return hlua_checkudata(L, ud, class_proxy_list_ref);
+}
+
+/* does nothing and returns 0, only prevents insertions in the
+ * table which represent list of proxies
+ */
+int hlua_listable_proxies_newindex(lua_State *L) {
+ return 0;
+}
+
+/* first arg is the table (struct hlua_proxy_list * in metadata)
+ * second arg is the required index
+ */
+int hlua_listable_proxies_index(lua_State *L)
+{
+ struct hlua_proxy_list *hlua_px;
+ const char *name;
+ struct proxy *px;
+
+ hlua_px = hlua_check_proxy_list(L, 1);
+ name = luaL_checkstring(L, 2);
+
+ px = NULL;
+ if (hlua_px->capabilities & PR_CAP_FE) {
+ px = proxy_find_by_name(name, PR_CAP_FE, 0);
+ }
+ if (!px && hlua_px->capabilities & PR_CAP_BE) {
+ px = proxy_find_by_name(name, PR_CAP_BE, 0);
+ }
+ if (px == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ hlua_fcn_new_proxy(L, px);
+ return 1;
+}
+
+static inline int hlua_listable_proxies_match(struct proxy *px, char cap) {
+ return ((px->cap & cap) && !(px->cap & (PR_CAP_DEF | PR_CAP_INT)));
+}
+
+/* iterator must return key as string and value as proxy
+ * object, if we reach end of list, it returns nil
+ */
+int hlua_listable_proxies_pairs_iterator(lua_State *L)
+{
+ int context_index;
+ struct hlua_proxy_list_iterator_context *ctx;
+
+ context_index = lua_upvalueindex(1);
+ ctx = lua_touserdata(L, context_index);
+
+ if (ctx->next == NULL) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ lua_pushstring(L, ctx->next->id);
+ hlua_fcn_new_proxy(L, ctx->next);
+
+ for (ctx->next = ctx->next->next;
+ ctx->next && !hlua_listable_proxies_match(ctx->next, ctx->capabilities);
+ ctx->next = ctx->next->next);
+
+ return 2;
+}
+
+/* init the iterator context, return iterator function
+ * with context as closure. The only argument is a
+ * proxy object.
+ */
+int hlua_listable_proxies_pairs(lua_State *L)
+{
+ struct hlua_proxy_list_iterator_context *ctx;
+ struct hlua_proxy_list *hlua_px;
+
+ hlua_px = hlua_check_proxy_list(L, 1);
+
+ ctx = lua_newuserdata(L, sizeof(*ctx));
+
+ ctx->capabilities = hlua_px->capabilities;
+ for (ctx->next = proxies_list;
+ ctx->next && !hlua_listable_proxies_match(ctx->next, ctx->capabilities);
+ ctx->next = ctx->next->next);
+ lua_pushcclosure(L, hlua_listable_proxies_pairs_iterator, 1);
+ return 1;
+}
+
+void hlua_listable_proxies(lua_State *L, char capabilities)
+{
+ struct hlua_proxy_list *list;
+
+ lua_newtable(L);
+ list = lua_newuserdata(L, sizeof(*list));
+ list->capabilities = capabilities;
+ lua_rawseti(L, -2, 0);
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_proxy_list_ref);
+ lua_setmetatable(L, -2);
+}
+
+int hlua_event_sub_unsub(lua_State *L)
+{
+ struct event_hdl_sub *sub = hlua_checkudata(L, 1, class_event_sub_ref);
+
+ BUG_ON(!sub);
+ event_hdl_take(sub); /* keep a reference on sub until the item is GCed */
+ event_hdl_unsubscribe(sub); /* will automatically call event_hdl_drop() */
+ return 0;
+}
+
+int hlua_event_sub_gc(lua_State *L)
+{
+ struct event_hdl_sub *sub = hlua_checkudata(L, 1, class_event_sub_ref);
+
+ BUG_ON(!sub);
+ event_hdl_drop(sub); /* final drop of the reference */
+ return 0;
+}
+
+int hlua_fcn_new_event_sub(lua_State *L, struct event_hdl_sub *sub)
+{
+ lua_newtable(L);
+
+ /* Pop a class event_sub metatable and affect it to the userdata. */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_event_sub_ref);
+ lua_setmetatable(L, -2);
+
+ lua_pushlightuserdata(L, sub);
+ lua_rawseti(L, -2, 0);
+
+ /* userdata is affected: increment sub refcount */
+ event_hdl_take(sub);
+
+ /* set public methods */
+ hlua_class_function(L, "unsub", hlua_event_sub_unsub);
+
+ return 1;
+}
+
+/* This Lua function take a string, a list of separators.
+ * It tokenize the input string using the list of separators
+ * as separator.
+ *
+ * The functionreturns a table filled with tokens.
+ */
+int hlua_tokenize(lua_State *L)
+{
+ const char *str;
+ const char *sep;
+ int index;
+ const char *token;
+ const char *p;
+ const char *c;
+ int ignore_empty;
+
+ ignore_empty = 0;
+
+ str = luaL_checkstring(L, 1);
+ sep = luaL_checkstring(L, 2);
+ if (lua_gettop(L) == 3)
+ ignore_empty = hlua_checkboolean(L, 3);
+
+ lua_newtable(L);
+ index = 1;
+ token = str;
+ p = str;
+ while(1) {
+ for (c = sep; *c != '\0'; c++)
+ if (*p == *c)
+ break;
+ if (*p == *c) {
+ if ((!ignore_empty) || (p - token > 0)) {
+ lua_pushlstring(L, token, p - token);
+ lua_rawseti(L, -2, index);
+ index++;
+ }
+ token = p + 1;
+ }
+ if (*p == '\0')
+ break;
+ p++;
+ }
+
+ return 1;
+}
+
+int hlua_parse_addr(lua_State *L)
+{
+ struct net_addr *addr;
+ const char *str = luaL_checkstring(L, 1);
+ unsigned char mask;
+
+ addr = lua_newuserdata(L, sizeof(struct net_addr));
+ if (!addr) {
+ lua_pushnil(L);
+ return 1;
+ }
+
+ if (str2net(str, PAT_MF_NO_DNS, &addr->addr.v4.ip, &addr->addr.v4.mask)) {
+ addr->family = AF_INET;
+ return 1;
+ }
+
+ if (str62net(str, &addr->addr.v6.ip, &mask)) {
+ len2mask6(mask, &addr->addr.v6.mask);
+ addr->family = AF_INET6;
+ return 1;
+ }
+
+ lua_pop(L, 1);
+ lua_pushnil(L);
+ return 1;
+}
+
+int hlua_match_addr(lua_State *L)
+{
+ struct net_addr *addr1;
+ struct net_addr *addr2;
+
+ if (!lua_isuserdata(L, 1) ||
+ !lua_isuserdata(L, 2)) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ addr1 = lua_touserdata(L, 1);
+ addr2 = lua_touserdata(L, 2);
+
+ if (addr1->family != addr2->family) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ if (addr1->family == AF_INET) {
+ if ((addr1->addr.v4.ip.s_addr & addr2->addr.v4.mask.s_addr) ==
+ (addr2->addr.v4.ip.s_addr & addr1->addr.v4.mask.s_addr)) {
+ lua_pushboolean(L, 1);
+ return 1;
+ }
+ } else {
+ int i;
+
+ for (i = 0; i < 16; i += 4) {
+ if ((read_u32(&addr1->addr.v6.ip.s6_addr[i]) &
+ read_u32(&addr2->addr.v6.mask.s6_addr[i])) !=
+ (read_u32(&addr2->addr.v6.ip.s6_addr[i]) &
+ read_u32(&addr1->addr.v6.mask.s6_addr[i])))
+ break;
+ }
+ if (i == 16) {
+ lua_pushboolean(L, 1);
+ return 1;
+ }
+ }
+
+ lua_pushboolean(L, 0);
+ return 1;
+}
+
+static struct my_regex **hlua_check_regex(lua_State *L, int ud)
+{
+ return (hlua_checkudata(L, ud, class_regex_ref));
+}
+
+static int hlua_regex_comp(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ int cs;
+ char *err;
+
+ str = luaL_checkstring(L, 1);
+ luaL_argcheck(L, lua_isboolean(L, 2), 2, NULL);
+ cs = lua_toboolean(L, 2);
+
+ regex = lua_newuserdata(L, sizeof(*regex));
+
+ err = NULL;
+ if (!(*regex = regex_comp(str, cs, 1, &err))) {
+ lua_pushboolean(L, 0); /* status error */
+ lua_pushstring(L, err); /* Reason */
+ free(err);
+ return 2;
+ }
+
+ lua_pushboolean(L, 1); /* Status ok */
+
+ /* Create object */
+ lua_newtable(L);
+ lua_pushvalue(L, -3); /* Get the userdata pointer. */
+ lua_rawseti(L, -2, 0);
+ lua_rawgeti(L, LUA_REGISTRYINDEX, class_regex_ref);
+ lua_setmetatable(L, -2);
+ return 2;
+}
+
+static int hlua_regex_exec(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ size_t len;
+ struct buffer *tmp;
+
+ regex = hlua_check_regex(L, 1);
+ str = luaL_checklstring(L, 2, &len);
+
+ if (!*regex) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Copy the string because regex_exec2 require a 'char *'
+ * and not a 'const char *'.
+ */
+ tmp = get_trash_chunk();
+ if (len >= tmp->size) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ memcpy(tmp->area, str, len);
+
+ lua_pushboolean(L, regex_exec2(*regex, tmp->area, len));
+
+ return 1;
+}
+
+static int hlua_regex_match(struct lua_State *L)
+{
+ struct my_regex **regex;
+ const char *str;
+ size_t len;
+ regmatch_t pmatch[20];
+ int ret;
+ int i;
+ struct buffer *tmp;
+
+ regex = hlua_check_regex(L, 1);
+ str = luaL_checklstring(L, 2, &len);
+
+ if (!*regex) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+
+ /* Copy the string because regex_exec2 require a 'char *'
+ * and not a 'const char *'.
+ */
+ tmp = get_trash_chunk();
+ if (len >= tmp->size) {
+ lua_pushboolean(L, 0);
+ return 1;
+ }
+ memcpy(tmp->area, str, len);
+
+ ret = regex_exec_match2(*regex, tmp->area, len, 20, pmatch, 0);
+ lua_pushboolean(L, ret);
+ lua_newtable(L);
+ if (ret) {
+ for (i = 0; i < 20 && pmatch[i].rm_so != -1; i++) {
+ lua_pushlstring(L, str + pmatch[i].rm_so, pmatch[i].rm_eo - pmatch[i].rm_so);
+ lua_rawseti(L, -2, i + 1);
+ }
+ }
+ return 2;
+}
+
+static int hlua_regex_free(struct lua_State *L)
+{
+ struct my_regex **regex;
+
+ regex = hlua_check_regex(L, 1);
+ regex_free(*regex);
+ *regex = NULL;
+ return 0;
+}
+
+void hlua_fcn_reg_core_fcn(lua_State *L)
+{
+ hlua_concat_init(L);
+ hlua_queue_init(L);
+
+ hlua_class_function(L, "now", hlua_now);
+ hlua_class_function(L, "http_date", hlua_http_date);
+ hlua_class_function(L, "imf_date", hlua_imf_date);
+ hlua_class_function(L, "rfc850_date", hlua_rfc850_date);
+ hlua_class_function(L, "asctime_date", hlua_asctime_date);
+ hlua_class_function(L, "concat", hlua_concat_new);
+ hlua_class_function(L, "queue", hlua_queue_new);
+ hlua_class_function(L, "get_info", hlua_get_info);
+ hlua_class_function(L, "parse_addr", hlua_parse_addr);
+ hlua_class_function(L, "match_addr", hlua_match_addr);
+ hlua_class_function(L, "tokenize", hlua_tokenize);
+
+ /* Create regex object. */
+ lua_newtable(L);
+ hlua_class_function(L, "new", hlua_regex_comp);
+
+ lua_newtable(L); /* The metatable. */
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "exec", hlua_regex_exec);
+ hlua_class_function(L, "match", hlua_regex_match);
+ lua_rawset(L, -3); /* -> META["__index"] = TABLE */
+ hlua_class_function(L, "__gc", hlua_regex_free);
+
+ lua_pushvalue(L, -1); /* Duplicate the metatable reference. */
+ class_regex_ref = hlua_register_metatable(L, CLASS_REGEX);
+
+ lua_setmetatable(L, -2);
+ lua_setglobal(L, CLASS_REGEX); /* Create global object called Regex */
+
+ /* Create stktable object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "info", hlua_stktable_info);
+ hlua_class_function(L, "lookup", hlua_stktable_lookup);
+ hlua_class_function(L, "dump", hlua_stktable_dump);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_stktable_ref = hlua_register_metatable(L, CLASS_STKTABLE);
+
+ /* Create listener object. */
+ lua_newtable(L);
+ lua_pushstring(L, "__index");
+ lua_newtable(L);
+ hlua_class_function(L, "get_stats", hlua_listener_get_stats);
+ lua_settable(L, -3); /* -> META["__index"] = TABLE */
+ class_listener_ref = hlua_register_metatable(L, CLASS_LISTENER);
+
+ /* Create event_sub object. */
+ lua_newtable(L);
+ hlua_class_function(L, "__gc", hlua_event_sub_gc);
+ class_event_sub_ref = hlua_register_metatable(L, CLASS_EVENT_SUB);
+
+ /* Create server object. */
+ lua_newtable(L);
+ hlua_class_function(L, "__gc", hlua_server_gc);
+ hlua_class_function(L, "__index", hlua_server_index);
+ class_server_ref = hlua_register_metatable(L, CLASS_SERVER);
+
+ /* Create proxy object. */
+ lua_newtable(L);
+ hlua_class_function(L, "__index", hlua_proxy_index);
+ class_proxy_ref = hlua_register_metatable(L, CLASS_PROXY);
+
+ /* list of proxy objects. Instead of having a static array
+ * of proxies, we use special metamethods that rely on internal
+ * proxies list so that the array is resolved at runtime.
+ *
+ * To emulate the same behavior than Lua array, we implement some
+ * metatable functions:
+ * - __newindex : prevent the insertion of a new item in the array
+ * - __index : find a proxy in the list using "name" index
+ * - __pairs : iterate through available proxies in the list
+ */
+ lua_newtable(L);
+ hlua_class_function(L, "__index", hlua_listable_proxies_index);
+ hlua_class_function(L, "__newindex", hlua_listable_proxies_newindex);
+ hlua_class_function(L, "__pairs", hlua_listable_proxies_pairs);
+ class_proxy_list_ref = hlua_register_metatable(L, CLASS_PROXY_LIST);
+
+ /* Create proxies entry. */
+ lua_pushstring(L, "proxies");
+ hlua_listable_proxies(L, PR_CAP_LISTEN);
+ lua_settable(L, -3);
+
+ /* Create frontends entry. */
+ lua_pushstring(L, "frontends");
+ hlua_listable_proxies(L, PR_CAP_FE);
+ lua_settable(L, -3);
+
+ /* Create backends entry. */
+ lua_pushstring(L, "backends");
+ hlua_listable_proxies(L, PR_CAP_BE);
+ lua_settable(L, -3);
+
+ /* list of server. This object is similar to
+ * CLASS_PROXY_LIST
+ */
+ lua_newtable(L);
+ hlua_class_function(L, "__index", hlua_listable_servers_index);
+ hlua_class_function(L, "__newindex", hlua_listable_servers_newindex);
+ hlua_class_function(L, "__pairs", hlua_listable_servers_pairs);
+ class_server_list_ref = hlua_register_metatable(L, CLASS_SERVER_LIST);
+}
diff --git a/src/hpack-dec.c b/src/hpack-dec.c
new file mode 100644
index 0000000..052a7c3
--- /dev/null
+++ b/src/hpack-dec.c
@@ -0,0 +1,475 @@
+/*
+ * HPACK decompressor (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/h2.h>
+#include <haproxy/hpack-dec.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/tools.h>
+
+
+#if defined(DEBUG_HPACK)
+#define hpack_debug_printf printf
+#define hpack_debug_hexdump debug_hexdump
+#else
+#define hpack_debug_printf(...) do { } while (0)
+#define hpack_debug_hexdump(...) do { } while (0)
+#endif
+
+/* reads a varint from <raw>'s lowest <b> bits and <len> bytes max (raw included).
+ * returns the 32-bit value on success after updating raw_in and len_in. Forces
+ * len_in to (uint32_t)-1 on truncated input.
+ */
+static uint32_t get_var_int(const uint8_t **raw_in, uint32_t *len_in, int b)
+{
+ uint32_t ret = 0;
+ int len = *len_in;
+ const uint8_t *raw = *raw_in;
+ uint8_t shift = 0;
+
+ len--;
+ ret = *(raw++) & ((1 << b) - 1);
+ if (ret != (uint32_t)((1 << b) - 1))
+ goto end;
+
+ while (len && (*raw & 128)) {
+ ret += ((uint32_t)(*raw++) & 127) << shift;
+ shift += 7;
+ len--;
+ }
+
+ /* last 7 bits */
+ if (!len)
+ goto too_short;
+ len--;
+ ret += ((uint32_t)(*raw++) & 127) << shift;
+
+ end:
+ *raw_in = raw;
+ *len_in = len;
+ return ret;
+
+ too_short:
+ *len_in = (uint32_t)-1;
+ return 0;
+}
+
+/* returns the pseudo-header <idx> corresponds to among the following values :
+ * - 0 = unknown, the header's string needs to be used instead
+ * - 1 = ":authority"
+ * - 2 = ":method"
+ * - 3 = ":path"
+ * - 4 = ":scheme"
+ * - 5 = ":status"
+ */
+static inline int hpack_idx_to_phdr(uint32_t idx)
+{
+ if (idx > 14)
+ return 0;
+
+ idx >>= 1;
+ idx <<= 2;
+ return (0x55554321U >> idx) & 0xF;
+}
+
+/* If <idx> designates a static header, returns <in>. Otherwise allocates some
+ * room from chunk <store> to duplicate <in> into it and returns the string
+ * allocated there. In case of allocation failure, returns a string whose
+ * pointer is NULL.
+ */
+static inline struct ist hpack_alloc_string(struct buffer *store, uint32_t idx,
+ struct ist in)
+{
+ struct ist out;
+
+ if (idx < HPACK_SHT_SIZE)
+ return in;
+
+ out.len = in.len;
+ out.ptr = chunk_newstr(store);
+ if (unlikely(!isttest(out)))
+ return out;
+
+ if (unlikely(store->data + out.len > store->size)) {
+ out.ptr = NULL;
+ return out;
+ }
+
+ store->data += out.len;
+ memcpy(out.ptr, in.ptr, out.len);
+ return out;
+}
+
+/* decode an HPACK frame starting at <raw> for <len> bytes, using the dynamic
+ * headers table <dht>, produces the output into list <list> of <list_size>
+ * entries max, and uses pre-allocated buffer <tmp> for temporary storage (some
+ * list elements will point to it). Some <list> name entries may be made of a
+ * NULL pointer and a len, in which case they will designate a pseudo header
+ * index according to the values returned by hpack_idx_to_phdr() above. The
+ * number of <list> entries used is returned on success, or <0 on failure, with
+ * the opposite one of the HPACK_ERR_* codes. A last element is always zeroed
+ * and is not counted in the number of returned entries. This way the caller
+ * can use list[].n.len == 0 as a marker for the end of list.
+ */
+int hpack_decode_frame(struct hpack_dht *dht, const uint8_t *raw, uint32_t len,
+ struct http_hdr *list, int list_size,
+ struct buffer *tmp)
+{
+ uint32_t idx;
+ uint32_t nlen;
+ uint32_t vlen;
+ uint8_t huff;
+ struct ist name;
+ struct ist value;
+ int must_index;
+ int ret;
+
+ hpack_debug_hexdump(stderr, "[HPACK-DEC] ", (const char *)raw, 0, len);
+
+ chunk_reset(tmp);
+ ret = 0;
+ while (len) {
+ int __maybe_unused code = *raw; /* first byte, only for debugging */
+
+ must_index = 0;
+ if (*raw >= 0x80) {
+ /* indexed header field */
+ if (*raw == 0x80) {
+ hpack_debug_printf("unhandled code 0x%02x (raw=%p, len=%u)\n", *raw, raw, len);
+ ret = -HPACK_ERR_UNKNOWN_OPCODE;
+ goto leave;
+ }
+
+ hpack_debug_printf("%02x: p14: indexed header field : ", code);
+
+ idx = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ hpack_debug_printf(" idx=%u ", idx);
+
+ if (!hpack_valid_idx(dht, idx)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ value = hpack_alloc_string(tmp, idx, hpack_idx_to_value(dht, idx));
+ if (!isttest(value)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* here we don't index so we can always keep the pseudo header number */
+ name = ist2(NULL, hpack_idx_to_phdr(idx));
+
+ if (!name.len) {
+ name = hpack_alloc_string(tmp, idx, hpack_idx_to_name(dht, idx));
+ if (!isttest(name)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+ }
+ /* <name> and <value> are now set and point to stable values */
+ }
+ else if (*raw >= 0x20 && *raw <= 0x3f) {
+ /* max dyn table size change */
+ hpack_debug_printf("%02x: p18: dynamic table size update : ", code);
+
+ if (ret) {
+ /* 7541#4.2.1 : DHT size update must only be at the beginning */
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ idx = get_var_int(&raw, &len, 5);
+ if (len == (uint32_t)-1) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+ hpack_debug_printf(" new len=%u\n", idx);
+
+ if (idx > dht->size) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_INVALID_ARGUMENT;
+ goto leave;
+ }
+ continue;
+ }
+ else if (!(*raw & (*raw - 0x10))) {
+ /* 0x00, 0x10, and 0x40 (0x20 and 0x80 were already handled above) */
+
+ /* literal header field without/never/with incremental indexing -- literal name */
+ if (*raw == 0x00)
+ hpack_debug_printf("%02x: p17: literal without indexing : ", code);
+ else if (*raw == 0x10)
+ hpack_debug_printf("%02x: p18: literal never indexed : ", code);
+ else if (*raw == 0x40)
+ hpack_debug_printf("%02x: p16: literal with indexing : ", code);
+
+ if (*raw == 0x40)
+ must_index = 1;
+
+ raw++; len--;
+
+ /* retrieve name */
+ if (!len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ huff = *raw & 0x80;
+ nlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < nlen) { // truncated
+ hpack_debug_printf("##ERR@%d## (truncated): nlen=%d len=%d\n",
+ __LINE__, (int)nlen, (int)len);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ name = ist2(raw, nlen);
+
+ raw += nlen;
+ len -= nlen;
+
+ if (huff) {
+ char *ntrash = chunk_newstr(tmp);
+ if (!ntrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ nlen = huff_dec((const uint8_t *)name.ptr, name.len, ntrash,
+ tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ hpack_debug_printf("2: can't decode huffman.\n");
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ hpack_debug_printf(" [name huff %d->%d] ", (int)name.len, (int)nlen);
+
+ tmp->data += nlen; // make room for the value
+ name = ist2(ntrash, nlen);
+ }
+
+ /* retrieve value */
+ if (!len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ huff = *raw & 0x80;
+ vlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < vlen) { // truncated
+ hpack_debug_printf("##ERR@%d## : vlen=%d len=%d\n",
+ __LINE__, (int)vlen, (int)len);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ value = ist2(raw, vlen);
+ raw += vlen;
+ len -= vlen;
+
+ if (huff) {
+ char *vtrash = chunk_newstr(tmp);
+ if (!vtrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash,
+ tmp->size - tmp->data);
+ if (vlen == (uint32_t)-1) {
+ hpack_debug_printf("3: can't decode huffman.\n");
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ hpack_debug_printf(" [value huff %d->%d] ", (int)value.len, (int)vlen);
+
+ tmp->data += vlen; // make room for the value
+ value = ist2(vtrash, vlen);
+ }
+
+ /* <name> and <value> are correctly filled here */
+ }
+ else {
+ /* 0x01..0x0f : literal header field without indexing -- indexed name */
+ /* 0x11..0x1f : literal header field never indexed -- indexed name */
+ /* 0x41..0x7f : literal header field with incremental indexing -- indexed name */
+
+ if (*raw <= 0x0f)
+ hpack_debug_printf("%02x: p16: literal without indexing -- indexed name : ", code);
+ else if (*raw >= 0x41)
+ hpack_debug_printf("%02x: p15: literal with indexing -- indexed name : ", code);
+ else
+ hpack_debug_printf("%02x: p16: literal never indexed -- indexed name : ", code);
+
+ /* retrieve name index */
+ if (*raw >= 0x41) {
+ must_index = 1;
+ idx = get_var_int(&raw, &len, 6);
+ }
+ else
+ idx = get_var_int(&raw, &len, 4);
+
+ hpack_debug_printf(" idx=%u ", idx);
+
+ if (len == (uint32_t)-1 || !len) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ if (!hpack_valid_idx(dht, idx)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* retrieve value */
+ huff = *raw & 0x80;
+ vlen = get_var_int(&raw, &len, 7);
+ if (len == (uint32_t)-1 || len < vlen) { // truncated
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TRUNCATED;
+ goto leave;
+ }
+
+ value = ist2(raw, vlen);
+ raw += vlen;
+ len -= vlen;
+
+ if (huff) {
+ char *vtrash = chunk_newstr(tmp);
+ if (!vtrash) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ vlen = huff_dec((const uint8_t *)value.ptr, value.len, vtrash,
+ tmp->size - tmp->data);
+ if (vlen == (uint32_t)-1) {
+ hpack_debug_printf("##ERR@%d## can't decode huffman : ilen=%d osize=%d\n",
+ __LINE__, (int)value.len,
+ (int)(tmp->size - tmp->data));
+ hpack_debug_hexdump(stderr, "[HUFFMAN] ", value.ptr, 0, value.len);
+ ret = -HPACK_ERR_HUFFMAN;
+ goto leave;
+ }
+ tmp->data += vlen; // make room for the value
+ value = ist2(vtrash, vlen);
+ }
+
+ name = IST_NULL;
+ if (!must_index)
+ name.len = hpack_idx_to_phdr(idx);
+
+ if (!name.len) {
+ name = hpack_alloc_string(tmp, idx, hpack_idx_to_name(dht, idx));
+ if (!isttest(name)) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+ }
+ /* <name> and <value> are correctly filled here */
+ }
+
+ /* We must not accept empty header names (forbidden by the spec and used
+ * as a list termination).
+ */
+ if (!name.len) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_INVALID_ARGUMENT;
+ goto leave;
+ }
+
+ /* here's what we have here :
+ * - name.len > 0
+ * - value is filled with either const data or data allocated from tmp
+ * - name.ptr == NULL && !must_index : known pseudo-header #name.len
+ * - name.ptr != NULL || must_index : general header, unknown pseudo-header or index needed
+ */
+ if (ret >= list_size) {
+ hpack_debug_printf("##ERR@%d##\n", __LINE__);
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ list[ret].n = name;
+ list[ret].v = value;
+ ret++;
+
+ if (must_index && hpack_dht_insert(dht, name, value) < 0) {
+ hpack_debug_printf("failed to find some room in the dynamic table\n");
+ ret = -HPACK_ERR_DHT_INSERT_FAIL;
+ goto leave;
+ }
+
+ hpack_debug_printf("\e[1;34m%s\e[0m: ",
+ isttest(name) ? istpad(trash.area, name).ptr : h2_phdr_to_str(name.len));
+
+ hpack_debug_printf("\e[1;35m%s\e[0m [mustidx=%d, used=%d] [n=(%p,%d) v=(%p,%d)]\n",
+ istpad(trash.area, value).ptr, must_index,
+ dht->used,
+ name.ptr, (int)name.len, value.ptr, (int)value.len);
+ }
+
+ if (ret >= list_size) {
+ ret = -HPACK_ERR_TOO_LARGE;
+ goto leave;
+ }
+
+ /* put an end marker */
+ list[ret].n = list[ret].v = IST_NULL;
+ ret++;
+
+ leave:
+ hpack_debug_printf("-- done: ret=%d list_size=%d --\n", (int)ret, (int)list_size);
+ return ret;
+}
diff --git a/src/hpack-enc.c b/src/hpack-enc.c
new file mode 100644
index 0000000..3ab21bc
--- /dev/null
+++ b/src/hpack-enc.c
@@ -0,0 +1,210 @@
+/*
+ * HPACK decompressor (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/hpack-enc.h>
+#include <haproxy/http-hdr-t.h>
+
+/*
+ * HPACK encoding: these tables were generated using gen-enc.c
+ */
+
+/* encoding of stream of compressed headers. This stream is composed of series
+ * of <len:8b> <index:8b> <name:<len>*8b>.
+ */
+const char hpack_enc_stream[666] = {
+ /* 0: */ 0x03, 0x15, 0x61, 0x67, 0x65, 0x03, 0x3c, 0x76,
+ /* 8: */ 0x69, 0x61, 0x04, 0x21, 0x64, 0x61, 0x74, 0x65,
+ /* 16: */ 0x04, 0x26, 0x68, 0x6f, 0x73, 0x74, 0x04, 0x22,
+ /* 24: */ 0x65, 0x74, 0x61, 0x67, 0x04, 0x25, 0x66, 0x72,
+ /* 32: */ 0x6f, 0x6d, 0x04, 0x2d, 0x6c, 0x69, 0x6e, 0x6b,
+ /* 40: */ 0x04, 0x3b, 0x76, 0x61, 0x72, 0x79, 0x05, 0x04,
+ /* 48: */ 0x3a, 0x70, 0x61, 0x74, 0x68, 0x05, 0x16, 0x61,
+ /* 56: */ 0x6c, 0x6c, 0x6f, 0x77, 0x05, 0x32, 0x72, 0x61,
+ /* 64: */ 0x6e, 0x67, 0x65, 0x06, 0x13, 0x61, 0x63, 0x63,
+ /* 72: */ 0x65, 0x70, 0x74, 0x06, 0x36, 0x73, 0x65, 0x72,
+ /* 80: */ 0x76, 0x65, 0x72, 0x06, 0x20, 0x63, 0x6f, 0x6f,
+ /* 88: */ 0x6b, 0x69, 0x65, 0x06, 0x23, 0x65, 0x78, 0x70,
+ /* 96: */ 0x65, 0x63, 0x74, 0x07, 0x33, 0x72, 0x65, 0x66,
+ /* 104: */ 0x65, 0x72, 0x65, 0x72, 0x07, 0x24, 0x65, 0x78,
+ /* 112: */ 0x70, 0x69, 0x72, 0x65, 0x73, 0x07, 0x02, 0x3a,
+ /* 120: */ 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x07, 0x06,
+ /* 128: */ 0x3a, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x65, 0x07,
+ /* 136: */ 0x08, 0x3a, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73,
+ /* 144: */ 0x07, 0x34, 0x72, 0x65, 0x66, 0x72, 0x65, 0x73,
+ /* 152: */ 0x68, 0x08, 0x2e, 0x6c, 0x6f, 0x63, 0x61, 0x74,
+ /* 160: */ 0x69, 0x6f, 0x6e, 0x08, 0x27, 0x69, 0x66, 0x2d,
+ /* 168: */ 0x6d, 0x61, 0x74, 0x63, 0x68, 0x08, 0x2a, 0x69,
+ /* 176: */ 0x66, 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x0a,
+ /* 184: */ 0x3a, 0x75, 0x73, 0x65, 0x72, 0x2d, 0x61, 0x67,
+ /* 192: */ 0x65, 0x6e, 0x74, 0x0a, 0x37, 0x73, 0x65, 0x74,
+ /* 200: */ 0x2d, 0x63, 0x6f, 0x6f, 0x6b, 0x69, 0x65, 0x0a,
+ /* 208: */ 0x01, 0x3a, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72,
+ /* 216: */ 0x69, 0x74, 0x79, 0x0b, 0x35, 0x72, 0x65, 0x74,
+ /* 224: */ 0x72, 0x79, 0x2d, 0x61, 0x66, 0x74, 0x65, 0x72,
+ /* 232: */ 0x0c, 0x1f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
+ /* 240: */ 0x74, 0x2d, 0x74, 0x79, 0x70, 0x65, 0x0c, 0x2f,
+ /* 248: */ 0x6d, 0x61, 0x78, 0x2d, 0x66, 0x6f, 0x72, 0x77,
+ /* 256: */ 0x61, 0x72, 0x64, 0x73, 0x0d, 0x18, 0x63, 0x61,
+ /* 264: */ 0x63, 0x68, 0x65, 0x2d, 0x63, 0x6f, 0x6e, 0x74,
+ /* 272: */ 0x72, 0x6f, 0x6c, 0x0d, 0x2c, 0x6c, 0x61, 0x73,
+ /* 280: */ 0x74, 0x2d, 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69,
+ /* 288: */ 0x65, 0x64, 0x0d, 0x12, 0x61, 0x63, 0x63, 0x65,
+ /* 296: */ 0x70, 0x74, 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65,
+ /* 304: */ 0x73, 0x0d, 0x29, 0x69, 0x66, 0x2d, 0x6e, 0x6f,
+ /* 312: */ 0x6e, 0x65, 0x2d, 0x6d, 0x61, 0x74, 0x63, 0x68,
+ /* 320: */ 0x0d, 0x17, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72,
+ /* 328: */ 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0d,
+ /* 336: */ 0x1e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
+ /* 344: */ 0x2d, 0x72, 0x61, 0x6e, 0x67, 0x65, 0x0e, 0x1c,
+ /* 352: */ 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d,
+ /* 360: */ 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x0e, 0x0f,
+ /* 368: */ 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x63,
+ /* 376: */ 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x0f, 0x10,
+ /* 384: */ 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d, 0x65,
+ /* 392: */ 0x6e, 0x63, 0x6f, 0x64, 0x69, 0x6e, 0x67, 0x0f,
+ /* 400: */ 0x11, 0x61, 0x63, 0x63, 0x65, 0x70, 0x74, 0x2d,
+ /* 408: */ 0x6c, 0x61, 0x6e, 0x67, 0x75, 0x61, 0x67, 0x65,
+ /* 416: */ 0x10, 0x1a, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
+ /* 424: */ 0x74, 0x2d, 0x65, 0x6e, 0x63, 0x6f, 0x64, 0x69,
+ /* 432: */ 0x6e, 0x67, 0x10, 0x1b, 0x63, 0x6f, 0x6e, 0x74,
+ /* 440: */ 0x65, 0x6e, 0x74, 0x2d, 0x6c, 0x61, 0x6e, 0x67,
+ /* 448: */ 0x75, 0x61, 0x67, 0x65, 0x10, 0x1d, 0x63, 0x6f,
+ /* 456: */ 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x6c, 0x6f,
+ /* 464: */ 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x10, 0x3d,
+ /* 472: */ 0x77, 0x77, 0x77, 0x2d, 0x61, 0x75, 0x74, 0x68,
+ /* 480: */ 0x65, 0x6e, 0x74, 0x69, 0x63, 0x61, 0x74, 0x65,
+ /* 488: */ 0x11, 0x39, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x66,
+ /* 496: */ 0x65, 0x72, 0x2d, 0x65, 0x6e, 0x63, 0x6f, 0x64,
+ /* 504: */ 0x69, 0x6e, 0x67, 0x11, 0x28, 0x69, 0x66, 0x2d,
+ /* 512: */ 0x6d, 0x6f, 0x64, 0x69, 0x66, 0x69, 0x65, 0x64,
+ /* 520: */ 0x2d, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x12, 0x30,
+ /* 528: */ 0x70, 0x72, 0x6f, 0x78, 0x79, 0x2d, 0x61, 0x75,
+ /* 536: */ 0x74, 0x68, 0x65, 0x6e, 0x74, 0x69, 0x63, 0x61,
+ /* 544: */ 0x74, 0x65, 0x13, 0x19, 0x63, 0x6f, 0x6e, 0x74,
+ /* 552: */ 0x65, 0x6e, 0x74, 0x2d, 0x64, 0x69, 0x73, 0x70,
+ /* 560: */ 0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x13,
+ /* 568: */ 0x2b, 0x69, 0x66, 0x2d, 0x75, 0x6e, 0x6d, 0x6f,
+ /* 576: */ 0x64, 0x69, 0x66, 0x69, 0x65, 0x64, 0x2d, 0x73,
+ /* 584: */ 0x69, 0x6e, 0x63, 0x65, 0x13, 0x31, 0x70, 0x72,
+ /* 592: */ 0x6f, 0x78, 0x79, 0x2d, 0x61, 0x75, 0x74, 0x68,
+ /* 600: */ 0x6f, 0x72, 0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f,
+ /* 608: */ 0x6e, 0x19, 0x38, 0x73, 0x74, 0x72, 0x69, 0x63,
+ /* 616: */ 0x74, 0x2d, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70,
+ /* 624: */ 0x6f, 0x72, 0x74, 0x2d, 0x73, 0x65, 0x63, 0x75,
+ /* 632: */ 0x72, 0x69, 0x74, 0x79, 0x1b, 0x14, 0x61, 0x63,
+ /* 640: */ 0x63, 0x65, 0x73, 0x73, 0x2d, 0x63, 0x6f, 0x6e,
+ /* 648: */ 0x74, 0x72, 0x6f, 0x6c, 0x2d, 0x61, 0x6c, 0x6c,
+ /* 656: */ 0x6f, 0x77, 0x2d, 0x6f, 0x72, 0x69, 0x67, 0x69,
+ /* 664: */ 0x6e, 0x00,
+};
+
+/* This points to the first position in table hpack_enc_stream[] of a header
+ * of the same length.
+ */
+const signed short hpack_pos_len[32] = {
+ /* 0: */ -1, -1, -1, 0, 10, 46, 67, 99,
+ /* 8: */ 153, -1, 183, 219, 232, 260, 350, 382,
+ /* 16: */ 416, 488, 526, 546, -1, -1, -1, -1,
+ /* 24: */ -1, 609, -1, 636, -1, -1, -1, -1,
+};
+
+/* Tries to encode header whose name is <n> and value <v> into the chunk <out>.
+ * Returns non-zero on success, 0 on failure (buffer full).
+ */
+int hpack_encode_header(struct buffer *out, const struct ist n,
+ const struct ist v)
+{
+ int len = out->data;
+ int size = out->size;
+ int pos;
+
+ if (len >= size)
+ return 0;
+
+ /* look for the header field <n> in the static table */
+ if (n.len >= sizeof(hpack_pos_len) / sizeof(hpack_pos_len[0]))
+ goto make_literal;
+
+ pos = hpack_pos_len[n.len];
+ if (pos >= 0) {
+ /* At least one header field of this length exist */
+ do {
+ char idx;
+
+ pos++;
+ idx = hpack_enc_stream[pos++];
+ pos += n.len;
+ if (isteq(ist2(&hpack_enc_stream[pos - n.len], n.len), n)) {
+ /* emit literal with indexing (7541#6.2.1) :
+ * [ 0 | 1 | Index (6+) ]
+ */
+ out->area[len++] = idx | 0x40;
+ goto emit_value;
+ }
+ } while ((unsigned char)hpack_enc_stream[pos] == n.len);
+ }
+
+ make_literal:
+ if (likely(n.len < 127 && len + 2 + n.len <= size)) {
+ out->area[len++] = 0x00; /* literal without indexing -- new name */
+ out->area[len++] = n.len; /* single-byte length encoding */
+ ist2bin(out->area + len, n);
+ len += n.len;
+ }
+ else if (hpack_len_to_bytes(n.len) &&
+ len + 1 + hpack_len_to_bytes(n.len) + n.len <= size) {
+ out->area[len++] = 0x00; /* literal without indexing -- new name */
+ len = hpack_encode_len(out->area, len, n.len);
+ ist2bin(out->area + len, n);
+ len += n.len;
+ }
+ else {
+ /* header field name too large for the buffer */
+ return 0;
+ }
+
+ emit_value:
+ /* copy literal header field value */
+ if (!hpack_len_to_bytes(v.len) ||
+ len + hpack_len_to_bytes(v.len) + v.len > size) {
+ /* header value too large for the buffer */
+ return 0;
+ }
+
+ len = hpack_encode_len(out->area, len, v.len);
+ memcpy(out->area + len, v.ptr, v.len);
+ len += v.len;
+
+ out->data = len;
+ return 1;
+}
diff --git a/src/hpack-huff.c b/src/hpack-huff.c
new file mode 100644
index 0000000..77743be
--- /dev/null
+++ b/src/hpack-huff.c
@@ -0,0 +1,861 @@
+/*
+ * Huffman decoding and encoding for HPACK (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/net_helper.h>
+
+struct huff {
+ uint32_t c; /* code point */
+ int b; /* bits */
+};
+
+/* huffman table as per RFC7541 appendix B */
+static const struct huff ht[257] = {
+ [ 0] = { .c = 0x00001ff8, .b = 13 },
+ [ 1] = { .c = 0x007fffd8, .b = 23 },
+ [ 2] = { .c = 0x0fffffe2, .b = 28 },
+ [ 3] = { .c = 0x0fffffe3, .b = 28 },
+ [ 4] = { .c = 0x0fffffe4, .b = 28 },
+ [ 5] = { .c = 0x0fffffe5, .b = 28 },
+ [ 6] = { .c = 0x0fffffe6, .b = 28 },
+ [ 7] = { .c = 0x0fffffe7, .b = 28 },
+ [ 8] = { .c = 0x0fffffe8, .b = 28 },
+ [ 9] = { .c = 0x00ffffea, .b = 24 },
+ [ 10] = { .c = 0x3ffffffc, .b = 30 },
+ [ 11] = { .c = 0x0fffffe9, .b = 28 },
+ [ 12] = { .c = 0x0fffffea, .b = 28 },
+ [ 13] = { .c = 0x3ffffffd, .b = 30 },
+ [ 14] = { .c = 0x0fffffeb, .b = 28 },
+ [ 15] = { .c = 0x0fffffec, .b = 28 },
+ [ 16] = { .c = 0x0fffffed, .b = 28 },
+ [ 17] = { .c = 0x0fffffee, .b = 28 },
+ [ 18] = { .c = 0x0fffffef, .b = 28 },
+ [ 19] = { .c = 0x0ffffff0, .b = 28 },
+ [ 20] = { .c = 0x0ffffff1, .b = 28 },
+ [ 21] = { .c = 0x0ffffff2, .b = 28 },
+ [ 22] = { .c = 0x3ffffffe, .b = 30 },
+ [ 23] = { .c = 0x0ffffff3, .b = 28 },
+ [ 24] = { .c = 0x0ffffff4, .b = 28 },
+ [ 25] = { .c = 0x0ffffff5, .b = 28 },
+ [ 26] = { .c = 0x0ffffff6, .b = 28 },
+ [ 27] = { .c = 0x0ffffff7, .b = 28 },
+ [ 28] = { .c = 0x0ffffff8, .b = 28 },
+ [ 29] = { .c = 0x0ffffff9, .b = 28 },
+ [ 30] = { .c = 0x0ffffffa, .b = 28 },
+ [ 31] = { .c = 0x0ffffffb, .b = 28 },
+ [ 32] = { .c = 0x00000014, .b = 6 },
+ [ 33] = { .c = 0x000003f8, .b = 10 },
+ [ 34] = { .c = 0x000003f9, .b = 10 },
+ [ 35] = { .c = 0x00000ffa, .b = 12 },
+ [ 36] = { .c = 0x00001ff9, .b = 13 },
+ [ 37] = { .c = 0x00000015, .b = 6 },
+ [ 38] = { .c = 0x000000f8, .b = 8 },
+ [ 39] = { .c = 0x000007fa, .b = 11 },
+ [ 40] = { .c = 0x000003fa, .b = 10 },
+ [ 41] = { .c = 0x000003fb, .b = 10 },
+ [ 42] = { .c = 0x000000f9, .b = 8 },
+ [ 43] = { .c = 0x000007fb, .b = 11 },
+ [ 44] = { .c = 0x000000fa, .b = 8 },
+ [ 45] = { .c = 0x00000016, .b = 6 },
+ [ 46] = { .c = 0x00000017, .b = 6 },
+ [ 47] = { .c = 0x00000018, .b = 6 },
+ [ 48] = { .c = 0x00000000, .b = 5 },
+ [ 49] = { .c = 0x00000001, .b = 5 },
+ [ 50] = { .c = 0x00000002, .b = 5 },
+ [ 51] = { .c = 0x00000019, .b = 6 },
+ [ 52] = { .c = 0x0000001a, .b = 6 },
+ [ 53] = { .c = 0x0000001b, .b = 6 },
+ [ 54] = { .c = 0x0000001c, .b = 6 },
+ [ 55] = { .c = 0x0000001d, .b = 6 },
+ [ 56] = { .c = 0x0000001e, .b = 6 },
+ [ 57] = { .c = 0x0000001f, .b = 6 },
+ [ 58] = { .c = 0x0000005c, .b = 7 },
+ [ 59] = { .c = 0x000000fb, .b = 8 },
+ [ 60] = { .c = 0x00007ffc, .b = 15 },
+ [ 61] = { .c = 0x00000020, .b = 6 },
+ [ 62] = { .c = 0x00000ffb, .b = 12 },
+ [ 63] = { .c = 0x000003fc, .b = 10 },
+ [ 64] = { .c = 0x00001ffa, .b = 13 },
+ [ 65] = { .c = 0x00000021, .b = 6 },
+ [ 66] = { .c = 0x0000005d, .b = 7 },
+ [ 67] = { .c = 0x0000005e, .b = 7 },
+ [ 68] = { .c = 0x0000005f, .b = 7 },
+ [ 69] = { .c = 0x00000060, .b = 7 },
+ [ 70] = { .c = 0x00000061, .b = 7 },
+ [ 71] = { .c = 0x00000062, .b = 7 },
+ [ 72] = { .c = 0x00000063, .b = 7 },
+ [ 73] = { .c = 0x00000064, .b = 7 },
+ [ 74] = { .c = 0x00000065, .b = 7 },
+ [ 75] = { .c = 0x00000066, .b = 7 },
+ [ 76] = { .c = 0x00000067, .b = 7 },
+ [ 77] = { .c = 0x00000068, .b = 7 },
+ [ 78] = { .c = 0x00000069, .b = 7 },
+ [ 79] = { .c = 0x0000006a, .b = 7 },
+ [ 80] = { .c = 0x0000006b, .b = 7 },
+ [ 81] = { .c = 0x0000006c, .b = 7 },
+ [ 82] = { .c = 0x0000006d, .b = 7 },
+ [ 83] = { .c = 0x0000006e, .b = 7 },
+ [ 84] = { .c = 0x0000006f, .b = 7 },
+ [ 85] = { .c = 0x00000070, .b = 7 },
+ [ 86] = { .c = 0x00000071, .b = 7 },
+ [ 87] = { .c = 0x00000072, .b = 7 },
+ [ 88] = { .c = 0x000000fc, .b = 8 },
+ [ 89] = { .c = 0x00000073, .b = 7 },
+ [ 90] = { .c = 0x000000fd, .b = 8 },
+ [ 91] = { .c = 0x00001ffb, .b = 13 },
+ [ 92] = { .c = 0x0007fff0, .b = 19 },
+ [ 93] = { .c = 0x00001ffc, .b = 13 },
+ [ 94] = { .c = 0x00003ffc, .b = 14 },
+ [ 95] = { .c = 0x00000022, .b = 6 },
+ [ 96] = { .c = 0x00007ffd, .b = 15 },
+ [ 97] = { .c = 0x00000003, .b = 5 },
+ [ 98] = { .c = 0x00000023, .b = 6 },
+ [ 99] = { .c = 0x00000004, .b = 5 },
+ [100] = { .c = 0x00000024, .b = 6 },
+ [101] = { .c = 0x00000005, .b = 5 },
+ [102] = { .c = 0x00000025, .b = 6 },
+ [103] = { .c = 0x00000026, .b = 6 },
+ [104] = { .c = 0x00000027, .b = 6 },
+ [105] = { .c = 0x00000006, .b = 5 },
+ [106] = { .c = 0x00000074, .b = 7 },
+ [107] = { .c = 0x00000075, .b = 7 },
+ [108] = { .c = 0x00000028, .b = 6 },
+ [109] = { .c = 0x00000029, .b = 6 },
+ [110] = { .c = 0x0000002a, .b = 6 },
+ [111] = { .c = 0x00000007, .b = 5 },
+ [112] = { .c = 0x0000002b, .b = 6 },
+ [113] = { .c = 0x00000076, .b = 7 },
+ [114] = { .c = 0x0000002c, .b = 6 },
+ [115] = { .c = 0x00000008, .b = 5 },
+ [116] = { .c = 0x00000009, .b = 5 },
+ [117] = { .c = 0x0000002d, .b = 6 },
+ [118] = { .c = 0x00000077, .b = 7 },
+ [119] = { .c = 0x00000078, .b = 7 },
+ [120] = { .c = 0x00000079, .b = 7 },
+ [121] = { .c = 0x0000007a, .b = 7 },
+ [122] = { .c = 0x0000007b, .b = 7 },
+ [123] = { .c = 0x00007ffe, .b = 15 },
+ [124] = { .c = 0x000007fc, .b = 11 },
+ [125] = { .c = 0x00003ffd, .b = 14 },
+ [126] = { .c = 0x00001ffd, .b = 13 },
+ [127] = { .c = 0x0ffffffc, .b = 28 },
+ [128] = { .c = 0x000fffe6, .b = 20 },
+ [129] = { .c = 0x003fffd2, .b = 22 },
+ [130] = { .c = 0x000fffe7, .b = 20 },
+ [131] = { .c = 0x000fffe8, .b = 20 },
+ [132] = { .c = 0x003fffd3, .b = 22 },
+ [133] = { .c = 0x003fffd4, .b = 22 },
+ [134] = { .c = 0x003fffd5, .b = 22 },
+ [135] = { .c = 0x007fffd9, .b = 23 },
+ [136] = { .c = 0x003fffd6, .b = 22 },
+ [137] = { .c = 0x007fffda, .b = 23 },
+ [138] = { .c = 0x007fffdb, .b = 23 },
+ [139] = { .c = 0x007fffdc, .b = 23 },
+ [140] = { .c = 0x007fffdd, .b = 23 },
+ [141] = { .c = 0x007fffde, .b = 23 },
+ [142] = { .c = 0x00ffffeb, .b = 24 },
+ [143] = { .c = 0x007fffdf, .b = 23 },
+ [144] = { .c = 0x00ffffec, .b = 24 },
+ [145] = { .c = 0x00ffffed, .b = 24 },
+ [146] = { .c = 0x003fffd7, .b = 22 },
+ [147] = { .c = 0x007fffe0, .b = 23 },
+ [148] = { .c = 0x00ffffee, .b = 24 },
+ [149] = { .c = 0x007fffe1, .b = 23 },
+ [150] = { .c = 0x007fffe2, .b = 23 },
+ [151] = { .c = 0x007fffe3, .b = 23 },
+ [152] = { .c = 0x007fffe4, .b = 23 },
+ [153] = { .c = 0x001fffdc, .b = 21 },
+ [154] = { .c = 0x003fffd8, .b = 22 },
+ [155] = { .c = 0x007fffe5, .b = 23 },
+ [156] = { .c = 0x003fffd9, .b = 22 },
+ [157] = { .c = 0x007fffe6, .b = 23 },
+ [158] = { .c = 0x007fffe7, .b = 23 },
+ [159] = { .c = 0x00ffffef, .b = 24 },
+ [160] = { .c = 0x003fffda, .b = 22 },
+ [161] = { .c = 0x001fffdd, .b = 21 },
+ [162] = { .c = 0x000fffe9, .b = 20 },
+ [163] = { .c = 0x003fffdb, .b = 22 },
+ [164] = { .c = 0x003fffdc, .b = 22 },
+ [165] = { .c = 0x007fffe8, .b = 23 },
+ [166] = { .c = 0x007fffe9, .b = 23 },
+ [167] = { .c = 0x001fffde, .b = 21 },
+ [168] = { .c = 0x007fffea, .b = 23 },
+ [169] = { .c = 0x003fffdd, .b = 22 },
+ [170] = { .c = 0x003fffde, .b = 22 },
+ [171] = { .c = 0x00fffff0, .b = 24 },
+ [172] = { .c = 0x001fffdf, .b = 21 },
+ [173] = { .c = 0x003fffdf, .b = 22 },
+ [174] = { .c = 0x007fffeb, .b = 23 },
+ [175] = { .c = 0x007fffec, .b = 23 },
+ [176] = { .c = 0x001fffe0, .b = 21 },
+ [177] = { .c = 0x001fffe1, .b = 21 },
+ [178] = { .c = 0x003fffe0, .b = 22 },
+ [179] = { .c = 0x001fffe2, .b = 21 },
+ [180] = { .c = 0x007fffed, .b = 23 },
+ [181] = { .c = 0x003fffe1, .b = 22 },
+ [182] = { .c = 0x007fffee, .b = 23 },
+ [183] = { .c = 0x007fffef, .b = 23 },
+ [184] = { .c = 0x000fffea, .b = 20 },
+ [185] = { .c = 0x003fffe2, .b = 22 },
+ [186] = { .c = 0x003fffe3, .b = 22 },
+ [187] = { .c = 0x003fffe4, .b = 22 },
+ [188] = { .c = 0x007ffff0, .b = 23 },
+ [189] = { .c = 0x003fffe5, .b = 22 },
+ [190] = { .c = 0x003fffe6, .b = 22 },
+ [191] = { .c = 0x007ffff1, .b = 23 },
+ [192] = { .c = 0x03ffffe0, .b = 26 },
+ [193] = { .c = 0x03ffffe1, .b = 26 },
+ [194] = { .c = 0x000fffeb, .b = 20 },
+ [195] = { .c = 0x0007fff1, .b = 19 },
+ [196] = { .c = 0x003fffe7, .b = 22 },
+ [197] = { .c = 0x007ffff2, .b = 23 },
+ [198] = { .c = 0x003fffe8, .b = 22 },
+ [199] = { .c = 0x01ffffec, .b = 25 },
+ [200] = { .c = 0x03ffffe2, .b = 26 },
+ [201] = { .c = 0x03ffffe3, .b = 26 },
+ [202] = { .c = 0x03ffffe4, .b = 26 },
+ [203] = { .c = 0x07ffffde, .b = 27 },
+ [204] = { .c = 0x07ffffdf, .b = 27 },
+ [205] = { .c = 0x03ffffe5, .b = 26 },
+ [206] = { .c = 0x00fffff1, .b = 24 },
+ [207] = { .c = 0x01ffffed, .b = 25 },
+ [208] = { .c = 0x0007fff2, .b = 19 },
+ [209] = { .c = 0x001fffe3, .b = 21 },
+ [210] = { .c = 0x03ffffe6, .b = 26 },
+ [211] = { .c = 0x07ffffe0, .b = 27 },
+ [212] = { .c = 0x07ffffe1, .b = 27 },
+ [213] = { .c = 0x03ffffe7, .b = 26 },
+ [214] = { .c = 0x07ffffe2, .b = 27 },
+ [215] = { .c = 0x00fffff2, .b = 24 },
+ [216] = { .c = 0x001fffe4, .b = 21 },
+ [217] = { .c = 0x001fffe5, .b = 21 },
+ [218] = { .c = 0x03ffffe8, .b = 26 },
+ [219] = { .c = 0x03ffffe9, .b = 26 },
+ [220] = { .c = 0x0ffffffd, .b = 28 },
+ [221] = { .c = 0x07ffffe3, .b = 27 },
+ [222] = { .c = 0x07ffffe4, .b = 27 },
+ [223] = { .c = 0x07ffffe5, .b = 27 },
+ [224] = { .c = 0x000fffec, .b = 20 },
+ [225] = { .c = 0x00fffff3, .b = 24 },
+ [226] = { .c = 0x000fffed, .b = 20 },
+ [227] = { .c = 0x001fffe6, .b = 21 },
+ [228] = { .c = 0x003fffe9, .b = 22 },
+ [229] = { .c = 0x001fffe7, .b = 21 },
+ [230] = { .c = 0x001fffe8, .b = 21 },
+ [231] = { .c = 0x007ffff3, .b = 23 },
+ [232] = { .c = 0x003fffea, .b = 22 },
+ [233] = { .c = 0x003fffeb, .b = 22 },
+ [234] = { .c = 0x01ffffee, .b = 25 },
+ [235] = { .c = 0x01ffffef, .b = 25 },
+ [236] = { .c = 0x00fffff4, .b = 24 },
+ [237] = { .c = 0x00fffff5, .b = 24 },
+ [238] = { .c = 0x03ffffea, .b = 26 },
+ [239] = { .c = 0x007ffff4, .b = 23 },
+ [240] = { .c = 0x03ffffeb, .b = 26 },
+ [241] = { .c = 0x07ffffe6, .b = 27 },
+ [242] = { .c = 0x03ffffec, .b = 26 },
+ [243] = { .c = 0x03ffffed, .b = 26 },
+ [244] = { .c = 0x07ffffe7, .b = 27 },
+ [245] = { .c = 0x07ffffe8, .b = 27 },
+ [246] = { .c = 0x07ffffe9, .b = 27 },
+ [247] = { .c = 0x07ffffea, .b = 27 },
+ [248] = { .c = 0x07ffffeb, .b = 27 },
+ [249] = { .c = 0x0ffffffe, .b = 28 },
+ [250] = { .c = 0x07ffffec, .b = 27 },
+ [251] = { .c = 0x07ffffed, .b = 27 },
+ [252] = { .c = 0x07ffffee, .b = 27 },
+ [253] = { .c = 0x07ffffef, .b = 27 },
+ [254] = { .c = 0x07fffff0, .b = 27 },
+ [255] = { .c = 0x03ffffee, .b = 26 },
+ [256] = { .c = 0x3fffffff, .b = 30 }, /* EOS */
+};
+
+
+/* Reversed huffman codes, generated by dev/hpack/gen-rht.c from the table
+ * above, then simplified by hand by extracting the few different length
+ * values and writing code to produce them instead.
+ *
+ * The codes are aligned on the MSB since that's how they appear in the stream.
+ *
+ * Quick summary below of the way the tables work. They're based on how the
+ * prefixes are organized, starting from the MSB.
+ *
+ * These codes fit in a single octet (5 to 8 bits) :
+ * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
+ * 40/5 48/5
+ *
+ * 50/6 54/6 58/6 5c/6 60/6 64/6 68/6 6c/6
+ * 70/6 74/6 78/6 7c/6 80/6 84/6 88/6 8c/6
+ * 90/6 94/6 98/6 9c/6 a0/6 a4/6 a8/6 ac/6
+ * b0/6 b4/6
+ *
+ * b8/7 ba/7 bc/7 be/7 c0/7 c2/7 c4/7 c6/7
+ * c8/7 ca/7 cc/7 ce/7 d0/7 d2/7 d4/7 d6/7
+ * d8/7 da/7 dc/7 de/7 e0/7 e2/7 e4/7 e6/7
+ * e8/7 ea/7 ec/7 ee/7 f0/7 f2/7 f4/7 f6/7
+ *
+ * f8/8 f9/8 fa/8 fb/8 fc/8 fd/8
+ *
+ * ==> a single 256-symbol table based on the full byte provides a direct
+ * access and the bit count
+ *
+ * These codes fit in two octets (10 to 15 bits, neither 9 nor 16 bits code) :
+ *
+ * fe + 2 bits:
+ * 00/2 40/2 80/2 c0/2
+ *
+ * ff + 2..7 bits :
+ * 00/2
+ * 40/3 60/3 80/3
+ * a0/4 b0/4
+ * c0/5 c8/5 d0/5 d8/5 e0/5 e8/5
+ * f0/6 f4/6
+ * f8/7 fa/7 fc/7
+ *
+ * ==> a single 256-symbol table made of b0.0 and b1.7-1 provides a direct
+ * access and the bit count after a miss on the first one above.
+ *
+ * These ones fit in three octets :
+ * ff fe + 3..5 bits :
+ * 00/3 20/3 40/3 60/4 70/4 80/4 90/4 a0/4
+ * b0/4 c0/4 d0/4
+ * e0/5 e8/5 f0/5 f8/5
+ *
+ * ff ff + 5..8 bits :
+ * 00/5 08/5 10/5 18/5 20/5 28/5 30/5 38/5
+ * 40/5
+ * 48/6 4c/6 50/6 54/6 58/6 5c/6 60/6 64/6
+ * 68/6 6c/6 70/6 74/6 78/6 7c/6 80/6 84/6
+ * 88/6 8c/6 90/6 94/6 98/6 9c/6 a0/6 a4/6
+ * a8/6 ac/6
+ * b0/7 b2/7 b4/7 b6/7 b8/7 ba/7 bc/7 be/7
+ * c0/7 c2/7 c4/7 c6/7 c8/7 ca/7 cc/7 ce/7
+ * d0/7 d2/7 d4/7 d6/7 d8/7 da/7 dc/7 de/7
+ * e0/7 e2/7 e4/7 e6/7 e8/7
+ * ea/8 eb/8 ec/8 ed/8 ee/8 ef/8 f0/8 f1/8
+ * f2/8 f3/8 f4/8 f5/8
+ *
+ * ==> a 32-symbol table has to be applied to 0xfffe
+ * ==> a 256-symbol table has to be applied to 0xffff
+ *
+ * The other ones fit in four octets with 1 to 6 bits in the last one :
+ * ff ff f6 : 00/1 80/1
+ * ff ff f7 : 00/1 80/1
+ * ff ff f8 : 00/2 40/2 80/2 c0/2
+ * ff ff f9 : 00/2 40/2 80/2 c0/2
+ * ff ff fa : 00/2 40/2 80/2 c0/2
+ * ff ff fb : 00/2 40/2 80/2
+ * ff ff fb : c0/3 e0/3
+ * ff ff fc : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
+ * ff ff fd : 00/3 20/3 40/3 60/3 80/3 a0/3 c0/3 e0/3
+ * ff ff fe : 00/3
+ * ff ff fe : 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4 f0/4
+ * ff ff ff : 00/4 10/4 20/4 30/4 40/4 50/4 60/4 70/4 80/4 90/4 a0/4 b0/4 c0/4 d0/4 e0/4
+ * ff ff ff : f0/6 f4/6 f8/6 fc/6
+ *
+ * ==> a 256-symbol table with b2.0-3,b3.7-4 gives all of them except the
+ * distinction between ffffff{f0,f4,f8,fc} which is rare enough
+ * and can be done by hand when bit count == 30.
+ *
+ *
+ * Code lengths :
+ * 5..8 : 0x00..0xfe
+ * 10..15 : 0xfe
+ * 0xff 0x00..0xfe
+ * 19..20 : 0xff 0xfe 0x00..0xdf
+ * 21 : 0xff 0xfe 0xe0..0xff
+ * 21 : 0xff 0xff 0x00..0x40
+ * 22..24 : 0xff 0xff 0x00..0xf5
+ * 24..28 : 0xff 0xff 0xf5..0xff
+ * 30 : 0xff 0xff 0xff 0xf0..0xff
+ *
+ *
+ * if b0 < 0xfe ==> 5..8 bits (74 codes)
+ * if b0 == 0xfe or 0xff : 10..15
+ * => if b0 == 0xfe || b1 < 0xfe : lookup (b0:0|b1:7..1) (21 codes)
+ *
+ * -- b0 = 0xff --
+ * if b1 == 0xfe : 19..21 bits
+ * => lookup b2:7..3 (15 codes)
+ *
+ * -- b0 = 0xff, b1 = 0xff : 147 codes --
+ * if b2 < 0xf6 : 21..24 bits (76 codes)
+ * if b2 >= 0xf6 : 25..30 bits (71 codes)
+ *
+ * Algorithm:
+ * - if > 24 and < 32, read missing bits.
+ * - if less than 24 bits, read 1 byte. If past end, insert 0xff instead.
+ * - if b0 < 0xfe lookup b0 in table0[0..255]
+ * - else if b0 == 0xfe, manual lookup
+ * - else if b0 == 0xff, lookup b1 in table1[0..255]
+ * ...
+ */
+
+uint8_t rht_bit31_24[256] = {
+ /* 0x00 */ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+ /* 0x08 */ 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31, 0x31,
+ /* 0x10 */ 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32,
+ /* 0x18 */ 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x61,
+ /* 0x20 */ 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63, 0x63,
+ /* 0x28 */ 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65, 0x65,
+ /* 0x30 */ 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69, 0x69,
+ /* 0x38 */ 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f, 0x6f,
+ /* 0x40 */ 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73, 0x73,
+ /* 0x48 */ 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74,
+ /* 0x50 */ 0x20, 0x20, 0x20, 0x20,
+ /* 0x54 */ 0x25, 0x25, 0x25, 0x25,
+ /* 0x58 */ 0x2d, 0x2d, 0x2d, 0x2d,
+ /* 0x5c */ 0x2e, 0x2e, 0x2e, 0x2e,
+ /* 0x60 */ 0x2f, 0x2f, 0x2f, 0x2f,
+ /* 0x64 */ 0x33, 0x33, 0x33, 0x33,
+ /* 0x68 */ 0x34, 0x34, 0x34, 0x34,
+ /* 0x6c */ 0x35, 0x35, 0x35, 0x35,
+ /* 0x70 */ 0x36, 0x36, 0x36, 0x36,
+ /* 0x74 */ 0x37, 0x37, 0x37, 0x37,
+ /* 0x78 */ 0x38, 0x38, 0x38, 0x38,
+ /* 0x7c */ 0x39, 0x39, 0x39, 0x39,
+ /* 0x80 */ 0x3d, 0x3d, 0x3d, 0x3d,
+ /* 0x84 */ 0x41, 0x41, 0x41, 0x41,
+ /* 0x88 */ 0x5f, 0x5f, 0x5f, 0x5f,
+ /* 0x8c */ 0x62, 0x62, 0x62, 0x62,
+ /* 0x90 */ 0x64, 0x64, 0x64, 0x64,
+ /* 0x94 */ 0x66, 0x66, 0x66, 0x66,
+ /* 0x98 */ 0x67, 0x67, 0x67, 0x67,
+ /* 0x9c */ 0x68, 0x68, 0x68, 0x68,
+ /* 0xa0 */ 0x6c, 0x6c, 0x6c, 0x6c,
+ /* 0xa4 */ 0x6d, 0x6d, 0x6d, 0x6d,
+ /* 0xa8 */ 0x6e, 0x6e, 0x6e, 0x6e,
+ /* 0xac */ 0x70, 0x70, 0x70, 0x70,
+ /* 0xb0 */ 0x72, 0x72, 0x72, 0x72,
+ /* 0xb4 */ 0x75, 0x75, 0x75, 0x75,
+ /* 0xb8 */ 0x3a, 0x3a,
+ /* 0xba */ 0x42, 0x42,
+ /* 0xbc */ 0x43, 0x43,
+ /* 0xbe */ 0x44, 0x44,
+ /* 0xc0 */ 0x45, 0x45,
+ /* 0xc2 */ 0x46, 0x46,
+ /* 0xc4 */ 0x47, 0x47,
+ /* 0xc6 */ 0x48, 0x48,
+ /* 0xc8 */ 0x49, 0x49,
+ /* 0xca */ 0x4a, 0x4a,
+ /* 0xcc */ 0x4b, 0x4b,
+ /* 0xce */ 0x4c, 0x4c,
+ /* 0xd0 */ 0x4d, 0x4d,
+ /* 0xd2 */ 0x4e, 0x4e,
+ /* 0xd4 */ 0x4f, 0x4f,
+ /* 0xd6 */ 0x50, 0x50,
+ /* 0xd8 */ 0x51, 0x51,
+ /* 0xda */ 0x52, 0x52,
+ /* 0xdc */ 0x53, 0x53,
+ /* 0xde */ 0x54, 0x54,
+ /* 0xe0 */ 0x55, 0x55,
+ /* 0xe2 */ 0x56, 0x56,
+ /* 0xe4 */ 0x57, 0x57,
+ /* 0xe6 */ 0x59, 0x59,
+ /* 0xe8 */ 0x6a, 0x6a,
+ /* 0xea */ 0x6b, 0x6b,
+ /* 0xec */ 0x71, 0x71,
+ /* 0xee */ 0x76, 0x76,
+ /* 0xf0 */ 0x77, 0x77,
+ /* 0xf2 */ 0x78, 0x78,
+ /* 0xf4 */ 0x79, 0x79,
+ /* 0xf6 */ 0x7a, 0x7a,
+ /* 0xf8 */ 0x26,
+ /* 0xf9 */ 0x2a,
+ /* 0xfa */ 0x2c,
+ /* 0xfb */ 0x3b,
+ /* 0xfc */ 0x58,
+ /* 0xfd */ 0x5a,
+};
+
+uint8_t rht_bit24_17[256] = {
+ /* 0x00 */ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ /* 0x10 */ 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+ /* 0x20 */ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ /* 0x30 */ 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+ /* 0x40 */ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ /* 0x50 */ 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28,
+ /* 0x60 */ 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29,
+ /* 0x70 */ 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29, 0x29,
+ /* 0x80 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
+ /* 0x90 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
+ /* 0xa0 */ 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
+ /* 0xb0 */ 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b, 0x2b,
+ /* 0xc0 */ 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c,
+ /* 0xd0 */ 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23, 0x23,
+ /* 0xd8 */ 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e,
+ /* 0xe0 */ 0x00, 0x00, 0x00, 0x00,
+ /* 0xe4 */ 0x24, 0x24, 0x24, 0x24,
+ /* 0xe8 */ 0x40, 0x40, 0x40, 0x40,
+ /* 0xec */ 0x5b, 0x5b, 0x5b, 0x5b,
+ /* 0xf0 */ 0x5d, 0x5d, 0x5d, 0x5d,
+ /* 0xf4 */ 0x7e, 0x7e, 0x7e, 0x7e,
+ /* 0xf8 */ 0x5e, 0x5e,
+ /* 0xfa */ 0x7d, 0x7d,
+ /* 0xfc */ 0x3c,
+ /* 0xfd */ 0x60,
+ /* 0xfe */ 0x7b,
+};
+
+uint8_t rht_bit15_8[256] = {
+ /* 0x00 */ 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
+ /* 0x08 */ 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1, 0xb1,
+ /* 0x10 */ 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3, 0xb3,
+ /* 0x18 */ 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1, 0xd1,
+ /* 0x20 */ 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8,
+ /* 0x28 */ 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9, 0xd9,
+ /* 0x30 */ 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3, 0xe3,
+ /* 0x38 */ 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5, 0xe5,
+ /* 0x40 */ 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6, 0xe6,
+ /* 0x48 */ 0x81, 0x81, 0x81, 0x81,
+ /* 0x4c */ 0x84, 0x84, 0x84, 0x84,
+ /* 0x50 */ 0x85, 0x85, 0x85, 0x85,
+ /* 0x54 */ 0x86, 0x86, 0x86, 0x86,
+ /* 0x58 */ 0x88, 0x88, 0x88, 0x88,
+ /* 0x5c */ 0x92, 0x92, 0x92, 0x92,
+ /* 0x60 */ 0x9a, 0x9a, 0x9a, 0x9a,
+ /* 0x64 */ 0x9c, 0x9c, 0x9c, 0x9c,
+ /* 0x68 */ 0xa0, 0xa0, 0xa0, 0xa0,
+ /* 0x6c */ 0xa3, 0xa3, 0xa3, 0xa3,
+ /* 0x70 */ 0xa4, 0xa4, 0xa4, 0xa4,
+ /* 0x74 */ 0xa9, 0xa9, 0xa9, 0xa9,
+ /* 0x78 */ 0xaa, 0xaa, 0xaa, 0xaa,
+ /* 0x7c */ 0xad, 0xad, 0xad, 0xad,
+ /* 0x80 */ 0xb2, 0xb2, 0xb2, 0xb2,
+ /* 0x84 */ 0xb5, 0xb5, 0xb5, 0xb5,
+ /* 0x88 */ 0xb9, 0xb9, 0xb9, 0xb9,
+ /* 0x8c */ 0xba, 0xba, 0xba, 0xba,
+ /* 0x90 */ 0xbb, 0xbb, 0xbb, 0xbb,
+ /* 0x94 */ 0xbd, 0xbd, 0xbd, 0xbd,
+ /* 0x98 */ 0xbe, 0xbe, 0xbe, 0xbe,
+ /* 0x9c */ 0xc4, 0xc4, 0xc4, 0xc4,
+ /* 0xa0 */ 0xc6, 0xc6, 0xc6, 0xc6,
+ /* 0xa4 */ 0xe4, 0xe4, 0xe4, 0xe4,
+ /* 0xa8 */ 0xe8, 0xe8, 0xe8, 0xe8,
+ /* 0xac */ 0xe9, 0xe9, 0xe9, 0xe9,
+ /* 0xb0 */ 0x01, 0x01,
+ /* 0xb2 */ 0x87, 0x87,
+ /* 0xb4 */ 0x89, 0x89,
+ /* 0xb6 */ 0x8a, 0x8a,
+ /* 0xb8 */ 0x8b, 0x8b,
+ /* 0xba */ 0x8c, 0x8c,
+ /* 0xbc */ 0x8d, 0x8d,
+ /* 0xbe */ 0x8f, 0x8f,
+ /* 0xc0 */ 0x93, 0x93,
+ /* 0xc2 */ 0x95, 0x95,
+ /* 0xc4 */ 0x96, 0x96,
+ /* 0xc6 */ 0x97, 0x97,
+ /* 0xc8 */ 0x98, 0x98,
+ /* 0xca */ 0x9b, 0x9b,
+ /* 0xcc */ 0x9d, 0x9d,
+ /* 0xce */ 0x9e, 0x9e,
+ /* 0xd0 */ 0xa5, 0xa5,
+ /* 0xd2 */ 0xa6, 0xa6,
+ /* 0xd4 */ 0xa8, 0xa8,
+ /* 0xd6 */ 0xae, 0xae,
+ /* 0xd8 */ 0xaf, 0xaf,
+ /* 0xda */ 0xb4, 0xb4,
+ /* 0xdc */ 0xb6, 0xb6,
+ /* 0xde */ 0xb7, 0xb7,
+ /* 0xe0 */ 0xbc, 0xbc,
+ /* 0xe2 */ 0xbf, 0xbf,
+ /* 0xe4 */ 0xc5, 0xc5,
+ /* 0xe6 */ 0xe7, 0xe7,
+ /* 0xe8 */ 0xef, 0xef,
+ /* 0xea */ 0x09,
+ /* 0xeb */ 0x8e,
+ /* 0xec */ 0x90,
+ /* 0xed */ 0x91,
+ /* 0xee */ 0x94,
+ /* 0xef */ 0x9f,
+ /* 0xf0 */ 0xab,
+ /* 0xf1 */ 0xce,
+ /* 0xf2 */ 0xd7,
+ /* 0xf3 */ 0xe1,
+ /* 0xf4 */ 0xec,
+ /* 0xf5 */ 0xed,
+};
+
+/* below two non-overlapping tables are merged in order to save on L1D:
+ * - bits 15-11 for values 0x00-0x1f
+ * - bits 11-4 for values 0x60-0xff
+ * Note that there's no data between 0x20 and 0x5f, the caller must
+ * adjust its offsets by subtracting 0x40 for values 0x60 and above.
+ */
+uint8_t rht_bit15_11_11_4[192] = {
+ /* part used for bits 15-11 (0x00-0x1f) */
+ /* 0x00 */ 0x5c, 0x5c, 0x5c, 0x5c,
+ /* 0x04 */ 0xc3, 0xc3, 0xc3, 0xc3,
+ /* 0x08 */ 0xd0, 0xd0, 0xd0, 0xd0,
+ /* 0x0c */ 0x80, 0x80,
+ /* 0x0e */ 0x82, 0x82,
+ /* 0x10 */ 0x83, 0x83,
+ /* 0x12 */ 0xa2, 0xa2,
+ /* 0x14 */ 0xb8, 0xb8,
+ /* 0x16 */ 0xc2, 0xc2,
+ /* 0x18 */ 0xe0, 0xe0,
+ /* 0x1a */ 0xe2, 0xe2,
+ /* 0x1c */ 0x99,
+ /* 0x1d */ 0xa1,
+ /* 0x1e */ 0xa7,
+ /* 0x1f */ 0xac,
+
+ /* part used for bits 11-4 for 0xf600 (0x60-0xff), starting @0x20 */
+ /* 0x60 */ 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7, 0xc7,
+ /* 0x68 */ 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf, 0xcf,
+ /* 0x70 */ 0xea, 0xea, 0xea, 0xea, 0xea, 0xea, 0xea, 0xea,
+ /* 0x78 */ 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb, 0xeb,
+ /* 0x80 */ 0xc0, 0xc0, 0xc0, 0xc0,
+ /* 0x84 */ 0xc1, 0xc1, 0xc1, 0xc1,
+ /* 0x88 */ 0xc8, 0xc8, 0xc8, 0xc8,
+ /* 0x8c */ 0xc9, 0xc9, 0xc9, 0xc9,
+ /* 0x90 */ 0xca, 0xca, 0xca, 0xca,
+ /* 0x94 */ 0xcd, 0xcd, 0xcd, 0xcd,
+ /* 0x98 */ 0xd2, 0xd2, 0xd2, 0xd2,
+ /* 0x9c */ 0xd5, 0xd5, 0xd5, 0xd5,
+ /* 0xa0 */ 0xda, 0xda, 0xda, 0xda,
+ /* 0xa4 */ 0xdb, 0xdb, 0xdb, 0xdb,
+ /* 0xa8 */ 0xee, 0xee, 0xee, 0xee,
+ /* 0xac */ 0xf0, 0xf0, 0xf0, 0xf0,
+ /* 0xb0 */ 0xf2, 0xf2, 0xf2, 0xf2,
+ /* 0xb4 */ 0xf3, 0xf3, 0xf3, 0xf3,
+ /* 0xb8 */ 0xff, 0xff, 0xff, 0xff,
+ /* 0xbc */ 0xcb, 0xcb,
+ /* 0xbe */ 0xcc, 0xcc,
+ /* 0xc0 */ 0xd3, 0xd3,
+ /* 0xc2 */ 0xd4, 0xd4,
+ /* 0xc4 */ 0xd6, 0xd6,
+ /* 0xc6 */ 0xdd, 0xdd,
+ /* 0xc8 */ 0xde, 0xde,
+ /* 0xca */ 0xdf, 0xdf,
+ /* 0xcc */ 0xf1, 0xf1,
+ /* 0xce */ 0xf4, 0xf4,
+ /* 0xd0 */ 0xf5, 0xf5,
+ /* 0xd2 */ 0xf6, 0xf6,
+ /* 0xd4 */ 0xf7, 0xf7,
+ /* 0xd6 */ 0xf8, 0xf8,
+ /* 0xd8 */ 0xfa, 0xfa,
+ /* 0xda */ 0xfb, 0xfb,
+ /* 0xdc */ 0xfc, 0xfc,
+ /* 0xde */ 0xfd, 0xfd,
+ /* 0xe0 */ 0xfe, 0xfe,
+ /* 0xe2 */ 0x02,
+ /* 0xe3 */ 0x03,
+ /* 0xe4 */ 0x04,
+ /* 0xe5 */ 0x05,
+ /* 0xe6 */ 0x06,
+ /* 0xe7 */ 0x07,
+ /* 0xe8 */ 0x08,
+ /* 0xe9 */ 0x0b,
+ /* 0xea */ 0x0c,
+ /* 0xeb */ 0x0e,
+ /* 0xec */ 0x0f,
+ /* 0xed */ 0x10,
+ /* 0xee */ 0x11,
+ /* 0xef */ 0x12,
+ /* 0xf0 */ 0x13,
+ /* 0xf1 */ 0x14,
+ /* 0xf2 */ 0x15,
+ /* 0xf3 */ 0x17,
+ /* 0xf4 */ 0x18,
+ /* 0xf5 */ 0x19,
+ /* 0xf6 */ 0x1a,
+ /* 0xf7 */ 0x1b,
+ /* 0xf8 */ 0x1c,
+ /* 0xf9 */ 0x1d,
+ /* 0xfa */ 0x1e,
+ /* 0xfb */ 0x1f,
+ /* 0xfc */ 0x7f,
+ /* 0xfd */ 0xdc,
+ /* 0xfe */ 0xf9,
+ /* 0xff */ 0x0a,
+ /* Note, for [0xff], l==30 and bits 2..3 give 00:0x0a, 01:0x0d, 10:0x16, 11:EOS */
+};
+
+/* huffman-encode string <s> into the huff_tmp buffer and returns the amount
+ * of output bytes. The caller must ensure the output is large enough (ie at
+ * least 4 times as long as s).
+ *
+ * FIXME: bits are only counted for now, no code is emitted!
+ */
+int huff_enc(const char *s, char *out)
+{
+ int bits = 0;
+
+ while (*s) {
+ bits += ht[(uint8_t)*s].b;
+ s++;
+ }
+ bits += 7;
+
+ /* FIXME: huffman code is not emitted yet. */
+ //memset(out, 'H', bits / 8);
+ return bits / 8;
+}
+
+/* pass a huffman string, it will decode it and return the new output size or
+ * -1 in case of error.
+ *
+ * The principle of the decoder is to lookup full bytes in reverse-huffman
+ * tables. Since we may need up to 30 bits and the word positions are not
+ * always multiples of 8, we build the code word by shifting the "current"
+ * 32-bit word and the "next" one of the appropriate amount of bits. Once
+ * the shift goes beyond 32, words are swapped and the "next" one is refilled
+ * with new bytes. Shift operations are cheap when done a single time like this.
+ * On 64-bit platforms it is possible to further improve this by storing both
+ * of them in a single word.
+ */
+int huff_dec(const uint8_t *huff, int hlen, char *out, int olen)
+{
+ char *out_start = out;
+ char *out_end = out + olen;
+ const uint8_t *huff_end = huff + hlen;
+ uint32_t curr = 0;
+ uint32_t next = 0;
+ uint32_t shift;
+ uint32_t code; /* The 30-bit code being looked up, MSB-aligned */
+ uint8_t sym;
+ int bleft; /* bits left */
+ int l;
+
+ code = 0;
+ shift = 64; // start with an empty buffer
+ bleft = hlen << 3;
+ while (bleft > 0 && out != out_end) {
+ while (shift >= 32) {
+ curr = next;
+
+ /* read up to 4 bytes into next */
+ next = 0;
+
+ if (huff + 4 <= huff_end) {
+ next = read_n32(huff);
+ huff += 4;
+ }
+ else {
+ /* note: we append 0 and not 0xff so that we can
+ * distinguish shifted bits from a really inserted
+ * EOS.
+ */
+ next = (((huff + 0 < huff_end) ? (uint32_t)huff[0] : 0x00) << 24) +
+ (((huff + 1 < huff_end) ? (uint32_t)huff[1] : 0x00) << 16) +
+ (((huff + 2 < huff_end) ? (uint32_t)huff[2] : 0x00) << 8) +
+ ((huff + 3 < huff_end) ? (uint32_t)huff[3] : 0x00);
+ huff = huff_end;
+ }
+
+ shift -= 32;
+ }
+
+ /* curr:next contain 64 bit of huffman code */
+ code = curr;
+ if (shift)
+ code = (code << shift) + (next >> (32 - shift));
+
+ /* now we necessarily have 32 bits available */
+ if (code < 0xfe000000) {
+ /* single byte */
+ sym = code >> 24;
+ l = sym < 0xb8 ?
+ sym < 0x50 ? 5 : 6 :
+ sym < 0xf8 ? 7 : 8;
+ sym = rht_bit31_24[code >> 24];
+ }
+ else if (code < 0xfffe0000) {
+ /* two bytes, 0xfe + 2 bits or 0xff + 2..7 bits */
+ sym = code >> 17;
+ l = sym < 0xe0 ?
+ sym < 0xa0 ? 10 : sym < 0xd0 ? 11 : 12 :
+ sym < 0xf8 ? 13 : sym < 0xfc ? 14 : 15;
+
+ sym = rht_bit24_17[(code >> 17) & 0xff];
+ }
+ else if (code < 0xffff0000) { /* 3..5 bits */
+ /* 0xff + 0xfe + 3..5 bits or
+ * 0xff + 0xff + 5..8 bits for values till 0xf5
+ */
+ sym = (code >> 11) & 0x1f;
+ l = sym < 0x0c ? 19 : sym < 0x1c ? 20 : 21;
+ sym = rht_bit15_11_11_4[(code >> 11) & 0x1f];
+ }
+ else if (code < 0xfffff600) { /* 5..8 bits */
+ /* that's 0xff + 0xff */
+ sym = code >> 8;
+
+ l = sym < 0xb0 ?
+ sym < 0x48 ? 21 : 22 :
+ sym < 0xea ? 23 : 24;
+ sym = rht_bit15_8[(code >> 8) & 0xff];
+ }
+ else {
+ /* 0xff 0xff 0xf6..0xff */
+ sym = code >> 4; /* sym = 0x60..0xff */
+ l = sym < 0xbc ?
+ sym < 0x80 ? 25 : 26 :
+ sym < 0xe2 ? 27 : sym < 0xff ? 28 : 30;
+ if (sym < 0xff)
+ sym = rht_bit15_11_11_4[((code >> 4) & 0xff) - 0x40L];
+ else if ((code & 0xff) == 0xf0)
+ sym = 10;
+ else if ((code & 0xff) == 0xf4)
+ sym = 13;
+ else if ((code & 0xff) == 0xf8)
+ sym = 22;
+ else { // 0xfc : EOS
+ break;
+ }
+ }
+
+ if (!l || bleft - l < 0)
+ break;
+
+ bleft -= l;
+ shift += l;
+ *out++ = sym;
+ }
+
+ if (bleft > 0) {
+ /* some bits were not consumed after the last code, they must
+ * match EOS (ie: all ones) and there must be 7 bits or less.
+ * (7541#5.2).
+ */
+ if (bleft > 7)
+ return -1;
+
+ if ((code & -(1 << (32 - bleft))) != (uint32_t)-(1 << (32 - bleft)))
+ return -1;
+ }
+
+ if (out < out_end)
+ *out = 0; // end of string whenever possible
+ return out - out_start;
+}
diff --git a/src/hpack-tbl.c b/src/hpack-tbl.c
new file mode 100644
index 0000000..990d2f7
--- /dev/null
+++ b/src/hpack-tbl.c
@@ -0,0 +1,372 @@
+/*
+ * HPACK header table management (RFC7541)
+ *
+ * Copyright (C) 2014-2017 Willy Tarreau <willy@haproxy.org>
+ * Copyright (C) 2017 HAProxy Technologies
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+
+/* static header table as in RFC7541 Appendix A. [0] unused. */
+const struct http_hdr hpack_sht[HPACK_SHT_SIZE] = {
+ [ 1] = { .n = IST(":authority"), .v = IST("") },
+ [ 2] = { .n = IST(":method"), .v = IST("GET") },
+ [ 3] = { .n = IST(":method"), .v = IST("POST") },
+ [ 4] = { .n = IST(":path"), .v = IST("/") },
+ [ 5] = { .n = IST(":path"), .v = IST("/index.html") },
+ [ 6] = { .n = IST(":scheme"), .v = IST("http") },
+ [ 7] = { .n = IST(":scheme"), .v = IST("https") },
+ [ 8] = { .n = IST(":status"), .v = IST("200") },
+ [ 9] = { .n = IST(":status"), .v = IST("204") },
+ [10] = { .n = IST(":status"), .v = IST("206") },
+ [11] = { .n = IST(":status"), .v = IST("304") },
+ [12] = { .n = IST(":status"), .v = IST("400") },
+ [13] = { .n = IST(":status"), .v = IST("404") },
+ [14] = { .n = IST(":status"), .v = IST("500") },
+ [15] = { .n = IST("accept-charset"), .v = IST("") },
+ [16] = { .n = IST("accept-encoding"), .v = IST("gzip, deflate") },
+ [17] = { .n = IST("accept-language"), .v = IST("") },
+ [18] = { .n = IST("accept-ranges"), .v = IST("") },
+ [19] = { .n = IST("accept"), .v = IST("") },
+ [20] = { .n = IST("access-control-allow-origin"), .v = IST("") },
+ [21] = { .n = IST("age"), .v = IST("") },
+ [22] = { .n = IST("allow"), .v = IST("") },
+ [23] = { .n = IST("authorization"), .v = IST("") },
+ [24] = { .n = IST("cache-control"), .v = IST("") },
+ [25] = { .n = IST("content-disposition"), .v = IST("") },
+ [26] = { .n = IST("content-encoding"), .v = IST("") },
+ [27] = { .n = IST("content-language"), .v = IST("") },
+ [28] = { .n = IST("content-length"), .v = IST("") },
+ [29] = { .n = IST("content-location"), .v = IST("") },
+ [30] = { .n = IST("content-range"), .v = IST("") },
+ [31] = { .n = IST("content-type") , .v = IST("") },
+ [32] = { .n = IST("cookie"), .v = IST("") },
+ [33] = { .n = IST("date"), .v = IST("") },
+ [34] = { .n = IST("etag"), .v = IST("") },
+ [35] = { .n = IST("expect"), .v = IST("") },
+ [36] = { .n = IST("expires"), .v = IST("") },
+ [37] = { .n = IST("from"), .v = IST("") },
+ [38] = { .n = IST("host"), .v = IST("") },
+ [39] = { .n = IST("if-match"), .v = IST("") },
+ [40] = { .n = IST("if-modified-since"), .v = IST("") },
+ [41] = { .n = IST("if-none-match"), .v = IST("") },
+ [42] = { .n = IST("if-range"), .v = IST("") },
+ [43] = { .n = IST("if-unmodified-since"), .v = IST("") },
+ [44] = { .n = IST("last-modified"), .v = IST("") },
+ [45] = { .n = IST("link"), .v = IST("") },
+ [46] = { .n = IST("location"), .v = IST("") },
+ [47] = { .n = IST("max-forwards"), .v = IST("") },
+ [48] = { .n = IST("proxy-authenticate"), .v = IST("") },
+ [49] = { .n = IST("proxy-authorization"), .v = IST("") },
+ [50] = { .n = IST("range"), .v = IST("") },
+ [51] = { .n = IST("referer"), .v = IST("") },
+ [52] = { .n = IST("refresh"), .v = IST("") },
+ [53] = { .n = IST("retry-after"), .v = IST("") },
+ [54] = { .n = IST("server"), .v = IST("") },
+ [55] = { .n = IST("set-cookie"), .v = IST("") },
+ [56] = { .n = IST("strict-transport-security"), .v = IST("") },
+ [57] = { .n = IST("transfer-encoding"), .v = IST("") },
+ [58] = { .n = IST("user-agent"), .v = IST("") },
+ [59] = { .n = IST("vary"), .v = IST("") },
+ [60] = { .n = IST("via"), .v = IST("") },
+ [61] = { .n = IST("www-authenticate"), .v = IST("") },
+};
+
+struct pool_head *pool_head_hpack_tbl __read_mostly = NULL;
+
+#ifdef DEBUG_HPACK
+/* dump the whole dynamic header table */
+void hpack_dht_dump(FILE *out, const struct hpack_dht *dht)
+{
+ unsigned int i;
+ unsigned int slot;
+ char name[4096], value[4096];
+
+ for (i = HPACK_SHT_SIZE; i < HPACK_SHT_SIZE + dht->used; i++) {
+ slot = (hpack_get_dte(dht, i - HPACK_SHT_SIZE + 1) - dht->dte);
+ fprintf(out, "idx=%u slot=%u name=<%s> value=<%s> addr=%u-%u\n",
+ i, slot,
+ istpad(name, hpack_idx_to_name(dht, i)).ptr,
+ istpad(value, hpack_idx_to_value(dht, i)).ptr,
+ dht->dte[slot].addr, dht->dte[slot].addr+dht->dte[slot].nlen+dht->dte[slot].vlen-1);
+ }
+}
+
+/* check for the whole dynamic header table consistency, abort on failures */
+void hpack_dht_check_consistency(const struct hpack_dht *dht)
+{
+ unsigned slot = hpack_dht_get_tail(dht);
+ unsigned used2 = dht->used;
+ unsigned total = 0;
+
+ if (!dht->used)
+ return;
+
+ if (dht->front >= dht->wrap)
+ abort();
+
+ if (dht->used > dht->wrap)
+ abort();
+
+ if (dht->head >= dht->wrap)
+ abort();
+
+ while (used2--) {
+ total += dht->dte[slot].nlen + dht->dte[slot].vlen;
+ slot++;
+ if (slot >= dht->wrap)
+ slot = 0;
+ }
+
+ if (total != dht->total) {
+ fprintf(stderr, "%d: total=%u dht=%u\n", __LINE__, total, dht->total);
+ abort();
+ }
+}
+#endif // DEBUG_HPACK
+
+/* rebuild a new dynamic header table from <dht> with an unwrapped index and
+ * contents at the end. The new table is returned, the caller must not use the
+ * previous one anymore. NULL may be returned if no table could be allocated.
+ */
+static struct hpack_dht *hpack_dht_defrag(struct hpack_dht *dht)
+{
+ struct hpack_dht *alt_dht;
+ uint16_t old, new;
+ uint32_t addr;
+
+ /* Note: for small tables we could use alloca() instead but
+ * portability especially for large tables can be problematic.
+ */
+ alt_dht = hpack_dht_alloc();
+ if (!alt_dht)
+ return NULL;
+
+ alt_dht->total = dht->total;
+ alt_dht->used = dht->used;
+ alt_dht->wrap = dht->used;
+
+ new = 0;
+ addr = alt_dht->size;
+
+ if (dht->used) {
+ /* start from the tail */
+ old = hpack_dht_get_tail(dht);
+ do {
+ alt_dht->dte[new].nlen = dht->dte[old].nlen;
+ alt_dht->dte[new].vlen = dht->dte[old].vlen;
+ addr -= dht->dte[old].nlen + dht->dte[old].vlen;
+ alt_dht->dte[new].addr = addr;
+
+ memcpy((void *)alt_dht + alt_dht->dte[new].addr,
+ (void *)dht + dht->dte[old].addr,
+ dht->dte[old].nlen + dht->dte[old].vlen);
+
+ old++;
+ if (old >= dht->wrap)
+ old = 0;
+ new++;
+ } while (new < dht->used);
+ }
+
+ alt_dht->front = alt_dht->head = new - 1;
+
+ memcpy(dht, alt_dht, dht->size);
+ hpack_dht_free(alt_dht);
+
+ return dht;
+}
+
+/* Purges table dht until a header field of <needed> bytes fits according to
+ * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero
+ * on failure (ie: table empty but still not sufficient). It must only be
+ * called when the table is not large enough to suit the new entry and there
+ * are some entries left. In case of doubt, use dht_make_room() instead.
+ */
+int __hpack_dht_make_room(struct hpack_dht *dht, unsigned int needed)
+{
+ unsigned int used = dht->used;
+ unsigned int wrap = dht->wrap;
+ unsigned int tail;
+
+ do {
+ tail = ((dht->head + 1U < used) ? wrap : 0) + dht->head + 1U - used;
+ dht->total -= dht->dte[tail].nlen + dht->dte[tail].vlen;
+ if (tail == dht->front)
+ dht->front = dht->head;
+ used--;
+ } while (used && used * 32 + dht->total + needed + 32 > dht->size);
+
+ dht->used = used;
+
+ /* realign if empty */
+ if (!used)
+ dht->front = dht->head = 0;
+
+ /* pack the table if it doesn't wrap anymore */
+ if (dht->head + 1U >= used)
+ dht->wrap = dht->head + 1;
+
+ /* no need to check for 'used' here as if it doesn't fit, used==0 */
+ return needed + 32 <= dht->size;
+}
+
+/* tries to insert a new header <name>:<value> in front of the current head. A
+ * negative value is returned on error.
+ */
+int hpack_dht_insert(struct hpack_dht *dht, struct ist name, struct ist value)
+{
+ unsigned int used;
+ unsigned int head;
+ unsigned int prev;
+ unsigned int wrap;
+ unsigned int tail;
+ uint32_t headroom, tailroom;
+
+ if (!hpack_dht_make_room(dht, name.len + value.len))
+ return 0;
+
+ /* Now there is enough room in the table, that's guaranteed by the
+ * protocol, but not necessarily where we need it.
+ */
+
+ used = dht->used;
+ if (!used) {
+ /* easy, the table was empty */
+ dht->front = dht->head = 0;
+ dht->wrap = dht->used = 1;
+ dht->total = 0;
+ head = 0;
+ dht->dte[head].addr = dht->size - (name.len + value.len);
+ goto copy;
+ }
+
+ /* compute the new head, used and wrap position */
+ prev = head = dht->head;
+ wrap = dht->wrap;
+ tail = hpack_dht_get_tail(dht);
+
+ used++;
+ head++;
+
+ if (head >= wrap) {
+ /* head is leading the entries, we either need to push the
+ * table further or to loop back to released entries. We could
+ * force to loop back when at least half of the allocatable
+ * entries are free but in practice it never happens.
+ */
+ if ((sizeof(*dht) + (wrap + 1) * sizeof(dht->dte[0]) <= dht->dte[dht->front].addr))
+ wrap++;
+ else if (head >= used) /* there's a hole at the beginning */
+ head = 0;
+ else {
+ /* no more room, head hits tail and the index cannot be
+ * extended, we have to realign the whole table.
+ */
+ if (!hpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+ }
+ else if (used >= wrap) {
+ /* we've hit the tail, we need to reorganize the index so that
+ * the head is at the end (but not necessarily move the data).
+ */
+ if (!hpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+
+ /* Now we have updated head, used and wrap, we know that there is some
+ * available room at least from the protocol's perspective. This space
+ * is split in two areas :
+ *
+ * 1: if the previous head was the front cell, the space between the
+ * end of the index table and the front cell's address.
+ * 2: if the previous head was the front cell, the space between the
+ * end of the tail and the end of the table ; or if the previous
+ * head was not the front cell, the space between the end of the
+ * tail and the head's address.
+ */
+ if (prev == dht->front) {
+ /* the area was contiguous */
+ headroom = dht->dte[dht->front].addr - (sizeof(*dht) + wrap * sizeof(dht->dte[0]));
+ tailroom = dht->size - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+ else {
+ /* it's already wrapped so we can't store anything in the headroom */
+ headroom = 0;
+ tailroom = dht->dte[prev].addr - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+
+ /* We can decide to stop filling the headroom as soon as there's enough
+ * room left in the tail to suit the protocol, but tests show that in
+ * practice it almost never happens in other situations so the extra
+ * test is useless and we simply fill the headroom as long as it's
+ * available and we don't wrap.
+ */
+ if (prev == dht->front && headroom >= name.len + value.len) {
+ /* install upfront and update ->front */
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+ else if (tailroom >= name.len + value.len) {
+ dht->dte[head].addr = dht->dte[tail].addr + dht->dte[tail].nlen + dht->dte[tail].vlen + tailroom - (name.len + value.len);
+ }
+ else {
+ /* need to defragment the table before inserting upfront */
+ dht = hpack_dht_defrag(dht);
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+
+ dht->wrap = wrap;
+ dht->head = head;
+ dht->used = used;
+
+ copy:
+ dht->total += name.len + value.len;
+ dht->dte[head].nlen = name.len;
+ dht->dte[head].vlen = value.len;
+
+ memcpy((void *)dht + dht->dte[head].addr, name.ptr, name.len);
+ memcpy((void *)dht + dht->dte[head].addr + name.len, value.ptr, value.len);
+ return 0;
+}
diff --git a/src/hq_interop.c b/src/hq_interop.c
new file mode 100644
index 0000000..31c2101
--- /dev/null
+++ b/src/hq_interop.c
@@ -0,0 +1,174 @@
+#include <haproxy/hq_interop.h>
+
+#include <import/ist.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/htx.h>
+#include <haproxy/http.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/qmux_http.h>
+
+static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct buffer htx_buf = BUF_NULL;
+ struct ist path;
+ char *ptr = b_head(b);
+ size_t data = b_data(b);
+
+ /* hq-interop parser does not support buffer wrapping. */
+ BUG_ON(b_data(b) != b_contig_data(b, 0));
+
+ /* hq-interop parser is only done once full message is received. */
+ if (!fin)
+ return 0;
+
+ b_alloc(&htx_buf);
+ htx = htx_from_buf(&htx_buf);
+
+ /* skip method */
+ while (data && HTTP_IS_TOKEN(*ptr)) {
+ ptr++;
+ data--;
+ }
+
+ if (!data || !HTTP_IS_SPHT(*ptr)) {
+ fprintf(stderr, "truncated stream\n");
+ return -1;
+ }
+
+ ptr++;
+ if (!--data) {
+ fprintf(stderr, "truncated stream\n");
+ return -1;
+ }
+
+ if (HTTP_IS_LWS(*ptr)) {
+ fprintf(stderr, "malformed stream\n");
+ return -1;
+ }
+
+ /* extract path */
+ path.ptr = ptr;
+ while (data && !HTTP_IS_LWS(*ptr)) {
+ ptr++;
+ data--;
+ }
+
+ if (!data) {
+ fprintf(stderr, "truncated stream\n");
+ return -1;
+ }
+
+ path.len = ptr - path.ptr;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, 0, ist("GET"), path, ist("HTTP/1.0"));
+ if (!sl)
+ return -1;
+
+ sl->flags |= HTX_SL_F_BODYLESS;
+ sl->info.req.meth = find_http_meth("GET", 3);
+
+ htx_add_endof(htx, HTX_BLK_EOH);
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &htx_buf);
+
+ if (!qcs_attach_sc(qcs, &htx_buf, fin))
+ return -1;
+
+ b_free(&htx_buf);
+
+ return b_data(b);
+}
+
+static struct buffer *mux_get_buf(struct qcs *qcs)
+{
+ if (!b_size(&qcs->tx.buf))
+ b_alloc(&qcs->tx.buf);
+
+ return &qcs->tx.buf;
+}
+
+static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf,
+ size_t count)
+{
+ enum htx_blk_type btype;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t idx;
+ uint32_t bsize, fsize;
+ struct buffer *res, outbuf;
+ size_t total = 0;
+
+ res = mux_get_buf(qcs);
+ outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0);
+
+ htx = htx_from_buf(buf);
+
+ if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH)
+ qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH;
+
+ while (count && !htx_is_empty(htx) && !(qcs->flags & QC_SF_BLK_MROOM)) {
+ /* Not implemented : QUIC on backend side */
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ fsize = bsize = htx_get_blksz(blk);
+
+ BUG_ON(btype == HTX_BLK_REQ_SL);
+
+ switch (btype) {
+ case HTX_BLK_DATA:
+ if (fsize > count)
+ fsize = count;
+
+ if (b_room(&outbuf) < fsize)
+ fsize = b_room(&outbuf);
+
+ if (!fsize) {
+ qcs->flags |= QC_SF_BLK_MROOM;
+ goto end;
+ }
+
+ b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize);
+ total += fsize;
+ count -= fsize;
+
+ if (fsize == bsize)
+ htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, fsize);
+ break;
+
+ /* only body is transferred on HTTP/0.9 */
+ case HTX_BLK_RES_SL:
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ end:
+ b_add(res, b_data(&outbuf));
+ htx_to_buf(htx, buf);
+
+ return total;
+}
+
+static int hq_interop_attach(struct qcs *qcs, void *conn_ctx)
+{
+ qcs_wait_http_req(qcs);
+ return 0;
+}
+
+const struct qcc_app_ops hq_interop_ops = {
+ .decode_qcs = hq_interop_decode_qcs,
+ .snd_buf = hq_interop_snd_buf,
+ .attach = hq_interop_attach,
+};
diff --git a/src/http.c b/src/http.c
new file mode 100644
index 0000000..9599e0e
--- /dev/null
+++ b/src/http.c
@@ -0,0 +1,1433 @@
+/*
+ * HTTP semantics
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <haproxy/api.h>
+#include <haproxy/http.h>
+#include <haproxy/tools.h>
+
+/* It is about twice as fast on recent architectures to lookup a byte in a
+ * table than to perform a boolean AND or OR between two tests. Refer to
+ * RFC2616/RFC5234/RFC7230 for those chars. A token is any ASCII char that is
+ * neither a separator nor a CTL char. An http ver_token is any ASCII which can
+ * be found in an HTTP version, which includes 'H', 'T', 'P', '/', '.' and any
+ * digit. Note: please do not overwrite values in assignment since gcc-2.95
+ * will not handle them correctly. It's worth noting that chars 128..255 are
+ * nothing, not even control chars.
+ */
+const unsigned char http_char_classes[256] = {
+ [ 0] = HTTP_FLG_CTL,
+ [ 1] = HTTP_FLG_CTL,
+ [ 2] = HTTP_FLG_CTL,
+ [ 3] = HTTP_FLG_CTL,
+ [ 4] = HTTP_FLG_CTL,
+ [ 5] = HTTP_FLG_CTL,
+ [ 6] = HTTP_FLG_CTL,
+ [ 7] = HTTP_FLG_CTL,
+ [ 8] = HTTP_FLG_CTL,
+ [ 9] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP | HTTP_FLG_CTL,
+ [ 10] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
+ [ 11] = HTTP_FLG_CTL,
+ [ 12] = HTTP_FLG_CTL,
+ [ 13] = HTTP_FLG_CRLF | HTTP_FLG_LWS | HTTP_FLG_CTL,
+ [ 14] = HTTP_FLG_CTL,
+ [ 15] = HTTP_FLG_CTL,
+ [ 16] = HTTP_FLG_CTL,
+ [ 17] = HTTP_FLG_CTL,
+ [ 18] = HTTP_FLG_CTL,
+ [ 19] = HTTP_FLG_CTL,
+ [ 20] = HTTP_FLG_CTL,
+ [ 21] = HTTP_FLG_CTL,
+ [ 22] = HTTP_FLG_CTL,
+ [ 23] = HTTP_FLG_CTL,
+ [ 24] = HTTP_FLG_CTL,
+ [ 25] = HTTP_FLG_CTL,
+ [ 26] = HTTP_FLG_CTL,
+ [ 27] = HTTP_FLG_CTL,
+ [ 28] = HTTP_FLG_CTL,
+ [ 29] = HTTP_FLG_CTL,
+ [ 30] = HTTP_FLG_CTL,
+ [ 31] = HTTP_FLG_CTL,
+ [' '] = HTTP_FLG_SPHT | HTTP_FLG_LWS | HTTP_FLG_SEP,
+ ['!'] = HTTP_FLG_TOK,
+ ['"'] = HTTP_FLG_SEP,
+ ['#'] = HTTP_FLG_TOK,
+ ['$'] = HTTP_FLG_TOK,
+ ['%'] = HTTP_FLG_TOK,
+ ['&'] = HTTP_FLG_TOK,
+ [ 39] = HTTP_FLG_TOK,
+ ['('] = HTTP_FLG_SEP,
+ [')'] = HTTP_FLG_SEP,
+ ['*'] = HTTP_FLG_TOK,
+ ['+'] = HTTP_FLG_TOK,
+ [','] = HTTP_FLG_SEP,
+ ['-'] = HTTP_FLG_TOK,
+ ['.'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['/'] = HTTP_FLG_SEP | HTTP_FLG_VER,
+ ['0'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['1'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['2'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['3'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['4'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['5'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['6'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['7'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['8'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ ['9'] = HTTP_FLG_TOK | HTTP_FLG_VER | HTTP_FLG_DIG,
+ [':'] = HTTP_FLG_SEP,
+ [';'] = HTTP_FLG_SEP,
+ ['<'] = HTTP_FLG_SEP,
+ ['='] = HTTP_FLG_SEP,
+ ['>'] = HTTP_FLG_SEP,
+ ['?'] = HTTP_FLG_SEP,
+ ['@'] = HTTP_FLG_SEP,
+ ['A'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['B'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['C'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['D'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['E'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['F'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['G'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['H'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['I'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['J'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['K'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['L'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['M'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['N'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['O'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['P'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Q'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['R'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['S'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['T'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['U'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['V'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['W'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['X'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Y'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['Z'] = HTTP_FLG_TOK | HTTP_FLG_VER,
+ ['['] = HTTP_FLG_SEP,
+ [ 92] = HTTP_FLG_SEP,
+ [']'] = HTTP_FLG_SEP,
+ ['^'] = HTTP_FLG_TOK,
+ ['_'] = HTTP_FLG_TOK,
+ ['`'] = HTTP_FLG_TOK,
+ ['a'] = HTTP_FLG_TOK,
+ ['b'] = HTTP_FLG_TOK,
+ ['c'] = HTTP_FLG_TOK,
+ ['d'] = HTTP_FLG_TOK,
+ ['e'] = HTTP_FLG_TOK,
+ ['f'] = HTTP_FLG_TOK,
+ ['g'] = HTTP_FLG_TOK,
+ ['h'] = HTTP_FLG_TOK,
+ ['i'] = HTTP_FLG_TOK,
+ ['j'] = HTTP_FLG_TOK,
+ ['k'] = HTTP_FLG_TOK,
+ ['l'] = HTTP_FLG_TOK,
+ ['m'] = HTTP_FLG_TOK,
+ ['n'] = HTTP_FLG_TOK,
+ ['o'] = HTTP_FLG_TOK,
+ ['p'] = HTTP_FLG_TOK,
+ ['q'] = HTTP_FLG_TOK,
+ ['r'] = HTTP_FLG_TOK,
+ ['s'] = HTTP_FLG_TOK,
+ ['t'] = HTTP_FLG_TOK,
+ ['u'] = HTTP_FLG_TOK,
+ ['v'] = HTTP_FLG_TOK,
+ ['w'] = HTTP_FLG_TOK,
+ ['x'] = HTTP_FLG_TOK,
+ ['y'] = HTTP_FLG_TOK,
+ ['z'] = HTTP_FLG_TOK,
+ ['{'] = HTTP_FLG_SEP,
+ ['|'] = HTTP_FLG_TOK,
+ ['}'] = HTTP_FLG_SEP,
+ ['~'] = HTTP_FLG_TOK,
+ [127] = HTTP_FLG_CTL,
+};
+
+const int http_err_codes[HTTP_ERR_SIZE] = {
+ [HTTP_ERR_200] = 200, /* used by "monitor-uri" */
+ [HTTP_ERR_400] = 400,
+ [HTTP_ERR_401] = 401,
+ [HTTP_ERR_403] = 403,
+ [HTTP_ERR_404] = 404,
+ [HTTP_ERR_405] = 405,
+ [HTTP_ERR_407] = 407,
+ [HTTP_ERR_408] = 408,
+ [HTTP_ERR_410] = 410,
+ [HTTP_ERR_413] = 413,
+ [HTTP_ERR_421] = 421,
+ [HTTP_ERR_422] = 422,
+ [HTTP_ERR_425] = 425,
+ [HTTP_ERR_429] = 429,
+ [HTTP_ERR_500] = 500,
+ [HTTP_ERR_501] = 501,
+ [HTTP_ERR_502] = 502,
+ [HTTP_ERR_503] = 503,
+ [HTTP_ERR_504] = 504,
+};
+
+const char *http_err_msgs[HTTP_ERR_SIZE] = {
+ [HTTP_ERR_200] =
+ "HTTP/1.1 200 OK\r\n"
+ "Content-length: 58\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>200 OK</h1>\nService ready.\n</body></html>\n",
+
+ [HTTP_ERR_400] =
+ "HTTP/1.1 400 Bad request\r\n"
+ "Content-length: 90\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Connection: close\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>400 Bad request</h1>\nYour browser sent an invalid request.\n</body></html>\n",
+
+ [HTTP_ERR_401] =
+ "HTTP/1.1 401 Unauthorized\r\n"
+ "Content-length: 112\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>401 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
+
+ [HTTP_ERR_403] =
+ "HTTP/1.1 403 Forbidden\r\n"
+ "Content-length: 93\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>403 Forbidden</h1>\nRequest forbidden by administrative rules.\n</body></html>\n",
+
+ [HTTP_ERR_404] =
+ "HTTP/1.1 404 Not Found\r\n"
+ "Content-length: 83\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>404 Not Found</h1>\nThe resource could not be found.\n</body></html>\n",
+
+ [HTTP_ERR_405] =
+ "HTTP/1.1 405 Method Not Allowed\r\n"
+ "Content-length: 146\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>405 Method Not Allowed</h1>\nA request was made of a resource using a request method not supported by that resource\n</body></html>\n",
+
+ [HTTP_ERR_407] =
+ "HTTP/1.1 407 Unauthorized\r\n"
+ "Content-length: 112\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>407 Unauthorized</h1>\nYou need a valid user and password to access this content.\n</body></html>\n",
+
+ [HTTP_ERR_408] =
+ "HTTP/1.1 408 Request Time-out\r\n"
+ "Content-length: 110\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Connection: close\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>408 Request Time-out</h1>\nYour browser didn't send a complete request in time.\n</body></html>\n",
+
+ [HTTP_ERR_410] =
+ "HTTP/1.1 410 Gone\r\n"
+ "Content-length: 114\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>410 Gone</h1>\nThe resource is no longer available and will not be available again.\n</body></html>\n",
+
+ [HTTP_ERR_413] =
+ "HTTP/1.1 413 Payload Too Large\r\n"
+ "Content-length: 106\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>413 Payload Too Large</h1>\nThe request entity exceeds the maximum allowed.\n</body></html>\n",
+
+ [HTTP_ERR_421] =
+ "HTTP/1.1 421 Misdirected Request\r\n"
+ "Content-length: 104\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>421 Misdirected Request</h1>\nRequest sent to a non-authoritative server.\n</body></html>\n",
+
+ [HTTP_ERR_422] =
+ "HTTP/1.1 422 Unprocessable Content\r\n"
+ "Content-length: 116\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>422 Unprocessable Content</h1>\nThe server cannot process the contained instructions.\n</body></html>\n",
+
+ [HTTP_ERR_425] =
+ "HTTP/1.1 425 Too Early\r\n"
+ "Content-length: 80\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>425 Too Early</h1>\nYour browser sent early data.\n</body></html>\n",
+
+ [HTTP_ERR_429] =
+ "HTTP/1.1 429 Too Many Requests\r\n"
+ "Content-length: 117\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>429 Too Many Requests</h1>\nYou have sent too many requests in a given amount of time.\n</body></html>\n",
+
+ [HTTP_ERR_500] =
+ "HTTP/1.1 500 Internal Server Error\r\n"
+ "Content-length: 97\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>500 Internal Server Error</h1>\nAn internal server error occurred.\n</body></html>\n",
+
+ [HTTP_ERR_501] =
+ "HTTP/1.1 501 Not Implemented\r\n"
+ "Content-length: 136\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>501 Not Implemented</h1>\n.The server does not support the functionality required to fulfill the request.\n</body></html>\n",
+
+ [HTTP_ERR_502] =
+ "HTTP/1.1 502 Bad Gateway\r\n"
+ "Content-length: 107\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>502 Bad Gateway</h1>\nThe server returned an invalid or incomplete response.\n</body></html>\n",
+
+ [HTTP_ERR_503] =
+ "HTTP/1.1 503 Service Unavailable\r\n"
+ "Content-length: 107\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>503 Service Unavailable</h1>\nNo server is available to handle this request.\n</body></html>\n",
+
+ [HTTP_ERR_504] =
+ "HTTP/1.1 504 Gateway Time-out\r\n"
+ "Content-length: 92\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-Type: text/html\r\n"
+ "\r\n"
+ "<html><body><h1>504 Gateway Time-out</h1>\nThe server didn't respond in time.\n</body></html>\n",
+};
+
+const struct ist http_known_methods[HTTP_METH_OTHER] = {
+ [HTTP_METH_OPTIONS] = IST("OPTIONS"),
+ [HTTP_METH_GET] = IST("GET"),
+ [HTTP_METH_HEAD] = IST("HEAD"),
+ [HTTP_METH_POST] = IST("POST"),
+ [HTTP_METH_PUT] = IST("PUT"),
+ [HTTP_METH_DELETE] = IST("DELETE"),
+ [HTTP_METH_TRACE] = IST("TRACE"),
+ [HTTP_METH_CONNECT] = IST("CONNECT"),
+};
+
+/*
+ * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown
+ * ones.
+ */
+enum http_meth_t find_http_meth(const char *str, const int len)
+{
+ const struct ist m = ist2(str, len);
+
+ if (isteq(m, ist("GET"))) return HTTP_METH_GET;
+ else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD;
+ else if (isteq(m, ist("POST"))) return HTTP_METH_POST;
+ else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT;
+ else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT;
+ else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS;
+ else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE;
+ else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE;
+ else return HTTP_METH_OTHER;
+}
+
+/* This function returns HTTP_ERR_<num> (enum) matching http status code.
+ * Returned value should match codes from http_err_codes.
+ */
+int http_get_status_idx(unsigned int status)
+{
+ switch (status) {
+ case 200: return HTTP_ERR_200;
+ case 400: return HTTP_ERR_400;
+ case 401: return HTTP_ERR_401;
+ case 403: return HTTP_ERR_403;
+ case 404: return HTTP_ERR_404;
+ case 405: return HTTP_ERR_405;
+ case 407: return HTTP_ERR_407;
+ case 408: return HTTP_ERR_408;
+ case 410: return HTTP_ERR_410;
+ case 413: return HTTP_ERR_413;
+ case 421: return HTTP_ERR_421;
+ case 422: return HTTP_ERR_422;
+ case 425: return HTTP_ERR_425;
+ case 429: return HTTP_ERR_429;
+ case 500: return HTTP_ERR_500;
+ case 501: return HTTP_ERR_501;
+ case 502: return HTTP_ERR_502;
+ case 503: return HTTP_ERR_503;
+ case 504: return HTTP_ERR_504;
+ default: return HTTP_ERR_500;
+ }
+}
+
+/* This function returns a reason associated with the HTTP status.
+ * This function never fails, a message is always returned.
+ */
+const char *http_get_reason(unsigned int status)
+{
+ switch (status) {
+ case 100: return "Continue";
+ case 101: return "Switching Protocols";
+ case 102: return "Processing";
+ case 200: return "OK";
+ case 201: return "Created";
+ case 202: return "Accepted";
+ case 203: return "Non-Authoritative Information";
+ case 204: return "No Content";
+ case 205: return "Reset Content";
+ case 206: return "Partial Content";
+ case 207: return "Multi-Status";
+ case 210: return "Content Different";
+ case 226: return "IM Used";
+ case 300: return "Multiple Choices";
+ case 301: return "Moved Permanently";
+ case 302: return "Found";
+ case 303: return "See Other";
+ case 304: return "Not Modified";
+ case 305: return "Use Proxy";
+ case 307: return "Temporary Redirect";
+ case 308: return "Permanent Redirect";
+ case 310: return "Too many Redirects";
+ case 400: return "Bad Request";
+ case 401: return "Unauthorized";
+ case 402: return "Payment Required";
+ case 403: return "Forbidden";
+ case 404: return "Not Found";
+ case 405: return "Method Not Allowed";
+ case 406: return "Not Acceptable";
+ case 407: return "Proxy Authentication Required";
+ case 408: return "Request Time-out";
+ case 409: return "Conflict";
+ case 410: return "Gone";
+ case 411: return "Length Required";
+ case 412: return "Precondition Failed";
+ case 413: return "Request Entity Too Large";
+ case 414: return "Request-URI Too Long";
+ case 415: return "Unsupported Media Type";
+ case 416: return "Requested range unsatisfiable";
+ case 417: return "Expectation failed";
+ case 418: return "I'm a teapot";
+ case 421: return "Misdirected Request";
+ case 422: return "Unprocessable Content";
+ case 423: return "Locked";
+ case 424: return "Method failure";
+ case 425: return "Too Early";
+ case 426: return "Upgrade Required";
+ case 428: return "Precondition Required";
+ case 429: return "Too Many Requests";
+ case 431: return "Request Header Fields Too Large";
+ case 449: return "Retry With";
+ case 450: return "Blocked by Windows Parental Controls";
+ case 451: return "Unavailable For Legal Reasons";
+ case 456: return "Unrecoverable Error";
+ case 499: return "client has closed connection";
+ case 500: return "Internal Server Error";
+ case 501: return "Not Implemented";
+ case 502: return "Bad Gateway or Proxy Error";
+ case 503: return "Service Unavailable";
+ case 504: return "Gateway Time-out";
+ case 505: return "HTTP Version not supported";
+ case 506: return "Variant also negotiate";
+ case 507: return "Insufficient storage";
+ case 508: return "Loop detected";
+ case 509: return "Bandwidth Limit Exceeded";
+ case 510: return "Not extended";
+ case 511: return "Network authentication required";
+ case 520: return "Web server is returning an unknown error";
+ default:
+ switch (status) {
+ case 100 ... 199: return "Informational";
+ case 200 ... 299: return "Success";
+ case 300 ... 399: return "Redirection";
+ case 400 ... 499: return "Client Error";
+ case 500 ... 599: return "Server Error";
+ default: return "Other";
+ }
+ }
+}
+
+/* Returns the ist string corresponding to port part (without ':') in the host
+ * <host>, IST_NULL if no ':' is found or an empty IST if there is no digit. In
+ * the last case, the result is the original ist trimmed to 0. So be sure to test
+ * the result length before doing any pointer arithmetic.
+*/
+struct ist http_get_host_port(const struct ist host)
+{
+ char *start, *end, *ptr;
+
+ start = istptr(host);
+ end = istend(host);
+ for (ptr = end; ptr > start && isdigit((unsigned char)*--ptr););
+
+ /* no port found */
+ if (likely(*ptr != ':'))
+ return IST_NULL;
+ if (ptr+1 == end)
+ return isttrim(host, 0);
+
+ return istnext(ist2(ptr, end - ptr));
+}
+
+
+/* Return non-zero if the port <port> is a default port. If the scheme <schm> is
+ * set, it is used to detect default ports (HTTP => 80 and HTTPS => 443)
+ * port. Otherwise, both are considered as default ports.
+ */
+int http_is_default_port(const struct ist schm, const struct ist port)
+{
+ if (!istlen(port))
+ return 1;
+
+ if (!isttest(schm))
+ return (isteq(port, ist("443")) || isteq(port, ist("80")));
+ else
+ return (isteq(port, ist("443")) && isteqi(schm, ist("https://"))) ||
+ (isteq(port, ist("80")) && isteqi(schm, ist("http://")));
+}
+
+/* Returns non-zero if the scheme <schm> is syntactically correct according to
+ * RFC3986#3.1, otherwise zero. It expects only the scheme and nothing else
+ * (particularly not the following "://").
+ * Scheme = alpha *(alpha|digit|'+'|'-'|'.')
+ */
+int http_validate_scheme(const struct ist schm)
+{
+ size_t i;
+
+ for (i = 0; i < schm.len; i++) {
+ if (likely((schm.ptr[i] >= 'a' && schm.ptr[i] <= 'z') ||
+ (schm.ptr[i] >= 'A' && schm.ptr[i] <= 'Z')))
+ continue;
+ if (unlikely(!i)) // first char must be alpha
+ return 0;
+ if ((schm.ptr[i] >= '0' && schm.ptr[i] <= '9') ||
+ schm.ptr[i] == '+' || schm.ptr[i] == '-' || schm.ptr[i] == '.')
+ continue;
+ return 0;
+ }
+ return !!i;
+}
+
+/* Parse the uri and looks for the scheme. If not found, an empty ist is
+ * returned. Otherwise, the ist pointing to the scheme is returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_scheme(struct http_uri_parser *parser)
+{
+ const char *ptr, *start, *end;
+
+ if (parser->state >= URI_PARSER_STATE_SCHEME_DONE)
+ goto not_found;
+
+ if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
+ goto not_found;
+
+ ptr = start = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ if (isalpha((unsigned char)*ptr)) {
+ /* this is a scheme as described by RFC3986, par. 3.1, or only
+ * an authority (in case of a CONNECT method).
+ */
+ ptr++;
+ /* retrieve the scheme up to the suffix '://'. If the suffix is
+ * not found, this means there is no scheme and it is an
+ * authority-only uri.
+ */
+ while (ptr < end &&
+ (isalnum((unsigned char)*ptr) || *ptr == '+' || *ptr == '-' || *ptr == '.'))
+ ptr++;
+ if (ptr == end || *ptr++ != ':')
+ goto not_found;
+ if (ptr == end || *ptr++ != '/')
+ goto not_found;
+ if (ptr == end || *ptr++ != '/')
+ goto not_found;
+ }
+ else {
+ goto not_found;
+ }
+
+ parser->uri = ist2(ptr, end - ptr);
+ parser->state = URI_PARSER_STATE_SCHEME_DONE;
+ return ist2(start, ptr - start);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_SCHEME_DONE;
+ return IST_NULL;
+}
+
+/* Parse the uri and looks for the authority, between the scheme and the
+ * path. if no_userinfo is not zero, the part before the '@' (including it) is
+ * skipped. If not found, an empty ist is returned. Otherwise, the ist pointing
+ * on the authority is returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_authority(struct http_uri_parser *parser, int no_userinfo)
+{
+ const char *ptr, *start, *end;
+
+ if (parser->state >= URI_PARSER_STATE_AUTHORITY_DONE)
+ goto not_found;
+
+ if (parser->format != URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY)
+ goto not_found;
+
+ if (parser->state < URI_PARSER_STATE_SCHEME_DONE)
+ http_parse_scheme(parser);
+
+ ptr = start = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ while (ptr < end && *ptr != '/') {
+ if (*ptr++ == '@' && no_userinfo)
+ start = ptr;
+ }
+
+ /* OK, ptr point on the '/' or the end */
+
+ authority:
+ parser->uri = ist2(ptr, end - ptr);
+ parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
+ return ist2(start, ptr - start);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_AUTHORITY_DONE;
+ return IST_NULL;
+}
+
+/* Parse the URI from the given transaction (which is assumed to be in request
+ * phase) and look for the "/" beginning the PATH. If not found, ist2(0,0) is
+ * returned. Otherwise the pointer and length are returned.
+ *
+ * <parser> must have been initialized via http_uri_parser_init. See the
+ * related http_uri_parser documentation for the specific API usage.
+ */
+struct ist http_parse_path(struct http_uri_parser *parser)
+{
+ const char *ptr, *end;
+
+ if (parser->state >= URI_PARSER_STATE_PATH_DONE)
+ goto not_found;
+
+ if (parser->format == URI_PARSER_FORMAT_EMPTY ||
+ parser->format == URI_PARSER_FORMAT_ASTERISK) {
+ goto not_found;
+ }
+
+ ptr = istptr(parser->uri);
+ end = istend(parser->uri);
+
+ /* If the uri is in absolute-path format, first skip the scheme and
+ * authority parts. No scheme will be found if the uri is in authority
+ * format, which indicates that the path won't be present.
+ */
+ if (parser->format == URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY) {
+ if (parser->state < URI_PARSER_STATE_SCHEME_DONE) {
+ /* If no scheme found, uri is in authority format. No
+ * path is present.
+ */
+ if (!isttest(http_parse_scheme(parser)))
+ goto not_found;
+ }
+
+ if (parser->state < URI_PARSER_STATE_AUTHORITY_DONE)
+ http_parse_authority(parser, 1);
+
+ ptr = istptr(parser->uri);
+
+ if (ptr == end)
+ goto not_found;
+ }
+
+ parser->state = URI_PARSER_STATE_PATH_DONE;
+ return ist2(ptr, end - ptr);
+
+ not_found:
+ parser->state = URI_PARSER_STATE_PATH_DONE;
+ return IST_NULL;
+}
+
+/* Parse <value> Content-Length header field of an HTTP request. The function
+ * checks all possible occurrences of a comma-delimited value, and verifies if
+ * any of them doesn't match a previous value. <value> is sanitized on return
+ * to contain a single value if several identical values were found.
+ *
+ * <body_len> must be a valid pointer and is used to return the parsed length
+ * unless values differ. Also if <not_first> is true, <body_len> is assumed to
+ * point to previously parsed value and which must be equal to the new length.
+ * This is useful if an HTTP message contains several Content-Length headers.
+ *
+ * Returns <0 if a value differs, 0 if the whole header can be dropped (i.e.
+ * already known), or >0 if the value can be indexed (first one). In the last
+ * case, the value might be adjusted and the caller must only add the updated
+ * value.
+ */
+int http_parse_cont_len_header(struct ist *value, unsigned long long *body_len,
+ int not_first)
+{
+ char *e, *n;
+ unsigned long long cl;
+ struct ist word;
+ int check_prev = not_first;
+
+ word.ptr = value->ptr;
+ e = value->ptr + value->len;
+
+ while (1) {
+ if (word.ptr >= e) {
+ /* empty header or empty value */
+ goto fail;
+ }
+
+ /* skip leading delimiter and blanks */
+ if (unlikely(HTTP_IS_LWS(*word.ptr))) {
+ word.ptr++;
+ continue;
+ }
+
+ /* digits only now */
+ for (cl = 0, n = word.ptr; n < e; n++) {
+ unsigned int c = *n - '0';
+ if (unlikely(c > 9)) {
+ /* non-digit */
+ if (unlikely(n == word.ptr)) // spaces only
+ goto fail;
+ break;
+ }
+
+ if (unlikely(!cl && n > word.ptr)) {
+ /* There was a leading zero before this digit,
+ * let's trim it.
+ */
+ word.ptr = n;
+ }
+
+ if (unlikely(cl > ULLONG_MAX / 10ULL))
+ goto fail; /* multiply overflow */
+ cl = cl * 10ULL;
+ if (unlikely(cl + c < cl))
+ goto fail; /* addition overflow */
+ cl = cl + c;
+ }
+
+ /* keep a copy of the exact cleaned value */
+ word.len = n - word.ptr;
+
+ /* skip trailing LWS till next comma or EOL */
+ for (; n < e; n++) {
+ if (!HTTP_IS_LWS(*n)) {
+ if (unlikely(*n != ','))
+ goto fail;
+ break;
+ }
+ }
+
+ /* if duplicate, must be equal */
+ if (check_prev && cl != *body_len)
+ goto fail;
+
+ /* OK, store this result as the one to be indexed */
+ *body_len = cl;
+ *value = word;
+
+ /* Now either n==e and we're done, or n points to the comma,
+ * and we skip it and continue.
+ */
+ if (n++ == e)
+ break;
+
+ word.ptr = n;
+ check_prev = 1;
+ }
+
+ /* here we've reached the end with a single value or a series of
+ * identical values, all matching previous series if any. The last
+ * parsed value was sent back into <value>. We just have to decide
+ * if this occurrence has to be indexed (it's the first one) or
+ * silently skipped (it's not the first one)
+ */
+ return !not_first;
+ fail:
+ return -1;
+}
+
+/*
+ * Checks if <hdr> is exactly <name> for <len> chars, and ends with a colon.
+ * If so, returns the position of the first non-space character relative to
+ * <hdr>, or <end>-<hdr> if not found before. If no value is found, it tries
+ * to return a pointer to the place after the first space. Returns 0 if the
+ * header name does not match. Checks are case-insensitive.
+ */
+int http_header_match2(const char *hdr, const char *end,
+ const char *name, int len)
+{
+ const char *val;
+
+ if (hdr + len >= end)
+ return 0;
+ if (hdr[len] != ':')
+ return 0;
+ if (strncasecmp(hdr, name, len) != 0)
+ return 0;
+ val = hdr + len + 1;
+ while (val < end && HTTP_IS_SPHT(*val))
+ val++;
+ if ((val >= end) && (len + 2 <= end - hdr))
+ return len + 2; /* we may replace starting from second space */
+ return val - hdr;
+}
+
+/* Find the end of the header value contained between <s> and <e>. See RFC7230,
+ * par 3.2 for more information. Note that it requires a valid header to return
+ * a valid result. This works for headers defined as comma-separated lists.
+ */
+char *http_find_hdr_value_end(char *s, const char *e)
+{
+ int quoted, qdpair;
+
+ quoted = qdpair = 0;
+
+#ifdef HA_UNALIGNED_LE
+ /* speedup: skip everything not a comma nor a double quote */
+ for (; s <= e - sizeof(int); s += sizeof(int)) {
+ unsigned int c = *(int *)s; // comma
+ unsigned int q = c; // quote
+
+ c ^= 0x2c2c2c2c; // contains one zero on a comma
+ q ^= 0x22222222; // contains one zero on a quote
+
+ c = (c - 0x01010101) & ~c; // contains 0x80 below a comma
+ q = (q - 0x01010101) & ~q; // contains 0x80 below a quote
+
+ if ((c | q) & 0x80808080)
+ break; // found a comma or a quote
+ }
+#endif
+ for (; s < e; s++) {
+ if (qdpair) qdpair = 0;
+ else if (quoted) {
+ if (*s == '\\') qdpair = 1;
+ else if (*s == '"') quoted = 0;
+ }
+ else if (*s == '"') quoted = 1;
+ else if (*s == ',') return s;
+ }
+ return s;
+}
+
+/* Find the end of a cookie value contained between <s> and <e>. It works the
+ * same way as with headers above except that the semi-colon also ends a token.
+ * See RFC2965 for more information. Note that it requires a valid header to
+ * return a valid result.
+ */
+char *http_find_cookie_value_end(char *s, const char *e)
+{
+ int quoted, qdpair;
+
+ quoted = qdpair = 0;
+ for (; s < e; s++) {
+ if (qdpair) qdpair = 0;
+ else if (quoted) {
+ if (*s == '\\') qdpair = 1;
+ else if (*s == '"') quoted = 0;
+ }
+ else if (*s == '"') quoted = 1;
+ else if (*s == ',' || *s == ';') return s;
+ }
+ return s;
+}
+
+/* Try to find the next occurrence of a cookie name in a cookie header value.
+ * To match on any cookie name, <cookie_name_l> must be set to 0.
+ * The lookup begins at <hdr>. The pointer and size of the next occurrence of
+ * the cookie value is returned into *value and *value_l, and the function
+ * returns a pointer to the next pointer to search from if the value was found.
+ * Otherwise if the cookie was not found, NULL is returned and neither value
+ * nor value_l are touched. The input <hdr> string should first point to the
+ * header's value, and the <hdr_end> pointer must point to the first character
+ * not part of the value. <list> must be non-zero if value may represent a list
+ * of values (cookie headers). This makes it faster to abort parsing when no
+ * list is expected.
+ */
+char *http_extract_cookie_value(char *hdr, const char *hdr_end,
+ char *cookie_name, size_t cookie_name_l,
+ int list, char **value, size_t *value_l)
+{
+ char *equal, *att_end, *att_beg, *val_beg, *val_end;
+ char *next;
+
+ /* we search at least a cookie name followed by an equal, and more
+ * generally something like this :
+ * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
+ */
+ for (att_beg = hdr; att_beg + cookie_name_l + 1 < hdr_end; att_beg = next + 1) {
+ /* Iterate through all cookies on this line */
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ';' || (list && *equal == ','))
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ } else {
+ val_beg = val_end = next = equal;
+ }
+
+ /* We have nothing to do with attributes beginning with '$'. However,
+ * they will automatically be removed if a header before them is removed,
+ * since they're supposed to be linked together.
+ */
+ if (*att_beg == '$')
+ continue;
+
+ /* Ignore cookies with no equal sign */
+ if (equal == next)
+ continue;
+
+ /* Now we have the cookie name between att_beg and att_end, and
+ * its value between val_beg and val_end.
+ */
+
+ if (cookie_name_l == 0 || (att_end - att_beg == cookie_name_l &&
+ memcmp(att_beg, cookie_name, cookie_name_l) == 0)) {
+ /* let's return this value and indicate where to go on from */
+ *value = val_beg;
+ *value_l = val_end - val_beg;
+ return next + 1;
+ }
+
+ /* Set-Cookie headers only have the name in the first attr=value part */
+ if (!list)
+ break;
+ }
+
+ return NULL;
+}
+
+/* Try to find the next cookie name in a cookie header given a pointer
+ * <hdr_beg> to the starting position, a pointer <hdr_end> to the ending
+ * position to search in the cookie and a boolean <is_req> of type int that
+ * indicates if the stream direction is for request or response.
+ * The lookup begins at <hdr_beg>, which is assumed to be in
+ * Cookie / Set-Cookie header, and the function returns a pointer to the next
+ * position to search from if a valid cookie k-v pair is found for Cookie
+ * request header (<is_req> is non-zero) and <hdr_end> for Set-Cookie response
+ * header (<is_req> is zero). When the next cookie name is found, <ptr> will
+ * be pointing to the start of the cookie name, and <len> will be the length
+ * of the cookie name.
+ * Otherwise if there is no valid cookie k-v pair, NULL is returned.
+ * The <hdr_end> pointer must point to the first character
+ * not part of the Cookie / Set-Cookie header.
+ */
+char *http_extract_next_cookie_name(char *hdr_beg, char *hdr_end, int is_req,
+ char **ptr, size_t *len)
+{
+ char *equal, *att_end, *att_beg, *val_beg;
+ char *next;
+
+ /* We search a valid cookie name between hdr_beg and hdr_end,
+ * followed by an equal. For example for the following cookie:
+ * Cookie: NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3\r\n
+ * We want to find NAME1, NAME2, or NAME3 depending on where we start our search
+ * according to <hdr_beg>
+ */
+ for (att_beg = hdr_beg; att_beg + 1 < hdr_end; att_beg = next + 1) {
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find <att_end> : this is the first character after the last non
+ * space before the equal. It may be equal to <hdr_end>.
+ */
+ equal = att_end = att_beg;
+
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ';')
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* Here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+
+ /* Look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* Look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* Find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+ } else {
+ next = equal;
+ }
+
+ /* We have nothing to do with attributes beginning with '$'. However,
+ * they will automatically be removed if a header before them is removed,
+ * since they're supposed to be linked together.
+ */
+ if (*att_beg == '$')
+ continue;
+
+ /* Ignore cookies with no equal sign */
+ if (equal == next)
+ continue;
+
+ /* Now we have the cookie name between <att_beg> and <att_end>, and
+ * <next> points to the end of cookie value
+ */
+ *ptr = att_beg;
+ *len = att_end - att_beg;
+
+ /* Return next position for Cookie request header and <hdr_end> for
+ * Set-Cookie response header as each Set-Cookie header is assumed to
+ * contain only 1 cookie
+ */
+ if (is_req)
+ return next + 1;
+ return hdr_end;
+ }
+
+ return NULL;
+}
+
+/* Parses a qvalue and returns it multiplied by 1000, from 0 to 1000. If the
+ * value is larger than 1000, it is bound to 1000. The parser consumes up to
+ * 1 digit, one dot and 3 digits and stops on the first invalid character.
+ * Unparsable qvalues return 1000 as "q=1.000".
+ */
+int http_parse_qvalue(const char *qvalue, const char **end)
+{
+ int q = 1000;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q = (*qvalue++ - '0') * 1000;
+
+ if (*qvalue++ != '.')
+ goto out;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 100;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 10;
+
+ if (!isdigit((unsigned char)*qvalue))
+ goto out;
+ q += (*qvalue++ - '0') * 1;
+ out:
+ if (q > 1000)
+ q = 1000;
+ if (end)
+ *end = qvalue;
+ return q;
+}
+
+/*
+ * Given a url parameter, find the starting position of the first occurrence,
+ * or NULL if the parameter is not found.
+ *
+ * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
+ * the function will return query_string+8.
+ *
+ * Warning: this function returns a pointer that can point to the first chunk
+ * or the second chunk. The caller must be check the position before using the
+ * result.
+ */
+const char *http_find_url_param_pos(const char **chunks,
+ const char* url_param_name, size_t url_param_name_l,
+ char delim, char insensitive)
+{
+ const char *pos, *last, *equal;
+ const char **bufs = chunks;
+ int l1, l2;
+
+
+ pos = bufs[0];
+ last = bufs[1];
+ while (pos < last) {
+ /* Check the equal. */
+ equal = pos + url_param_name_l;
+ if (fix_pointer_if_wrap(chunks, &equal)) {
+ if (equal >= chunks[3])
+ return NULL;
+ } else {
+ if (equal >= chunks[1])
+ return NULL;
+ }
+ if (*equal == '=') {
+ if (pos + url_param_name_l > last) {
+ /* process wrap case, we detect a wrap. In this case, the
+ * comparison is performed in two parts.
+ */
+
+ /* This is the end, we don't have any other chunk. */
+ if (bufs != chunks || !bufs[2])
+ return NULL;
+
+ /* Compute the length of each part of the comparison. */
+ l1 = last - pos;
+ l2 = url_param_name_l - l1;
+
+ /* The second buffer is too short to contain the compared string. */
+ if (bufs[2] + l2 > bufs[3])
+ return NULL;
+
+ if (insensitive) {
+ if (strncasecmp(pos, url_param_name, l1) == 0 &&
+ strncasecmp(bufs[2], url_param_name+l1, l2) == 0)
+ return pos;
+ }
+ else {
+ if (memcmp(pos, url_param_name, l1) == 0 &&
+ memcmp(bufs[2], url_param_name+l1, l2) == 0)
+ return pos;
+ }
+
+ /* Perform wrapping and jump the string who fail the comparison. */
+ bufs += 2;
+ pos = bufs[0] + l2;
+ last = bufs[1];
+
+ } else {
+ /* process a simple comparison.*/
+ if (insensitive) {
+ if (strncasecmp(pos, url_param_name, url_param_name_l) == 0)
+ return pos;
+ } else {
+ if (memcmp(pos, url_param_name, url_param_name_l) == 0)
+ return pos;
+ }
+ pos += url_param_name_l + 1;
+ if (fix_pointer_if_wrap(chunks, &pos))
+ last = bufs[2];
+ }
+ }
+
+ while (1) {
+ /* Look for the next delimiter. */
+ while (pos < last && !http_is_param_delimiter(*pos, delim))
+ pos++;
+ if (pos < last)
+ break;
+ /* process buffer wrapping. */
+ if (bufs != chunks || !bufs[2])
+ return NULL;
+ bufs += 2;
+ pos = bufs[0];
+ last = bufs[1];
+ }
+ pos++;
+ }
+ return NULL;
+}
+
+/*
+ * Given a url parameter name and a query string, find the next value.
+ * An empty url_param_name matches the first available parameter.
+ * If the parameter is found, 1 is returned and *vstart / *vend are updated to
+ * respectively provide a pointer to the value and its end.
+ * Otherwise, 0 is returned and vstart/vend are not modified.
+ */
+int http_find_next_url_param(const char **chunks,
+ const char* url_param_name, size_t url_param_name_l,
+ const char **vstart, const char **vend, char delim, char insensitive)
+{
+ const char *arg_start, *qs_end;
+ const char *value_start, *value_end;
+
+ arg_start = chunks[0];
+ qs_end = chunks[1];
+ if (url_param_name_l) {
+ /* Looks for an argument name. */
+ arg_start = http_find_url_param_pos(chunks,
+ url_param_name, url_param_name_l,
+ delim, insensitive);
+ /* Check for wrapping. */
+ if (arg_start >= qs_end)
+ qs_end = chunks[3];
+ }
+ if (!arg_start)
+ return 0;
+
+ if (!url_param_name_l) {
+ while (1) {
+ /* looks for the first argument. */
+ value_start = memchr(arg_start, '=', qs_end - arg_start);
+ if (!value_start) {
+ /* Check for wrapping. */
+ if (arg_start >= chunks[0] &&
+ arg_start < chunks[1] &&
+ chunks[2]) {
+ arg_start = chunks[2];
+ qs_end = chunks[3];
+ continue;
+ }
+ return 0;
+ }
+ break;
+ }
+ value_start++;
+ }
+ else {
+ /* Jump the argument length. */
+ value_start = arg_start + url_param_name_l + 1;
+
+ /* Check for pointer wrapping. */
+ if (fix_pointer_if_wrap(chunks, &value_start)) {
+ /* Update the end pointer. */
+ qs_end = chunks[3];
+
+ /* Check for overflow. */
+ if (value_start >= qs_end)
+ return 0;
+ }
+ }
+
+ value_end = value_start;
+
+ while (1) {
+ while ((value_end < qs_end) && !http_is_param_delimiter(*value_end, delim))
+ value_end++;
+ if (value_end < qs_end)
+ break;
+ /* process buffer wrapping. */
+ if (value_end >= chunks[0] &&
+ value_end < chunks[1] &&
+ chunks[2]) {
+ value_end = chunks[2];
+ qs_end = chunks[3];
+ continue;
+ }
+ break;
+ }
+
+ *vstart = value_start;
+ *vend = value_end;
+ return 1;
+}
+
+/* Parses a single header line (without the CRLF) and splits it into its name
+ * and its value. The parsing is pretty naive and just skip spaces.
+ */
+int http_parse_header(const struct ist hdr, struct ist *name, struct ist *value)
+{
+ char *p = hdr.ptr;
+ char *end = p + hdr.len;
+
+ name->len = value->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the header name */
+ name->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ name->len = p - name->ptr;
+
+ /* Skip the ':' and spaces before and after it */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+ if (p < end && *p == ':') p++;
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the header value */
+ value->ptr = p;
+ value->len = end - p;
+
+ return 1;
+}
+
+/* Parses a single start line (without the CRLF) and splits it into 3 parts. The
+ * parsing is pretty naive and just skip spaces.
+ */
+int http_parse_stline(const struct ist line, struct ist *p1, struct ist *p2, struct ist *p3)
+{
+ char *p = line.ptr;
+ char *end = p + line.len;
+
+ p1->len = p2->len = p3->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the first part */
+ p1->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ p1->len = p - p1->ptr;
+
+ /* Skip spaces between p1 and p2 */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the second part */
+ p2->ptr = p;
+ for (; p < end && !HTTP_IS_SPHT(*p); p++);
+ p2->len = p - p2->ptr;
+
+ /* Skip spaces between p2 and p3 */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* The remaining is the third value */
+ p3->ptr = p;
+ p3->len = end - p;
+
+ return 1;
+}
+
+/* Parses value of a Status header with the following format: "Status: Code[
+ * Reason]". The parsing is pretty naive and just skip spaces. It return the
+ * numeric value of the status code.
+ */
+int http_parse_status_val(const struct ist value, struct ist *status, struct ist *reason)
+{
+ char *p = value.ptr;
+ char *end = p + value.len;
+ uint16_t code;
+
+ status->len = reason->len = 0;
+
+ /* Skip leading spaces */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* Set the status part */
+ status->ptr = p;
+ for (; p < end && HTTP_IS_TOKEN(*p); p++);
+ status->len = p - status->ptr;
+
+ /* Skip spaces between status and reason */
+ for (; p < end && HTTP_IS_SPHT(*p); p++);
+
+ /* the remaining is the reason */
+ reason->ptr = p;
+ reason->len = end - p;
+
+ code = strl2ui(status->ptr, status->len);
+ return code;
+}
+
+
+/* Returns non-zero if the two ETags are comparable (see RFC 7232#2.3.2).
+ * If any of them is a weak ETag, we discard the weakness prefix and perform
+ * a strict string comparison.
+ * Returns 0 otherwise.
+ */
+int http_compare_etags(struct ist etag1, struct ist etag2)
+{
+ enum http_etag_type etag_type1;
+ enum http_etag_type etag_type2;
+
+ etag_type1 = http_get_etag_type(etag1);
+ etag_type2 = http_get_etag_type(etag2);
+
+ if (etag_type1 == ETAG_INVALID || etag_type2 == ETAG_INVALID)
+ return 0;
+
+ /* Discard the 'W/' prefix an ETag is a weak one. */
+ if (etag_type1 == ETAG_WEAK)
+ etag1 = istadv(etag1, 2);
+ if (etag_type2 == ETAG_WEAK)
+ etag2 = istadv(etag2, 2);
+
+ return isteq(etag1, etag2);
+}
+
+
+/*
+ * Trim leading space or horizontal tab characters from <value> string.
+ * Returns the trimmed string.
+ */
+struct ist http_trim_leading_spht(struct ist value)
+{
+ struct ist ret = value;
+
+ while (ret.len && HTTP_IS_SPHT(ret.ptr[0])) {
+ ++ret.ptr;
+ --ret.len;
+ }
+
+ return ret;
+}
+
+/*
+ * Trim trailing space or horizontal tab characters from <value> string.
+ * Returns the trimmed string.
+ */
+struct ist http_trim_trailing_spht(struct ist value)
+{
+ struct ist ret = value;
+
+ while (ret.len && HTTP_IS_SPHT(ret.ptr[-1]))
+ --ret.len;
+
+ return ret;
+}
diff --git a/src/http_acl.c b/src/http_acl.c
new file mode 100644
index 0000000..bf29fc3
--- /dev/null
+++ b/src/http_acl.c
@@ -0,0 +1,185 @@
+/*
+ * HTTP ACLs declaration
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/chunk.h>
+#include <haproxy/http.h>
+#include <haproxy/pattern.h>
+#include <haproxy/pool.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* We use the pre-parsed method if it is known, and store its number as an
+ * integer. If it is unknown, we use the pointer and the length.
+ */
+static int pat_parse_meth(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ int len, meth;
+
+ len = strlen(text);
+ meth = find_http_meth(text, len);
+
+ pattern->val.i = meth;
+ if (meth == HTTP_METH_OTHER) {
+ pattern->ptr.str = (char *)text;
+ pattern->len = len;
+ }
+ else {
+ pattern->ptr.str = NULL;
+ pattern->len = 0;
+ }
+ return 1;
+}
+
+/* See above how the method is stored in the global pattern */
+static struct pattern *pat_match_meth(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ /* well-known method */
+ if (pattern->val.i != HTTP_METH_OTHER) {
+ if (smp->data.u.meth.meth == pattern->val.i)
+ return pattern;
+ else
+ continue;
+ }
+
+ /* Other method, we must compare the strings */
+ if (pattern->len != smp->data.u.meth.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.meth.str.area, smp->data.u.meth.str.data) == 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.meth.str.area, smp->data.u.meth.str.data) == 0))
+ return pattern;
+ }
+ return NULL;
+}
+
+/************************************************************************/
+/* All supported ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "base", "base", PAT_MATCH_STR },
+ { "base_beg", "base", PAT_MATCH_BEG },
+ { "base_dir", "base", PAT_MATCH_DIR },
+ { "base_dom", "base", PAT_MATCH_DOM },
+ { "base_end", "base", PAT_MATCH_END },
+ { "base_len", "base", PAT_MATCH_LEN },
+ { "base_reg", "base", PAT_MATCH_REG },
+ { "base_sub", "base", PAT_MATCH_SUB },
+
+ { "cook", "req.cook", PAT_MATCH_STR },
+ { "cook_beg", "req.cook", PAT_MATCH_BEG },
+ { "cook_dir", "req.cook", PAT_MATCH_DIR },
+ { "cook_dom", "req.cook", PAT_MATCH_DOM },
+ { "cook_end", "req.cook", PAT_MATCH_END },
+ { "cook_len", "req.cook", PAT_MATCH_LEN },
+ { "cook_reg", "req.cook", PAT_MATCH_REG },
+ { "cook_sub", "req.cook", PAT_MATCH_SUB },
+
+ { "hdr", "req.hdr", PAT_MATCH_STR },
+ { "hdr_beg", "req.hdr", PAT_MATCH_BEG },
+ { "hdr_dir", "req.hdr", PAT_MATCH_DIR },
+ { "hdr_dom", "req.hdr", PAT_MATCH_DOM },
+ { "hdr_end", "req.hdr", PAT_MATCH_END },
+ { "hdr_len", "req.hdr", PAT_MATCH_LEN },
+ { "hdr_reg", "req.hdr", PAT_MATCH_REG },
+ { "hdr_sub", "req.hdr", PAT_MATCH_SUB },
+
+ /* these two declarations uses strings with list storage (in place
+ * of tree storage). The basic match is PAT_MATCH_STR, but the indexation
+ * and delete functions are relative to the list management. The parse
+ * and match method are related to the corresponding fetch methods. This
+ * is very particular ACL declaration mode.
+ */
+ { "http_auth_group", NULL, PAT_MATCH_STR, NULL, pat_idx_list_str, NULL, NULL, pat_match_auth },
+ { "method", NULL, PAT_MATCH_STR, pat_parse_meth, pat_idx_list_str, NULL, NULL, pat_match_meth },
+
+ { "path", "path", PAT_MATCH_STR },
+ { "path_beg", "path", PAT_MATCH_BEG },
+ { "path_dir", "path", PAT_MATCH_DIR },
+ { "path_dom", "path", PAT_MATCH_DOM },
+ { "path_end", "path", PAT_MATCH_END },
+ { "path_len", "path", PAT_MATCH_LEN },
+ { "path_reg", "path", PAT_MATCH_REG },
+ { "path_sub", "path", PAT_MATCH_SUB },
+
+ { "req_ver", "req.ver", PAT_MATCH_STR },
+ { "resp_ver", "res.ver", PAT_MATCH_STR },
+
+ { "scook", "res.cook", PAT_MATCH_STR },
+ { "scook_beg", "res.cook", PAT_MATCH_BEG },
+ { "scook_dir", "res.cook", PAT_MATCH_DIR },
+ { "scook_dom", "res.cook", PAT_MATCH_DOM },
+ { "scook_end", "res.cook", PAT_MATCH_END },
+ { "scook_len", "res.cook", PAT_MATCH_LEN },
+ { "scook_reg", "res.cook", PAT_MATCH_REG },
+ { "scook_sub", "res.cook", PAT_MATCH_SUB },
+
+ { "shdr", "res.hdr", PAT_MATCH_STR },
+ { "shdr_beg", "res.hdr", PAT_MATCH_BEG },
+ { "shdr_dir", "res.hdr", PAT_MATCH_DIR },
+ { "shdr_dom", "res.hdr", PAT_MATCH_DOM },
+ { "shdr_end", "res.hdr", PAT_MATCH_END },
+ { "shdr_len", "res.hdr", PAT_MATCH_LEN },
+ { "shdr_reg", "res.hdr", PAT_MATCH_REG },
+ { "shdr_sub", "res.hdr", PAT_MATCH_SUB },
+
+ { "url", "url", PAT_MATCH_STR },
+ { "url_beg", "url", PAT_MATCH_BEG },
+ { "url_dir", "url", PAT_MATCH_DIR },
+ { "url_dom", "url", PAT_MATCH_DOM },
+ { "url_end", "url", PAT_MATCH_END },
+ { "url_len", "url", PAT_MATCH_LEN },
+ { "url_reg", "url", PAT_MATCH_REG },
+ { "url_sub", "url", PAT_MATCH_SUB },
+
+ { "urlp", "urlp", PAT_MATCH_STR },
+ { "urlp_beg", "urlp", PAT_MATCH_BEG },
+ { "urlp_dir", "urlp", PAT_MATCH_DIR },
+ { "urlp_dom", "urlp", PAT_MATCH_DOM },
+ { "urlp_end", "urlp", PAT_MATCH_END },
+ { "urlp_len", "urlp", PAT_MATCH_LEN },
+ { "urlp_reg", "urlp", PAT_MATCH_REG },
+ { "urlp_sub", "urlp", PAT_MATCH_SUB },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_act.c b/src/http_act.c
new file mode 100644
index 0000000..7d45780
--- /dev/null
+++ b/src/http_act.c
@@ -0,0 +1,2501 @@
+/*
+ * HTTP actions
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pattern.h>
+#include <haproxy/pool.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/uri_normalizer.h>
+#include <haproxy/version.h>
+
+
+/* Release memory allocated by most of HTTP actions. Concretly, it releases
+ * <arg.http>.
+ */
+static void release_http_action(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ istfree(&rule->arg.http.str);
+ if (rule->arg.http.re)
+ regex_free(rule->arg.http.re);
+ list_for_each_entry_safe(lf, lfb, &rule->arg.http.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+}
+
+/* Release memory allocated by HTTP actions relying on an http reply. Concretly,
+ * it releases <.arg.http_reply>
+ */
+static void release_act_http_reply(struct act_rule *rule)
+{
+ release_http_reply(rule->arg.http_reply);
+ rule->arg.http_reply = NULL;
+}
+
+
+/* Check function for HTTP actions relying on an http reply. The function
+ * returns 1 in success case, otherwise, it returns 0 and err is filled.
+ */
+static int check_act_http_reply(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct http_reply *reply = rule->arg.http_reply;
+
+ if (!http_check_http_reply(reply, px, err)) {
+ release_act_http_reply(rule);
+ return 0;
+ }
+ return 1;
+}
+
+
+/* This function executes one of the set-{method,path,query,uri} actions. It
+ * builds a string in the trash from the specified format string. It finds
+ * the action to be performed in <.action>, previously filled by function
+ * parse_set_req_line(). The replacement action is executed by the function
+ * http_action_set_req_line(). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_req_line(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct buffer *replace;
+ enum act_return ret = ACT_RET_CONT;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ /* If we have to create a query string, prepare a '?'. */
+ if (rule->action == 2) // set-query
+ replace->area[replace->data++] = '?';
+ replace->data += build_logline(s, replace->area + replace->data,
+ replace->size - replace->data,
+ &rule->arg.http.fmt);
+
+ if (http_req_replace_stline(rule->action, replace->area, replace->data, px, s) == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* parse an http-request action among :
+ * set-method
+ * set-path
+ * set-pathq
+ * set-query
+ * set-uri
+ *
+ * All of them accept a single argument of type string representing a log-format.
+ * The resulting rule makes use of <http.fmt> to store the log-format list head,
+ * and <.action> to store the action type as an int (0=method, 1=path, 2=query,
+ * 3=uri). It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_set_req_line(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+ int cap = 0;
+
+ switch (args[0][4]) {
+ case 'm' :
+ rule->action = 0; // set-method
+ break;
+ case 'p' :
+ if (args[0][8] == 'q')
+ rule->action = 4; // set-pathq
+ else
+ rule->action = 1; // set-path
+ break;
+ case 'q' :
+ rule->action = 2; // set-query
+ break;
+ case 'u' :
+ rule->action = 3; // set-uri
+ break;
+ default:
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_set_req_line;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ if (!*args[cur_arg] ||
+ (*args[cur_arg + 1] && strcmp(args[cur_arg + 1], "if") != 0 && strcmp(args[cur_arg + 1], "unless") != 0)) {
+ memprintf(err, "expects exactly 1 argument <format>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg)++;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the http-request normalize-uri action.
+ * `rule->action` is expected to be a value from `enum act_normalize_uri`.
+ *
+ * On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_normalize_uri(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum act_return ret = ACT_RET_CONT;
+ struct htx *htx = htxbuf(&s->req.buf);
+ const struct ist uri = htx_sl_req_uri(http_get_stline(htx));
+ struct buffer *replace = alloc_trash_chunk();
+ enum uri_normalizer_err err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+
+ if (!replace)
+ goto fail_alloc;
+
+ switch ((enum act_normalize_uri) rule->action) {
+ case ACT_NORMALIZE_URI_PATH_MERGE_SLASHES: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_merge_slashes(iststop(path, '?'), &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_dot(iststop(path, '?'), &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT:
+ case ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_path_dotdot(iststop(path, '?'), rule->action == ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 0))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newquery = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_query_sort(istfind(path, '?'), '&', &newquery);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_query(htx, newquery))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE:
+ case ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_percent_upper(path, rule->action == ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_FRAGMENT_STRIP: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_fragment_strip(path, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ case ACT_NORMALIZE_URI_FRAGMENT_ENCODE: {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ const struct ist path = http_parse_path(&parser);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_fragment_encode(path, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
+ }
+
+ switch (err) {
+ case URI_NORMALIZER_ERR_NONE:
+ break;
+ case URI_NORMALIZER_ERR_INTERNAL_ERROR:
+ ret = ACT_RET_ERR;
+ break;
+ case URI_NORMALIZER_ERR_INVALID_INPUT:
+ ret = ACT_RET_INV;
+ break;
+ case URI_NORMALIZER_ERR_ALLOC:
+ goto fail_alloc;
+ }
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.failed_rewrites, 1);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_ADD(&s->be->be_counters.failed_rewrites, 1);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->failed_rewrites, 1);
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.failed_rewrites, 1);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parses the http-request normalize-uri action. It expects a single <normalizer>
+ * argument, corresponding too a value in `enum act_normalize_uri`.
+ *
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_normalize_uri(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+
+ rule->action_ptr = http_action_normalize_uri;
+ rule->release_ptr = NULL;
+
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing argument <normalizer>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "path-merge-slashes") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_PATH_MERGE_SLASHES;
+ }
+ else if (strcmp(args[cur_arg], "path-strip-dot") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOT;
+ }
+ else if (strcmp(args[cur_arg], "path-strip-dotdot") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "full") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT_FULL;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PATH_STRIP_DOTDOT;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'path-strip-dotdot' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "query-sort-by-name") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME;
+ }
+ else if (strcmp(args[cur_arg], "percent-to-uppercase") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "strict") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'percent-to-uppercase' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "strict") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ else if (strcmp(args[cur_arg], "fragment-strip") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_FRAGMENT_STRIP;
+ }
+ else if (strcmp(args[cur_arg], "fragment-encode") == 0) {
+ cur_arg++;
+
+ rule->action = ACT_NORMALIZE_URI_FRAGMENT_ENCODE;
+ }
+ else {
+ memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a replace-uri action. It finds its arguments in
+ * <rule>.arg.http. It builds a string in the trash from the format string
+ * previously filled by function parse_replace_uri() and will execute the regex
+ * in <http.re> to replace the URI. It uses the format string present in
+ * <http.fmt>. The component to act on (path/uri) is taken from <.action> which
+ * contains 1 for the path or 3 for the URI (values used by
+ * http_req_replace_stline()). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_replace_uri(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace, *output;
+ struct ist uri;
+ int len;
+
+ replace = alloc_trash_chunk();
+ output = alloc_trash_chunk();
+ if (!replace || !output)
+ goto fail_alloc;
+ uri = htx_sl_req_uri(http_get_stline(htxbuf(&s->req.buf)));
+
+ if (rule->action == 1) { // replace-path
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = iststop(http_parse_path(&parser), '?');
+ }
+ else if (rule->action == 4) { // replace-pathq
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
+
+ if (!istlen(uri))
+ goto leave;
+
+ if (!regex_exec_match2(rule->arg.http.re, uri.ptr, uri.len, MAX_MATCH, pmatch, 0))
+ goto leave;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+
+ /* note: uri.ptr doesn't need to be zero-terminated because it will
+ * only be used to pick pmatch references.
+ */
+ len = exp_replace(output->area, output->size, uri.ptr, replace->area, pmatch);
+ if (len == -1)
+ goto fail_rewrite;
+
+ if (http_req_replace_stline(rule->action, output->area, len, px, s) == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(output);
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* parse a "replace-uri", "replace-path" or "replace-pathq"
+ * http-request action.
+ * This action takes 2 arguments (a regex and a replacement format string).
+ * The resulting rule makes use of <.action> to store the action (1/3 for now),
+ * <http.re> to store the compiled regex, and <http.fmt> to store the log-format
+ * list head. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_replace_uri(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg = *orig_arg;
+ int cap = 0;
+ char *error = NULL;
+
+ switch (args[0][8]) {
+ case 'p':
+ if (args[0][12] == 'q')
+ rule->action = 4; // replace-pathq, same as set-pathq
+ else
+ rule->action = 1; // replace-path, same as set-path
+ break;
+ case 'u':
+ rule->action = 3; // replace-uri, same as set-uri
+ break;
+ default:
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action_ptr = http_action_replace_uri;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ if (!*args[cur_arg] || !*args[cur_arg+1] ||
+ (*args[cur_arg+2] && strcmp(args[cur_arg+2], "if") != 0 && strcmp(args[cur_arg+2], "unless") != 0)) {
+ memprintf(err, "expects exactly 2 arguments <match-regex> and <replace-format>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(rule->arg.http.re = regex_comp(args[cur_arg], 1, 1, &error))) {
+ memprintf(err, "failed to parse the regex : %s", error);
+ free(error);
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ if (!parse_logformat_string(args[cur_arg + 1], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ regex_free(rule->arg.http.re);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg) += 2;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function is just a compliant action wrapper for "set-status". */
+static enum act_return action_http_set_status(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ if (http_res_set_status(rule->arg.http.i, rule->arg.http.str, s) == -1) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(s->txn->req.flags & HTTP_MSGF_SOFT_RW)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ return ACT_RET_ERR;
+ }
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* parse set-status action:
+ * This action accepts a single argument of type int representing
+ * an http status code. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_set_status(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ char *error;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_http_set_status;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ /* Check if an argument is available */
+ if (!*args[*orig_arg]) {
+ memprintf(err, "expects 1 argument: <status>; or 3 arguments: <status> reason <fmt>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* convert status code as integer */
+ rule->arg.http.i = strtol(args[*orig_arg], &error, 10);
+ if (*error != '\0' || rule->arg.http.i < 100 || rule->arg.http.i > 999) {
+ memprintf(err, "expects an integer status code between 100 and 999");
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*orig_arg)++;
+
+ /* set custom reason string */
+ rule->arg.http.str = ist(NULL); // If null, we use the default reason for the status code.
+ if (*args[*orig_arg] && strcmp(args[*orig_arg], "reason") == 0 &&
+ (*args[*orig_arg + 1] && strcmp(args[*orig_arg + 1], "if") != 0 && strcmp(args[*orig_arg + 1], "unless") != 0)) {
+ (*orig_arg)++;
+ rule->arg.http.str = ist(strdup(args[*orig_arg]));
+ (*orig_arg)++;
+ }
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "reject" HTTP action. It clears the request and
+ * response buffer without sending any response. It can be useful as an HTTP
+ * alternative to the silent-drop action to defend against DoS attacks, and may
+ * also be used with HTTP/2 to close a connection instead of just a stream.
+ * The txn status is unchanged, indicating no response was sent. The termination
+ * flags will indicate "PR". It always returns ACT_RET_ABRT.
+ */
+static enum act_return http_action_reject(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ sc_must_kill_conn(s->scf);
+ stream_abort(s);
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->res.analysers &= AN_RES_FLT_END;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ return ACT_RET_ABRT;
+}
+
+/* parse the "reject" action:
+ * This action takes no argument and returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_action_reject(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_reject;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "disable-l7-retry" HTTP action.
+ * It disables L7 retries (all retry except for a connection failure). This
+ * can be useful for example to avoid retrying on POST requests.
+ * It just removes the L7 retry flag on the HTTP transaction, and always
+ * return ACT_RET_CONT;
+ */
+static enum act_return http_req_disable_l7_retry(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ /* In theory, the TX_L7_RETRY flags isn't set at this point, but
+ * let's be future-proof and remove it anyway.
+ */
+ s->txn->flags &= ~TX_L7_RETRY;
+ s->txn->flags |= TX_D_L7_RETRY;
+ return ACT_RET_CONT;
+}
+
+/* parse the "disable-l7-retry" action:
+ * This action takes no argument and returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_req_disable_l7_retry(const char **args,
+ int *orig_args, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_req_disable_l7_retry;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "capture" action. It executes a fetch expression,
+ * turns the result into a string and puts it in a capture slot. It always
+ * returns 1. If an error occurs the action is cancelled, but the rule
+ * processing continues.
+ */
+static enum act_return http_action_req_capture(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h = rule->arg.cap.hdr;
+ char **cap = s->req_cap;
+ int len;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.cap.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* This function executes the "capture" action and store the result in a
+ * capture slot if exists. It executes a fetch expression, turns the result
+ * into a string and puts it in a capture slot. It always returns 1. If an
+ * error occurs the action is cancelled, but the rule processing continues.
+ */
+static enum act_return http_action_req_capture_by_id(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h;
+ char **cap = s->req_cap;
+ struct proxy *fe = strm_fe(s);
+ int len;
+ int i;
+
+ /* Look for the original configuration. */
+ for (h = fe->req_cap, i = fe->nb_req_cap - 1;
+ h != NULL && i != rule->arg.capid.idx ;
+ i--, h = h->next);
+ if (!h)
+ return ACT_RET_CONT;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.capid.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* Check an "http-request capture" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int check_http_req_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->action_ptr != http_action_req_capture_by_id)
+ return 1;
+
+ /* capture slots can only be declared in frontends, so we can't check their
+ * existence in backends at configuration parsing step
+ */
+ if (px->cap & PR_CAP_FE && rule->arg.capid.idx >= px->nb_req_cap) {
+ memprintf(err, "unable to find capture id '%d' referenced by http-request capture rule",
+ rule->arg.capid.idx);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Release memory allocate by an http capture action */
+static void release_http_capture(struct act_rule *rule)
+{
+ if (rule->action_ptr == http_action_req_capture)
+ release_sample_expr(rule->arg.cap.expr);
+ else
+ release_sample_expr(rule->arg.capid.expr);
+}
+
+/* parse an "http-request capture" action. It takes a single argument which is
+ * a sample fetch expression. It stores the expression into arg->act.p[0] and
+ * the allocated hdr_cap struct or the preallocated "id" into arg->act.p[1].
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_req_capture(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ struct cap_hdr *hdr;
+ int cur_arg;
+ int len = 0;
+
+ for (cur_arg = *orig_arg; cur_arg < *orig_arg + 3 && *args[cur_arg]; cur_arg++)
+ if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0)
+ break;
+
+ if (cur_arg < *orig_arg + 3) {
+ memprintf(err, "expects <expression> [ 'len' <length> | id <idx> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(expr->fetch->val & SMP_VAL_FE_HRQ_HDR)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!args[cur_arg] || !*args[cur_arg]) {
+ memprintf(err, "expects 'len or 'id'");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "len") == 0) {
+ cur_arg++;
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "proxy '%s' has no frontend capability", px->id);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing length value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ len = atoi(args[cur_arg]);
+ if (len <= 0) {
+ memprintf(err, "length must be > 0");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "out of memory");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ hdr->next = px->req_cap;
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ hdr->index = px->nb_req_cap++;
+
+ px->req_cap = hdr;
+ px->to_log |= LW_REQHDR;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.cap.expr = expr;
+ rule->arg.cap.hdr = hdr;
+ }
+
+ else if (strcmp(args[cur_arg], "id") == 0) {
+ int id;
+ char *error;
+
+ cur_arg++;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing id value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ id = strtol(args[cur_arg], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "cannot parse id '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_req_capture_by_id;
+ rule->check_ptr = check_http_req_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.capid.expr = expr;
+ rule->arg.capid.idx = id;
+ }
+
+ else {
+ memprintf(err, "expects 'len' or 'id', found '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes the "capture" action and store the result in a
+ * capture slot if exists. It executes a fetch expression, turns the result
+ * into a string and puts it in a capture slot. It always returns 1. If an
+ * error occurs the action is cancelled, but the rule processing continues.
+ */
+static enum act_return http_action_res_capture_by_id(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h;
+ char **cap = s->res_cap;
+ struct proxy *fe = strm_fe(s);
+ int len;
+ int i;
+
+ /* Look for the original configuration. */
+ for (h = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
+ h != NULL && i != rule->arg.capid.idx ;
+ i--, h = h->next);
+ if (!h)
+ return ACT_RET_CONT;
+
+ key = sample_fetch_as_type(s->be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, rule->arg.capid.expr, SMP_T_STR);
+ if (!key)
+ return ACT_RET_CONT;
+
+ if (cap[h->index] == NULL)
+ cap[h->index] = pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) /* no more capture memory */
+ return ACT_RET_CONT;
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+ return ACT_RET_CONT;
+}
+
+/* Check an "http-response capture" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+static int check_http_res_capture(struct act_rule *rule, struct proxy *px, char **err)
+{
+ if (rule->action_ptr != http_action_res_capture_by_id)
+ return 1;
+
+ /* capture slots can only be declared in frontends, so we can't check their
+ * existence in backends at configuration parsing step
+ */
+ if (px->cap & PR_CAP_FE && rule->arg.capid.idx >= px->nb_rsp_cap) {
+ memprintf(err, "unable to find capture id '%d' referenced by http-response capture rule",
+ rule->arg.capid.idx);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* parse an "http-response capture" action. It takes a single argument which is
+ * a sample fetch expression. It stores the expression into arg->act.p[0] and
+ * the allocated hdr_cap struct of the preallocated id into arg->act.p[1].
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_res_capture(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ int cur_arg;
+ int id;
+ char *error;
+
+ for (cur_arg = *orig_arg; cur_arg < *orig_arg + 3 && *args[cur_arg]; cur_arg++)
+ if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0)
+ break;
+
+ if (cur_arg < *orig_arg + 3) {
+ memprintf(err, "expects <expression> id <idx>");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(expr->fetch->val & SMP_VAL_FE_HRS_HDR)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!args[cur_arg] || !*args[cur_arg]) {
+ memprintf(err, "expects 'id'");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "id") != 0) {
+ memprintf(err, "expects 'id', found '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg++;
+
+ if (!args[cur_arg]) {
+ memprintf(err, "missing id value");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ id = strtol(args[cur_arg], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "cannot parse id '%s'", args[cur_arg]);
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+
+ px->conf.args.ctx = ARGC_CAP;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_res_capture_by_id;
+ rule->check_ptr = check_http_res_capture;
+ rule->release_ptr = release_http_capture;
+ rule->arg.capid.expr = expr;
+ rule->arg.capid.idx = id;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* Parse a "allow" action for a request or a response rule. It takes no argument. It
+ * returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_allow(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ return ACT_RET_PRS_OK;
+}
+
+/* Parse "deny" or "tarpit" actions for a request rule or "deny" action for a
+ * response rule. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on
+ * error. It relies on http_parse_http_reply() to set
+ * <.arg.http_reply>.
+ */
+static enum act_parse_ret parse_http_deny(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int default_status;
+ int cur_arg, arg = 0;
+
+ cur_arg = *orig_arg;
+ if (rule->from == ACT_F_HTTP_REQ) {
+ if (strcmp(args[cur_arg - 1], "tarpit") == 0) {
+ rule->action = ACT_HTTP_REQ_TARPIT;
+ default_status = 500;
+ }
+ else {
+ rule->action = ACT_ACTION_DENY;
+ default_status = 403;
+ }
+ }
+ else {
+ rule->action = ACT_ACTION_DENY;
+ default_status = 502;
+ }
+
+ /* If no args or only a deny_status specified, fallback on the legacy
+ * mode and use default error files despite the fact that
+ * default-errorfiles is not used. Otherwise, parse an http reply.
+ */
+
+ /* Prepare parsing of log-format strings */
+ px->conf.args.ctx = ((rule->from == ACT_F_HTTP_REQ) ? ARGC_HRQ : ARGC_HRS);
+
+ if (!*(args[cur_arg]) || strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ rule->arg.http_reply = http_parse_http_reply((const char *[]){"default-errorfiles", ""}, &arg, px, default_status, err);
+ goto end;
+ }
+
+ if (strcmp(args[cur_arg], "deny_status") == 0) {
+ if (!*(args[cur_arg+2]) || strcmp(args[cur_arg+2], "if") == 0 || strcmp(args[cur_arg+2], "unless") == 0) {
+ rule->arg.http_reply = http_parse_http_reply((const char *[]){"status", args[cur_arg+1], "default-errorfiles", ""},
+ &arg, px, default_status, err);
+ *orig_arg += 2;
+ goto end;
+ }
+ args[cur_arg] += 5; /* skip "deny_" for the parsing */
+ }
+
+ rule->arg.http_reply = http_parse_http_reply(args, orig_arg, px, default_status, err);
+
+ end:
+ if (!rule->arg.http_reply)
+ return ACT_RET_PRS_ERR;
+
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->check_ptr = check_act_http_reply;
+ rule->release_ptr = release_act_http_reply;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* This function executes a auth action. It builds an 401/407 HTX message using
+ * the corresponding proxy's error message. On success, it returns
+ * ACT_RET_ABRT. If an error occurs ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_auth(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct http_reply *reply;
+ const char *auth_realm;
+ struct http_hdr_ctx ctx;
+ struct ist hdr;
+
+ /* Auth might be performed on regular http-req rules as well as on stats */
+ auth_realm = rule->arg.http.str.ptr;
+ if (!auth_realm) {
+ if (px->uri_auth && s->current_rule_list == &px->uri_auth->http_req_rules)
+ auth_realm = STATS_DEFAULT_REALM;
+ else
+ auth_realm = px->id;
+ }
+
+ if (!(s->txn->flags & TX_USE_PX_CONN)) {
+ s->txn->status = 401;
+ hdr = ist("WWW-Authenticate");
+ }
+ else {
+ s->txn->status = 407;
+ hdr = ist("Proxy-Authenticate");
+ }
+ reply = http_error_message(s);
+ channel_htx_truncate(res, htx);
+
+ if (chunk_printf(&trash, "Basic realm=\"%s\"", auth_realm) == -1)
+ goto fail;
+
+ /* Write the generic 40x message */
+ if (http_reply_to_htx(s, htx, reply) == -1)
+ goto fail;
+
+ /* Remove all existing occurrences of the XXX-Authenticate header */
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr, &ctx, 1))
+ http_remove_header(htx, &ctx);
+
+ /* Now a the right XXX-Authenticate header */
+ if (!http_add_header(htx, hdr, ist2(b_orig(&trash), b_data(&trash))))
+ goto fail;
+
+ /* Finally forward the reply */
+ htx_to_buf(htx, &res->buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ /* Note: Only eval on the request */
+ s->logs.request_ts = now_ns;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ stream_inc_http_err_ctr(s);
+ return ACT_RET_ABRT;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+ return ACT_RET_ERR;
+}
+
+/* Parse a "auth" action. It may take 2 optional arguments to define a "realm"
+ * parameter. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_auth(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->action_ptr = http_action_auth;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (strcmp(args[cur_arg], "realm") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing realm value.\n");
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+ cur_arg++;
+ }
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a early-hint action. It adds an HTTP Early Hint HTTP
+ * 103 response header with <.arg.http.str> name and with a value built
+ * according to <.arg.http.fmt> log line format. If it is the first early-hint
+ * rule of series, the 103 response start-line is added first. At the end, if
+ * the next rule is not an early-hint rule or if it is the last rule, the EOH
+ * block is added to terminate the response. On success, it returns
+ * ACT_RET_CONT. If an error occurs while soft rewrites are enabled, the action
+ * is canceled, but the rule processing continue. Otherwsize ACT_RET_ERR is
+ * returned.
+ */
+static enum act_return http_action_early_hint(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct act_rule *next_rule;
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct buffer *value = alloc_trash_chunk();
+ enum act_return ret = ACT_RET_CONT;
+
+ if (!(s->txn->req.flags & HTTP_MSGF_VER_11))
+ goto leave;
+
+ if (!value) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto error;
+ }
+
+ /* if there is no pending 103 response, start a new response. Otherwise,
+ * continue to add link to a previously started response
+ */
+ if (s->txn->status != 103) {
+ struct htx_sl *sl;
+ unsigned int flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|
+ HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("103"), ist("Early Hints"));
+ if (!sl)
+ goto error;
+ sl->info.res.status = 103;
+ s->txn->status = 103;
+ }
+
+ /* Add the HTTP Early Hint HTTP 103 response header */
+ value->data = build_logline(s, b_tail(value), b_room(value), &rule->arg.http.fmt);
+ if (!htx_add_header(htx, rule->arg.http.str, ist2(b_head(value), b_data(value))))
+ goto error;
+
+ /* if it is the last rule or the next one is not an early-hint or an
+ * conditional early-hint, terminate the current response.
+ */
+ next_rule = LIST_NEXT(&rule->list, typeof(rule), list);
+ if (&next_rule->list == s->current_rule_list || next_rule->action_ptr != http_action_early_hint || next_rule->cond) {
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto error;
+ if (!http_forward_proxy_resp(s, 0))
+ goto error;
+ s->txn->status = 0;
+ }
+
+ leave:
+ free_trash_chunk(value);
+ return ret;
+
+ error:
+ /* If an error occurred during an Early-hint rule, remove the incomplete
+ * HTTP 103 response from the buffer */
+ channel_htx_truncate(res, htx);
+ ret = ACT_RET_ERR;
+ s->txn->status = 0;
+ goto leave;
+}
+
+/* This function executes a set-header or add-header actions. It builds a string
+ * in the trash from the specified format string. It finds the action to be
+ * performed in <.action>, previously filled by function parse_set_header(). The
+ * replacement action is executed by the function http_action_set_header(). On
+ * success, it returns ACT_RET_CONT. If an error occurs while soft rewrites are
+ * enabled, the action is canceled, but the rule processing continue. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace;
+ struct http_hdr_ctx ctx;
+ struct ist n, v;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+ n = rule->arg.http.str;
+ v = ist2(replace->area, replace->data);
+
+ if (rule->action == 0) { // set-header
+ /* remove all occurrences of the header */
+ ctx.blk = NULL;
+ while (http_find_header(htx, n, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ }
+
+ /* Now add header */
+ if (!http_add_header(htx, n, v))
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(msg->flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parse a "set-header", "add-header" or "early-hint" actions. It takes an
+ * header name and a log-format string as arguments. It returns ACT_RET_PRS_OK
+ * on success, ACT_RET_PRS_ERR on error.
+ *
+ * Note: same function is used for the request and the response. However
+ * "early-hint" rules are only supported for request rules.
+ */
+static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+ const char *p;
+
+ if (args[*orig_arg-1][0] == 'e') {
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_early_hint;
+ }
+ else {
+ if (args[*orig_arg-1][0] == 's')
+ rule->action = 0; // set-header
+ else
+ rule->action = 1; // add-header
+ rule->action_ptr = http_action_set_header;
+ }
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ cur_arg++;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ istfree(&rule->arg.http.str);
+ return ACT_RET_PRS_ERR;
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ /* some characters are totally forbidden in header names and
+ * may happen by accident when writing configs, causing strange
+ * failures in field. Better catch these ones early, nobody will
+ * miss them. In particular, a colon at the end (or anywhere
+ * after the first char) or a space/cr anywhere due to misplaced
+ * quotes are hard to spot.
+ */
+ for (p = istptr(rule->arg.http.str); p < istend(rule->arg.http.str); p++) {
+ if (HTTP_IS_TOKEN(*p))
+ continue;
+ if (p == istptr(rule->arg.http.str) && *p == ':')
+ continue;
+ /* we only report this as-is but it will not cause an error */
+ memprintf(err, "header name '%s' contains forbidden character '%c'", istptr(rule->arg.http.str), *p);
+ break;
+ }
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a replace-header or replace-value actions. It
+ * builds a string in the trash from the specified format string. It finds
+ * the action to be performed in <.action>, previously filled by function
+ * parse_replace_header(). The replacement action is executed by the function
+ * http_action_replace_header(). On success, it returns ACT_RET_CONT. If an error
+ * occurs while soft rewrites are enabled, the action is canceled, but the rule
+ * processing continue. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_replace_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+ struct buffer *replace;
+ int r;
+
+ replace = alloc_trash_chunk();
+ if (!replace)
+ goto fail_alloc;
+
+ replace->data = build_logline(s, replace->area, replace->size, &rule->arg.http.fmt);
+
+ r = http_replace_hdrs(s, htx, rule->arg.http.str, replace->area, rule->arg.http.re, (rule->action == 0));
+ if (r == -1)
+ goto fail_rewrite;
+
+ leave:
+ free_trash_chunk(replace);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+
+ fail_rewrite:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+
+ if (!(msg->flags & HTTP_MSGF_SOFT_RW)) {
+ ret = ACT_RET_ERR;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ }
+ goto leave;
+}
+
+/* Parse a "replace-header" or "replace-value" actions. It takes an header name,
+ * a regex and replacement string as arguments. It returns ACT_RET_PRS_OK on
+ * success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_replace_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+
+ if (args[*orig_arg-1][8] == 'h')
+ rule->action = 0; // replace-header
+ else
+ rule->action = 1; // replace-value
+ rule->action_ptr = http_action_replace_header;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg+1] || !*args[cur_arg+2]) {
+ memprintf(err, "expects exactly 3 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+
+ cur_arg++;
+ if (!(rule->arg.http.re = regex_comp(args[cur_arg], 1, 1, err))) {
+ istfree(&rule->arg.http.str);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ cur_arg++;
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.http.fmt, LOG_OPT_HTTP, cap, err)) {
+ istfree(&rule->arg.http.str);
+ regex_free(rule->arg.http.re);
+ return ACT_RET_PRS_ERR;
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a del-header action with selected matching mode for
+ * header name. It finds the matching method to be performed in <.action>, previously
+ * filled by function parse_http_del_header(). On success, it returns ACT_RET_CONT.
+ * Otherwise ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_del_header(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_hdr_ctx ctx;
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+ struct htx *htx = htxbuf(&msg->chn->buf);
+ enum act_return ret = ACT_RET_CONT;
+
+ /* remove all occurrences of the header */
+ ctx.blk = NULL;
+ switch (rule->action) {
+ case PAT_MATCH_STR:
+ while (http_find_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_BEG:
+ while (http_find_pfx_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_END:
+ while (http_find_sfx_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_SUB:
+ while (http_find_sub_header(htx, rule->arg.http.str, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ case PAT_MATCH_REG:
+ while (http_match_header(htx, rule->arg.http.re, &ctx, 1))
+ http_remove_header(htx, &ctx);
+ break;
+ default:
+ return ACT_RET_ERR;
+ }
+ return ret;
+}
+
+/* Parse a "del-header" action. It takes string as a required argument,
+ * optional flag (currently only -m) and optional matching method of input string
+ * with header name to be deleted. Default matching method is exact match (-m str).
+ * It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_del_header(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ int pat_idx;
+
+ /* set exact matching (-m str) as default */
+ rule->action = PAT_MATCH_STR;
+ rule->action_ptr = http_action_del_header;
+ rule->release_ptr = release_http_action;
+ LIST_INIT(&rule->arg.http.fmt);
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects at least 1 argument");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.http.str = ist(strdup(args[cur_arg]));
+ px->conf.args.ctx = (rule->from == ACT_F_HTTP_REQ ? ARGC_HRQ : ARGC_HRS);
+
+ if (strcmp(args[cur_arg+1], "-m") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg+1]) {
+ memprintf(err, "-m flag expects exactly 1 argument");
+ return ACT_RET_PRS_ERR;
+ }
+
+ cur_arg++;
+ pat_idx = pat_find_match_name(args[cur_arg]);
+ switch (pat_idx) {
+ case PAT_MATCH_REG:
+ if (!(rule->arg.http.re = regex_comp(rule->arg.http.str.ptr, 1, 1, err)))
+ return ACT_RET_PRS_ERR;
+ __fallthrough;
+ case PAT_MATCH_STR:
+ case PAT_MATCH_BEG:
+ case PAT_MATCH_END:
+ case PAT_MATCH_SUB:
+ rule->action = pat_idx;
+ break;
+ default:
+ memprintf(err, "-m with unsupported matching method '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* Release memory allocated by an http redirect action. */
+static void release_http_redir(struct act_rule *rule)
+{
+ struct redirect_rule *redir;
+
+ redir = rule->arg.redir;
+ if (!redir)
+ return;
+
+ LIST_DELETE(&redir->list);
+ http_free_redirect_rule(redir);
+}
+
+/* Parse a "redirect" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_redirect(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct redirect_rule *redir;
+ int dir, cur_arg;
+
+ rule->action = ACT_HTTP_REDIR;
+ rule->release_ptr = release_http_redir;
+
+ cur_arg = *orig_arg;
+
+ dir = (rule->from == ACT_F_HTTP_REQ ? 0 : 1);
+ if ((redir = http_parse_redirect_rule(px->conf.args.file, px->conf.args.line, px, &args[cur_arg], err, 1, dir)) == NULL)
+ return ACT_RET_PRS_ERR;
+
+ if (!(redir->flags & REDIRECT_FLAG_IGNORE_EMPTY))
+ rule->flags |= ACT_FLAG_FINAL;
+
+ rule->arg.redir = redir;
+ rule->cond = redir->cond;
+ redir->cond = NULL;
+
+ /* skip all arguments */
+ while (*args[cur_arg])
+ cur_arg++;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a add-acl, del-acl, set-map or del-map actions. On
+ * success, it returns ACT_RET_CONT. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_set_map(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct pat_ref *ref;
+ struct buffer *key = NULL, *value = NULL;
+ enum act_return ret = ACT_RET_CONT;
+
+ /* collect reference */
+ ref = pat_ref_lookup(rule->arg.map.ref);
+ if (!ref)
+ goto leave;
+
+ /* allocate key */
+ key = alloc_trash_chunk();
+ if (!key)
+ goto fail_alloc;
+
+ /* collect key */
+ key->data = build_logline(s, key->area, key->size, &rule->arg.map.key);
+ key->area[key->data] = '\0';
+
+ switch (rule->action) {
+ case 0: // add-acl
+ /* add entry only if it does not already exist */
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ if (pat_ref_find_elt(ref, key->area) == NULL)
+ pat_ref_add(ref, key->area, NULL, NULL);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+
+ case 1: // set-map
+ {
+ struct pat_ref_elt *elt;
+
+ /* allocate value */
+ value = alloc_trash_chunk();
+ if (!value)
+ goto fail_alloc;
+
+ /* collect value */
+ value->data = build_logline(s, value->area, value->size, &rule->arg.map.value);
+ value->area[value->data] = '\0';
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ elt = pat_ref_find_elt(ref, key->area);
+ if (elt) {
+ /* update entry if it exists */
+ pat_ref_set(ref, key->area, value->area, NULL, elt);
+ }
+ else {
+ /* insert a new entry */
+ pat_ref_add(ref, key->area, value->area, NULL);
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+ }
+
+ case 2: // del-acl
+ case 3: // del-map
+ /* returned code: 1=ok, 0=ko */
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ref->lock);
+ pat_ref_delete(ref, key->area);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ref->lock);
+ break;
+
+ default:
+ ret = ACT_RET_ERR;
+ }
+
+
+ leave:
+ free_trash_chunk(key);
+ free_trash_chunk(value);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ ret = ACT_RET_ERR;
+ goto leave;
+}
+
+/* Release memory allocated by an http map/acl action. */
+static void release_http_map(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ free(rule->arg.map.ref);
+ list_for_each_entry_safe(lf, lfb, &rule->arg.map.key, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ if (rule->action == 1) {
+ list_for_each_entry_safe(lf, lfb, &rule->arg.map.value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+}
+
+/* Parse a "add-acl", "del-acl", "set-map" or "del-map" actions. It takes one or
+ * two log-format string as argument depending on the action. The action is
+ * stored in <.action> as an int (0=add-acl, 1=set-map, 2=del-acl,
+ * 3=del-map). It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cap = 0, cur_arg;
+
+ if (args[*orig_arg-1][0] == 'a') // add-acl
+ rule->action = 0;
+ else if (args[*orig_arg-1][0] == 's') // set-map
+ rule->action = 1;
+ else if (args[*orig_arg-1][4] == 'a') // del-acl
+ rule->action = 2;
+ else if (args[*orig_arg-1][4] == 'm') // del-map
+ rule->action = 3;
+ else {
+ memprintf(err, "internal error: unhandled action '%s'", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_set_map;
+ rule->release_ptr = release_http_map;
+
+ cur_arg = *orig_arg;
+ if (rule->action == 1 && (!*args[cur_arg] || !*args[cur_arg+1])) {
+ /* 2 args for set-map */
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+ else if (!*args[cur_arg]) {
+ /* only one arg for other actions */
+ memprintf(err, "expects exactly 1 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ /*
+ * '+ 8' for 'set-map(' (same for del-map)
+ * '- 9' for 'set-map(' + trailing ')' (same for del-map)
+ */
+ rule->arg.map.ref = my_strndup(args[cur_arg-1] + 8, strlen(args[cur_arg-1]) - 9);
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ px->conf.args.ctx = ARGC_HRQ;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRQ_HDR;
+ }
+ else{
+ px->conf.args.ctx = ARGC_HRS;
+ if (px->cap & PR_CAP_FE)
+ cap |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ cap |= SMP_VAL_BE_HRS_HDR;
+ }
+
+ /* key pattern */
+ LIST_INIT(&rule->arg.map.key);
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.key, LOG_OPT_HTTP, cap, err)) {
+ free(rule->arg.map.ref);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->action == 1) {
+ /* value pattern for set-map only */
+ cur_arg++;
+ LIST_INIT(&rule->arg.map.value);
+ if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.value, LOG_OPT_HTTP, cap, err)) {
+ free(rule->arg.map.ref);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a track-sc* actions. On success, it returns
+ * ACT_RET_CONT. Otherwsize ACT_RET_ERR is returned.
+ */
+static enum act_return http_action_track_sc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stktable *t;
+ struct stksess *ts;
+ struct stktable_key *key;
+ void *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6;
+ int opt;
+
+ ptr1 = ptr2 = ptr3 = ptr4 = ptr5 = ptr6 = NULL;
+ opt = ((rule->from == ACT_F_HTTP_REQ) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES) | SMP_OPT_FINAL;
+
+ t = rule->arg.trk_ctr.table.t;
+
+ if (stkctr_entry(&s->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, s->be, sess, s, opt, rule->arg.trk_ctr.expr, NULL);
+
+ if (!key)
+ goto end;
+ ts = stktable_get_entry(t, key);
+ if (!ts)
+ goto end;
+
+ stream_track_stkctr(&s->stkctr[rule->action], t, ts);
+
+ /* let's count a new HTTP request as it's the first time we do it */
+ ptr1 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_CNT);
+ ptr2 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_RATE);
+
+ /* When the client triggers a 4xx from the server, it's most often due
+ * to a missing object or permission. These events should be tracked
+ * because if they happen often, it may indicate a brute force or a
+ * vulnerability scan. Normally this is done when receiving the response
+ * but here we're tracking after this ought to have been done so we have
+ * to do it on purpose.
+ */
+ if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 400) < 100) {
+ ptr3 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT);
+ ptr4 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE);
+ }
+
+ if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 500) < 100 &&
+ s->txn->status != 501 && s->txn->status != 505) {
+ ptr5 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT);
+ ptr6 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE);
+ }
+
+ if (ptr1 || ptr2 || ptr3 || ptr4 || ptr5 || ptr6) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ stktable_data_cast(ptr1, std_t_uint)++;
+ if (ptr2)
+ update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u, 1);
+ if (ptr3)
+ stktable_data_cast(ptr3, std_t_uint)++;
+ if (ptr4)
+ update_freq_ctr_period(&stktable_data_cast(ptr4, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u, 1);
+ if (ptr5)
+ stktable_data_cast(ptr5, std_t_uint)++;
+ if (ptr6)
+ update_freq_ctr_period(&stktable_data_cast(ptr6, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u, 1);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(t, ts, 0);
+ }
+
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_CONTENT);
+ if (sess->fe != s->be)
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_BACKEND);
+
+ end:
+ return ACT_RET_CONT;
+}
+
+static void release_http_track_sc(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.trk_ctr.expr);
+}
+
+/* Parse a "track-sc*" actions. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_track_sc(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ struct sample_expr *expr;
+ unsigned int where;
+ unsigned int tsc_num;
+ const char *tsc_num_str;
+ int cur_arg;
+
+ tsc_num_str = &args[*orig_arg-1][8];
+ if (cfg_parse_track_sc_num(&tsc_num, tsc_num_str, tsc_num_str + strlen(tsc_num_str), err) == -1)
+ return ACT_RET_PRS_ERR;
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line,
+ err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= (rule->from == ACT_F_HTTP_REQ ? SMP_VAL_FE_HRQ_HDR : SMP_VAL_FE_HRS_HDR);
+ if (px->cap & PR_CAP_BE)
+ where |= (rule->from == ACT_F_HTTP_REQ ? SMP_VAL_BE_HRQ_HDR : SMP_VAL_BE_HRS_HDR);
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err, "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcmp(args[cur_arg], "table") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(err, "missing table name");
+ release_sample_expr(expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* we copy the table name for now, it will be resolved later */
+ rule->arg.trk_ctr.table.n = strdup(args[cur_arg]);
+ cur_arg++;
+ }
+
+ rule->action = tsc_num;
+ rule->arg.trk_ctr.expr = expr;
+ rule->action_ptr = http_action_track_sc;
+ rule->release_ptr = release_http_track_sc;
+ rule->check_ptr = check_trk_action;
+
+ *orig_arg = cur_arg;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_return action_timeout_set_stream_timeout(struct act_rule *rule,
+ struct proxy *px,
+ struct session *sess,
+ struct stream *s,
+ int flags)
+{
+ struct sample *key;
+
+ if (rule->arg.timeout.expr) {
+ key = sample_fetch_as_type(px, sess, s, SMP_OPT_FINAL, rule->arg.timeout.expr, SMP_T_SINT);
+ if (!key)
+ return ACT_RET_CONT;
+
+ stream_set_timeout(s, rule->arg.timeout.type, MS_TO_TICKS(key->data.u.sint));
+ }
+ else {
+ stream_set_timeout(s, rule->arg.timeout.type, MS_TO_TICKS(rule->arg.timeout.value));
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* Parse a "set-timeout" action. Returns ACT_RET_PRS_ERR if parsing error.
+ */
+static enum act_parse_ret parse_http_set_timeout(const char **args,
+ int *orig_arg,
+ struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_timeout_set_stream_timeout;
+ rule->release_ptr = release_timeout_action;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg] || !*args[cur_arg + 1]) {
+ memprintf(err, "expects exactly 2 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (cfg_parse_rule_set_timeout(args, cur_arg, rule, px, err) == -1) {
+ return ACT_RET_PRS_ERR;
+ }
+
+ *orig_arg = cur_arg + 2;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a strict-mode actions. On success, it always returns
+ * ACT_RET_CONT
+ */
+static enum act_return http_action_strict_mode(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct http_msg *msg = ((rule->from == ACT_F_HTTP_REQ) ? &s->txn->req : &s->txn->rsp);
+
+ if (rule->action == 0) // strict-mode on
+ msg->flags &= ~HTTP_MSGF_SOFT_RW;
+ else // strict-mode off
+ msg->flags |= HTTP_MSGF_SOFT_RW;
+ return ACT_RET_CONT;
+}
+
+/* Parse a "strict-mode" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_strict_mode(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects exactly 1 arguments");
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (strcasecmp(args[cur_arg], "on") == 0)
+ rule->action = 0; // strict-mode on
+ else if (strcasecmp(args[cur_arg], "off") == 0)
+ rule->action = 1; // strict-mode off
+ else {
+ memprintf(err, "Unexpected value '%s'. Only 'on' and 'off' are supported", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action_ptr = http_action_strict_mode;
+
+ *orig_arg = cur_arg + 1;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function executes a return action. It builds an HTX message from an
+ * errorfile, an raw file or a log-format string, depending on <.action>
+ * value. On success, it returns ACT_RET_ABRT. If an error occurs ACT_RET_ERR is
+ * returned.
+ */
+static enum act_return http_action_return(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *req = &s->req;
+
+ s->txn->status = rule->arg.http_reply->status;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= ((rule->from == ACT_F_HTTP_REQ) ? SF_FINST_R : SF_FINST_H);
+
+ if (http_reply_message(s, rule->arg.http_reply) == -1)
+ return ACT_RET_ERR;
+
+ if (rule->from == ACT_F_HTTP_REQ) {
+ /* let's log the request time */
+ s->logs.request_ts = now_ns;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ return ACT_RET_ABRT;
+}
+
+/* Parse a "return" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error. It relies on http_parse_http_reply() to set
+ * <.arg.http_reply>.
+ */
+static enum act_parse_ret parse_http_return(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ /* Prepare parsing of log-format strings */
+ px->conf.args.ctx = ((rule->from == ACT_F_HTTP_REQ) ? ARGC_HRQ : ARGC_HRS);
+ rule->arg.http_reply = http_parse_http_reply(args, orig_arg, px, 200, err);
+ if (!rule->arg.http_reply)
+ return ACT_RET_PRS_ERR;
+
+ rule->flags |= ACT_FLAG_FINAL;
+ rule->action = ACT_CUSTOM;
+ rule->check_ptr = check_act_http_reply;
+ rule->action_ptr = http_action_return;
+ rule->release_ptr = release_act_http_reply;
+ return ACT_RET_PRS_OK;
+}
+
+
+
+/* This function executes a wait-for-body action. It waits for the message
+ * payload for a max configured time (.arg.p[0]) and eventually for only first
+ * <arg.p[1]> bytes (0 means no limit). It relies on http_wait_for_msg_body()
+ * function. it returns ACT_RET_CONT when conditions are met to stop to wait.
+ * Otherwise ACT_RET_YIELD is returned to wait for more data. ACT_RET_INV is
+ * returned if a parsing error is raised by lower level and ACT_RET_ERR if an
+ * internal error occurred. Finally ACT_RET_ABRT is returned when a timeout
+ * occurred.
+ */
+static enum act_return http_action_wait_for_body(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct channel *chn = ((rule->from == ACT_F_HTTP_REQ) ? &s->req : &s->res);
+ unsigned int time = (uintptr_t)rule->arg.act.p[0];
+ unsigned int bytes = (uintptr_t)rule->arg.act.p[1];
+
+ switch (http_wait_for_msg_body(s, chn, time, bytes)) {
+ case HTTP_RULE_RES_CONT:
+ return ACT_RET_CONT;
+ case HTTP_RULE_RES_YIELD:
+ return ACT_RET_YIELD;
+ case HTTP_RULE_RES_BADREQ:
+ return ACT_RET_INV;
+ case HTTP_RULE_RES_ERROR:
+ return ACT_RET_ERR;
+ case HTTP_RULE_RES_ABRT:
+ return ACT_RET_ABRT;
+ default:
+ return ACT_RET_ERR;
+ }
+}
+
+/* Parse a "wait-for-body" action. It returns ACT_RET_PRS_OK on success,
+ * ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret parse_http_wait_for_body(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ unsigned int time, bytes;
+ const char *res;
+
+ cur_arg = *orig_arg;
+ if (!*args[cur_arg]) {
+ memprintf(err, "expects time <time> [ at-least <bytes> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ time = UINT_MAX; /* To be sure it is set */
+ bytes = 0; /* Default value, wait all the body */
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "time") == 0) {
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "missing argument for '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ res = parse_time_err(args[cur_arg+1], &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "time overflow (maximum value is 2147483647 ms or ~24.8 days)");
+ return ACT_RET_PRS_ERR;
+ }
+ if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "time underflow (minimum non-null value is 1 ms)");
+ return ACT_RET_PRS_ERR;
+ }
+ if (res) {
+ memprintf(err, "unexpected character '%c'", *res);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "at-least") == 0) {
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "missing argument for '%s'", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ res = parse_size_err(args[cur_arg+1], &bytes);
+ if (res) {
+ memprintf(err, "unexpected character '%c'", *res);
+ return ACT_RET_PRS_ERR;
+ }
+ cur_arg++;
+ }
+ else
+ break;
+ cur_arg++;
+ }
+
+ if (time == UINT_MAX) {
+ memprintf(err, "expects time <time> [ at-least <bytes> ]");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->arg.act.p[0] = (void *)(uintptr_t)time;
+ rule->arg.act.p[1] = (void *)(uintptr_t)bytes;
+
+ *orig_arg = cur_arg;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = http_action_wait_for_body;
+ return ACT_RET_PRS_OK;
+}
+
+/************************************************************************/
+/* All supported http-request action keywords must be declared here. */
+/************************************************************************/
+
+static struct action_kw_list http_req_actions = {
+ .kw = {
+ { "add-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "auth", parse_http_auth, 0 },
+ { "capture", parse_http_req_capture, 0 },
+ { "del-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "del-header", parse_http_del_header, 0 },
+ { "del-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "deny", parse_http_deny, 0 },
+ { "disable-l7-retry", parse_http_req_disable_l7_retry, 0 },
+ { "early-hint", parse_http_set_header, 0 },
+ { "normalize-uri", parse_http_normalize_uri, KWF_EXPERIMENTAL },
+ { "redirect", parse_http_redirect, 0 },
+ { "reject", parse_http_action_reject, 0 },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-path", parse_replace_uri, 0 },
+ { "replace-pathq", parse_replace_uri, 0 },
+ { "replace-uri", parse_replace_uri, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "return", parse_http_return, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "set-method", parse_set_req_line, 0 },
+ { "set-path", parse_set_req_line, 0 },
+ { "set-pathq", parse_set_req_line, 0 },
+ { "set-query", parse_set_req_line, 0 },
+ { "set-uri", parse_set_req_line, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { "tarpit", parse_http_deny, 0 },
+ { "track-sc", parse_http_track_sc, KWF_MATCH_PREFIX },
+ { "set-timeout", parse_http_set_timeout, 0 },
+ { "wait-for-body", parse_http_wait_for_body, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+static struct action_kw_list http_res_actions = {
+ .kw = {
+ { "add-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "capture", parse_http_res_capture, 0 },
+ { "del-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "del-header", parse_http_del_header, 0 },
+ { "del-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "deny", parse_http_deny, 0 },
+ { "redirect", parse_http_redirect, 0 },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "return", parse_http_return, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "set-status", parse_http_set_status, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { "track-sc", parse_http_track_sc, KWF_MATCH_PREFIX },
+ { "set-timeout", parse_http_set_timeout, 0 },
+ { "wait-for-body", parse_http_wait_for_body, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+static struct action_kw_list http_after_res_actions = {
+ .kw = {
+ { "add-header", parse_http_set_header, 0 },
+ { "allow", parse_http_allow, 0 },
+ { "capture", parse_http_res_capture, 0 },
+ { "del-acl", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "del-header", parse_http_del_header, 0 },
+ { "del-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "replace-header", parse_http_replace_header, 0 },
+ { "replace-value", parse_http_replace_header, 0 },
+ { "set-header", parse_http_set_header, 0 },
+ { "set-map", parse_http_set_map, KWF_MATCH_PREFIX },
+ { "set-status", parse_http_set_status, 0 },
+ { "strict-mode", parse_http_strict_mode, 0 },
+ { NULL, NULL }
+ }
+};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &http_after_res_actions);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_ana.c b/src/http_ana.c
new file mode 100644
index 0000000..178f874
--- /dev/null
+++ b/src/http_ana.c
@@ -0,0 +1,5153 @@
+/*
+ * HTTP protocol analyzer
+ *
+ * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/acl.h>
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/backend.h>
+#include <haproxy/base64.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/filters.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_ext.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+extern const char *stat_status_codes[];
+
+struct pool_head *pool_head_requri __read_mostly = NULL;
+struct pool_head *pool_head_capture __read_mostly = NULL;
+
+
+static void http_end_request(struct stream *s);
+static void http_end_response(struct stream *s);
+
+static void http_capture_headers(struct htx *htx, char **cap, struct cap_hdr *cap_hdr);
+static int http_del_hdr_value(char *start, char *end, char **from, char *next);
+static size_t http_fmt_req_line(const struct htx_sl *sl, char *str, size_t len);
+static void http_debug_stline(const char *dir, struct stream *s, const struct htx_sl *sl);
+static void http_debug_hdr(const char *dir, struct stream *s, const struct ist n, const struct ist v);
+
+static enum rule_result http_req_get_intercept_rule(struct proxy *px, struct list *def_rules, struct list *rules, struct stream *s);
+static enum rule_result http_res_get_intercept_rule(struct proxy *px, struct list *def_rules, struct list *rules, struct stream *s, uint8_t final);
+static enum rule_result http_req_restrict_header_names(struct stream *s, struct htx *htx, struct proxy *px);
+
+static void http_manage_client_side_cookies(struct stream *s, struct channel *req);
+static void http_manage_server_side_cookies(struct stream *s, struct channel *res);
+
+static int http_stats_check_uri(struct stream *s, struct http_txn *txn, struct proxy *px);
+static int http_handle_stats(struct stream *s, struct channel *req, struct proxy *px);
+
+static int http_handle_expect_hdr(struct stream *s, struct htx *htx, struct http_msg *msg);
+static int http_reply_100_continue(struct stream *s);
+
+/* This stream analyser waits for a complete HTTP request. It returns 1 if the
+ * processing can continue on next analysers, or zero if it either needs more
+ * data or wants to immediately abort the request (eg: timeout, error, ...). It
+ * is tied to AN_REQ_WAIT_HTTP and may may remove itself from s->req.analysers
+ * when it has nothing left to do, and may remove any analyser when it wants to
+ * abort.
+ */
+int http_wait_for_request(struct stream *s, struct channel *req, int an_bit)
+{
+
+ /*
+ * We will analyze a complete HTTP request to check the its syntax.
+ *
+ * Once the start line and all headers are received, we may perform a
+ * capture of the error (if any), and we will set a few fields. We also
+ * check for monitor-uri, logging and finally headers capture.
+ */
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ struct htx_sl *sl;
+ char http_ver;
+ int len;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ if (unlikely(!IS_HTX_STRM(s))) {
+ /* It is only possible when a TCP stream is upgrade to HTTP.
+ * There is a transition period during which there is no
+ * data. The stream is still in raw mode and SF_IGNORE flag is
+ * still set. When this happens, the new mux is responsible to
+ * handle all errors. Thus we may leave immediately.
+ */
+ BUG_ON(!(s->flags & SF_IGNORE) || !c_empty(&s->req));
+
+ /* Don't connect for now */
+ channel_dont_connect(req);
+
+ /* An abort at this stage means we are performing a "destructive"
+ * HTTP upgrade (TCP>H2). In this case, we can leave.
+ */
+ if (s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) {
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ stream_abort(s);
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 0;
+ }
+
+ htx = htxbuf(&req->buf);
+ sl = http_get_stline(htx);
+ len = HTX_SL_REQ_VLEN(sl);
+ if (len < 6) {
+ http_ver = 0;
+ }
+ else {
+ char *ptr;
+
+ ptr = HTX_SL_REQ_VPTR(sl);
+ http_ver = ptr[5] - '0';
+ }
+
+ /* Parsing errors are caught here */
+ if (htx->flags & (HTX_FL_PARSING_ERROR|HTX_FL_PROCESSING_ERROR)) {
+ stream_inc_http_req_ctr(s);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, http_ver);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ stream_inc_http_err_ctr(s);
+ goto return_bad_req;
+ }
+ else
+ goto return_int_err;
+ }
+
+ /* we're speaking HTTP here, so let's speak HTTP to the client */
+ s->srv_error = http_return_srv_error;
+
+ msg->msg_state = HTTP_MSG_BODY;
+ stream_inc_http_req_ctr(s);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, http_ver); /* one more valid request for this FE */
+
+ /* kill the pending keep-alive timeout */
+ req->analyse_exp = TICK_ETERNITY;
+
+ BUG_ON(htx_get_first_type(htx) != HTX_BLK_REQ_SL);
+
+ /* 0: we might have to print this header in debug mode */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ int32_t pos;
+
+ http_debug_stline("clireq", s, sl);
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ http_debug_hdr("clihdr", s,
+ htx_get_blk_name(htx, blk),
+ htx_get_blk_value(htx, blk));
+ }
+ }
+
+ /*
+ * 1: identify the method and the version. Also set HTTP flags
+ */
+ txn->meth = sl->info.req.meth;
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ msg->flags |= HTTP_MSGF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CLEN)
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ else if (sl->flags & HTX_SL_F_CHNK)
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ if (sl->flags & HTX_SL_F_BODYLESS)
+ msg->flags |= HTTP_MSGF_BODYLESS;
+ if (sl->flags & HTX_SL_F_CONN_UPG)
+ msg->flags |= HTTP_MSGF_CONN_UPG;
+
+ /* we can make use of server redirect on GET and HEAD */
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ s->flags |= SF_REDIRECTABLE;
+ else if (txn->meth == HTTP_METH_OTHER && isteqi(htx_sl_req_meth(sl), ist("PRI"))) {
+ /* PRI is reserved for the HTTP/2 preface */
+ goto return_bad_req;
+ }
+
+ /*
+ * 2: check if the URI matches the monitor_uri. We have to do this for
+ * every request which gets in, because the monitor-uri is defined by
+ * the frontend. If the monitor-uri starts with a '/', the matching is
+ * done against the request's path. Otherwise, the request's uri is
+ * used. It is a workaround to let HTTP/2 health-checks work as
+ * expected.
+ */
+ if (unlikely(isttest(sess->fe->monitor_uri))) {
+ const struct ist monitor_uri = sess->fe->monitor_uri;
+ struct http_uri_parser parser = http_uri_parser_init(htx_sl_req_uri(sl));
+
+ if ((istptr(monitor_uri)[0] == '/' &&
+ isteq(http_parse_path(&parser), monitor_uri)) ||
+ isteq(htx_sl_req_uri(sl), monitor_uri)) {
+ /*
+ * We have found the monitor URI
+ */
+ struct acl_cond *cond;
+
+ s->flags |= SF_MONITOR;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ /* Check if we want to fail this monitor request or not */
+ list_for_each_entry(cond, &sess->fe->mon_fail_cond, list) {
+ int ret = acl_exec_cond(cond, sess->fe, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+
+ ret = acl_pass(ret);
+ if (cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (ret) {
+ /* we fail this request, let's return 503 service unavail */
+ txn->status = 503;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL; /* we don't want a real error here */
+ goto return_prx_cond;
+ }
+ }
+
+ /* nothing to fail, let's reply normally */
+ txn->status = 200;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL; /* we don't want a real error here */
+ goto return_prx_cond;
+ }
+ }
+
+ /*
+ * 3: Maybe we have to copy the original REQURI for the logs ?
+ * Note: we cannot log anymore if the request has been
+ * classified as invalid.
+ */
+ if (unlikely(s->logs.logwait & LW_REQ)) {
+ /* we have a complete HTTP request that we must log */
+ if ((txn->uri = pool_alloc(pool_head_requri)) != NULL) {
+ size_t len;
+
+ len = http_fmt_req_line(sl, txn->uri, global.tune.requri_len - 1);
+ txn->uri[len] = 0;
+
+ if (!(s->logs.logwait &= ~(LW_REQ|LW_INIT)))
+ s->do_log(s);
+ } else {
+ ha_alert("HTTP logging : out of memory.\n");
+ }
+ }
+
+ /* if the frontend has "option http-use-proxy-header", we'll check if
+ * we have what looks like a proxied connection instead of a connection,
+ * and in this case set the TX_USE_PX_CONN flag to use Proxy-connection.
+ * Note that this is *not* RFC-compliant, however browsers and proxies
+ * happen to do that despite being non-standard :-(
+ * We consider that a request not beginning with either '/' or '*' is
+ * a proxied connection, which covers both "scheme://location" and
+ * CONNECT ip:port.
+ */
+ if ((sess->fe->options2 & PR_O2_USE_PXHDR) &&
+ *HTX_SL_REQ_UPTR(sl) != '/' && *HTX_SL_REQ_UPTR(sl) != '*')
+ txn->flags |= TX_USE_PX_CONN;
+
+ /* 5: we may need to capture headers */
+ if (unlikely((s->logs.logwait & LW_REQHDR) && s->req_cap))
+ http_capture_headers(htx, s->req_cap, sess->fe->req_cap);
+
+ /* we may have to wait for the request's body */
+ if (s->be->options & PR_O_WREQ_BODY)
+ req->analysers |= AN_REQ_HTTP_BODY;
+
+ /*
+ * RFC7234#4:
+ * A cache MUST write through requests with methods
+ * that are unsafe (Section 4.2.1 of [RFC7231]) to
+ * the origin server; i.e., a cache is not allowed
+ * to generate a reply to such a request before
+ * having forwarded the request and having received
+ * a corresponding response.
+ *
+ * RFC7231#4.2.1:
+ * Of the request methods defined by this
+ * specification, the GET, HEAD, OPTIONS, and TRACE
+ * methods are defined to be safe.
+ */
+ if (likely(txn->meth == HTTP_METH_GET ||
+ txn->meth == HTTP_METH_HEAD ||
+ txn->meth == HTTP_METH_OPTIONS ||
+ txn->meth == HTTP_METH_TRACE))
+ txn->flags |= TX_CACHEABLE | TX_CACHE_COOK;
+
+ /* end of job, return OK */
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_int_err:
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_cond;
+
+ return_bad_req:
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_cond:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+
+/* This stream analyser runs all HTTP request processing which is common to
+ * frontends and backends, which means blocking ACLs, filters, connection-close,
+ * reqadd, stats and redirects. This is performed for the designated proxy.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * either needs more data or wants to immediately abort the request (eg: deny,
+ * error, ...).
+ */
+int http_process_req_common(struct stream *s, struct channel *req, int an_bit, struct proxy *px)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ struct redirect_rule *rule;
+ enum rule_result verdict;
+ struct connection *conn = objt_conn(sess->origin);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&req->buf);
+
+ /* just in case we have some per-backend tracking. Only called the first
+ * execution of the analyser. */
+ if (!s->current_rule && !s->current_rule_list)
+ stream_inc_be_http_req_ctr(s);
+
+ def_rules = ((px->defpx && (an_bit == AN_REQ_HTTP_PROCESS_FE || px != sess->fe)) ? &px->defpx->http_req_rules : NULL);
+ rules = &px->http_req_rules;
+
+ /* evaluate http-request rules */
+ if ((def_rules && !LIST_ISEMPTY(def_rules)) || !LIST_ISEMPTY(rules)) {
+ verdict = http_req_get_intercept_rule(px, def_rules, rules, s);
+
+ switch (verdict) {
+ case HTTP_RULE_RES_YIELD: /* some data miss, call the function later. */
+ goto return_prx_yield;
+
+ case HTTP_RULE_RES_CONT:
+ case HTTP_RULE_RES_STOP: /* nothing to do */
+ break;
+
+ case HTTP_RULE_RES_DENY: /* deny or tarpit */
+ if (txn->flags & TX_CLTARPIT)
+ goto tarpit;
+ goto deny;
+
+ case HTTP_RULE_RES_ABRT: /* abort request, response already sent. Eg: auth */
+ goto return_prx_cond;
+
+ case HTTP_RULE_RES_DONE: /* OK, but terminate request processing (eg: redirect) */
+ goto done;
+
+ case HTTP_RULE_RES_BADREQ: /* failed with a bad request */
+ goto return_bad_req;
+
+ case HTTP_RULE_RES_ERROR: /* failed with a bad request */
+ goto return_int_err;
+ }
+ }
+
+ if (px->options2 & (PR_O2_RSTRICT_REQ_HDR_NAMES_BLK|PR_O2_RSTRICT_REQ_HDR_NAMES_DEL)) {
+ verdict = http_req_restrict_header_names(s, htx, px);
+ if (verdict == HTTP_RULE_RES_DENY)
+ goto deny;
+ }
+
+ if (conn && (conn->flags & CO_FL_EARLY_DATA) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS))) {
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("Early-Data"), &ctx, 0)) {
+ if (unlikely(!http_add_header(htx, ist("Early-Data"), ist("1"))))
+ goto return_fail_rewrite;
+ }
+ }
+
+ /* OK at this stage, we know that the request was accepted according to
+ * the http-request rules, we can check for the stats. Note that the
+ * URI is detected *before* the req* rules in order not to be affected
+ * by a possible reqrep, while they are processed *after* so that a
+ * reqdeny can still block them. This clearly needs to change in 1.6!
+ */
+ if (!s->target && http_stats_check_uri(s, txn, px)) {
+ s->target = &http_stats_applet.obj_type;
+ if (unlikely(!sc_applet_create(s->scb, objt_applet(s->target)))) {
+ s->logs.request_ts = now_ns;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto return_int_err;
+ }
+
+ /* parse the whole stats request and extract the relevant information */
+ http_handle_stats(s, req, px);
+ verdict = http_req_get_intercept_rule(px, NULL, &px->uri_auth->http_req_rules, s);
+ /* not all actions implemented: deny, allow, auth */
+
+ if (verdict == HTTP_RULE_RES_DENY) /* stats http-request deny */
+ goto deny;
+
+ if (verdict == HTTP_RULE_RES_ABRT) /* stats auth / stats http-request auth */
+ goto return_prx_cond;
+
+ if (verdict == HTTP_RULE_RES_BADREQ) /* failed with a bad request */
+ goto return_bad_req;
+
+ if (verdict == HTTP_RULE_RES_ERROR) /* failed with a bad request */
+ goto return_int_err;
+ }
+
+ /* Proceed with the applets now. */
+ if (unlikely(objt_applet(s->target))) {
+ if (sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ if (http_handle_expect_hdr(s, htx, msg) == -1)
+ goto return_int_err;
+
+ if (!(s->flags & SF_ERR_MASK)) // this is not really an error but it is
+ s->flags |= SF_ERR_LOCAL; // to mark that it comes from the proxy
+ http_set_term_flags(s);
+
+ if (HAS_FILTERS(s))
+ req->analysers |= AN_REQ_FLT_HTTP_HDRS;
+
+ /* enable the minimally required analyzers to handle keep-alive and compression on the HTTP response */
+ req->analysers &= (AN_REQ_HTTP_BODY | AN_REQ_FLT_HTTP_HDRS | AN_REQ_FLT_END);
+ req->analysers &= ~AN_REQ_FLT_XFER_DATA;
+ req->analysers |= AN_REQ_HTTP_XFER_BODY;
+
+ s->scb->flags |= SC_FL_SND_ASAP;
+ s->flags |= SF_ASSIGNED;
+ goto done;
+ }
+
+ /* check whether we have some ACLs set to redirect this request */
+ list_for_each_entry(rule, &px->redirect_rules, list) {
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ if (!ret)
+ continue;
+ }
+ if (!http_apply_redirect_rule(rule, s, txn))
+ goto return_int_err;
+ goto done;
+ }
+
+ /* POST requests may be accompanied with an "Expect: 100-Continue" header.
+ * If this happens, then the data will not come immediately, so we must
+ * send all what we have without waiting. Note that due to the small gain
+ * in waiting for the body of the request, it's easier to simply put the
+ * SC_FL_SND_ASAP flag on the back SC any time. It's a one-shot flag so it
+ * will remove itself once used.
+ */
+ s->scb->flags |= SC_FL_SND_ASAP;
+
+ done: /* done with this analyser, continue with next ones that the calling
+ * points will have set, if any.
+ */
+ req->analyse_exp = TICK_ETERNITY;
+ done_without_exp: /* done with this analyser, but don't reset the analyse_exp. */
+ req->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ tarpit:
+ /* Allow cookie logging
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ /* When a connection is tarpitted, we use the tarpit timeout,
+ * which may be the same as the connect timeout if unspecified.
+ * If unset, then set it to zero because we really want it to
+ * eventually expire. We build the tarpit as an analyser.
+ */
+ channel_htx_erase(&s->req, htx);
+
+ /* wipe the request out so that we can drop the connection early
+ * if the client closes first.
+ */
+ channel_dont_connect(req);
+
+ req->analysers &= AN_REQ_FLT_END; /* remove switching rules etc... */
+ req->analysers |= AN_REQ_HTTP_TARPIT;
+ req->analyse_exp = tick_add_ifset(now_ms, s->be->timeout.tarpit);
+ if (!req->analyse_exp)
+ req->analyse_exp = tick_add(now_ms, 0);
+ stream_inc_http_err_ctr(s);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto done_without_exp;
+
+ deny: /* this request was blocked (denied) */
+
+ /* Allow cookie logging
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ s->logs.request_ts = now_ns;
+ stream_inc_http_err_ctr(s);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto return_prx_err;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_err;
+
+ return_bad_req:
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_err:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ http_set_term_flags(s);
+
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ return_prx_yield:
+ channel_dont_connect(req);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function performs all the processing enabled for the current request.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * request. It relies on buffers flags, and updates s->req.analysers.
+ */
+int http_process_request(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct connection *cli_conn = objt_conn(strm_sess(s)->origin);
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+
+ /*
+ * Right now, we know that we have processed the entire headers
+ * and that unwanted requests have been filtered out. We can do
+ * whatever we want with the remaining request. Also, now we
+ * may have separate values for ->fe, ->be.
+ */
+ htx = htxbuf(&req->buf);
+
+ /*
+ * 7: Now we can work with the cookies.
+ * Note that doing so might move headers in the request, but
+ * the fields will stay coherent and the URI will not move.
+ * This should only be performed in the backend.
+ */
+ if (s->be->cookie_name || sess->fe->capture_name)
+ http_manage_client_side_cookies(s, req);
+
+ /* 8: Generate unique ID if a "unique-id-format" is defined.
+ *
+ * A unique ID is generated even when it is not sent to ensure that the ID can make use of
+ * fetches only available in the HTTP request processing stage.
+ */
+ if (!LIST_ISEMPTY(&sess->fe->format_unique_id)) {
+ struct ist unique_id = stream_generate_unique_id(s, &sess->fe->format_unique_id);
+
+ if (!isttest(unique_id)) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto return_int_err;
+ }
+
+ /* send unique ID if a "unique-id-header" is defined */
+ if (isttest(sess->fe->header_unique_id) &&
+ unlikely(!http_add_header(htx, sess->fe->header_unique_id, unique_id)))
+ goto return_fail_rewrite;
+ }
+
+ /* handle http extensions (if configured) */
+ if (unlikely(!http_handle_7239_header(s, req)))
+ goto return_fail_rewrite;
+ if (unlikely(!http_handle_xff_header(s, req)))
+ goto return_fail_rewrite;
+ if (unlikely(!http_handle_xot_header(s, req)))
+ goto return_fail_rewrite;
+
+ /* Filter the request headers if there are filters attached to the
+ * stream.
+ */
+ if (HAS_FILTERS(s))
+ req->analysers |= AN_REQ_FLT_HTTP_HDRS;
+
+ /* If we have no server assigned yet and we're balancing on url_param
+ * with a POST request, we may be interested in checking the body for
+ * that parameter. This will be done in another analyser.
+ */
+ if (!(s->flags & (SF_ASSIGNED|SF_DIRECT)) &&
+ s->txn->meth == HTTP_METH_POST &&
+ (s->be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_PH) {
+ channel_dont_connect(req);
+ req->analysers |= AN_REQ_HTTP_BODY;
+ }
+
+ req->analysers &= ~AN_REQ_FLT_XFER_DATA;
+ req->analysers |= AN_REQ_HTTP_XFER_BODY;
+
+ /* We expect some data from the client. Unless we know for sure
+ * we already have a full request, we have to re-enable quick-ack
+ * in case we previously disabled it, otherwise we might cause
+ * the client to delay further data.
+ */
+ if ((sess->listener && (sess->listener->bind_conf->options & BC_O_NOQUICKACK)) && !(htx->flags & HTX_FL_EOM))
+ conn_set_quickack(cli_conn, 1);
+
+ /*************************************************************
+ * OK, that's finished for the headers. We have done what we *
+ * could. Let's switch to the DATA state. *
+ ************************************************************/
+ req->analyse_exp = TICK_ETERNITY;
+ req->analysers &= ~an_bit;
+
+ s->logs.request_ts = now_ns;
+ /* OK let's go on with the BODY now */
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which processes the HTTP tarpit. It always
+ * returns zero, at the beginning because it prevents any other processing
+ * from occurring, and at the end because it terminates the request.
+ */
+int http_process_tarpit(struct stream *s, struct channel *req, int an_bit)
+{
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, &txn->req);
+ /* This connection is being tarpitted. The CLIENT side has
+ * already set the connect expiration date to the right
+ * timeout. We just have to check that the client is still
+ * there and that the timeout has not expired.
+ */
+ channel_dont_connect(req);
+ if (!(s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) &&
+ !tick_is_expired(req->analyse_exp, now_ms)) {
+ /* Be sure to drain all data from the request channel */
+ channel_htx_erase(req, htxbuf(&req->buf));
+ DBG_TRACE_DEVEL("waiting for tarpit timeout expiry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+
+ /* We will set the queue timer to the time spent, just for
+ * logging purposes. We fake a 500 server error, so that the
+ * attacker will not suspect his connection has been tarpitted.
+ * It will not cause trouble to the logs because we can exclude
+ * the tarpitted connections by filtering on the 'PT' status flags.
+ */
+ s->logs.t_queue = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, (!(s->scf->flags & SC_FL_ERROR) ? http_error_message(s) : NULL));
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which waits for the HTTP request body. It waits
+ * for either the buffer to be full, or the full advertised contents to have
+ * reached the buffer. It must only be called after the standard HTTP request
+ * processing has occurred, because it expects the request to be parsed and will
+ * look for the Expect header. It may send a 100-Continue interim response. It
+ * returns zero if it needs to read more data, or 1 once it has completed its
+ * analysis.
+ */
+int http_wait_for_request_body(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, &s->txn->req);
+
+
+ switch (http_wait_for_msg_body(s, req, s->be->timeout.httpreq, 0)) {
+ case HTTP_RULE_RES_CONT:
+ goto http_end;
+ case HTTP_RULE_RES_YIELD:
+ goto missing_data_or_waiting;
+ case HTTP_RULE_RES_BADREQ:
+ goto return_bad_req;
+ case HTTP_RULE_RES_ERROR:
+ goto return_int_err;
+ case HTTP_RULE_RES_ABRT:
+ goto return_prx_cond;
+ default:
+ goto return_int_err;
+ }
+
+ http_end:
+ /* The situation will not evolve, so let's give up on the analysis. */
+ s->logs.request_ts = now_ns; /* update the request timer to reflect full request */
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ missing_data_or_waiting:
+ channel_dont_connect(req);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_int_err:
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ goto return_prx_err;
+
+ return_bad_req: /* let's centralize all bad requests */
+ txn->status = 400;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ /* fall through */
+
+ return_prx_err:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ http_set_term_flags(s);
+
+ req->analysers &= AN_REQ_FLT_END;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which forwards request body (including chunk
+ * sizes if any). It is called as soon as we must forward, even if we forward
+ * zero byte. The only situation where it must not be called is when we're in
+ * tunnel mode and we want to forward till the close. It's used both to forward
+ * remaining data and to resync after end of body. It expects the msg_state to
+ * be between MSG_BODY and MSG_DONE (inclusive). It returns zero if it needs to
+ * read more data, or 1 once we can go on with next request or end the stream.
+ * When in MSG_DATA or MSG_TRAILERS, it will automatically forward chunk_len
+ * bytes of pending data + the headers if not already done.
+ */
+int http_request_forward_body(struct stream *s, struct channel *req, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct htx *htx;
+ short status = 0;
+ int ret;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&req->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_req;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ /* Note that we don't have to send 100-continue back because we don't
+ * need the data to complete our job, and it's up to the server to
+ * decide whether to return 100, 417 or anything else in return of
+ * an "Expect: 100-continue" header.
+ */
+ if (msg->msg_state == HTTP_MSG_BODY)
+ msg->msg_state = HTTP_MSG_DATA;
+
+ /* in most states, we should abort in case of early close */
+ channel_auto_close(req);
+
+ if (req->to_forward) {
+ if (req->to_forward == CHN_INFINITE_FORWARD) {
+ if (s->scf->flags & SC_FL_EOI)
+ msg->msg_state = HTTP_MSG_ENDING;
+ }
+ else {
+ /* We can't process the buffer's contents yet */
+ req->flags |= CF_WAKE_WRITE;
+ goto missing_data_or_waiting;
+ }
+ }
+
+ if (msg->msg_state >= HTTP_MSG_ENDING)
+ goto ending;
+
+ if (txn->meth == HTTP_METH_CONNECT) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Forward input data. We get it by removing all outgoing data not
+ * forwarded yet from HTX data size. If there are some data filters, we
+ * let them decide the amount of data to forward.
+ */
+ if (HAS_REQ_DATA_FILTERS(s)) {
+ ret = flt_http_payload(s, msg, htx->data);
+ if (ret < 0)
+ goto return_bad_req;
+ c_adv(req, ret);
+ }
+ else {
+ c_adv(req, htx->data - co_data(req));
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) && (msg->flags & HTTP_MSGF_XFER_LEN))
+ channel_htx_forward_forever(req, htx);
+ }
+
+ if (htx->data != co_data(req))
+ goto missing_data_or_waiting;
+
+ /* Check if the end-of-message is reached and if so, switch the message
+ * in HTTP_MSG_ENDING state. Then if all data was marked to be
+ * forwarded, set the state to HTTP_MSG_DONE.
+ */
+ if (!(htx->flags & HTX_FL_EOM))
+ goto missing_data_or_waiting;
+
+ msg->msg_state = HTTP_MSG_ENDING;
+
+ ending:
+ s->scb->flags &= ~SC_FL_SND_EXP_MORE; /* no more data are expected to be send */
+
+ /* other states, ENDING...TUNNEL */
+ if (msg->msg_state >= HTTP_MSG_DONE)
+ goto done;
+
+ if (HAS_REQ_DATA_FILTERS(s)) {
+ ret = flt_http_end(s, msg);
+ if (ret <= 0) {
+ if (!ret)
+ goto missing_data_or_waiting;
+ goto return_bad_req;
+ }
+ }
+
+ if (txn->meth == HTTP_METH_CONNECT)
+ msg->msg_state = HTTP_MSG_TUNNEL;
+ else {
+ msg->msg_state = HTTP_MSG_DONE;
+ req->to_forward = 0;
+ }
+
+ done:
+ /* we don't want to forward closes on DONE except in tunnel mode. */
+ if (!(txn->flags & TX_CON_WANT_TUN))
+ channel_dont_close(req);
+
+ if ((s->scb->flags & SC_FL_SHUT_DONE) && co_data(req)) {
+ /* request errors are most likely due to the server aborting the
+ * transfer.Bit handle server aborts only if there is no
+ * response. Otherwise, let a change to forward the response
+ * first.
+ */
+ if (htx_is_empty(htxbuf(&s->res.buf)))
+ goto return_srv_abort;
+ }
+
+ http_end_request(s);
+ if (!(req->analysers & an_bit)) {
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+ }
+
+ /* If "option abortonclose" is set on the backend, we want to monitor
+ * the client's connection and forward any shutdown notification to the
+ * server, which will decide whether to close or to go on processing the
+ * request. We only do that in tunnel mode, and not in other modes since
+ * it can be abused to exhaust source ports. */
+ if (s->be->options & PR_O_ABRT_CLOSE) {
+ channel_auto_read(req);
+ if ((s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) && !(txn->flags & TX_CON_WANT_TUN))
+ s->scb->flags |= SC_FL_NOLINGER;
+ channel_auto_close(req);
+ }
+ else if (s->txn->meth == HTTP_METH_POST) {
+ /* POST requests may require to read extra CRLF sent by broken
+ * browsers and which could cause an RST to be sent upon close
+ * on some systems (eg: Linux). */
+ channel_auto_read(req);
+ }
+ DBG_TRACE_DEVEL("waiting for the end of the HTTP txn",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ missing_data_or_waiting:
+ /* stop waiting for data if the input is closed before the end */
+ if (msg->msg_state < HTTP_MSG_ENDING && (s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)))
+ goto return_cli_abort;
+
+ waiting:
+ /* waiting for the last bits to leave the buffer */
+ if (s->scb->flags & SC_FL_SHUT_DONE) {
+ /* Handle server aborts only if there is no response. Otherwise,
+ * let a change to forward the response first.
+ */
+ if (htx_is_empty(htxbuf(&s->res.buf)))
+ goto return_srv_abort;
+ }
+
+ /* When TE: chunked is used, we need to get there again to parse remaining
+ * chunks even if the client has closed, so we don't want to set CF_DONTCLOSE.
+ * And when content-length is used, we never want to let the possible
+ * shutdown be forwarded to the other side, as the state machine will
+ * take care of it once the client responds. It's also important to
+ * prevent TIME_WAITs from accumulating on the backend side, and for
+ * HTTP/2 where the last frame comes with a shutdown.
+ */
+ if (msg->flags & HTTP_MSGF_XFER_LEN)
+ channel_dont_close(req);
+
+ /* We know that more data are expected, but we couldn't send more that
+ * what we did. So we always set the SC_FL_SND_EXP_MORE flag so that the
+ * system knows it must not set a PUSH on this first part. Interactive
+ * modes are already handled by the stream sock layer. We must not do
+ * this in content-length mode because it could present the MSG_MORE
+ * flag with the last block of forwarded data, which would cause an
+ * additional delay to be observed by the receiver.
+ */
+ if (HAS_REQ_DATA_FILTERS(s))
+ s->scb->flags |= SC_FL_SND_EXP_MORE;
+
+ DBG_TRACE_DEVEL("waiting for more data to forward",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_cli_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= ((req->flags & CF_READ_TIMEOUT) ? SF_ERR_CLITO : SF_ERR_CLICL);
+ status = 400;
+ goto return_prx_cond;
+
+ return_srv_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.srv_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= ((req->flags & CF_WRITE_TIMEOUT) ? SF_ERR_SRVTO : SF_ERR_SRVCL);
+ status = 502;
+ goto return_prx_cond;
+
+ return_int_err:
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ status = 500;
+ goto return_prx_cond;
+
+ return_bad_req:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ status = 400;
+ /* fall through */
+
+ return_prx_cond:
+ http_set_term_flags(s);
+ if (txn->status > 0) {
+ /* Note: we don't send any error if some data were already sent */
+ http_reply_and_close(s, txn->status, NULL);
+ } else {
+ txn->status = status;
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ }
+ DBG_TRACE_DEVEL("leaving on error ",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* Reset the stream and the backend stream connector to a situation suitable for attemption connection */
+/* Returns 0 if we can attempt to retry, -1 otherwise */
+static __inline int do_l7_retry(struct stream *s, struct stconn *sc)
+{
+ struct channel *req, *res;
+ int co_data;
+
+ if (s->conn_retries >= s->be->conn_retries)
+ return -1;
+ s->conn_retries++;
+ if (objt_server(s->target)) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.retries);
+ }
+ _HA_ATOMIC_INC(&s->be->be_counters.retries);
+
+ req = &s->req;
+ res = &s->res;
+
+ /* Remove any write error from the request, and read error from the response */
+ s->scf->flags &= ~(SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED);
+ req->flags &= ~CF_WRITE_TIMEOUT;
+ res->flags &= ~(CF_READ_TIMEOUT | CF_READ_EVENT);
+ res->analysers &= AN_RES_FLT_END;
+ s->conn_err_type = STRM_ET_NONE;
+ s->flags &= ~(SF_CONN_EXP | SF_ERR_MASK | SF_FINST_MASK);
+ s->conn_exp = TICK_ETERNITY;
+ stream_choose_redispatch(s);
+ res->to_forward = 0;
+ res->analyse_exp = TICK_ETERNITY;
+ res->total = 0;
+
+ s->scb->flags &= ~(SC_FL_ERROR|SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED);
+ if (sc_reset_endp(s->scb) < 0) {
+ s->flags |= SF_ERR_INTERNAL;
+ return -1;
+ }
+
+ b_free(&req->buf);
+ /* Swap the L7 buffer with the channel buffer */
+ /* We know we stored the co_data as b_data, so get it there */
+ co_data = b_data(&s->txn->l7_buffer);
+ b_set_data(&s->txn->l7_buffer, b_size(&s->txn->l7_buffer));
+ b_xfer(&req->buf, &s->txn->l7_buffer, b_data(&s->txn->l7_buffer));
+ co_set_data(req, co_data);
+
+ DBG_TRACE_DEVEL("perform a L7 retry", STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, s->txn);
+
+ b_reset(&res->buf);
+ co_set_data(res, 0);
+ return 0;
+}
+
+/* This stream analyser waits for a complete HTTP response. It returns 1 if the
+ * processing can continue on next analysers, or zero if it either needs more
+ * data or wants to immediately abort the response (eg: timeout, error, ...). It
+ * is tied to AN_RES_WAIT_HTTP and may may remove itself from s->res.analysers
+ * when it has nothing left to do, and may remove any analyser when it wants to
+ * abort.
+ */
+int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ /*
+ * We will analyze a complete HTTP response to check the its syntax.
+ *
+ * Once the start line and all headers are received, we may perform a
+ * capture of the error (if any), and we will set a few fields. We also
+ * logging and finally headers capture.
+ */
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct htx *htx;
+ struct connection *srv_conn;
+ struct htx_sl *sl;
+ int n;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&rep->buf);
+
+ /* Parsing errors are caught here */
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_res;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ /*
+ * Now we quickly check if we have found a full valid response.
+ * If not so, we check the FD and buffer states before leaving.
+ * A full response is indicated by the fact that we have seen
+ * the double LF/CRLF, so the state is >= HTTP_MSG_BODY. Invalid
+ * responses are checked first.
+ *
+ * Depending on whether the client is still there or not, we
+ * may send an error response back or not. Note that normally
+ * we should only check for HTTP status there, and check I/O
+ * errors somewhere else.
+ */
+ next_one:
+ if (unlikely(htx_is_empty(htx) || htx->first == -1)) {
+ /* 1: have we encountered a read error ? */
+ if (s->scb->flags & SC_FL_ERROR) {
+ struct connection *conn = sc_conn(s->scb);
+
+
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_DISCONNECTED) &&
+ (!conn || conn->err_code != CO_ER_SSL_EARLY_FAILED)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0)
+ return 0;
+ }
+
+ /* Perform a L7 retry on empty response or because server refuses the early data. */
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_EARLY_ERROR) &&
+ conn && conn->err_code == CO_ER_SSL_EARLY_FAILED &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_READ_ERROR);
+ }
+
+ /* if the server refused the early data, just send a 425 */
+ if (conn && conn->err_code == CO_ER_SSL_EARLY_FAILED)
+ txn->status = 425;
+ else {
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ }
+
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ http_set_term_flags(s);
+
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 2: read timeout : return a 504 to the client. */
+ else if (rep->flags & CF_READ_TIMEOUT) {
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_TIMEOUT)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ }
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_READ_TIMEOUT);
+ }
+
+ txn->status = 504;
+ stream_inc_http_fail_ctr(s);
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVTO;
+ http_set_term_flags(s);
+
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 3: client abort with an abortonclose */
+ else if ((s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && (s->scb->flags & SC_FL_SHUT_DONE) &&
+ (s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+
+ txn->status = 400;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ http_set_term_flags(s);
+
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ /* process_stream() will take care of the error */
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 4: close from server, capture the response if the server has started to respond */
+ else if (s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) {
+ if ((txn->flags & TX_L7_RETRY) &&
+ (s->be->retry_type & PR_RE_DISCONNECTED)) {
+ if (co_data(rep) || do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ }
+
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_BROKEN_PIPE);
+ }
+
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ http_set_term_flags(s);
+
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ /* 5: write error to client (we don't send any message then) */
+ else if (sc_ep_test(s->scf, SE_FL_ERR_PENDING)) {
+ if (txn->flags & TX_NOT_FIRST)
+ goto abort_keep_alive;
+
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ rep->analysers &= AN_RES_FLT_END;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ http_set_term_flags(s);
+
+ /* process_stream() will take care of the error */
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+ }
+
+ channel_dont_close(rep);
+ s->scb->flags |= SC_FL_RCV_ONCE; /* try to get back here ASAP */
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ /* More interesting part now : we know that we have a complete
+ * response which at least looks like HTTP. We have an indicator
+ * of each header's length, so we can parse them quickly.
+ */
+ BUG_ON(htx_get_first_type(htx) != HTX_BLK_RES_SL);
+ sl = http_get_stline(htx);
+
+ /* Perform a L7 retry because of the status code */
+ if ((txn->flags & TX_L7_RETRY) &&
+ l7_status_match(s->be, sl->info.res.status) &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry", STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+
+ /* Now, L7 buffer is useless, it can be released */
+ b_free(&txn->l7_buffer);
+
+ msg->msg_state = HTTP_MSG_BODY;
+
+
+ /* 0: we might have to print this header in debug mode */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ int32_t pos;
+
+ http_debug_stline("srvrep", s, sl);
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ http_debug_hdr("srvhdr", s,
+ htx_get_blk_name(htx, blk),
+ htx_get_blk_value(htx, blk));
+ }
+ }
+
+ /* 1: get the status code and the version. Also set HTTP flags */
+ txn->server_status = txn->status = sl->info.res.status;
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ if (sl->flags & HTX_SL_F_XFER_LEN) {
+ msg->flags |= HTTP_MSGF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CLEN)
+ msg->flags |= HTTP_MSGF_CNT_LEN;
+ else if (sl->flags & HTX_SL_F_CHNK)
+ msg->flags |= HTTP_MSGF_TE_CHNK;
+ }
+ if (sl->flags & HTX_SL_F_BODYLESS)
+ msg->flags |= HTTP_MSGF_BODYLESS;
+ if (sl->flags & HTX_SL_F_CONN_UPG)
+ msg->flags |= HTTP_MSGF_CONN_UPG;
+
+ n = txn->status / 100;
+ if (n < 1 || n > 5)
+ n = 0;
+
+ /* when the client triggers a 4xx from the server, it's most often due
+ * to a missing object or permission. These events should be tracked
+ * because if they happen often, it may indicate a brute force or a
+ * vulnerability scan.
+ */
+ if (n == 4)
+ stream_inc_http_err_ctr(s);
+
+ if (n == 5 && txn->status != 501 && txn->status != 505)
+ stream_inc_http_fail_ctr(s);
+
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.rsp[n]);
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.cum_req);
+ }
+
+ /* Adjust server's health based on status code. Note: status codes 501
+ * and 505 are triggered on demand by client request, so we must not
+ * count them as server failures.
+ */
+ if (objt_server(s->target)) {
+ if (txn->status >= 100 && (txn->status < 500 || txn->status == 501 || txn->status == 505))
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_OK);
+ else
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_STS);
+ }
+
+ /*
+ * We may be facing a 100-continue response, or any other informational
+ * 1xx response which is non-final, in which case this is not the right
+ * response, and we're waiting for the next one. Let's allow this response
+ * to go to the client and wait for the next one. There's an exception for
+ * 101 which is used later in the code to switch protocols.
+ */
+ if (txn->status < 200 &&
+ (txn->status == 100 || txn->status >= 102)) {
+ FLT_STRM_CB(s, flt_http_reset(s, msg));
+ htx->first = channel_htx_fwd_headers(rep, htx);
+ msg->msg_state = HTTP_MSG_RPBEFORE;
+ msg->flags = 0;
+ txn->server_status = txn->status = 0;
+ s->logs.t_data = -1; /* was not a response yet */
+ s->scf->flags |= SC_FL_SND_ASAP; /* Send ASAP informational messages */
+ goto next_one;
+ }
+
+ /* A 101-switching-protocols must contains a Connection header with the
+ * "upgrade" option and the request too. It means both are agree to
+ * upgrade. It is not so strict because there is no test on the Upgrade
+ * header content. But it is probably stronger enough for now.
+ */
+ if (txn->status == 101 &&
+ (!(txn->req.flags & HTTP_MSGF_CONN_UPG) || !(txn->rsp.flags & HTTP_MSGF_CONN_UPG)))
+ goto return_bad_res;
+
+ /*
+ * 2: check for cacheability.
+ */
+
+ switch (txn->status) {
+ case 200:
+ case 203:
+ case 204:
+ case 206:
+ case 300:
+ case 301:
+ case 404:
+ case 405:
+ case 410:
+ case 414:
+ case 501:
+ break;
+ default:
+ /* RFC7231#6.1:
+ * Responses with status codes that are defined as
+ * cacheable by default (e.g., 200, 203, 204, 206,
+ * 300, 301, 404, 405, 410, 414, and 501 in this
+ * specification) can be reused by a cache with
+ * heuristic expiration unless otherwise indicated
+ * by the method definition or explicit cache
+ * controls [RFC7234]; all other status codes are
+ * not cacheable by default.
+ */
+ txn->flags &= ~(TX_CACHEABLE | TX_CACHE_COOK);
+ break;
+ }
+
+ /*
+ * 3: we may need to capture headers
+ */
+ s->logs.logwait &= ~LW_RESP;
+ if (unlikely((s->logs.logwait & LW_RSPHDR) && s->res_cap))
+ http_capture_headers(htx, s->res_cap, sess->fe->rsp_cap);
+
+ /* Skip parsing if no content length is possible. */
+ if (unlikely((txn->meth == HTTP_METH_CONNECT && txn->status >= 200 && txn->status < 300) ||
+ txn->status == 101)) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely
+ * to understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement specific
+ * parsers for such protocols, we'll want to check the Upgrade
+ * header which contains information about that protocol for
+ * responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ txn->flags |= TX_CON_WANT_TUN;
+ }
+
+ /* check for NTML authentication headers in 401 (WWW-Authenticate) and
+ * 407 (Proxy-Authenticate) responses and set the connection to private
+ */
+ srv_conn = sc_conn(s->scb);
+ if (srv_conn) {
+ struct ist hdr;
+ struct http_hdr_ctx ctx;
+
+ if (txn->status == 401)
+ hdr = ist("WWW-Authenticate");
+ else if (txn->status == 407)
+ hdr = ist("Proxy-Authenticate");
+ else
+ goto end;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, hdr, &ctx, 0)) {
+ /* If www-authenticate contains "Negotiate", "Nego2", or "NTLM",
+ * possibly followed by blanks and a base64 string, the connection
+ * is private. Since it's a mess to deal with, we only check for
+ * values starting with "NTLM" or "Nego". Note that often multiple
+ * headers are sent by the server there.
+ */
+ if ((ctx.value.len >= 4 && strncasecmp(ctx.value.ptr, "Nego", 4) == 0) ||
+ (ctx.value.len >= 4 && strncasecmp(ctx.value.ptr, "NTLM", 4) == 0)) {
+ sess->flags |= SESS_FL_PREFER_LAST;
+ conn_set_owner(srv_conn, sess, NULL);
+ conn_set_private(srv_conn);
+ /* If it fail now, the same will be done in mux->detach() callback */
+ session_add_conn(srv_conn->owner, srv_conn, srv_conn->target);
+ break;
+ }
+ }
+ }
+
+ end:
+ /* we want to have the response time before we start processing it */
+ s->logs.t_data = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ /* end of job, return OK */
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ channel_auto_close(rep);
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+
+ return_int_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ goto return_prx_cond;
+
+ return_bad_res:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_HDRRSP);
+ }
+ if ((s->be->retry_type & PR_RE_JUNK_REQUEST) &&
+ (txn->flags & TX_L7_RETRY) &&
+ do_l7_retry(s, s->scb) == 0) {
+ DBG_TRACE_DEVEL("leaving on L7 retry",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+ }
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ /* fall through */
+
+ return_prx_cond:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+
+ s->scb->flags |= SC_FL_NOLINGER;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ abort_keep_alive:
+ /* A keep-alive request to the server failed on a network error.
+ * The client is required to retry. We need to close without returning
+ * any other information so that the client retries.
+ */
+ txn->status = 0;
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ s->scf->flags &= ~SC_FL_SND_EXP_MORE; /* speed up sending a previous response */
+ http_reply_and_close(s, txn->status, NULL);
+ DBG_TRACE_DEVEL("leaving by closing K/A connection",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function performs all the processing enabled for the current response.
+ * It normally returns 1 unless it wants to break. It relies on buffers flags,
+ * and updates s->res.analysers. It might make sense to explode it into several
+ * other functions. It works like process_request (see indications above).
+ */
+int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, struct proxy *px)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->rsp;
+ struct htx *htx;
+ struct proxy *cur_proxy;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ if (unlikely(msg->msg_state < HTTP_MSG_BODY)) /* we need more data */
+ return 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&rep->buf);
+
+ /* The stats applet needs to adjust the Connection header but we don't
+ * apply any filter there.
+ */
+ if (unlikely(objt_applet(s->target) == &http_stats_applet)) {
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ goto end;
+ }
+
+ /*
+ * We will have to evaluate the filters.
+ * As opposed to version 1.2, now they will be evaluated in the
+ * filters order and not in the header order. This means that
+ * each filter has to be validated among all headers.
+ *
+ * Filters are tried with ->be first, then with ->fe if it is
+ * different from ->be.
+ *
+ * Maybe we are in resume condiion. In this case I choose the
+ * "struct proxy" which contains the rule list matching the resume
+ * pointer. If none of these "struct proxy" match, I initialise
+ * the process with the first one.
+ *
+ * In fact, I check only correspondence between the current list
+ * pointer and the ->fe rule list. If it doesn't match, I initialize
+ * the loop with the ->be.
+ */
+ if (s->current_rule_list == &sess->fe->http_res_rules ||
+ (sess->fe->defpx && s->current_rule_list == &sess->fe->defpx->http_res_rules))
+ cur_proxy = sess->fe;
+ else
+ cur_proxy = s->be;
+
+ while (1) {
+ /* evaluate http-response rules */
+ if (ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP) {
+ struct list *def_rules, *rules;
+
+ def_rules = ((cur_proxy->defpx && (cur_proxy == s->be || cur_proxy->defpx != s->be->defpx)) ? &cur_proxy->defpx->http_res_rules : NULL);
+ rules = &cur_proxy->http_res_rules;
+
+ ret = http_res_get_intercept_rule(cur_proxy, def_rules, rules, s, 0);
+
+ switch (ret) {
+ case HTTP_RULE_RES_YIELD: /* some data miss, call the function later. */
+ goto return_prx_yield;
+
+ case HTTP_RULE_RES_CONT:
+ case HTTP_RULE_RES_STOP: /* nothing to do */
+ break;
+
+ case HTTP_RULE_RES_DENY: /* deny or tarpit */
+ goto deny;
+
+ case HTTP_RULE_RES_ABRT: /* abort request, response already sent */
+ goto return_prx_cond;
+
+ case HTTP_RULE_RES_DONE: /* OK, but terminate request processing (eg: redirect) */
+ goto done;
+
+ case HTTP_RULE_RES_BADREQ: /* failed with a bad request */
+ goto return_bad_res;
+
+ case HTTP_RULE_RES_ERROR: /* failed with a bad request */
+ goto return_int_err;
+ }
+
+ }
+
+ /* check whether we're already working on the frontend */
+ if (cur_proxy == sess->fe)
+ break;
+ cur_proxy = sess->fe;
+ }
+
+ /* OK that's all we can do for 1xx responses */
+ if (unlikely(txn->status < 200 && txn->status != 101))
+ goto end;
+
+ /*
+ * Now check for a server cookie.
+ */
+ if (s->be->cookie_name || sess->fe->capture_name || (s->be->options & PR_O_CHK_CACHE))
+ http_manage_server_side_cookies(s, rep);
+
+ /*
+ * Check for cache-control or pragma headers if required.
+ */
+ if ((s->be->options & PR_O_CHK_CACHE) || (s->be->ck_opts & PR_CK_NOC))
+ http_check_response_for_cacheability(s, rep);
+
+ /*
+ * Add server cookie in the response if needed
+ */
+ if (objt_server(s->target) && (s->be->ck_opts & PR_CK_INS) &&
+ !((txn->flags & TX_SCK_FOUND) && (s->be->ck_opts & PR_CK_PSV)) &&
+ (!(s->flags & SF_DIRECT) ||
+ ((s->be->cookie_maxidle || txn->cookie_last_date) &&
+ (!txn->cookie_last_date || (txn->cookie_last_date - date.tv_sec) < 0)) ||
+ (s->be->cookie_maxlife && !txn->cookie_first_date) || // set the first_date
+ (!s->be->cookie_maxlife && txn->cookie_first_date)) && // remove the first_date
+ (!(s->be->ck_opts & PR_CK_POST) || (txn->meth == HTTP_METH_POST)) &&
+ !(s->flags & SF_IGNORE_PRST)) {
+ /* the server is known, it's not the one the client requested, or the
+ * cookie's last seen date needs to be refreshed. We have to
+ * insert a set-cookie here, except if we want to insert only on POST
+ * requests and this one isn't. Note that servers which don't have cookies
+ * (eg: some backup servers) will return a full cookie removal request.
+ */
+ if (!__objt_server(s->target)->cookie) {
+ chunk_printf(&trash,
+ "%s=; Expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/",
+ s->be->cookie_name);
+ }
+ else {
+ chunk_printf(&trash, "%s=%s", s->be->cookie_name, __objt_server(s->target)->cookie);
+
+ if (s->be->cookie_maxidle || s->be->cookie_maxlife) {
+ /* emit last_date, which is mandatory */
+ trash.area[trash.data++] = COOKIE_DELIM_DATE;
+ s30tob64((date.tv_sec+3) >> 2,
+ trash.area + trash.data);
+ trash.data += 5;
+
+ if (s->be->cookie_maxlife) {
+ /* emit first_date, which is either the original one or
+ * the current date.
+ */
+ trash.area[trash.data++] = COOKIE_DELIM_DATE;
+ s30tob64(txn->cookie_first_date ?
+ txn->cookie_first_date >> 2 :
+ (date.tv_sec+3) >> 2,
+ trash.area + trash.data);
+ trash.data += 5;
+ }
+ }
+ chunk_appendf(&trash, "; path=/");
+ }
+
+ if (s->be->cookie_domain)
+ chunk_appendf(&trash, "; domain=%s", s->be->cookie_domain);
+
+ if (s->be->ck_opts & PR_CK_HTTPONLY)
+ chunk_appendf(&trash, "; HttpOnly");
+
+ if (s->be->ck_opts & PR_CK_SECURE)
+ chunk_appendf(&trash, "; Secure");
+
+ if (s->be->cookie_attrs)
+ chunk_appendf(&trash, "; %s", s->be->cookie_attrs);
+
+ if (unlikely(!http_add_header(htx, ist("Set-Cookie"), ist2(trash.area, trash.data))))
+ goto return_fail_rewrite;
+
+ txn->flags &= ~TX_SCK_MASK;
+ if (__objt_server(s->target)->cookie && (s->flags & SF_DIRECT))
+ /* the server did not change, only the date was updated */
+ txn->flags |= TX_SCK_UPDATED;
+ else
+ txn->flags |= TX_SCK_INSERTED;
+
+ /* Here, we will tell an eventual cache on the client side that we don't
+ * want it to cache this reply because HTTP/1.0 caches also cache cookies !
+ * Some caches understand the correct form: 'no-cache="set-cookie"', but
+ * others don't (eg: apache <= 1.3.26). So we use 'private' instead.
+ */
+ if ((s->be->ck_opts & PR_CK_NOC) && (txn->flags & TX_CACHEABLE)) {
+
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+
+ if (unlikely(!http_add_header(htx, ist("Cache-control"), ist("private"))))
+ goto return_fail_rewrite;
+ }
+ }
+
+ /*
+ * Check if result will be cacheable with a cookie.
+ * We'll block the response if security checks have caught
+ * nasty things such as a cacheable cookie.
+ */
+ if (((txn->flags & (TX_CACHEABLE | TX_CACHE_COOK | TX_SCK_PRESENT)) ==
+ (TX_CACHEABLE | TX_CACHE_COOK | TX_SCK_PRESENT)) &&
+ (s->be->options & PR_O_CHK_CACHE)) {
+ /* we're in presence of a cacheable response containing
+ * a set-cookie header. We'll block it as requested by
+ * the 'checkcache' option, and send an alert.
+ */
+ ha_alert("Blocking cacheable cookie in response from instance %s, server %s.\n",
+ s->be->id, objt_server(s->target) ? __objt_server(s->target)->id : "<dispatch>");
+ send_log(s->be, LOG_ALERT,
+ "Blocking cacheable cookie in response from instance %s, server %s.\n",
+ s->be->id, objt_server(s->target) ? __objt_server(s->target)->id : "<dispatch>");
+ goto deny;
+ }
+
+ end:
+ /*
+ * Evaluate after-response rules before forwarding the response. rules
+ * from the backend are evaluated first, then one from the frontend if
+ * it differs.
+ */
+ if (!http_eval_after_res_rules(s))
+ goto return_int_err;
+
+ /* Filter the response headers if there are filters attached to the
+ * stream.
+ */
+ if (HAS_FILTERS(s))
+ rep->analysers |= AN_RES_FLT_HTTP_HDRS;
+
+ /* Always enter in the body analyzer */
+ rep->analysers &= ~AN_RES_FLT_XFER_DATA;
+ rep->analysers |= AN_RES_HTTP_XFER_BODY;
+
+ /* if the user wants to log as soon as possible, without counting
+ * bytes from the server, then this is the right moment. We have
+ * to temporarily assign bytes_out to log what we currently have.
+ */
+ if (!LIST_ISEMPTY(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) {
+ s->logs.t_close = s->logs.t_data; /* to get a valid end date */
+ s->logs.bytes_out = htx->data;
+ s->do_log(s);
+ s->logs.bytes_out = 0;
+ }
+
+ done:
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ return 1;
+
+ deny:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_resp);
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_resp);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.denied_resp);
+ goto return_prx_err;
+
+ return_fail_rewrite:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_rewrites);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_rewrites);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_rewrites);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_rewrites);
+ /* fall through */
+
+ return_int_err:
+ txn->status = 500;
+ s->flags |= SF_ERR_INTERNAL;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ goto return_prx_err;
+
+ return_bad_res:
+ txn->status = 502;
+ stream_inc_http_fail_ctr(s);
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_RSP);
+ }
+ /* fall through */
+
+ return_prx_err:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ /* fall through */
+
+ return_prx_cond:
+ s->logs.t_data = -1; /* was not a valid response */
+ s->scb->flags |= SC_FL_NOLINGER;
+
+ http_set_term_flags(s);
+
+ rep->analysers &= AN_RES_FLT_END;
+ s->req.analysers &= AN_REQ_FLT_END;
+ rep->analyse_exp = TICK_ETERNITY;
+ s->current_rule = s->current_rule_list = NULL;
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+
+ return_prx_yield:
+ channel_dont_close(rep);
+ DBG_TRACE_DEVEL("waiting for more data",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+}
+
+/* This function is an analyser which forwards response body (including chunk
+ * sizes if any). It is called as soon as we must forward, even if we forward
+ * zero byte. The only situation where it must not be called is when we're in
+ * tunnel mode and we want to forward till the close. It's used both to forward
+ * remaining data and to resync after end of body. It expects the msg_state to
+ * be between MSG_BODY and MSG_DONE (inclusive). It returns zero if it needs to
+ * read more data, or 1 once we can go on with next request or end the stream.
+ *
+ * It is capable of compressing response data both in content-length mode and
+ * in chunked mode. The state machines follows different flows depending on
+ * whether content-length and chunked modes are used, since there are no
+ * trailers in content-length :
+ *
+ * chk-mode cl-mode
+ * ,----- BODY -----.
+ * / \
+ * V size > 0 V chk-mode
+ * .--> SIZE -------------> DATA -------------> CRLF
+ * | | size == 0 | last byte |
+ * | v final crlf v inspected |
+ * | TRAILERS -----------> DONE |
+ * | |
+ * `----------------------------------------------'
+ *
+ * Compression only happens in the DATA state, and must be flushed in final
+ * states (TRAILERS/DONE) or when leaving on missing data. Normal forwarding
+ * is performed at once on final states for all bytes parsed, or when leaving
+ * on missing data.
+ */
+int http_response_forward_body(struct stream *s, struct channel *res, int an_bit)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &s->txn->rsp;
+ struct htx *htx;
+ int ret;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn, msg);
+
+ htx = htxbuf(&res->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ goto return_bad_res;
+ if (htx->flags & HTX_FL_PROCESSING_ERROR)
+ goto return_int_err;
+
+ if (msg->msg_state == HTTP_MSG_BODY)
+ msg->msg_state = HTTP_MSG_DATA;
+
+ /* in most states, we should abort in case of early close */
+ channel_auto_close(res);
+
+ if (res->to_forward) {
+ if (res->to_forward == CHN_INFINITE_FORWARD) {
+ if (s->scb->flags & SC_FL_EOI)
+ msg->msg_state = HTTP_MSG_ENDING;
+ }
+ else {
+ /* We can't process the buffer's contents yet */
+ res->flags |= CF_WAKE_WRITE;
+ goto missing_data_or_waiting;
+ }
+ }
+
+ if (msg->msg_state >= HTTP_MSG_ENDING)
+ goto ending;
+
+ if ((txn->meth == HTTP_METH_CONNECT && txn->status >= 200 && txn->status < 300) || txn->status == 101 ||
+ (!(msg->flags & HTTP_MSGF_XFER_LEN) && !HAS_RSP_DATA_FILTERS(s))) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Forward input data. We get it by removing all outgoing data not
+ * forwarded yet from HTX data size. If there are some data filters, we
+ * let them decide the amount of data to forward.
+ */
+ if (HAS_RSP_DATA_FILTERS(s)) {
+ ret = flt_http_payload(s, msg, htx->data);
+ if (ret < 0)
+ goto return_bad_res;
+ c_adv(res, ret);
+ }
+ else {
+ c_adv(res, htx->data - co_data(res));
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) && (msg->flags & HTTP_MSGF_XFER_LEN))
+ channel_htx_forward_forever(res, htx);
+ }
+
+ if (htx->data != co_data(res))
+ goto missing_data_or_waiting;
+
+ if (!(msg->flags & HTTP_MSGF_XFER_LEN) && (s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))) {
+ msg->msg_state = HTTP_MSG_ENDING;
+ goto ending;
+ }
+
+ /* Check if the end-of-message is reached and if so, switch the message
+ * in HTTP_MSG_ENDING state. Then if all data was marked to be
+ * forwarded, set the state to HTTP_MSG_DONE.
+ */
+ if (!(htx->flags & HTX_FL_EOM))
+ goto missing_data_or_waiting;
+
+ msg->msg_state = HTTP_MSG_ENDING;
+
+ ending:
+ s->scf->flags &= ~SC_FL_SND_EXP_MORE; /* no more data are expected to be sent */
+
+ /* other states, ENDING...TUNNEL */
+ if (msg->msg_state >= HTTP_MSG_DONE)
+ goto done;
+
+ if (HAS_RSP_DATA_FILTERS(s)) {
+ ret = flt_http_end(s, msg);
+ if (ret <= 0) {
+ if (!ret)
+ goto missing_data_or_waiting;
+ goto return_bad_res;
+ }
+ }
+
+ if (!(txn->flags & TX_CON_WANT_TUN) && !(msg->flags & HTTP_MSGF_XFER_LEN)) {
+ /* One-side tunnel */
+ msg->msg_state = HTTP_MSG_TUNNEL;
+ }
+ else {
+ msg->msg_state = HTTP_MSG_DONE;
+ res->to_forward = 0;
+ }
+
+ done:
+
+ channel_dont_close(res);
+
+ if ((s->scf->flags & SC_FL_SHUT_DONE) && co_data(res)) {
+ /* response errors are most likely due to the client aborting
+ * the transfer. */
+ goto return_cli_abort;
+ }
+
+ http_end_response(s);
+ if (!(res->analysers & an_bit)) {
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 1;
+ }
+ DBG_TRACE_DEVEL("waiting for the end of the HTTP txn",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ missing_data_or_waiting:
+ if (s->scf->flags & SC_FL_SHUT_DONE)
+ goto return_cli_abort;
+
+ /* stop waiting for data if the input is closed before the end. If the
+ * client side was already closed, it means that the client has aborted,
+ * so we don't want to count this as a server abort. Otherwise it's a
+ * server abort.
+ */
+ if (msg->msg_state < HTTP_MSG_ENDING && (s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))) {
+ if ((s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ (s->scb->flags & SC_FL_SHUT_DONE))
+ goto return_cli_abort;
+ /* If we have some pending data, we continue the processing */
+ if (htx_is_empty(htx))
+ goto return_srv_abort;
+ }
+
+ /* When TE: chunked is used, we need to get there again to parse
+ * remaining chunks even if the server has closed, so we don't want to
+ * set CF_DONTCLOSE. Similarly when there is a content-leng or if there
+ * are filters registered on the stream, we don't want to forward a
+ * close
+ */
+ if ((msg->flags & HTTP_MSGF_XFER_LEN) || HAS_RSP_DATA_FILTERS(s))
+ channel_dont_close(res);
+
+ /* We know that more data are expected, but we couldn't send more that
+ * what we did. So we always set the SC_FL_SND_EXP_MORE flag so that the
+ * system knows it must not set a PUSH on this first part. Interactive
+ * modes are already handled by the stream sock layer. We must not do
+ * this in content-length mode because it could present the MSG_MORE
+ * flag with the last block of forwarded data, which would cause an
+ * additional delay to be observed by the receiver.
+ */
+ if (HAS_RSP_DATA_FILTERS(s))
+ s->scf->flags |= SC_FL_SND_EXP_MORE;
+
+ /* the stream handler will take care of timeouts and errors */
+ DBG_TRACE_DEVEL("waiting for more data to forward",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA, s, txn);
+ return 0;
+
+ return_srv_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.srv_aborts);
+ stream_inc_http_fail_ctr(s);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= ((res->flags & CF_READ_TIMEOUT) ? SF_ERR_SRVTO : SF_ERR_SRVCL);
+ goto return_error;
+
+ return_cli_abort:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= ((res->flags & CF_WRITE_TIMEOUT) ? SF_ERR_CLITO : SF_ERR_CLICL);
+ goto return_error;
+
+ return_int_err:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ s->flags |= SF_ERR_INTERNAL;
+ goto return_error;
+
+ return_bad_res:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target)) {
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+ health_adjust(__objt_server(s->target), HANA_STATUS_HTTP_RSP);
+ }
+ stream_inc_http_fail_ctr(s);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ /* fall through */
+
+ return_error:
+ /* don't send any error message as we're in the body */
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, NULL);
+ stream_inc_http_fail_ctr(s);
+ DBG_TRACE_DEVEL("leaving on error",
+ STRM_EV_STRM_ANA|STRM_EV_HTTP_ANA|STRM_EV_HTTP_ERR, s, txn);
+ return 0;
+}
+
+/* Perform an HTTP redirect based on the information in <rule>. The function
+ * returns zero in case of an irrecoverable error such as too large a request
+ * to build a valid response, 1 in case of successful redirect (hence the rule
+ * is final), or 2 if the rule has to be silently skipped.
+ */
+int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struct http_txn *txn)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct buffer *chunk;
+ struct ist status, reason, location;
+ unsigned int flags;
+ int ret = 1, close = 0; /* Try to keep the connection alive byt default */
+
+ chunk = alloc_trash_chunk();
+ if (!chunk) {
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ goto fail;
+ }
+
+ /*
+ * Create the location
+ */
+ htx = htxbuf(&req->buf);
+ switch(rule->type) {
+ case REDIRECT_TYPE_SCHEME: {
+ struct http_hdr_ctx ctx;
+ struct ist path, host;
+ struct http_uri_parser parser;
+
+ host = ist("");
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 0))
+ host = ctx.value;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ /* build message using path */
+ if (isttest(path)) {
+ if (rule->flags & REDIRECT_FLAG_DROP_QS) {
+ int qs = 0;
+ while (qs < path.len) {
+ if (*(path.ptr + qs) == '?') {
+ path.len = qs;
+ break;
+ }
+ qs++;
+ }
+ }
+ }
+ else
+ path = ist("/");
+
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add scheme */
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ else {
+ /* add scheme with executing log format */
+ chunk->data += build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ }
+ /* add "://" + host + path */
+ if (!chunk_memcat(chunk, "://", 3) ||
+ !chunk_memcat(chunk, host.ptr, host.len) ||
+ !chunk_memcat(chunk, path.ptr, path.len))
+ goto fail;
+
+ /* append a slash at the end of the location if needed and missing */
+ if (chunk->data && chunk->area[chunk->data - 1] != '/' &&
+ (rule->flags & REDIRECT_FLAG_APPEND_SLASH)) {
+ if (chunk->data + 1 >= chunk->size)
+ goto fail;
+ chunk->area[chunk->data++] = '/';
+ }
+ break;
+ }
+
+ case REDIRECT_TYPE_PREFIX: {
+ struct ist path;
+ struct http_uri_parser parser;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ /* build message using path */
+ if (isttest(path)) {
+ if (rule->flags & REDIRECT_FLAG_DROP_QS) {
+ int qs = 0;
+ while (qs < path.len) {
+ if (*(path.ptr + qs) == '?') {
+ path.len = qs;
+ break;
+ }
+ qs++;
+ }
+ }
+ }
+ else
+ path = ist("/");
+
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add prefix. Note that if prefix == "/", we don't want to
+ * add anything, otherwise it makes it hard for the user to
+ * configure a self-redirection.
+ */
+ if (rule->rdr_len != 1 || *rule->rdr_str != '/') {
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ }
+ else {
+ /* add prefix with executing log format */
+ chunk->data += build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ }
+
+ /* add path */
+ if (!chunk_memcat(chunk, path.ptr, path.len))
+ goto fail;
+
+ /* append a slash at the end of the location if needed and missing */
+ if (chunk->data && chunk->area[chunk->data - 1] != '/' &&
+ (rule->flags & REDIRECT_FLAG_APPEND_SLASH)) {
+ if (chunk->data + 1 >= chunk->size)
+ goto fail;
+ chunk->area[chunk->data++] = '/';
+ }
+ break;
+ }
+ case REDIRECT_TYPE_LOCATION:
+ default:
+ if (rule->rdr_str) { /* this is an old "redirect" rule */
+ /* add location */
+ if (!chunk_memcat(chunk, rule->rdr_str, rule->rdr_len))
+ goto fail;
+ }
+ else {
+ /* add location with executing log format */
+ int len = build_logline(s, chunk->area + chunk->data,
+ chunk->size - chunk->data,
+ &rule->rdr_fmt);
+ if (!len && rule->flags & REDIRECT_FLAG_IGNORE_EMPTY) {
+ ret = 2;
+ goto out;
+ }
+
+ chunk->data += len;
+ }
+ break;
+ }
+ location = ist2(chunk->area, chunk->data);
+
+ /*
+ * Create the 30x response
+ */
+ switch (rule->code) {
+ case 308:
+ status = ist("308");
+ reason = ist("Permanent Redirect");
+ break;
+ case 307:
+ status = ist("307");
+ reason = ist("Temporary Redirect");
+ break;
+ case 303:
+ status = ist("303");
+ reason = ist("See Other");
+ break;
+ case 301:
+ status = ist("301");
+ reason = ist("Moved Permanently");
+ break;
+ case 302:
+ default:
+ status = ist("302");
+ reason = ist("Found");
+ break;
+ }
+
+ if (!(txn->req.flags & HTTP_MSGF_BODYLESS) && txn->req.msg_state != HTTP_MSG_DONE)
+ close = 1;
+
+ htx = htx_from_buf(&res->buf);
+ /* Trim any possible response */
+ channel_htx_truncate(&s->res, htx);
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_BODYLESS);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), status, reason);
+ if (!sl)
+ goto fail;
+ sl->info.res.status = rule->code;
+ s->txn->status = rule->code;
+
+ if (close && !htx_add_header(htx, ist("Connection"), ist("close")))
+ goto fail;
+
+ if (!htx_add_header(htx, ist("Content-length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), location))
+ goto fail;
+
+ if (rule->code == 302 || rule->code == 303 || rule->code == 307) {
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")))
+ goto fail;
+ }
+
+ if (rule->cookie_len) {
+ if (!htx_add_header(htx, ist("Set-Cookie"), ist2(rule->cookie_str, rule->cookie_len)))
+ goto fail;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &res->buf);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ http_set_term_flags(s);
+
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ if (rule->flags & REDIRECT_FLAG_FROM_REQ) {
+ /* let's log the request time */
+ s->logs.request_ts = now_ns;
+ req->analysers &= AN_REQ_FLT_END;
+
+ if (s->sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.intercepted_req);
+ }
+
+ out:
+ free_trash_chunk(chunk);
+ return ret;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htxbuf(&res->buf));
+ ret = 0;
+ goto out;
+}
+
+/* This function filters the request header names to only allow [0-9a-zA-Z-]
+ * characters. Depending on the proxy configuration, headers with a name not
+ * matching this charset are removed or the request is rejected with a
+ * 403-Forbidden response if such name are found. It returns HTTP_RULE_RES_CONT
+ * to continue the request processing or HTTP_RULE_RES_DENY if the request is
+ * rejected.
+ */
+static enum rule_result http_req_restrict_header_names(struct stream *s, struct htx *htx, struct proxy *px)
+{
+ struct htx_blk *blk;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+
+ blk = htx_get_first_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_HDR) {
+ struct ist n = htx_get_blk_name(htx, blk);
+ int i, end = istlen(n);
+
+ for (i = 0; i < end; i++) {
+ if (!isalnum((unsigned char)n.ptr[i]) && n.ptr[i] != '-') {
+ break;
+ }
+ }
+
+ if (i < end) {
+ /* Disallowed character found - block the request or remove the header */
+ if (px->options2 & PR_O2_RSTRICT_REQ_HDR_NAMES_BLK)
+ goto block;
+ blk = htx_remove_blk(htx, blk);
+ continue;
+ }
+ }
+ if (type == HTX_BLK_EOH)
+ break;
+
+ blk = htx_get_next_blk(htx, blk);
+ }
+ out:
+ return rule_ret;
+ block:
+ /* Block the request returning a 403-Forbidden response */
+ s->txn->status = 403;
+ rule_ret = HTTP_RULE_RES_DENY;
+ goto out;
+}
+
+/* Replace all headers matching the name <name>. The header value is replaced if
+ * it matches the regex <re>. <str> is used for the replacement. If <full> is
+ * set to 1, the full-line is matched and replaced. Otherwise, comma-separated
+ * values are evaluated one by one. It returns 0 on success and -1 on error.
+ */
+int http_replace_hdrs(struct stream* s, struct htx *htx, struct ist name,
+ const char *str, struct my_regex *re, int full)
+{
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, name, &ctx, full)) {
+ struct buffer *output = get_trash_chunk();
+
+ if (!regex_exec_match2(re, ctx.value.ptr, ctx.value.len, MAX_MATCH, pmatch, 0))
+ continue;
+
+ output->data = exp_replace(output->area, output->size, ctx.value.ptr, str, pmatch);
+ if (output->data == -1)
+ return -1;
+ if (!http_replace_header_value(htx, &ctx, ist2(output->area, output->data)))
+ return -1;
+ }
+ return 0;
+}
+
+/* This function executes one of the set-{method,path,query,uri} actions. It
+ * takes the string from the variable 'replace' with length 'len', then modifies
+ * the relevant part of the request line accordingly. Then it updates various
+ * pointers to the next elements which were moved, and the total buffer length.
+ * It finds the action to be performed in p[2], previously filled by function
+ * parse_set_req_line(). It returns 0 in case of success, -1 in case of internal
+ * error, though this can be revisited when this code is finally exploited.
+ *
+ * 'action' can be '0' to replace method, '1' to replace path, '2' to replace
+ * query string, 3 to replace uri or 4 to replace the path+query.
+ *
+ * In query string case, the mark question '?' must be set at the start of the
+ * string by the caller, event if the replacement query string is empty.
+ */
+int http_req_replace_stline(int action, const char *replace, int len,
+ struct proxy *px, struct stream *s)
+{
+ struct htx *htx = htxbuf(&s->req.buf);
+
+ switch (action) {
+ case 0: // method
+ if (!http_replace_req_meth(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 1: // path
+ if (!http_replace_req_path(htx, ist2(replace, len), 0))
+ return -1;
+ break;
+
+ case 2: // query
+ if (!http_replace_req_query(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 3: // uri
+ if (!http_replace_req_uri(htx, ist2(replace, len)))
+ return -1;
+ break;
+
+ case 4: // path + query
+ if (!http_replace_req_path(htx, ist2(replace, len), 1))
+ return -1;
+ break;
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/* This function replace the HTTP status code and the associated message. The
+ * variable <status> contains the new status code. This function never fails. It
+ * returns 0 in case of success, -1 in case of internal error.
+ */
+int http_res_set_status(unsigned int status, struct ist reason, struct stream *s)
+{
+ struct htx *htx = htxbuf(&s->res.buf);
+ char *res;
+
+ chunk_reset(&trash);
+ res = ultoa_o(status, trash.area, trash.size);
+ trash.data = res - trash.area;
+
+ /* Do we have a custom reason format string? */
+ if (!isttest(reason)) {
+ const char *str = http_get_reason(status);
+ reason = ist(str);
+ }
+
+ if (!http_replace_res_status(htx, ist2(trash.area, trash.data), reason))
+ return -1;
+ s->txn->status = status;
+ return 0;
+}
+
+/* Executes the http-request rules <rules> for stream <s>, proxy <px> and
+ * transaction <txn>. Returns the verdict of the first rule that prevents
+ * further processing of the request (auth, deny, ...), and defaults to
+ * HTTP_RULE_RES_STOP if it executed all rules or stopped on an allow, or
+ * HTTP_RULE_RES_CONT if the last rule was reached. It may set the TX_CLTARPIT
+ * on txn->flags if it encounters a tarpit rule. If <deny_status> is not NULL
+ * and a deny/tarpit rule is matched, it will be filled with this rule's deny
+ * status.
+ */
+static enum rule_result http_req_get_intercept_rule(struct proxy *px, struct list *def_rules,
+ struct list *rules, struct stream *s)
+{
+ struct session *sess = strm_sess(s);
+ struct http_txn *txn = s->txn;
+ struct act_rule *rule;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+ int act_opts = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if (s->current_rule_list == rules || (def_rules && s->current_rule_list == def_rules))
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ /* start the ruleset evaluation in strict mode */
+ txn->req.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ /* check optional condition */
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (!ret) /* condition not matched */
+ continue;
+ }
+
+ act_opts |= ACT_OPT_FIRST;
+ resume_execution:
+ if (rule->kw->flags & KWF_EXPERIMENTAL)
+ mark_tainted(TAINTED_ACTION_EXP_EXECUTED);
+
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ if ((s->scf->flags & SC_FL_ERROR) ||
+ ((s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ (px->options & PR_O_ABRT_CLOSE)))
+ act_opts |= ACT_OPT_FINAL;
+
+ switch (rule->action_ptr(rule, px, sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ rule_ret = HTTP_RULE_RES_YIELD;
+ goto end;
+ case ACT_RET_ERR:
+ rule_ret = HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DONE:
+ rule_ret = HTTP_RULE_RES_DONE;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DENY:
+ if (txn->status == -1)
+ txn->status = 403;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_ABRT:
+ rule_ret = HTTP_RULE_RES_ABRT;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_INV:
+ rule_ret = HTTP_RULE_RES_BADREQ;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ switch (rule->action) {
+ case ACT_ACTION_ALLOW:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_ACTION_DENY:
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REQ_TARPIT:
+ txn->flags |= TX_CLTARPIT;
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REDIR: {
+ int ret = http_apply_redirect_rule(rule->arg.redir, s, txn);
+
+ if (ret == 2) // 2 == skip
+ break;
+
+ rule_ret = ret ? HTTP_RULE_RES_ABRT : HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+
+ /* other flags exists, but normally, they never be matched. */
+ default:
+ break;
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if the ruleset evaluation is finished reset the strict mode */
+ if (rule_ret != HTTP_RULE_RES_YIELD)
+ txn->req.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ /* we reached the end of the rules, nothing to report */
+ return rule_ret;
+}
+
+/* Executes the http-response rules <rules> for stream <s> and proxy <px>. It
+ * returns one of 5 possible statuses: HTTP_RULE_RES_CONT, HTTP_RULE_RES_STOP,
+ * HTTP_RULE_RES_DONE, HTTP_RULE_RES_YIELD, or HTTP_RULE_RES_BADREQ. If *CONT
+ * is returned, the process can continue the evaluation of next rule list. If
+ * *STOP or *DONE is returned, the process must stop the evaluation. If *BADREQ
+ * is returned, it means the operation could not be processed and a server error
+ * must be returned. If *YIELD is returned, the caller must call again the
+ * function with the same context.
+ */
+static enum rule_result http_res_get_intercept_rule(struct proxy *px, struct list *def_rules,
+ struct list *rules, struct stream *s, uint8_t final)
+{
+ struct session *sess = strm_sess(s);
+ struct http_txn *txn = s->txn;
+ struct act_rule *rule;
+ enum rule_result rule_ret = HTTP_RULE_RES_CONT;
+ int act_opts = 0;
+
+ if (final)
+ act_opts |= ACT_OPT_FINAL;
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if (s->current_rule_list == rules || (def_rules && s->current_rule_list == def_rules))
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+
+ /* start the ruleset evaluation in strict mode */
+ txn->rsp.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ /* check optional condition */
+ if (rule->cond) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (!ret) /* condition not matched */
+ continue;
+ }
+
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ if (rule->kw->flags & KWF_EXPERIMENTAL)
+ mark_tainted(TAINTED_ACTION_EXP_EXECUTED);
+
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ if ((s->scf->flags & SC_FL_ERROR) ||
+ ((s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ (px->options & PR_O_ABRT_CLOSE)))
+ act_opts |= ACT_OPT_FINAL;
+
+ switch (rule->action_ptr(rule, px, sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ rule_ret = HTTP_RULE_RES_STOP;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ rule_ret = HTTP_RULE_RES_YIELD;
+ goto end;
+ case ACT_RET_ERR:
+ rule_ret = HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DONE:
+ rule_ret = HTTP_RULE_RES_DONE;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_DENY:
+ if (txn->status == -1)
+ txn->status = 502;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_ABRT:
+ rule_ret = HTTP_RULE_RES_ABRT;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_INV:
+ rule_ret = HTTP_RULE_RES_BADREQ;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ switch (rule->action) {
+ case ACT_ACTION_ALLOW:
+ rule_ret = HTTP_RULE_RES_STOP; /* "allow" rules are OK */
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_ACTION_DENY:
+ txn->status = rule->arg.http_reply->status;
+ txn->http_reply = rule->arg.http_reply;
+ rule_ret = HTTP_RULE_RES_DENY;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+
+ case ACT_HTTP_REDIR: {
+ int ret = http_apply_redirect_rule(rule->arg.redir, s, txn);
+
+ if (ret == 2) // 2 == skip
+ break;
+
+ rule_ret = ret ? HTTP_RULE_RES_ABRT : HTTP_RULE_RES_ERROR;
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ /* other flags exists, but normally, they never be matched. */
+ default:
+ break;
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if the ruleset evaluation is finished reset the strict mode */
+ if (rule_ret != HTTP_RULE_RES_YIELD)
+ txn->rsp.flags &= ~HTTP_MSGF_SOFT_RW;
+
+ /* we reached the end of the rules, nothing to report */
+ return rule_ret;
+}
+
+/* Executes backend and frontend http-after-response rules for the stream <s>,
+ * in that order. it return 1 on success and 0 on error. It is the caller
+ * responsibility to catch error or ignore it. If it catches it, this function
+ * may be called a second time, for the internal error.
+ */
+int http_eval_after_res_rules(struct stream *s)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ /* Eval after-response ruleset only if the reply is not const */
+ if (s->txn->flags & TX_CONST_REPLY)
+ goto end;
+
+ /* prune the request variables if not already done and swap to the response variables. */
+ if (s->vars_reqres.scope != SCOPE_RES) {
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+ vars_init_head(&s->vars_reqres, SCOPE_RES);
+ }
+
+ def_rules = (s->be->defpx ? &s->be->defpx->http_after_res_rules : NULL);
+ rules = &s->be->http_after_res_rules;
+
+ ret = http_res_get_intercept_rule(s->be, def_rules, rules, s, 1);
+ if ((ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP) && sess->fe != s->be) {
+ def_rules = ((sess->fe->defpx && sess->fe->defpx != s->be->defpx) ? &sess->fe->defpx->http_after_res_rules : NULL);
+ rules = &sess->fe->http_after_res_rules;
+ ret = http_res_get_intercept_rule(sess->fe, def_rules, rules, s, 1);
+ }
+
+ end:
+ /* All other codes than CONTINUE, STOP or DONE are forbidden */
+ return (ret == HTTP_RULE_RES_CONT || ret == HTTP_RULE_RES_STOP || ret == HTTP_RULE_RES_DONE);
+}
+
+/*
+ * Manage client-side cookie. It can impact performance by about 2% so it is
+ * desirable to call it only when needed. This code is quite complex because
+ * of the multiple very crappy and ambiguous syntaxes we have to support. it
+ * highly recommended not to touch this part without a good reason !
+ */
+static void http_manage_client_side_cookies(struct stream *s, struct channel *req)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ char *hdr_beg, *hdr_end, *del_from;
+ char *prev, *att_beg, *att_end, *equal, *val_beg, *val_end, *next;
+ int preserve_hdr;
+
+ htx = htxbuf(&req->buf);
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Cookie"), &ctx, 1)) {
+ int is_first = 1;
+ del_from = NULL; /* nothing to be deleted */
+ preserve_hdr = 0; /* assume we may kill the whole header */
+
+ /* Now look for cookies. Conforming to RFC2109, we have to support
+ * attributes whose name begin with a '$', and associate them with
+ * the right cookie, if we want to delete this cookie.
+ * So there are 3 cases for each cookie read :
+ * 1) it's a special attribute, beginning with a '$' : ignore it.
+ * 2) it's a server id cookie that we *MAY* want to delete : save
+ * some pointers on it (last semi-colon, beginning of cookie...)
+ * 3) it's an application cookie : we *MAY* have to delete a previous
+ * "special" cookie.
+ * At the end of loop, if a "special" cookie remains, we may have to
+ * remove it. If no application cookie persists in the header, we
+ * *MUST* delete it.
+ *
+ * Note: RFC2965 is unclear about the processing of spaces around
+ * the equal sign in the ATTR=VALUE form. A careful inspection of
+ * the RFC explicitly allows spaces before it, and not within the
+ * tokens (attrs or values). An inspection of RFC2109 allows that
+ * too but section 10.1.3 lets one think that spaces may be allowed
+ * after the equal sign too, resulting in some (rare) buggy
+ * implementations trying to do that. So let's do what servers do.
+ * Latest ietf draft forbids spaces all around. Also, earlier RFCs
+ * allowed quoted strings in values, with any possible character
+ * after a backslash, including control chars and delimiters, which
+ * causes parsing to become ambiguous. Browsers also allow spaces
+ * within values even without quotes.
+ *
+ * We have to keep multiple pointers in order to support cookie
+ * removal at the beginning, middle or end of header without
+ * corrupting the header. All of these headers are valid :
+ *
+ * hdr_beg hdr_end
+ * | |
+ * v |
+ * NAME1=VALUE1;NAME2=VALUE2;NAME3=VALUE3 |
+ * NAME1=VALUE1;NAME2_ONLY ;NAME3=VALUE3 v
+ * NAME1 = VALUE 1 ; NAME2 = VALUE2 ; NAME3 = VALUE3
+ * | | | | | | |
+ * | | | | | | |
+ * | | | | | | +--> next
+ * | | | | | +----> val_end
+ * | | | | +-----------> val_beg
+ * | | | +--------------> equal
+ * | | +----------------> att_end
+ * | +---------------------> att_beg
+ * +--------------------------> prev
+ *
+ */
+ hdr_beg = ctx.value.ptr;
+ hdr_end = hdr_beg + ctx.value.len;
+ for (prev = hdr_beg; prev < hdr_end; prev = next) {
+ /* Iterate through all cookies on this line */
+
+ /* find att_beg */
+ att_beg = prev;
+ if (!is_first)
+ att_beg++;
+ is_first = 0;
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ',' || *equal == ';')
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ }
+ else
+ val_beg = val_end = next = equal;
+
+ /* We have nothing to do with attributes beginning with
+ * '$'. However, they will automatically be removed if a
+ * header before them is removed, since they're supposed
+ * to be linked together.
+ */
+ if (*att_beg == '$')
+ continue;
+
+ /* Ignore cookies with no equal sign */
+ if (equal == next) {
+ /* This is not our cookie, so we must preserve it. But if we already
+ * scheduled another cookie for removal, we cannot remove the
+ * complete header, but we can remove the previous block itself.
+ */
+ preserve_hdr = 1;
+ if (del_from != NULL) {
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &del_from, prev);
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ prev = del_from;
+ del_from = NULL;
+ }
+ continue;
+ }
+
+ /* if there are spaces around the equal sign, we need to
+ * strip them otherwise we'll get trouble for cookie captures,
+ * or even for rewrites. Since this happens extremely rarely,
+ * it does not hurt performance.
+ */
+ if (unlikely(att_end != equal || val_beg > equal + 1)) {
+ int stripped_before = 0;
+ int stripped_after = 0;
+
+ if (att_end != equal) {
+ memmove(att_end, equal, hdr_end - equal);
+ stripped_before = (att_end - equal);
+ equal += stripped_before;
+ val_beg += stripped_before;
+ }
+
+ if (val_beg > equal + 1) {
+ memmove(equal + 1, val_beg, hdr_end + stripped_before - val_beg);
+ stripped_after = (equal + 1) - val_beg;
+ val_beg += stripped_after;
+ stripped_before += stripped_after;
+ }
+
+ val_end += stripped_before;
+ next += stripped_before;
+ hdr_end += stripped_before;
+ }
+ /* now everything is as on the diagram above */
+
+ /* First, let's see if we want to capture this cookie. We check
+ * that we don't already have a client side cookie, because we
+ * can only capture one. Also as an optimisation, we ignore
+ * cookies shorter than the declared name.
+ */
+ if (sess->fe->capture_name != NULL && txn->cli_cookie == NULL &&
+ (val_end - att_beg >= sess->fe->capture_namelen) &&
+ memcmp(att_beg, sess->fe->capture_name, sess->fe->capture_namelen) == 0) {
+ int log_len = val_end - att_beg;
+
+ if ((txn->cli_cookie = pool_alloc(pool_head_capture)) == NULL) {
+ ha_alert("HTTP logging : out of memory.\n");
+ } else {
+ if (log_len > sess->fe->capture_len)
+ log_len = sess->fe->capture_len;
+ memcpy(txn->cli_cookie, att_beg, log_len);
+ txn->cli_cookie[log_len] = 0;
+ }
+ }
+
+ /* Persistence cookies in passive, rewrite or insert mode have the
+ * following form :
+ *
+ * Cookie: NAME=SRV[|<lastseen>[|<firstseen>]]
+ *
+ * For cookies in prefix mode, the form is :
+ *
+ * Cookie: NAME=SRV~VALUE
+ */
+ if ((att_end - att_beg == s->be->cookie_len) && (s->be->cookie_name != NULL) &&
+ (memcmp(att_beg, s->be->cookie_name, att_end - att_beg) == 0)) {
+ struct server *srv = s->be->srv;
+ char *delim;
+
+ /* if we're in cookie prefix mode, we'll search the delimiter so that we
+ * have the server ID between val_beg and delim, and the original cookie between
+ * delim+1 and val_end. Otherwise, delim==val_end :
+ *
+ * hdr_beg
+ * |
+ * v
+ * NAME=SRV; # in all but prefix modes
+ * NAME=SRV~OPAQUE ; # in prefix mode
+ * || || | |+-> next
+ * || || | +--> val_end
+ * || || +---------> delim
+ * || |+------------> val_beg
+ * || +-------------> att_end = equal
+ * |+-----------------> att_beg
+ * +------------------> prev
+ *
+ */
+ if (s->be->ck_opts & PR_CK_PFX) {
+ for (delim = val_beg; delim < val_end; delim++)
+ if (*delim == COOKIE_DELIM)
+ break;
+ }
+ else {
+ char *vbar1;
+ delim = val_end;
+ /* Now check if the cookie contains a date field, which would
+ * appear after a vertical bar ('|') just after the server name
+ * and before the delimiter.
+ */
+ vbar1 = memchr(val_beg, COOKIE_DELIM_DATE, val_end - val_beg);
+ if (vbar1) {
+ /* OK, so left of the bar is the server's cookie and
+ * right is the last seen date. It is a base64 encoded
+ * 30-bit value representing the UNIX date since the
+ * epoch in 4-second quantities.
+ */
+ int val;
+ delim = vbar1++;
+ if (val_end - vbar1 >= 5) {
+ val = b64tos30(vbar1);
+ if (val > 0)
+ txn->cookie_last_date = val << 2;
+ }
+ /* look for a second vertical bar */
+ vbar1 = memchr(vbar1, COOKIE_DELIM_DATE, val_end - vbar1);
+ if (vbar1 && (val_end - vbar1 > 5)) {
+ val = b64tos30(vbar1 + 1);
+ if (val > 0)
+ txn->cookie_first_date = val << 2;
+ }
+ }
+ }
+
+ /* if the cookie has an expiration date and the proxy wants to check
+ * it, then we do that now. We first check if the cookie is too old,
+ * then only if it has expired. We detect strict overflow because the
+ * time resolution here is not great (4 seconds). Cookies with dates
+ * in the future are ignored if their offset is beyond one day. This
+ * allows an admin to fix timezone issues without expiring everyone
+ * and at the same time avoids keeping unwanted side effects for too
+ * long.
+ */
+ if (txn->cookie_first_date && s->be->cookie_maxlife &&
+ (((signed)(date.tv_sec - txn->cookie_first_date) > (signed)s->be->cookie_maxlife) ||
+ ((signed)(txn->cookie_first_date - date.tv_sec) > 86400))) {
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_OLD;
+ delim = val_beg; // let's pretend we have not found the cookie
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+ }
+ else if (txn->cookie_last_date && s->be->cookie_maxidle &&
+ (((signed)(date.tv_sec - txn->cookie_last_date) > (signed)s->be->cookie_maxidle) ||
+ ((signed)(txn->cookie_last_date - date.tv_sec) > 86400))) {
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_EXPIRED;
+ delim = val_beg; // let's pretend we have not found the cookie
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+ }
+
+ /* Here, we'll look for the first running server which supports the cookie.
+ * This allows to share a same cookie between several servers, for example
+ * to dedicate backup servers to specific servers only.
+ * However, to prevent clients from sticking to cookie-less backup server
+ * when they have incidentely learned an empty cookie, we simply ignore
+ * empty cookies and mark them as invalid.
+ * The same behaviour is applied when persistence must be ignored.
+ */
+ if ((delim == val_beg) || (s->flags & (SF_IGNORE_PRST | SF_ASSIGNED)))
+ srv = NULL;
+
+ while (srv) {
+ if (srv->cookie && (srv->cklen == delim - val_beg) &&
+ !memcmp(val_beg, srv->cookie, delim - val_beg)) {
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (s->be->options & PR_O_PERSIST) ||
+ (s->flags & SF_FORCE_PRST)) {
+ /* we found the server and we can use it */
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= (srv->cur_state != SRV_ST_STOPPED) ? TX_CK_VALID : TX_CK_DOWN;
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ } else {
+ /* we found a server, but it's down,
+ * mark it as such and go on in case
+ * another one is available.
+ */
+ txn->flags &= ~TX_CK_MASK;
+ txn->flags |= TX_CK_DOWN;
+ }
+ }
+ srv = srv->next;
+ }
+
+ if (!srv && !(txn->flags & (TX_CK_DOWN|TX_CK_EXPIRED|TX_CK_OLD))) {
+ /* no server matched this cookie or we deliberately skipped it */
+ txn->flags &= ~TX_CK_MASK;
+ if ((s->flags & (SF_IGNORE_PRST | SF_ASSIGNED)))
+ txn->flags |= TX_CK_UNUSED;
+ else
+ txn->flags |= TX_CK_INVALID;
+ }
+
+ /* depending on the cookie mode, we may have to either :
+ * - delete the complete cookie if we're in insert+indirect mode, so that
+ * the server never sees it ;
+ * - remove the server id from the cookie value, and tag the cookie as an
+ * application cookie so that it does not get accidentally removed later,
+ * if we're in cookie prefix mode
+ */
+ if ((s->be->ck_opts & PR_CK_PFX) && (delim != val_end)) {
+ int delta; /* negative */
+
+ memmove(val_beg, delim + 1, hdr_end - (delim + 1));
+ delta = val_beg - (delim + 1);
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ del_from = NULL;
+ preserve_hdr = 1; /* we want to keep this cookie */
+ }
+ else if (del_from == NULL &&
+ (s->be->ck_opts & (PR_CK_INS | PR_CK_IND)) == (PR_CK_INS | PR_CK_IND)) {
+ del_from = prev;
+ }
+ }
+ else {
+ /* This is not our cookie, so we must preserve it. But if we already
+ * scheduled another cookie for removal, we cannot remove the
+ * complete header, but we can remove the previous block itself.
+ */
+ preserve_hdr = 1;
+
+ if (del_from != NULL) {
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &del_from, prev);
+ if (att_beg >= del_from)
+ att_beg += delta;
+ if (att_end >= del_from)
+ att_end += delta;
+ val_beg += delta;
+ val_end += delta;
+ next += delta;
+ hdr_end += delta;
+ prev = del_from;
+ del_from = NULL;
+ }
+ }
+
+ } /* for each cookie */
+
+
+ /* There are no more cookies on this line.
+ * We may still have one (or several) marked for deletion at the
+ * end of the line. We must do this now in two ways :
+ * - if some cookies must be preserved, we only delete from the
+ * mark to the end of line ;
+ * - if nothing needs to be preserved, simply delete the whole header
+ */
+ if (del_from) {
+ hdr_end = (preserve_hdr ? del_from : hdr_beg);
+ }
+ if ((hdr_end - hdr_beg) != ctx.value.len) {
+ if (hdr_beg != hdr_end)
+ htx_change_blk_value_len(htx, ctx.blk, hdr_end - hdr_beg);
+ else
+ http_remove_header(htx, &ctx);
+ }
+ } /* for each "Cookie header */
+}
+
+/*
+ * Manage server-side cookies. It can impact performance by about 2% so it is
+ * desirable to call it only when needed. This function is also used when we
+ * just need to know if there is a cookie (eg: for check-cache).
+ */
+static void http_manage_server_side_cookies(struct stream *s, struct channel *res)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx;
+ struct server *srv;
+ char *hdr_beg, *hdr_end;
+ char *prev, *att_beg, *att_end, *equal, *val_beg, *val_end, *next;
+
+ htx = htxbuf(&res->buf);
+
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("Set-Cookie"), &ctx, 1)) {
+ int is_first = 1;
+
+ /* OK, right now we know we have a Set-Cookie* at hdr_beg, and
+ * <prev> points to the colon.
+ */
+ txn->flags |= TX_SCK_PRESENT;
+
+ /* Maybe we only wanted to see if there was a Set-Cookie (eg:
+ * check-cache is enabled) and we are not interested in checking
+ * them. Warning, the cookie capture is declared in the frontend.
+ */
+ if (s->be->cookie_name == NULL && sess->fe->capture_name == NULL)
+ break;
+
+ /* OK so now we know we have to process this response cookie.
+ * The format of the Set-Cookie header is slightly different
+ * from the format of the Cookie header in that it does not
+ * support the comma as a cookie delimiter (thus the header
+ * cannot be folded) because the Expires attribute described in
+ * the original Netscape's spec may contain an unquoted date
+ * with a comma inside. We have to live with this because
+ * many browsers don't support Max-Age and some browsers don't
+ * support quoted strings. However the Set-Cookie2 header is
+ * clean but basically nobody supports it.
+ *
+ * We have to keep multiple pointers in order to support cookie
+ * removal at the beginning, middle or end of header without
+ * corrupting the header (in case of set-cookie2). A special
+ * pointer, <scav> points to the beginning of the set-cookie-av
+ * fields after the first semi-colon. The <next> pointer points
+ * either to the end of line (set-cookie) or next unquoted comma
+ * (set-cookie2). All of these headers are valid :
+ *
+ * hdr_beg hdr_end
+ * | |
+ * v |
+ * NAME1 = VALUE 1 ; Secure; Path="/" |
+ * NAME=VALUE; Secure; Expires=Thu, 01-Jan-1970 00:00:01 GMT v
+ * NAME = VALUE ; Secure; Expires=Thu, 01-Jan-1970 00:00:01 GMT
+ * NAME1 = VALUE 1 ; Max-Age=0, NAME2=VALUE2; Discard
+ * | | | | | | | |
+ * | | | | | | | +-> next
+ * | | | | | | +------------> scav
+ * | | | | | +--------------> val_end
+ * | | | | +--------------------> val_beg
+ * | | | +----------------------> equal
+ * | | +------------------------> att_end
+ * | +----------------------------> att_beg
+ * +------------------------------> prev
+ * -------------------------------> hdr_beg
+ */
+ hdr_beg = ctx.value.ptr;
+ hdr_end = hdr_beg + ctx.value.len;
+ for (prev = hdr_beg; prev < hdr_end; prev = next) {
+
+ /* Iterate through all cookies on this line */
+
+ /* find att_beg */
+ att_beg = prev;
+ if (!is_first)
+ att_beg++;
+ is_first = 0;
+
+ while (att_beg < hdr_end && HTTP_IS_SPHT(*att_beg))
+ att_beg++;
+
+ /* find att_end : this is the first character after the last non
+ * space before the equal. It may be equal to hdr_end.
+ */
+ equal = att_end = att_beg;
+
+ while (equal < hdr_end) {
+ if (*equal == '=' || *equal == ';')
+ break;
+ if (HTTP_IS_SPHT(*equal++))
+ continue;
+ att_end = equal;
+ }
+
+ /* here, <equal> points to '=', a delimiter or the end. <att_end>
+ * is between <att_beg> and <equal>, both may be identical.
+ */
+
+ /* look for end of cookie if there is an equal sign */
+ if (equal < hdr_end && *equal == '=') {
+ /* look for the beginning of the value */
+ val_beg = equal + 1;
+ while (val_beg < hdr_end && HTTP_IS_SPHT(*val_beg))
+ val_beg++;
+
+ /* find the end of the value, respecting quotes */
+ next = http_find_cookie_value_end(val_beg, hdr_end);
+
+ /* make val_end point to the first white space or delimiter after the value */
+ val_end = next;
+ while (val_end > val_beg && HTTP_IS_SPHT(*(val_end - 1)))
+ val_end--;
+ }
+ else {
+ /* <equal> points to next comma, semi-colon or EOL */
+ val_beg = val_end = next = equal;
+ }
+
+ if (next < hdr_end) {
+ /* For Set-Cookie, since commas are permitted
+ * in values, skip to the end.
+ */
+ next = hdr_end;
+ }
+
+ /* Now everything is as on the diagram above */
+
+ /* Ignore cookies with no equal sign */
+ if (equal == val_end)
+ continue;
+
+ /* If there are spaces around the equal sign, we need to
+ * strip them otherwise we'll get trouble for cookie captures,
+ * or even for rewrites. Since this happens extremely rarely,
+ * it does not hurt performance.
+ */
+ if (unlikely(att_end != equal || val_beg > equal + 1)) {
+ int stripped_before = 0;
+ int stripped_after = 0;
+
+ if (att_end != equal) {
+ memmove(att_end, equal, hdr_end - equal);
+ stripped_before = (att_end - equal);
+ equal += stripped_before;
+ val_beg += stripped_before;
+ }
+
+ if (val_beg > equal + 1) {
+ memmove(equal + 1, val_beg, hdr_end + stripped_before - val_beg);
+ stripped_after = (equal + 1) - val_beg;
+ val_beg += stripped_after;
+ stripped_before += stripped_after;
+ }
+
+ val_end += stripped_before;
+ next += stripped_before;
+ hdr_end += stripped_before;
+
+ htx_change_blk_value_len(htx, ctx.blk, hdr_end - hdr_beg);
+ ctx.value.len = hdr_end - hdr_beg;
+ }
+
+ /* First, let's see if we want to capture this cookie. We check
+ * that we don't already have a server side cookie, because we
+ * can only capture one. Also as an optimisation, we ignore
+ * cookies shorter than the declared name.
+ */
+ if (sess->fe->capture_name != NULL &&
+ txn->srv_cookie == NULL &&
+ (val_end - att_beg >= sess->fe->capture_namelen) &&
+ memcmp(att_beg, sess->fe->capture_name, sess->fe->capture_namelen) == 0) {
+ int log_len = val_end - att_beg;
+ if ((txn->srv_cookie = pool_alloc(pool_head_capture)) == NULL) {
+ ha_alert("HTTP logging : out of memory.\n");
+ }
+ else {
+ if (log_len > sess->fe->capture_len)
+ log_len = sess->fe->capture_len;
+ memcpy(txn->srv_cookie, att_beg, log_len);
+ txn->srv_cookie[log_len] = 0;
+ }
+ }
+
+ srv = objt_server(s->target);
+ /* now check if we need to process it for persistence */
+ if (!(s->flags & SF_IGNORE_PRST) &&
+ (att_end - att_beg == s->be->cookie_len) && (s->be->cookie_name != NULL) &&
+ (memcmp(att_beg, s->be->cookie_name, att_end - att_beg) == 0)) {
+ /* assume passive cookie by default */
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_FOUND;
+
+ /* If the cookie is in insert mode on a known server, we'll delete
+ * this occurrence because we'll insert another one later.
+ * We'll delete it too if the "indirect" option is set and we're in
+ * a direct access.
+ */
+ if (s->be->ck_opts & PR_CK_PSV) {
+ /* The "preserve" flag was set, we don't want to touch the
+ * server's cookie.
+ */
+ }
+ else if ((srv && (s->be->ck_opts & PR_CK_INS)) ||
+ ((s->flags & SF_DIRECT) && (s->be->ck_opts & PR_CK_IND))) {
+ /* this cookie must be deleted */
+ if (prev == hdr_beg && next == hdr_end) {
+ /* whole header */
+ http_remove_header(htx, &ctx);
+ /* note: while both invalid now, <next> and <hdr_end>
+ * are still equal, so the for() will stop as expected.
+ */
+ } else {
+ /* just remove the value */
+ int delta = http_del_hdr_value(hdr_beg, hdr_end, &prev, next);
+ next = prev;
+ hdr_end += delta;
+ }
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_DELETED;
+ /* and go on with next cookie */
+ }
+ else if (srv && srv->cookie && (s->be->ck_opts & PR_CK_RW)) {
+ /* replace bytes val_beg->val_end with the cookie name associated
+ * with this server since we know it.
+ */
+ int sliding, delta;
+
+ ctx.value = ist2(val_beg, val_end - val_beg);
+ ctx.lws_before = ctx.lws_after = 0;
+ http_replace_header_value(htx, &ctx, ist2(srv->cookie, srv->cklen));
+ delta = srv->cklen - (val_end - val_beg);
+ sliding = (ctx.value.ptr - val_beg);
+ hdr_beg += sliding;
+ val_beg += sliding;
+ next += sliding + delta;
+ hdr_end += sliding + delta;
+
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_REPLACED;
+ }
+ else if (srv && srv->cookie && (s->be->ck_opts & PR_CK_PFX)) {
+ /* insert the cookie name associated with this server
+ * before existing cookie, and insert a delimiter between them..
+ */
+ int sliding, delta;
+ ctx.value = ist2(val_beg, 0);
+ ctx.lws_before = ctx.lws_after = 0;
+ http_replace_header_value(htx, &ctx, ist2(srv->cookie, srv->cklen + 1));
+ delta = srv->cklen + 1;
+ sliding = (ctx.value.ptr - val_beg);
+ hdr_beg += sliding;
+ val_beg += sliding;
+ next += sliding + delta;
+ hdr_end += sliding + delta;
+
+ val_beg[srv->cklen] = COOKIE_DELIM;
+ txn->flags &= ~TX_SCK_MASK;
+ txn->flags |= TX_SCK_REPLACED;
+ }
+ }
+ /* that's done for this cookie, check the next one on the same
+ * line when next != hdr_end (which should normally not happen
+ * with set-cookie2 support removed).
+ */
+ }
+ }
+}
+
+/*
+ * Parses the Cache-Control and Pragma request header fields to determine if
+ * the request may be served from the cache and/or if it is cacheable. Updates
+ * s->txn->flags.
+ */
+void http_check_request_for_cacheability(struct stream *s, struct channel *req)
+{
+ struct http_txn *txn = s->txn;
+ struct htx *htx;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ int pragma_found, cc_found;
+
+ if ((txn->flags & (TX_CACHEABLE|TX_CACHE_IGNORE)) == TX_CACHE_IGNORE)
+ return; /* nothing more to do here */
+
+ htx = htxbuf(&req->buf);
+ pragma_found = cc_found = 0;
+
+ /* Check "pragma" header for HTTP/1.0 compatibility. */
+ if (http_find_header(htx, ist("pragma"), &ctx, 1)) {
+ if (isteqi(ctx.value, ist("no-cache"))) {
+ pragma_found = 1;
+ }
+ }
+
+ ctx.blk = NULL;
+ /* Don't use the cache and don't try to store if we found the
+ * Authorization header */
+ if (http_find_header(htx, ist("authorization"), &ctx, 1)) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ txn->flags |= TX_CACHE_IGNORE;
+ }
+
+
+ /* Look for "cache-control" header and iterate over all the values
+ * until we find one that specifies that caching is possible or not. */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ cc_found = 1;
+ /* We don't check the values after max-age, max-stale nor min-fresh,
+ * we simply don't use the cache when they're specified. */
+ if (istmatchi(ctx.value, ist("max-age")) ||
+ istmatchi(ctx.value, ist("no-cache")) ||
+ istmatchi(ctx.value, ist("max-stale")) ||
+ istmatchi(ctx.value, ist("min-fresh"))) {
+ txn->flags |= TX_CACHE_IGNORE;
+ continue;
+ }
+ if (istmatchi(ctx.value, ist("no-store"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ continue;
+ }
+ }
+
+ /* RFC7234#5.4:
+ * When the Cache-Control header field is also present and
+ * understood in a request, Pragma is ignored.
+ * When the Cache-Control header field is not present in a
+ * request, caches MUST consider the no-cache request
+ * pragma-directive as having the same effect as if
+ * "Cache-Control: no-cache" were present.
+ */
+ if (!cc_found && pragma_found)
+ txn->flags |= TX_CACHE_IGNORE;
+}
+
+/*
+ * Check if response is cacheable or not. Updates s->txn->flags.
+ */
+void http_check_response_for_cacheability(struct stream *s, struct channel *res)
+{
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct htx *htx;
+ int has_freshness_info = 0;
+ int has_validator = 0;
+ int has_null_maxage = 0;
+
+ if (txn->status < 200) {
+ /* do not try to cache interim responses! */
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ return;
+ }
+
+ htx = htxbuf(&res->buf);
+ /* Check "pragma" header for HTTP/1.0 compatibility. */
+ if (http_find_header(htx, ist("pragma"), &ctx, 1)) {
+ if (isteqi(ctx.value, ist("no-cache"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ return;
+ }
+ }
+
+ /* Look for "cache-control" header and iterate over all the values
+ * until we find one that specifies that caching is possible or not. */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("cache-control"), &ctx, 0)) {
+ if (isteqi(ctx.value, ist("public"))) {
+ txn->flags |= TX_CACHEABLE | TX_CACHE_COOK;
+ continue;
+ }
+ /* This max-age might be overridden by a s-maxage directive, do
+ * not unset the TX_CACHEABLE yet. */
+ if (isteqi(ctx.value, ist("max-age=0"))) {
+ has_null_maxage = 1;
+ continue;
+ }
+
+ if (isteqi(ctx.value, ist("private")) ||
+ isteqi(ctx.value, ist("no-cache")) ||
+ isteqi(ctx.value, ist("no-store")) ||
+ isteqi(ctx.value, ist("s-maxage=0"))) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ continue;
+ }
+ /* We might have a no-cache="set-cookie" form. */
+ if (istmatchi(ctx.value, ist("no-cache=\"set-cookie"))) {
+ txn->flags &= ~TX_CACHE_COOK;
+ continue;
+ }
+
+ if (istmatchi(ctx.value, ist("s-maxage"))) {
+ has_freshness_info = 1;
+ has_null_maxage = 0; /* The null max-age is overridden, ignore it */
+ continue;
+ }
+ if (istmatchi(ctx.value, ist("max-age"))) {
+ has_freshness_info = 1;
+ continue;
+ }
+ }
+
+ /* We had a 'max-age=0' directive but no extra s-maxage, do not cache
+ * the response. */
+ if (has_null_maxage) {
+ txn->flags &= ~TX_CACHEABLE & ~TX_CACHE_COOK;
+ }
+
+ /* If no freshness information could be found in Cache-Control values,
+ * look for an Expires header. */
+ if (!has_freshness_info) {
+ ctx.blk = NULL;
+ has_freshness_info = http_find_header(htx, ist("expires"), &ctx, 0);
+ }
+
+ /* If no freshness information could be found in Cache-Control or Expires
+ * values, look for an explicit validator. */
+ if (!has_freshness_info) {
+ ctx.blk = NULL;
+ has_validator = 1;
+ if (!http_find_header(htx, ist("etag"), &ctx, 0)) {
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("last-modified"), &ctx, 0))
+ has_validator = 0;
+ }
+ }
+
+ /* We won't store an entry that has neither a cache validator nor an
+ * explicit expiration time, as suggested in RFC 7234#3. */
+ if (!has_freshness_info && !has_validator)
+ txn->flags &= ~TX_CACHEABLE;
+}
+
+/*
+ * In a GET, HEAD or POST request, check if the requested URI matches the stats uri
+ * for the current proxy.
+ *
+ * It is assumed that the request is either a HEAD, GET, or POST and that the
+ * uri_auth field is valid.
+ *
+ * Returns 1 if stats should be provided, otherwise 0.
+ */
+static int http_stats_check_uri(struct stream *s, struct http_txn *txn, struct proxy *px)
+{
+ struct uri_auth *uri_auth = px->uri_auth;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct ist uri;
+
+ if (!uri_auth)
+ return 0;
+
+ if (txn->meth != HTTP_METH_GET && txn->meth != HTTP_METH_HEAD && txn->meth != HTTP_METH_POST)
+ return 0;
+
+ htx = htxbuf(&s->req.buf);
+ sl = http_get_stline(htx);
+ uri = htx_sl_req_uri(sl);
+ if (*uri_auth->uri_prefix == '/') {
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+ uri = http_parse_path(&parser);
+ }
+
+ /* check URI size */
+ if (uri_auth->uri_len > uri.len)
+ return 0;
+
+ if (memcmp(uri.ptr, uri_auth->uri_prefix, uri_auth->uri_len) != 0)
+ return 0;
+
+ return 1;
+}
+
+/* This function prepares an applet to handle the stats. It can deal with the
+ * "100-continue" expectation, check that admin rules are met for POST requests,
+ * and program a response message if something was unexpected. It cannot fail
+ * and always relies on the stats applet to complete the job. It does not touch
+ * analysers nor counters, which are left to the caller. It does not touch
+ * s->target which is supposed to already point to the stats applet. The caller
+ * is expected to have already assigned an appctx to the stream.
+ */
+static int http_handle_stats(struct stream *s, struct channel *req, struct proxy *px)
+{
+ struct stats_admin_rule *stats_admin_rule;
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = &txn->req;
+ struct uri_auth *uri_auth = px->uri_auth;
+ const char *h, *lookup, *end;
+ struct appctx *appctx = __sc_appctx(s->scb);
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct htx *htx;
+ struct htx_sl *sl;
+
+ appctx->st1 = 0;
+ ctx->state = STAT_STATE_INIT;
+ ctx->st_code = STAT_STATUS_INIT;
+ ctx->http_px = px;
+ ctx->flags |= uri_auth->flags;
+ ctx->flags |= STAT_FMT_HTML; /* assume HTML mode by default */
+ if ((msg->flags & HTTP_MSGF_VER_11) && (txn->meth != HTTP_METH_HEAD))
+ ctx->flags |= STAT_CHUNKED;
+
+ htx = htxbuf(&req->buf);
+ sl = http_get_stline(htx);
+ lookup = HTX_SL_REQ_UPTR(sl) + uri_auth->uri_len;
+ end = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ for (h = lookup; h <= end - 3; h++) {
+ if (memcmp(h, ";up", 3) == 0) {
+ ctx->flags |= STAT_HIDE_DOWN;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 9; h++) {
+ if (memcmp(h, ";no-maint", 9) == 0) {
+ ctx->flags |= STAT_HIDE_MAINT;
+ break;
+ }
+ }
+
+ if (uri_auth->refresh) {
+ for (h = lookup; h <= end - 10; h++) {
+ if (memcmp(h, ";norefresh", 10) == 0) {
+ ctx->flags |= STAT_NO_REFRESH;
+ break;
+ }
+ }
+ }
+
+ for (h = lookup; h <= end - 4; h++) {
+ if (memcmp(h, ";csv", 4) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 6; h++) {
+ if (memcmp(h, ";typed", 6) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ ctx->flags |= STAT_FMT_TYPED;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 5; h++) {
+ if (memcmp(h, ";json", 5) == 0) {
+ ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM);
+ ctx->flags |= STAT_FMT_JSON;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 12; h++) {
+ if (memcmp(h, ";json-schema", 12) == 0) {
+ ctx->flags &= ~STAT_FMT_MASK;
+ ctx->flags |= STAT_JSON_SCHM;
+ break;
+ }
+ }
+
+ for (h = lookup; h <= end - 8; h++) {
+ if (memcmp(h, ";st=", 4) == 0) {
+ int i;
+ h += 4;
+ ctx->st_code = STAT_STATUS_UNKN;
+ for (i = STAT_STATUS_INIT + 1; i < STAT_STATUS_SIZE; i++) {
+ if (strncmp(stat_status_codes[i], h, 4) == 0) {
+ ctx->st_code = i;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ for (h = lookup; h <= end - 8; h++) {
+ if (memcmp(h, STAT_SCOPE_INPUT_NAME "=", strlen(STAT_SCOPE_INPUT_NAME) + 1) == 0) {
+ int itx = 0;
+ const char *h2;
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + 1];
+ const char *err;
+
+ h += strlen(STAT_SCOPE_INPUT_NAME) + 1;
+ h2 = h;
+ ctx->scope_str = h2 - HTX_SL_REQ_UPTR(sl);
+ while (h < end) {
+ if (*h == ';' || *h == '&' || *h == ' ')
+ break;
+ itx++;
+ h++;
+ }
+
+ if (itx > STAT_SCOPE_TXT_MAXLEN)
+ itx = STAT_SCOPE_TXT_MAXLEN;
+ ctx->scope_len = itx;
+
+ /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ memcpy(scope_txt, h2, itx);
+ scope_txt[itx] = '\0';
+ err = invalid_char(scope_txt);
+ if (err) {
+ /* bad char in search text => clear scope */
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ }
+ break;
+ }
+ }
+
+ /* now check whether we have some admin rules for this request */
+ list_for_each_entry(stats_admin_rule, &uri_auth->admin_rules, list) {
+ int ret = 1;
+
+ if (stats_admin_rule->cond) {
+ ret = acl_exec_cond(stats_admin_rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (stats_admin_rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* no rule, or the rule matches */
+ ctx->flags |= STAT_ADMIN;
+ break;
+ }
+ }
+
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ appctx->st0 = STAT_HTTP_HEAD;
+ else if (txn->meth == HTTP_METH_POST) {
+ if (ctx->flags & STAT_ADMIN) {
+ appctx->st0 = STAT_HTTP_POST;
+ if (msg->msg_state < HTTP_MSG_DATA)
+ req->analysers |= AN_REQ_HTTP_BODY;
+ }
+ else {
+ /* POST without admin level */
+ ctx->flags &= ~STAT_CHUNKED;
+ ctx->st_code = STAT_STATUS_DENY;
+ appctx->st0 = STAT_HTTP_LAST;
+ }
+ }
+ else {
+ /* Unsupported method */
+ ctx->flags &= ~STAT_CHUNKED;
+ ctx->st_code = STAT_STATUS_IVAL;
+ appctx->st0 = STAT_HTTP_LAST;
+ }
+
+ s->task->nice = -32; /* small boost for HTTP statistics */
+ return 1;
+}
+
+/* This function waits for the message payload at most <time> milliseconds (may
+ * be set to TICK_ETERNITY). It stops to wait if at least <bytes> bytes of the
+ * payload are received (0 means no limit). It returns HTTP_RULE_* depending on
+ * the result:
+ *
+ * - HTTP_RULE_RES_CONT when conditions are met to stop waiting
+ * - HTTP_RULE_RES_YIELD to wait for more data
+ * - HTTP_RULE_RES_ABRT when a timeout occurred.
+ * - HTTP_RULE_RES_BADREQ if a parsing error is raised by lower level
+ * - HTTP_RULE_RES_ERROR if an internal error occurred
+ *
+ * If a timeout occurred, this function is responsible to emit the right response
+ * to the client, depending on the channel (408 on request side, 504 on response
+ * side). All other errors must be handled by the caller.
+ */
+enum rule_result http_wait_for_msg_body(struct stream *s, struct channel *chn,
+ unsigned int time, unsigned int bytes)
+{
+ struct session *sess = s->sess;
+ struct http_txn *txn = s->txn;
+ struct http_msg *msg = ((chn->flags & CF_ISRESP) ? &txn->rsp : &txn->req);
+ struct htx *htx;
+ enum rule_result ret = HTTP_RULE_RES_CONT;
+
+ htx = htxbuf(&chn->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ ret = HTTP_RULE_RES_BADREQ;
+ goto end;
+ }
+ if (htx->flags & HTX_FL_PROCESSING_ERROR) {
+ ret = HTTP_RULE_RES_ERROR;
+ goto end;
+ }
+
+ /* Do nothing for bodyless and CONNECT requests */
+ if (txn->meth == HTTP_METH_CONNECT || (msg->flags & HTTP_MSGF_BODYLESS))
+ goto end;
+
+ if (!(chn->flags & CF_ISRESP)) {
+ if (http_handle_expect_hdr(s, htx, msg) == -1) {
+ ret = HTTP_RULE_RES_ERROR;
+ goto end;
+ }
+ }
+
+ /* Now we're are waiting for the payload. We just need to know if all
+ * data have been received or if the buffer is full.
+ */
+ if ((htx->flags & HTX_FL_EOM) ||
+ htx_get_tail_type(htx) > HTX_BLK_DATA ||
+ channel_htx_full(chn, htx, global.tune.maxrewrite) ||
+ sc_waiting_room(chn_prod(chn)))
+ goto end;
+
+ if (bytes) {
+ struct htx_blk *blk;
+ unsigned int len = 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ if (htx_get_blk_type(blk) != HTX_BLK_DATA)
+ continue;
+ len += htx_get_blksz(blk);
+ if (len >= bytes)
+ goto end;
+ }
+ }
+
+ if ((chn->flags & CF_READ_TIMEOUT) || tick_is_expired(chn->analyse_exp, now_ms)) {
+ if (!(chn->flags & CF_ISRESP))
+ goto abort_req;
+ goto abort_res;
+ }
+
+ /* we get here if we need to wait for more data */
+ if (!(chn_prod(chn)->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))) {
+ if (!tick_isset(chn->analyse_exp))
+ chn->analyse_exp = tick_add_ifset(now_ms, time);
+ ret = HTTP_RULE_RES_YIELD;
+ }
+
+ end:
+ return ret;
+
+ abort:
+ http_set_term_flags(s);
+ http_reply_and_close(s, txn->status, http_error_message(s));
+ ret = HTTP_RULE_RES_ABRT;
+ goto end;
+
+ abort_req:
+ txn->status = 408;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLITO;
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+ goto abort;
+
+ abort_res:
+ txn->status = 504;
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVTO;
+ stream_inc_http_fail_ctr(s);
+ goto abort;
+}
+
+void http_perform_server_redirect(struct stream *s, struct stconn *sc)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct server *srv;
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct ist path, location;
+ unsigned int flags;
+ struct http_uri_parser parser;
+
+ /*
+ * Create the location
+ */
+ chunk_reset(&trash);
+
+ /* 1: add the server's prefix */
+ /* special prefix "/" means don't change URL */
+ srv = __objt_server(s->target);
+ if (srv->rdr_len != 1 || *srv->rdr_pfx != '/') {
+ if (!chunk_memcat(&trash, srv->rdr_pfx, srv->rdr_len))
+ return;
+ }
+
+ /* 2: add the request Path */
+ htx = htxbuf(&req->buf);
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (!isttest(path))
+ return;
+
+ if (!chunk_memcat(&trash, path.ptr, path.len))
+ return;
+ location = ist2(trash.area, trash.data);
+
+ /*
+ * Create the 302 response
+ */
+ htx = htx_from_buf(&res->buf);
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_BODYLESS);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("302"), ist("Found"));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = 302;
+ s->txn->status = 302;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) ||
+ !htx_add_header(htx, ist("Connection"), ist("close")) ||
+ !htx_add_header(htx, ist("Content-length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), location))
+ goto fail;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &res->buf);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_LOCAL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_C;
+
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+
+ /* return without error. */
+ sc_abort(sc);
+ sc_shutdown(sc);
+ s->conn_err_type = STRM_ET_NONE;
+ sc->state = SC_ST_CLO;
+
+
+ /* FIXME: we should increase a counter of redirects per server and per backend. */
+ srv_inc_sess_ctr(srv);
+ srv_set_sess_last(srv);
+ return;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+}
+
+/* This function terminates the request because it was completely analyzed or
+ * because an error was triggered during the body forwarding.
+ */
+static void http_end_request(struct stream *s)
+{
+ struct channel *chn = &s->req;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_HTTP_ANA, s, txn);
+
+ if (unlikely(txn->req.msg_state < HTTP_MSG_DONE)) {
+ DBG_TRACE_DEVEL("waiting end of the request", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_DONE) {
+ /* No need to read anymore, the request was completely parsed.
+ * We can shut the read side unless we want to abort_on_close,
+ * or we have a POST request. The issue with POST requests is
+ * that some browsers still send a CRLF after the request, and
+ * this CRLF must be read so that it does not remain in the kernel
+ * buffers, otherwise a close could cause an RST on some systems
+ * (eg: Linux).
+ */
+ if (!(s->be->options & PR_O_ABRT_CLOSE) && txn->meth != HTTP_METH_POST)
+ channel_dont_read(chn);
+
+ /* if the server closes the connection, we want to immediately react
+ * and close the socket to save packets and syscalls.
+ */
+ s->scb->flags |= SC_FL_NOHALF;
+
+ /* In any case we've finished parsing the request so we must
+ * disable Nagle when sending data because 1) we're not going
+ * to shut this side, and 2) the server is waiting for us to
+ * send pending data.
+ */
+ s->scb->flags |= SC_FL_SND_NEVERWAIT;
+
+ if (txn->rsp.msg_state < HTTP_MSG_BODY ||
+ (txn->rsp.msg_state < HTTP_MSG_DONE && s->scb->state != SC_ST_CLO)) {
+ /* The server has not finished to respond and the
+ * backend SC is not closed, so we don't want to move in
+ * order not to upset it.
+ */
+ DBG_TRACE_DEVEL("waiting end of the response", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ /* When we get here, it means that both the request and the
+ * response have finished receiving. Depending on the connection
+ * mode, we'll have to wait for the last bytes to leave in either
+ * direction, and sometimes for a close to be effective.
+ */
+ if (txn->flags & TX_CON_WANT_TUN) {
+ /* Tunnel mode will not have any analyser so it needs to
+ * poll for reads.
+ */
+ channel_auto_read(&s->req);
+ txn->req.msg_state = HTTP_MSG_TUNNEL;
+ if (txn->rsp.msg_state != HTTP_MSG_TUNNEL)
+ s->res.flags |= CF_WAKE_ONCE;
+ }
+ else {
+ /* we're not expecting any new data to come for this
+ * transaction, so we can close it.
+ *
+ * However, there is an exception if the response
+ * length is undefined. In this case, we need to wait
+ * the close from the server. The response will be
+ * switched in TUNNEL mode until the end.
+ */
+ if (!(txn->rsp.flags & HTTP_MSGF_XFER_LEN) &&
+ txn->rsp.msg_state != HTTP_MSG_CLOSED)
+ goto check_channel_flags;
+
+ if (!(s->scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) {
+ sc_schedule_abort(s->scf);
+ sc_schedule_shutdown(s->scb);
+ }
+ }
+ goto check_channel_flags;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_CLOSING) {
+ http_msg_closing:
+ /* nothing else to forward, just waiting for the output buffer
+ * to be empty and for the shut_wanted to take effect.
+ */
+ if (!co_data(chn)) {
+ txn->req.msg_state = HTTP_MSG_CLOSED;
+ goto http_msg_closed;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->req.msg_state == HTTP_MSG_CLOSED) {
+ http_msg_closed:
+ /* if we don't know whether the server will close, we need to hard close */
+ if (txn->rsp.flags & HTTP_MSGF_XFER_LEN)
+ s->scb->flags |= SC_FL_NOLINGER; /* we want to close ASAP */
+ /* see above in MSG_DONE why we only do this in these states */
+ if (!(s->be->options & PR_O_ABRT_CLOSE))
+ channel_dont_read(chn);
+ goto end;
+ }
+
+ check_channel_flags:
+ /* Here, we are in HTTP_MSG_DONE or HTTP_MSG_TUNNEL */
+ if (s->scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) {
+ /* if we've just closed an output, let's switch */
+ txn->req.msg_state = HTTP_MSG_CLOSING;
+ goto http_msg_closing;
+ }
+
+ end:
+ chn->analysers &= AN_REQ_FLT_END;
+ if (txn->req.msg_state == HTTP_MSG_TUNNEL) {
+ s->scb->flags |= SC_FL_SND_NEVERWAIT;
+ if (HAS_REQ_DATA_FILTERS(s))
+ chn->analysers |= AN_REQ_FLT_XFER_DATA;
+ else
+ c_adv(chn, htxbuf(&chn->buf)->data - co_data(chn));
+ }
+ channel_auto_close(chn);
+ channel_auto_read(chn);
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+}
+
+
+/* This function terminates the response because it was completely analyzed or
+ * because an error was triggered during the body forwarding.
+ */
+static void http_end_response(struct stream *s)
+{
+ struct channel *chn = &s->res;
+ struct http_txn *txn = s->txn;
+
+ DBG_TRACE_ENTER(STRM_EV_HTTP_ANA, s, txn);
+
+ if (unlikely(txn->rsp.msg_state < HTTP_MSG_DONE)) {
+ DBG_TRACE_DEVEL("waiting end of the response", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_DONE) {
+ /* In theory, we don't need to read anymore, but we must
+ * still monitor the server connection for a possible close
+ * while the request is being uploaded, so we don't disable
+ * reading.
+ */
+ /* channel_dont_read(chn); */
+
+ if (txn->req.msg_state < HTTP_MSG_DONE && s->scf->state != SC_ST_CLO) {
+ /* The client seems to still be sending data, probably
+ * because we got an error response during an upload.
+ * We have the choice of either breaking the connection
+ * or letting it pass through. Let's do the later.
+ */
+ DBG_TRACE_DEVEL("waiting end of the request", STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ /* When we get here, it means that both the request and the
+ * response have finished receiving. Depending on the connection
+ * mode, we'll have to wait for the last bytes to leave in either
+ * direction, and sometimes for a close to be effective.
+ */
+ if (txn->flags & TX_CON_WANT_TUN) {
+ channel_auto_read(&s->res);
+ txn->rsp.msg_state = HTTP_MSG_TUNNEL;
+ if (txn->req.msg_state != HTTP_MSG_TUNNEL)
+ s->req.flags |= CF_WAKE_ONCE;
+ }
+ else {
+ /* we're not expecting any new data to come for this
+ * transaction, so we can close it.
+ */
+ if (!(s->scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) {
+ sc_schedule_abort(s->scb);
+ sc_schedule_shutdown(s->scf);
+ }
+ }
+ goto check_channel_flags;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_CLOSING) {
+ http_msg_closing:
+ /* nothing else to forward, just waiting for the output buffer
+ * to be empty and for the shut_wanted to take effect.
+ */
+ if (!co_data(chn)) {
+ txn->rsp.msg_state = HTTP_MSG_CLOSED;
+ goto http_msg_closed;
+ }
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+ return;
+ }
+
+ if (txn->rsp.msg_state == HTTP_MSG_CLOSED) {
+ http_msg_closed:
+ /* drop any pending data */
+ channel_htx_truncate(&s->req, htxbuf(&s->req.buf));
+ channel_abort(&s->req);
+ goto end;
+ }
+
+ check_channel_flags:
+ /* Here, we are in HTTP_MSG_DONE or HTTP_MSG_TUNNEL */
+ if (s->scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) {
+ /* if we've just closed an output, let's switch */
+ txn->rsp.msg_state = HTTP_MSG_CLOSING;
+ goto http_msg_closing;
+ }
+
+ end:
+ chn->analysers &= AN_RES_FLT_END;
+ if (txn->rsp.msg_state == HTTP_MSG_TUNNEL) {
+ s->scf->flags |= SC_FL_SND_NEVERWAIT;
+ if (HAS_RSP_DATA_FILTERS(s))
+ chn->analysers |= AN_RES_FLT_XFER_DATA;
+ else
+ c_adv(chn, htxbuf(&chn->buf)->data - co_data(chn));
+ }
+ channel_auto_close(chn);
+ channel_auto_read(chn);
+ DBG_TRACE_LEAVE(STRM_EV_HTTP_ANA, s, txn);
+}
+
+/* Forward a response generated by HAProxy (error/redirect/return). This
+ * function forwards all pending incoming data. If <final> is set to 0, nothing
+ * more is performed. It is used for 1xx informational messages. Otherwise, the
+ * transaction is terminated and the request is emptied. On success 1 is
+ * returned. If an error occurred, 0 is returned. If it fails, this function
+ * only exits. It is the caller responsibility to do the cleanup.
+ */
+int http_forward_proxy_resp(struct stream *s, int final)
+{
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx *htx = htxbuf(&res->buf);
+ size_t data;
+
+ if (final) {
+ htx->flags |= HTX_FL_PROXY_RESP;
+
+ if (!htx_is_empty(htx) && !http_eval_after_res_rules(s))
+ return 0;
+
+ if (s->txn->meth == HTTP_METH_HEAD)
+ htx_skip_msg_payload(htx);
+
+ channel_auto_read(req);
+ channel_abort(req);
+ channel_htx_erase(req, htxbuf(&req->buf));
+
+ channel_auto_read(res);
+ channel_auto_close(res);
+ sc_schedule_abort(s->scb);
+ s->scb->flags |= SC_FL_EOI; /* The response is terminated, add EOI */
+ htxbuf(&res->buf)->flags |= HTX_FL_EOM; /* no more data are expected */
+ }
+ else {
+ /* Send ASAP informational messages. Rely on SC_FL_EOI for final
+ * response.
+ */
+ s->scf->flags |= SC_FL_SND_ASAP;
+ }
+
+ data = htx->data - co_data(res);
+ c_adv(res, data);
+ htx->first = -1;
+ res->total += data;
+ return 1;
+}
+
+void http_server_error(struct stream *s, struct stconn *sc, int err,
+ int finst, struct http_reply *msg)
+{
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= err;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= finst;
+
+ http_reply_and_close(s, s->txn->status, msg);
+}
+
+void http_reply_and_close(struct stream *s, short status, struct http_reply *msg)
+{
+ if (!msg) {
+ channel_htx_truncate(&s->res, htxbuf(&s->res.buf));
+ goto end;
+ }
+
+ if (http_reply_message(s, msg) == -1) {
+ /* On error, return a 500 error message, but don't rewrite it if
+ * it is already an internal error. If it was already a "const"
+ * 500 error, just fail.
+ */
+ if (s->txn->status == 500) {
+ if (s->txn->flags & TX_CONST_REPLY)
+ goto end;
+ s->txn->flags |= TX_CONST_REPLY;
+ }
+ s->txn->status = 500;
+ s->txn->http_reply = NULL;
+ return http_reply_and_close(s, s->txn->status, http_error_message(s));
+ }
+
+end:
+ /* At this staged, HTTP analysis is finished */
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+
+ s->res.analysers &= AN_RES_FLT_END;
+ s->res.analyse_exp = TICK_ETERNITY;
+
+ channel_auto_read(&s->req);
+ channel_abort(&s->req);
+ channel_htx_erase(&s->req, htxbuf(&s->req.buf));
+ channel_auto_read(&s->res);
+ channel_auto_close(&s->res);
+ sc_schedule_abort(s->scb);
+}
+
+struct http_reply *http_error_message(struct stream *s)
+{
+ const int msgnum = http_get_status_idx(s->txn->status);
+
+ if (s->txn->http_reply)
+ return s->txn->http_reply;
+ else if (s->be->replies[msgnum])
+ return s->be->replies[msgnum];
+ else if (strm_fe(s)->replies[msgnum])
+ return strm_fe(s)->replies[msgnum];
+ else
+ return &http_err_replies[msgnum];
+}
+
+/* Produces an HTX message from an http reply. Depending on the http reply type,
+ * a, errorfile, an raw file or a log-format string is used. On success, it
+ * returns 0. If an error occurs -1 is returned. If it fails, this function only
+ * exits. It is the caller responsibility to do the cleanup.
+ */
+int http_reply_to_htx(struct stream *s, struct htx *htx, struct http_reply *reply)
+{
+ struct buffer *errmsg;
+ struct htx_sl *sl;
+ struct buffer *body = NULL;
+ const char *status, *reason, *clen, *ctype;
+ unsigned int slflags;
+ int ret = 0;
+
+ /*
+ * - HTTP_REPLY_ERRFILES unexpected here. handled as no payload if so
+ *
+ * - HTTP_REPLY_INDIRECT: switch on another reply if defined or handled
+ * as no payload if NULL. the TXN status code is set with the status
+ * of the original reply.
+ */
+
+ if (reply->type == HTTP_REPLY_INDIRECT) {
+ if (reply->body.reply)
+ reply = reply->body.reply;
+ }
+ if (reply->type == HTTP_REPLY_ERRMSG && !reply->body.errmsg) {
+ /* get default error message */
+ if (reply == s->txn->http_reply)
+ s->txn->http_reply = NULL;
+ reply = http_error_message(s);
+ if (reply->type == HTTP_REPLY_INDIRECT) {
+ if (reply->body.reply)
+ reply = reply->body.reply;
+ }
+ }
+
+ if (reply->type == HTTP_REPLY_ERRMSG) {
+ /* implicit or explicit error message*/
+ errmsg = reply->body.errmsg;
+ if (errmsg && !b_is_null(errmsg)) {
+ if (!htx_copy_msg(htx, errmsg))
+ goto fail;
+ }
+ }
+ else {
+ /* no payload, file or log-format string */
+ if (reply->type == HTTP_REPLY_RAW) {
+ /* file */
+ body = &reply->body.obj;
+ }
+ else if (reply->type == HTTP_REPLY_LOGFMT) {
+ /* log-format string */
+ body = alloc_trash_chunk();
+ if (!body)
+ goto fail_alloc;
+ body->data = build_logline(s, body->area, body->size, &reply->body.fmt);
+ }
+ /* else no payload */
+
+ status = ultoa(reply->status);
+ reason = http_get_reason(reply->status);
+ slflags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN);
+ if (!body || !b_data(body))
+ slflags |= HTX_SL_F_BODYLESS;
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, slflags, ist("HTTP/1.1"), ist(status), ist(reason));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = reply->status;
+
+ clen = (body ? ultoa(b_data(body)) : "0");
+ ctype = reply->ctype;
+
+ if (!LIST_ISEMPTY(&reply->hdrs)) {
+ struct http_reply_hdr *hdr;
+ struct buffer *value = alloc_trash_chunk();
+
+ if (!value)
+ goto fail;
+
+ list_for_each_entry(hdr, &reply->hdrs, list) {
+ chunk_reset(value);
+ value->data = build_logline(s, value->area, value->size, &hdr->value);
+ if (b_data(value) && !htx_add_header(htx, hdr->name, ist2(b_head(value), b_data(value)))) {
+ free_trash_chunk(value);
+ goto fail;
+ }
+ chunk_reset(value);
+ }
+ free_trash_chunk(value);
+ }
+
+ if (!htx_add_header(htx, ist("content-length"), ist(clen)) ||
+ (body && b_data(body) && ctype && !htx_add_header(htx, ist("content-type"), ist(ctype))) ||
+ !htx_add_endof(htx, HTX_BLK_EOH) ||
+ (body && b_data(body) && !htx_add_data_atonce(htx, ist2(b_head(body), b_data(body)))))
+ goto fail;
+
+ htx->flags |= HTX_FL_EOM;
+ }
+
+ leave:
+ if (reply->type == HTTP_REPLY_LOGFMT)
+ free_trash_chunk(body);
+ return ret;
+
+ fail_alloc:
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ /* fall through */
+ fail:
+ ret = -1;
+ goto leave;
+}
+
+/* Send an http reply to the client. On success, it returns 0. If an error
+ * occurs -1 is returned and the response channel is truncated, removing this
+ * way the faulty reply. This function may fail when the reply is formatted
+ * (http_reply_to_htx) or when the reply is forwarded
+ * (http_forward_proxy_resp). On the last case, it is because a
+ * http-after-response rule fails.
+ */
+int http_reply_message(struct stream *s, struct http_reply *reply)
+{
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+
+ if (s->txn->status == -1)
+ s->txn->status = reply->status;
+ channel_htx_truncate(res, htx);
+
+ if (http_reply_to_htx(s, htx, reply) == -1)
+ goto fail;
+
+ htx_to_buf(htx, &s->res.buf);
+ if (!http_forward_proxy_resp(s, 1))
+ goto fail;
+ return 0;
+
+ fail:
+ channel_htx_truncate(res, htx);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ return -1;
+}
+
+/* Return the error message corresponding to s->conn_err_type. It is assumed
+ * that the server side is closed. Note that err_type is actually a
+ * bitmask, where almost only aborts may be cumulated with other
+ * values. We consider that aborted operations are more important
+ * than timeouts or errors due to the fact that nobody else in the
+ * logs might explain incomplete retries. All others should avoid
+ * being cumulated. It should normally not be possible to have multiple
+ * aborts at once, but just in case, the first one in sequence is reported.
+ * Note that connection errors appearing on the second request of a keep-alive
+ * connection are not reported since this allows the client to retry.
+ */
+void http_return_srv_error(struct stream *s, struct stconn *sc)
+{
+ int err_type = s->conn_err_type;
+
+ /* set s->txn->status for http_error_message(s) */
+ if (err_type & STRM_ET_QUEUE_ABRT) {
+ s->txn->status = -1;
+ http_server_error(s, sc, SF_ERR_CLICL, SF_FINST_Q, NULL);
+ }
+ else if (err_type & STRM_ET_CONN_ABRT) {
+ s->txn->status = -1;
+ http_server_error(s, sc, SF_ERR_CLICL, SF_FINST_C, NULL);
+ }
+ else if (err_type & STRM_ET_QUEUE_TO) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVTO, SF_FINST_Q,
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_QUEUE_ERR) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVCL, SF_FINST_Q,
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_TO) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVTO, SF_FINST_C,
+ (s->txn->flags & TX_NOT_FIRST) ? NULL :
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_ERR) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_SRVCL, SF_FINST_C,
+ (s->flags & SF_SRV_REUSED) ? NULL :
+ http_error_message(s));
+ }
+ else if (err_type & STRM_ET_CONN_RES) {
+ s->txn->status = 503;
+ http_server_error(s, sc, SF_ERR_RESOURCE, SF_FINST_C,
+ (s->txn->flags & TX_NOT_FIRST) ? NULL :
+ http_error_message(s));
+ }
+ else { /* STRM_ET_CONN_OTHER and others */
+ s->txn->status = 500;
+ http_server_error(s, sc, SF_ERR_INTERNAL, SF_FINST_C,
+ http_error_message(s));
+ }
+}
+
+
+/* Handle Expect: 100-continue for HTTP/1.1 messages if necessary. It returns 0
+ * on success and -1 on error.
+ */
+static int http_handle_expect_hdr(struct stream *s, struct htx *htx, struct http_msg *msg)
+{
+ /* If we have HTTP/1.1 message with a body and Expect: 100-continue,
+ * then we must send an HTTP/1.1 100 Continue intermediate response.
+ */
+ if (!(msg->flags & HTTP_MSGF_EXPECT_CHECKED) &&
+ (msg->flags & HTTP_MSGF_VER_11) &&
+ (msg->flags & (HTTP_MSGF_CNT_LEN|HTTP_MSGF_TE_CHNK))) {
+ struct ist hdr = { .ptr = "Expect", .len = 6 };
+ struct http_hdr_ctx ctx;
+
+ ctx.blk = NULL;
+ /* Expect is allowed in 1.1, look for it */
+ if (http_find_header(htx, hdr, &ctx, 0) &&
+ unlikely(isteqi(ctx.value, ist2("100-continue", 12)))) {
+ if (http_reply_100_continue(s) == -1)
+ return -1;
+ http_remove_header(htx, &ctx);
+ }
+ }
+ msg->flags |= HTTP_MSGF_EXPECT_CHECKED;
+ return 0;
+}
+
+/* Send a 100-Continue response to the client. It returns 0 on success and -1
+ * on error. The response channel is updated accordingly.
+ */
+static int http_reply_100_continue(struct stream *s)
+{
+ struct channel *res = &s->res;
+ struct htx *htx = htx_from_buf(&res->buf);
+ struct htx_sl *sl;
+ unsigned int flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|
+ HTX_SL_F_XFER_LEN|HTX_SL_F_BODYLESS);
+
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags,
+ ist("HTTP/1.1"), ist("100"), ist("Continue"));
+ if (!sl)
+ goto fail;
+ sl->info.res.status = 100;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto fail;
+
+ if (!http_forward_proxy_resp(s, 0))
+ goto fail;
+ return 0;
+
+ fail:
+ /* If an error occurred, remove the incomplete HTTP response from the
+ * buffer */
+ channel_htx_truncate(res, htx);
+ return -1;
+}
+
+
+/*
+ * Capture headers from message <htx> according to header list <cap_hdr>, and
+ * fill the <cap> pointers appropriately.
+ */
+static void http_capture_headers(struct htx *htx, char **cap, struct cap_hdr *cap_hdr)
+{
+ struct cap_hdr *h;
+ int32_t pos;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ n = htx_get_blk_name(htx, blk);
+
+ for (h = cap_hdr; h; h = h->next) {
+ if (h->namelen && (h->namelen == n.len) &&
+ (strncasecmp(n.ptr, h->name, h->namelen) == 0)) {
+ if (cap[h->index] == NULL)
+ cap[h->index] =
+ pool_alloc(h->pool);
+
+ if (cap[h->index] == NULL) {
+ ha_alert("HTTP capture : out of memory.\n");
+ break;
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v = isttrim(v, h->len);
+
+ memcpy(cap[h->index], v.ptr, v.len);
+ cap[h->index][v.len]=0;
+ }
+ }
+ }
+}
+
+/* Delete a value in a header between delimiters <from> and <next>. The header
+ * itself is delimited by <start> and <end> pointers. The number of characters
+ * displaced is returned, and the pointer to the first delimiter is updated if
+ * required. The function tries as much as possible to respect the following
+ * principles :
+ * - replace <from> delimiter by the <next> one unless <from> points to <start>,
+ * in which case <next> is simply removed
+ * - set exactly one space character after the new first delimiter, unless there
+ * are not enough characters in the block being moved to do so.
+ * - remove unneeded spaces before the previous delimiter and after the new
+ * one.
+ *
+ * It is the caller's responsibility to ensure that :
+ * - <from> points to a valid delimiter or <start> ;
+ * - <next> points to a valid delimiter or <end> ;
+ * - there are non-space chars before <from>.
+ */
+static int http_del_hdr_value(char *start, char *end, char **from, char *next)
+{
+ char *prev = *from;
+
+ if (prev == start) {
+ /* We're removing the first value. eat the semicolon, if <next>
+ * is lower than <end> */
+ if (next < end)
+ next++;
+
+ while (next < end && HTTP_IS_SPHT(*next))
+ next++;
+ }
+ else {
+ /* Remove useless spaces before the old delimiter. */
+ while (HTTP_IS_SPHT(*(prev-1)))
+ prev--;
+ *from = prev;
+
+ /* copy the delimiter and if possible a space if we're
+ * not at the end of the line.
+ */
+ if (next < end) {
+ *prev++ = *next++;
+ if (prev + 1 < next)
+ *prev++ = ' ';
+ while (next < end && HTTP_IS_SPHT(*next))
+ next++;
+ }
+ }
+ memmove(prev, next, end - next);
+ return (prev - next);
+}
+
+
+/* Formats the start line of the request (without CRLF) and puts it in <str> and
+ * return the written length. The line can be truncated if it exceeds <len>.
+ */
+static size_t http_fmt_req_line(const struct htx_sl *sl, char *str, size_t len)
+{
+ struct ist dst = ist2(str, 0);
+
+ if (istcat(&dst, htx_sl_req_meth(sl), len) == -1)
+ goto end;
+ if (dst.len + 1 > len)
+ goto end;
+ dst.ptr[dst.len++] = ' ';
+
+ if (istcat(&dst, htx_sl_req_uri(sl), len) == -1)
+ goto end;
+ if (dst.len + 1 > len)
+ goto end;
+ dst.ptr[dst.len++] = ' ';
+
+ istcat(&dst, htx_sl_req_vsn(sl), len);
+ end:
+ return dst.len;
+}
+
+/*
+ * Print a debug line with a start line.
+ */
+static void http_debug_stline(const char *dir, struct stream *s, const struct htx_sl *sl)
+{
+ struct session *sess = strm_sess(s);
+ int max;
+
+ chunk_printf(&trash, "%08x:%s.%s[%04x:%04x]: ", s->uniq_id, s->be->id,
+ dir,
+ objt_conn(sess->origin) ? (unsigned short)__objt_conn(sess->origin)->handle.fd : -1,
+ sc_conn(s->scb) ? (unsigned short)(__sc_conn(s->scb))->handle.fd : -1);
+
+ max = HTX_SL_P1_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 3);
+ chunk_memcat(&trash, HTX_SL_P1_PTR(sl), max);
+ trash.area[trash.data++] = ' ';
+
+ max = HTX_SL_P2_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 2);
+ chunk_memcat(&trash, HTX_SL_P2_PTR(sl), max);
+ trash.area[trash.data++] = ' ';
+
+ max = HTX_SL_P3_LEN(sl);
+ UBOUND(max, trash.size - trash.data - 1);
+ chunk_memcat(&trash, HTX_SL_P3_PTR(sl), max);
+ trash.area[trash.data++] = '\n';
+
+ DISGUISE(write(1, trash.area, trash.data));
+}
+
+/*
+ * Print a debug line with a header.
+ */
+static void http_debug_hdr(const char *dir, struct stream *s, const struct ist n, const struct ist v)
+{
+ struct session *sess = strm_sess(s);
+ int max;
+
+ chunk_printf(&trash, "%08x:%s.%s[%04x:%04x]: ", s->uniq_id, s->be->id,
+ dir,
+ objt_conn(sess->origin) ? (unsigned short)__objt_conn(sess->origin)->handle.fd : -1,
+ sc_conn(s->scb) ? (unsigned short)(__sc_conn(s->scb))->handle.fd : -1);
+
+ max = n.len;
+ UBOUND(max, trash.size - trash.data - 3);
+ chunk_memcat(&trash, n.ptr, max);
+ trash.area[trash.data++] = ':';
+ trash.area[trash.data++] = ' ';
+
+ max = v.len;
+ UBOUND(max, trash.size - trash.data - 1);
+ chunk_memcat(&trash, v.ptr, max);
+ trash.area[trash.data++] = '\n';
+
+ DISGUISE(write(1, trash.area, trash.data));
+}
+
+void http_txn_reset_req(struct http_txn *txn)
+{
+ txn->req.flags = 0;
+ txn->req.msg_state = HTTP_MSG_RQBEFORE; /* at the very beginning of the request */
+}
+
+void http_txn_reset_res(struct http_txn *txn)
+{
+ txn->rsp.flags = 0;
+ txn->rsp.msg_state = HTTP_MSG_RPBEFORE; /* at the very beginning of the response */
+}
+
+/*
+ * Create and initialize a new HTTP transaction for stream <s>. This should be
+ * used before processing any new request. It returns the transaction or NLULL
+ * on error.
+ */
+struct http_txn *http_create_txn(struct stream *s)
+{
+ struct http_txn *txn;
+ struct stconn *sc = s->scf;
+
+ txn = pool_alloc(pool_head_http_txn);
+ if (!txn)
+ return NULL;
+ s->txn = txn;
+
+ txn->meth = HTTP_METH_OTHER;
+ txn->flags = ((sc && sc_ep_test(sc, SE_FL_NOT_FIRST)) ? TX_NOT_FIRST : 0);
+ txn->status = -1;
+ txn->server_status = -1;
+ txn->http_reply = NULL;
+ txn->l7_buffer = BUF_NULL;
+ write_u32(txn->cache_hash, 0);
+
+ txn->cookie_first_date = 0;
+ txn->cookie_last_date = 0;
+
+ txn->srv_cookie = NULL;
+ txn->cli_cookie = NULL;
+ txn->uri = NULL;
+
+ http_txn_reset_req(txn);
+ http_txn_reset_res(txn);
+
+ txn->req.chn = &s->req;
+ txn->rsp.chn = &s->res;
+
+ txn->auth.method = HTTP_AUTH_UNKNOWN;
+
+ /* here we don't want to re-initialize s->vars_txn and s->vars_reqres
+ * variable lists, because they were already initialized upon stream
+ * creation in stream_new(), and thus may already contain some variables
+ */
+
+ return txn;
+}
+
+/* to be used at the end of a transaction */
+void http_destroy_txn(struct stream *s)
+{
+ struct http_txn *txn = s->txn;
+
+ /* these ones will have been dynamically allocated */
+ pool_free(pool_head_requri, txn->uri);
+ pool_free(pool_head_capture, txn->cli_cookie);
+ pool_free(pool_head_capture, txn->srv_cookie);
+ pool_free(pool_head_uniqueid, s->unique_id.ptr);
+
+ s->unique_id = IST_NULL;
+ txn->uri = NULL;
+ txn->srv_cookie = NULL;
+ txn->cli_cookie = NULL;
+
+ if (!LIST_ISEMPTY(&s->vars_txn.head))
+ vars_prune(&s->vars_txn, s->sess, s);
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+
+ b_free(&txn->l7_buffer);
+
+ pool_free(pool_head_http_txn, txn);
+ s->txn = NULL;
+}
+
+
+void http_set_term_flags(struct stream *s)
+{
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+
+ if (!(s->flags & SF_FINST_MASK)) {
+ if (s->scb->state == SC_ST_INI) {
+ /* Before any connection attempt on the server side, we
+ * are still in the request analysis. Just take case to
+ * detect tarpit error
+ */
+ if (s->req.analysers & AN_REQ_HTTP_TARPIT)
+ s->flags |= SF_FINST_T;
+ else
+ s->flags |= SF_FINST_R;
+ }
+ else if (s->scb->state == SC_ST_QUE)
+ s->flags |= SF_FINST_Q;
+ else if (sc_state_in(s->scb->state, SC_SB_REQ|SC_SB_TAR|SC_SB_ASS|SC_SB_CON|SC_SB_CER|SC_SB_RDY)) {
+ if (unlikely(objt_applet(s->target))) {
+ s->flags |= SF_FINST_R;
+ }
+ else
+ s->flags |= SF_FINST_C;
+ }
+ else {
+ if (s->txn->rsp.msg_state < HTTP_MSG_DATA) {
+ /* We are still processing the response headers */
+ s->flags |= SF_FINST_H;
+ }
+ // (res == (done|closing|closed)) & (res->flags & shutw)
+ else if (s->txn->rsp.msg_state >= HTTP_MSG_DONE && s->txn->rsp.msg_state < HTTP_MSG_TUNNEL &&
+ (s->flags & (SF_ERR_CLITO|SF_ERR_CLICL))) {
+ /* A client error was reported and we are
+ * transmitting the last block of data
+ */
+ s->flags |= SF_FINST_L;
+ }
+ else {
+ /* Otherwise we are in DATA phase on both sides */
+ s->flags |= SF_FINST_D;
+ }
+ }
+ }
+}
+
+
+DECLARE_POOL(pool_head_http_txn, "http_txn", sizeof(struct http_txn));
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_client.c b/src/http_client.c
new file mode 100644
index 0000000..d7e50c0
--- /dev/null
+++ b/src/http_client.c
@@ -0,0 +1,1598 @@
+/*
+ * HTTP Client
+ *
+ * Copyright (C) 2021 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This file implements an HTTP Client API.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cli.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_client.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+#include <string.h>
+
+static struct proxy *httpclient_proxy;
+
+#ifdef USE_OPENSSL
+/* if the httpclient is not configured, error are ignored and features are limited */
+static int hard_error_ssl = 0;
+static int httpclient_ssl_verify = SSL_SOCK_VERIFY_REQUIRED;
+static char *httpclient_ssl_ca_file = NULL;
+#endif
+static struct applet httpclient_applet;
+
+/* if the httpclient is not configured, error are ignored and features are limited */
+static int hard_error_resolvers = 0;
+static char *resolvers_id = NULL;
+static char *resolvers_prefer = NULL;
+static int resolvers_disabled = 0;
+
+static int httpclient_retries = CONN_RETRIES;
+static int httpclient_timeout_connect = MS_TO_TICKS(5000);
+
+/* --- This part of the file implement an HTTP client over the CLI ---
+ * The functions will be starting by "hc_cli" for "httpclient cli"
+ */
+
+/* the CLI context for the httpclient command */
+struct hcli_svc_ctx {
+ struct httpclient *hc; /* the httpclient instance */
+ uint flags; /* flags from HC_CLI_F_* above */
+};
+
+/* These are the callback used by the HTTP Client when it needs to notify new
+ * data, we only sets a flag in the IO handler via the svcctx.
+ */
+void hc_cli_res_stline_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_F_RES_STLINE;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_headers_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_F_RES_HDR;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_body_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_F_RES_BODY;
+ appctx_wakeup(appctx);
+}
+
+void hc_cli_res_end_cb(struct httpclient *hc)
+{
+ struct appctx *appctx = hc->caller;
+ struct hcli_svc_ctx *ctx;
+
+ if (!appctx)
+ return;
+
+ ctx = appctx->svcctx;
+ ctx->flags |= HC_F_RES_END;
+ appctx_wakeup(appctx);
+}
+
+/*
+ * Parse an httpclient keyword on the cli:
+ * httpclient <ID> <method> <URI>
+ */
+static int hc_cli_parse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct hcli_svc_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct httpclient *hc;
+ char *err = NULL;
+ enum http_meth_t meth;
+ char *meth_str;
+ struct ist uri;
+ struct ist body = IST_NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[1] || !*args[2]) {
+ memprintf(&err, ": not enough parameters");
+ goto err;
+ }
+
+ meth_str = args[1];
+ uri = ist(args[2]);
+
+ if (payload)
+ body = ist(payload);
+
+ meth = find_http_meth(meth_str, strlen(meth_str));
+
+ hc = httpclient_new(appctx, meth, uri);
+ if (!hc) {
+ goto err;
+ }
+
+ /* update the httpclient callbacks */
+ hc->ops.res_stline = hc_cli_res_stline_cb;
+ hc->ops.res_headers = hc_cli_res_headers_cb;
+ hc->ops.res_payload = hc_cli_res_body_cb;
+ hc->ops.res_end = hc_cli_res_end_cb;
+
+ ctx->hc = hc; /* store the httpclient ptr in the applet */
+ ctx->flags = 0;
+
+ if (httpclient_req_gen(hc, hc->req.url, hc->req.meth, NULL, body) != ERR_NONE)
+ goto err;
+
+
+ if (!httpclient_start(hc))
+ goto err;
+
+ return 0;
+
+err:
+ memprintf(&err, "Can't start the HTTP client%s.\n", err ? err : "");
+ return cli_err(appctx, err);
+}
+
+/* This function dumps the content of the httpclient receive buffer
+ * on the CLI output
+ *
+ * Return 1 when the processing is finished
+ * return 0 if it needs to be called again
+ */
+static int hc_cli_io_handler(struct appctx *appctx)
+{
+ struct hcli_svc_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct httpclient *hc = ctx->hc;
+ struct http_hdr *hdrs, *hdr;
+
+ if (ctx->flags & HC_F_RES_STLINE) {
+ chunk_printf(&trash, "%.*s %d %.*s\n", (unsigned int)istlen(hc->res.vsn), istptr(hc->res.vsn),
+ hc->res.status, (unsigned int)istlen(hc->res.reason), istptr(hc->res.reason));
+ if (applet_putchk(appctx, &trash) == -1)
+ goto more;
+ ctx->flags &= ~HC_F_RES_STLINE;
+ }
+
+ if (ctx->flags & HC_F_RES_HDR) {
+ chunk_reset(&trash);
+ hdrs = hc->res.hdrs;
+ for (hdr = hdrs; isttest(hdr->v); hdr++) {
+ if (!h1_format_htx_hdr(hdr->n, hdr->v, &trash))
+ goto too_many_hdrs;
+ }
+ if (!chunk_memcat(&trash, "\r\n", 2))
+ goto too_many_hdrs;
+ if (applet_putchk(appctx, &trash) == -1)
+ goto more;
+ ctx->flags &= ~HC_F_RES_HDR;
+ }
+
+ if (ctx->flags & HC_F_RES_BODY) {
+ int ret;
+
+ ret = httpclient_res_xfer(hc, sc_ib(sc));
+ channel_add_input(sc_ic(sc), ret); /* forward what we put in the buffer channel */
+
+ /* remove the flag if the buffer was emptied */
+ if (httpclient_data(hc))
+ goto more;
+ ctx->flags &= ~HC_F_RES_BODY;
+ }
+
+ /* we must close only if F_END is the last flag */
+ if (ctx->flags == HC_F_RES_END) {
+ ctx->flags &= ~HC_F_RES_END;
+ goto end;
+ }
+
+more:
+ if (!ctx->flags)
+ applet_have_no_more_data(appctx);
+ return 0;
+end:
+ return 1;
+
+too_many_hdrs:
+ return cli_err(appctx, "Too many headers.\n");
+}
+
+static void hc_cli_release(struct appctx *appctx)
+{
+ struct hcli_svc_ctx *ctx = appctx->svcctx;
+ struct httpclient *hc = ctx->hc;
+
+ /* Everything possible was printed on the CLI, we can destroy the client */
+ httpclient_stop_and_destroy(hc);
+
+ return;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "httpclient", NULL }, "httpclient <method> <URI> : launch an HTTP request", hc_cli_parse, hc_cli_io_handler, hc_cli_release, NULL, ACCESS_EXPERT},
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+/* --- This part of the file implements the actual HTTP client API --- */
+
+/*
+ * Generate a simple request and fill the httpclient request buffer with it.
+ * The request contains a request line generated from the absolute <url> and
+ * <meth> as well as list of headers <hdrs>.
+ *
+ * If the buffer was filled correctly the function returns 0, if not it returns
+ * an error_code but there is no guarantee that the buffer wasn't modified.
+ */
+int httpclient_req_gen(struct httpclient *hc, const struct ist url, enum http_meth_t meth, const struct http_hdr *hdrs, const struct ist payload)
+{
+ struct htx_sl *sl;
+ struct htx *htx;
+ int err_code = 0;
+ struct ist meth_ist, vsn;
+ unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_NORMALIZED_URI | HTX_SL_F_HAS_SCHM;
+ int i;
+ int foundhost = 0, foundaccept = 0, foundua = 0;
+
+ if (!b_alloc(&hc->req.buf))
+ goto error;
+
+ if (meth >= HTTP_METH_OTHER)
+ goto error;
+
+ meth_ist = http_known_methods[meth];
+
+ vsn = ist("HTTP/1.1");
+
+ htx = htx_from_buf(&hc->req.buf);
+ if (!htx)
+ goto error;
+
+ if (!hc->ops.req_payload && !isttest(payload))
+ flags |= HTX_SL_F_BODYLESS;
+
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth_ist, url, vsn);
+ if (!sl) {
+ goto error;
+ }
+ sl->info.req.meth = meth;
+
+ for (i = 0; hdrs && hdrs[i].n.len; i++) {
+ /* Don't check the value length because a header value may be empty */
+ if (isttest(hdrs[i].v) == 0)
+ continue;
+
+ if (isteqi(hdrs[i].n, ist("host")))
+ foundhost = 1;
+ else if (isteqi(hdrs[i].n, ist("accept")))
+ foundaccept = 1;
+ else if (isteqi(hdrs[i].n, ist("user-agent")))
+ foundua = 1;
+
+ if (!htx_add_header(htx, hdrs[i].n, hdrs[i].v))
+ goto error;
+ }
+
+ if (!foundhost) {
+ /* Add Host Header from URL */
+ if (!htx_add_header(htx, ist("Host"), ist("h")))
+ goto error;
+ if (!http_update_host(htx, sl, url))
+ goto error;
+ }
+
+ if (!foundaccept) {
+ if (!htx_add_header(htx, ist("Accept"), ist("*/*")))
+ goto error;
+ }
+
+ if (!foundua) {
+ if (!htx_add_header(htx, ist("User-Agent"), ist(HTTPCLIENT_USERAGENT)))
+ goto error;
+ }
+
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto error;
+
+ if (isttest(payload) && istlen(payload)) {
+ /* add the payload if it can feat in the buffer, no need to set
+ * the Content-Length, the data will be sent chunked */
+ if (!htx_add_data_atonce(htx, payload))
+ goto error;
+ }
+
+ /* If req.payload was set, does not set the end of stream which *MUST*
+ * be set in the callback */
+ if (!hc->ops.req_payload)
+ htx->flags |= HTX_FL_EOM;
+
+ htx_to_buf(htx, &hc->req.buf);
+
+ return 0;
+error:
+ err_code |= ERR_ALERT | ERR_ABORT;
+ return err_code;
+}
+
+/*
+ * transfer the response to the destination buffer and wakeup the HTTP client
+ * applet so it could fill again its buffer.
+ *
+ * Return the number of bytes transferred.
+ */
+int httpclient_res_xfer(struct httpclient *hc, struct buffer *dst)
+{
+ size_t room = b_room(dst);
+ int ret;
+
+ ret = b_force_xfer(dst, &hc->res.buf, MIN(room, b_data(&hc->res.buf)));
+ /* call the client once we consumed all data */
+ if (!b_data(&hc->res.buf)) {
+ b_free(&hc->res.buf);
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+ }
+ return ret;
+}
+
+/*
+ * Transfer raw HTTP payload from src, and insert it into HTX format in the
+ * httpclient.
+ *
+ * Must be used to transfer the request body.
+ * Then wakeup the httpclient so it can transfer it.
+ *
+ * <end> tries to add the ending data flag if it succeed to copy all data.
+ *
+ * Return the number of bytes copied from src.
+ */
+int httpclient_req_xfer(struct httpclient *hc, struct ist src, int end)
+{
+ int ret = 0;
+ struct htx *htx;
+
+ if (!b_alloc(&hc->req.buf))
+ goto error;
+
+ htx = htx_from_buf(&hc->req.buf);
+ if (!htx)
+ goto error;
+
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+
+ ret += htx_add_data(htx, src);
+
+
+ /* if we copied all the data and the end flag is set */
+ if ((istlen(src) == ret) && end) {
+ /* no more data are expected. If the HTX buffer is empty, be
+ * sure to add something (EOT block in this case) to have
+ * something to send. It is important to be sure the EOM flags
+ * will be handled by the endpoint. Because the message is
+ * empty, this should not fail. Otherwise it is an error
+ */
+ if (htx_is_empty(htx)) {
+ if (!htx_add_endof(htx, HTX_BLK_EOT))
+ goto error;
+ }
+ htx->flags |= HTX_FL_EOM;
+ }
+ htx_to_buf(htx, &hc->req.buf);
+
+error:
+
+ return ret;
+}
+
+/* Set the 'timeout server' in ms for the next httpclient request */
+void httpclient_set_timeout(struct httpclient *hc, int timeout)
+{
+ hc->timeout_server = timeout;
+}
+
+/*
+ * Sets a destination for the httpclient from an HAProxy addr format
+ * This will prevent to determine the destination from the URL
+ * Return 0 in case of success or -1 otherwise.
+ */
+int httpclient_set_dst(struct httpclient *hc, const char *dst)
+{
+ struct sockaddr_storage *sk;
+ char *errmsg = NULL;
+
+ sockaddr_free(&hc->dst);
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(dst, NULL, NULL, NULL, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("httpclient: Failed to parse destination address in %s\n", errmsg);
+ free(errmsg);
+ return -1;
+ }
+
+ if (!sockaddr_alloc(&hc->dst, sk, sizeof(*sk))) {
+ ha_alert("httpclient: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Split <url> in <scheme>, <host>, <port>
+ */
+static int httpclient_spliturl(struct ist url, enum http_scheme *scheme,
+ struct ist *host, int *port)
+{
+ enum http_scheme scheme_tmp = SCH_HTTP;
+ int port_tmp = 0;
+ struct ist scheme_ist, authority_ist, host_ist, port_ist;
+ char *p, *end;
+ struct http_uri_parser parser;
+
+ parser = http_uri_parser_init(url);
+ scheme_ist = http_parse_scheme(&parser);
+ if (!isttest(scheme_ist)) {
+ return 0;
+ }
+
+ if (isteqi(scheme_ist, ist("http://"))){
+ scheme_tmp = SCH_HTTP;
+ port_tmp = 80;
+ } else if (isteqi(scheme_ist, ist("https://"))) {
+ scheme_tmp = SCH_HTTPS;
+ port_tmp = 443;
+ }
+
+ authority_ist = http_parse_authority(&parser, 1);
+ if (!isttest(authority_ist)) {
+ return 0;
+ }
+ p = end = istend(authority_ist);
+
+ /* look for a port at the end of the authority */
+ while (p > istptr(authority_ist) && isdigit((unsigned char)*--p))
+ ;
+
+ if (*p == ':') {
+ host_ist = ist2(istptr(authority_ist), p - istptr(authority_ist));
+ port_ist = istnext(ist2(p, end - p));
+ ist2str(trash.area, port_ist);
+ port_tmp = atoi(trash.area);
+ } else {
+ host_ist = authority_ist;
+ }
+
+ if (scheme)
+ *scheme = scheme_tmp;
+ if (host)
+ *host = host_ist;
+ if (port)
+ *port = port_tmp;
+
+ return 1;
+}
+
+/*
+ * Start the HTTP client
+ * Create the appctx, session, stream and wakeup the applet
+ *
+ * Return the <appctx> or NULL if it failed
+ */
+struct appctx *httpclient_start(struct httpclient *hc)
+{
+ struct applet *applet = &httpclient_applet;
+ struct appctx *appctx;
+
+ /* if the client was started and not ended, an applet is already
+ * running, we shouldn't try anything */
+ if (httpclient_started(hc) && !httpclient_ended(hc))
+ return NULL;
+
+ /* The HTTP client will be created in the same thread as the caller,
+ * avoiding threading issues */
+ appctx = appctx_new_here(applet, NULL);
+ if (!appctx)
+ goto out;
+ appctx->svcctx = hc;
+ hc->flags = 0;
+
+ if (appctx_init(appctx) == -1) {
+ ha_alert("httpclient: Failed to initialize appctx %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_appctx;
+ }
+
+ return appctx;
+
+out_free_appctx:
+ appctx_free_on_early_error(appctx);
+out:
+
+ return NULL;
+}
+
+/*
+ * This function tries to destroy the httpclient if it wasn't running.
+ * If it was running, stop the client and ask it to autodestroy itself.
+ *
+ * Once this function is used, all pointer sto the client must be removed
+ *
+ */
+void httpclient_stop_and_destroy(struct httpclient *hc)
+{
+
+ /* The httpclient was already stopped or never started, we can safely destroy it */
+ if (hc->flags & HTTPCLIENT_FS_ENDED || !(hc->flags & HTTPCLIENT_FS_STARTED)) {
+ httpclient_destroy(hc);
+ } else {
+ /* if the client wasn't stopped, ask for a stop and destroy */
+ hc->flags |= (HTTPCLIENT_FA_AUTOKILL | HTTPCLIENT_FA_STOP);
+ /* the calling applet doesn't exist anymore */
+ hc->caller = NULL;
+ if (hc->appctx)
+ appctx_wakeup(hc->appctx);
+ }
+}
+
+/* Free the httpclient */
+void httpclient_destroy(struct httpclient *hc)
+{
+ struct http_hdr *hdrs;
+
+
+ if (!hc)
+ return;
+
+ /* we should never destroy a client which was started but not stopped */
+ BUG_ON(httpclient_started(hc) && !httpclient_ended(hc));
+
+ /* request */
+ istfree(&hc->req.url);
+ b_free(&hc->req.buf);
+ /* response */
+ istfree(&hc->res.vsn);
+ istfree(&hc->res.reason);
+ hdrs = hc->res.hdrs;
+ while (hdrs && isttest(hdrs->n)) {
+ istfree(&hdrs->n);
+ istfree(&hdrs->v);
+ hdrs++;
+ }
+ ha_free(&hc->res.hdrs);
+ b_free(&hc->res.buf);
+ sockaddr_free(&hc->dst);
+
+ free(hc);
+
+ return;
+}
+
+/* Allocate an httpclient and its buffers
+ * Use the default httpclient_proxy
+ *
+ * Return NULL on failure */
+struct httpclient *httpclient_new(void *caller, enum http_meth_t meth, struct ist url)
+{
+ struct httpclient *hc;
+
+ hc = calloc(1, sizeof(*hc));
+ if (!hc)
+ goto err;
+
+ hc->req.buf = BUF_NULL;
+ hc->res.buf = BUF_NULL;
+ hc->caller = caller;
+ hc->req.url = istdup(url);
+ hc->req.meth = meth;
+ httpclient_set_proxy(hc, httpclient_proxy);
+
+ return hc;
+
+err:
+ httpclient_destroy(hc);
+ return NULL;
+}
+
+/* Allocate an httpclient and its buffers,
+ * Use the proxy <px>
+ *
+ * Return and httpclient or NULL.
+ */
+struct httpclient *httpclient_new_from_proxy(struct proxy *px, void *caller, enum http_meth_t meth, struct ist url)
+{
+ struct httpclient *hc;
+
+ hc = httpclient_new(caller, meth, url);
+ if (!hc)
+ return NULL;
+
+ httpclient_set_proxy(hc, px);
+
+ return hc;
+}
+
+/*
+ * Configure an httpclient with a specific proxy <px>
+ *
+ * The proxy <px> must contains 2 srv, one configured for clear connections, the other for SSL.
+ *
+ */
+int httpclient_set_proxy(struct httpclient *hc, struct proxy *px)
+{
+ struct server *srv;
+
+ hc->px = px;
+
+ for (srv = px->srv; srv != NULL; srv = srv->next) {
+ if (srv->xprt == xprt_get(XPRT_RAW)) {
+ hc->srv_raw = srv;
+#ifdef USE_OPENSSL
+ } else if (srv->xprt == xprt_get(XPRT_SSL)) {
+ hc->srv_ssl = srv;
+#endif
+ }
+ }
+
+ return 0;
+}
+
+void httpclient_applet_io_handler(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+ struct htx_blk *blk = NULL;
+ struct htx *htx;
+ struct htx_sl *sl = NULL;
+ uint32_t hdr_num;
+ uint32_t sz;
+ int ret;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ if (co_data(res)) {
+ htx = htx_from_buf(&res->buf);
+ co_htx_skip(res, htx, co_data(res));
+ htx_to_buf(htx, &res->buf);
+ }
+ goto out;
+ }
+ /* The IO handler could be called after the release, so we need to
+ * check if hc is still there to run the IO handler */
+ if (!hc)
+ goto out;
+
+ while (1) {
+
+ /* required to stop */
+ if (hc->flags & HTTPCLIENT_FA_STOP)
+ goto error;
+
+ switch(appctx->st0) {
+
+ case HTTPCLIENT_S_REQ:
+ /* we know that the buffer is empty here, since
+ * it's the first call, we can freely copy the
+ * request from the httpclient buffer */
+ ret = b_xfer(&req->buf, &hc->req.buf, b_data(&hc->req.buf));
+ if (!ret) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ if (!b_data(&hc->req.buf))
+ b_free(&hc->req.buf);
+
+ htx = htx_from_buf(&req->buf);
+ if (!htx) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ channel_add_input(req, htx->data);
+
+ if (htx->flags & HTX_FL_EOM) /* check if a body need to be added */
+ appctx->st0 = HTTPCLIENT_S_RES_STLINE;
+ else
+ appctx->st0 = HTTPCLIENT_S_REQ_BODY;
+
+ goto out; /* we need to leave the IO handler once we wrote the request */
+ break;
+
+ case HTTPCLIENT_S_REQ_BODY:
+ /* call the payload callback */
+ {
+ if (hc->ops.req_payload) {
+ struct htx *hc_htx;
+
+ /* call the request callback */
+ hc->ops.req_payload(hc);
+
+ hc_htx = htxbuf(&hc->req.buf);
+ if (htx_is_empty(hc_htx))
+ goto out;
+
+ htx = htx_from_buf(&req->buf);
+ if (htx_is_empty(htx)) {
+ size_t data = hc_htx->data;
+
+ /* Here htx_to_buf() will set buffer data to 0 because
+ * the HTX is empty, and allow us to do an xfer.
+ */
+ htx_to_buf(hc_htx, &hc->req.buf);
+ htx_to_buf(htx, &req->buf);
+ b_xfer(&req->buf, &hc->req.buf, b_data(&hc->req.buf));
+ channel_add_input(req, data);
+ } else {
+ struct htx_ret ret;
+
+ ret = htx_xfer_blks(htx, hc_htx, htx_used_space(hc_htx), HTX_BLK_UNUSED);
+ channel_add_input(req, ret.ret);
+
+ /* we must copy the EOM if we empty the buffer */
+ if (htx_is_empty(hc_htx)) {
+ htx->flags |= (hc_htx->flags & HTX_FL_EOM);
+ }
+ htx_to_buf(htx, &req->buf);
+ htx_to_buf(hc_htx, &hc->req.buf);
+ }
+
+
+ if (!b_data(&hc->req.buf))
+ b_free(&hc->req.buf);
+ }
+
+ htx = htxbuf(&req->buf);
+
+ /* if the request contains the HTX_FL_EOM, we finished the request part. */
+ if (htx->flags & HTX_FL_EOM)
+ appctx->st0 = HTTPCLIENT_S_RES_STLINE;
+
+ goto process_data; /* we need to leave the IO handler once we wrote the request */
+ }
+ break;
+
+ case HTTPCLIENT_S_RES_STLINE:
+ /* Request is finished, report EOI */
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+
+ /* copy the start line in the hc structure,then remove the htx block */
+ if (!co_data(res))
+ goto out;
+ htx = htxbuf(&res->buf);
+ if (htx_is_empty(htx))
+ goto out;
+ blk = htx_get_head_blk(htx);
+ if (blk && (htx_get_blk_type(blk) == HTX_BLK_RES_SL))
+ sl = htx_get_blk_ptr(htx, blk);
+ if (!sl || (!(sl->flags & HTX_SL_F_IS_RESP)))
+ goto out;
+
+ /* copy the status line in the httpclient */
+ hc->res.status = sl->info.res.status;
+ hc->res.vsn = istdup(htx_sl_res_vsn(sl));
+ hc->res.reason = istdup(htx_sl_res_reason(sl));
+ sz = htx_get_blksz(blk);
+ c_rew(res, sz);
+ htx_remove_blk(htx, blk);
+ /* caller callback */
+ if (hc->ops.res_stline)
+ hc->ops.res_stline(hc);
+
+ htx_to_buf(htx, &res->buf);
+
+ /* if there is no HTX data anymore and the EOM flag is
+ * set, leave (no body) */
+ if (htx_is_empty(htx) && htx->flags & HTX_FL_EOM)
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ else
+ appctx->st0 = HTTPCLIENT_S_RES_HDR;
+
+ break;
+
+ case HTTPCLIENT_S_RES_HDR:
+ /* first copy the headers in a local hdrs
+ * structure, once we the total numbers of the
+ * header we allocate the right size and copy
+ * them. The htx block of the headers are
+ * removed each time one is read */
+ {
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+
+ if (!co_data(res))
+ goto out;
+ htx = htxbuf(&res->buf);
+ if (htx_is_empty(htx))
+ goto out;
+
+ hdr_num = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t sz = htx_get_blksz(blk);
+
+ c_rew(res, sz);
+
+ if (type == HTX_BLK_HDR) {
+ hdrs[hdr_num].n = istdup(htx_get_blk_name(htx, blk));
+ hdrs[hdr_num].v = istdup(htx_get_blk_value(htx, blk));
+ hdr_num++;
+ }
+ else if (type == HTX_BLK_EOH) {
+ /* create a NULL end of array and leave the loop */
+ hdrs[hdr_num].n = IST_NULL;
+ hdrs[hdr_num].v = IST_NULL;
+ htx_remove_blk(htx, blk);
+ break;
+ }
+ blk = htx_remove_blk(htx, blk);
+ }
+ htx_to_buf(htx, &res->buf);
+
+ if (hdr_num) {
+ /* alloc and copy the headers in the httpclient struct */
+ hc->res.hdrs = calloc((hdr_num + 1), sizeof(*hc->res.hdrs));
+ if (!hc->res.hdrs)
+ goto error;
+ memcpy(hc->res.hdrs, hdrs, sizeof(struct http_hdr) * (hdr_num + 1));
+
+ /* caller callback */
+ if (hc->ops.res_headers)
+ hc->ops.res_headers(hc);
+ }
+
+ /* if there is no HTX data anymore and the EOM flag is
+ * set, leave (no body) */
+ if (htx_is_empty(htx) && htx->flags & HTX_FL_EOM) {
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ } else {
+ appctx->st0 = HTTPCLIENT_S_RES_BODY;
+ }
+ }
+ break;
+
+ case HTTPCLIENT_S_RES_BODY:
+ /*
+ * The IO handler removes the htx blocks in the response buffer and
+ * push them in the hc->res.buf buffer in a raw format.
+ */
+ if (!co_data(res))
+ goto out;
+
+ htx = htxbuf(&res->buf);
+ if (htx_is_empty(htx))
+ goto out;
+
+ if (!b_alloc(&hc->res.buf))
+ goto out;
+
+ if (b_full(&hc->res.buf))
+ goto process_data;
+
+ /* decapsule the htx data to raw data */
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ size_t count = co_data(res);
+ uint32_t blksz = htx_get_blksz(blk);
+ uint32_t room = b_room(&hc->res.buf);
+ uint32_t vlen;
+
+ /* we should try to copy the maximum output data in a block, which fit
+ * the destination buffer */
+ vlen = MIN(count, blksz);
+ vlen = MIN(vlen, room);
+
+ if (vlen == 0) {
+ htx_to_buf(htx, &res->buf);
+ goto process_data;
+ }
+
+ if (type == HTX_BLK_DATA) {
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ __b_putblk(&hc->res.buf, v.ptr, vlen);
+ c_rew(res, vlen);
+
+ if (vlen == blksz)
+ blk = htx_remove_blk(htx, blk);
+ else
+ htx_cut_data_blk(htx, blk, vlen);
+
+ /* the data must be processed by the caller in the receive phase */
+ if (hc->ops.res_payload)
+ hc->ops.res_payload(hc);
+
+ /* cannot copy everything, need to process */
+ if (vlen != blksz) {
+ htx_to_buf(htx, &res->buf);
+ goto process_data;
+ }
+ } else {
+ if (vlen != blksz) {
+ htx_to_buf(htx, &res->buf);
+ goto process_data;
+ }
+
+ /* remove any block which is not a data block */
+ c_rew(res, blksz);
+ blk = htx_remove_blk(htx, blk);
+ }
+ }
+
+ htx_to_buf(htx, &res->buf);
+
+ /* if not finished, should be called again */
+ if (!(htx_is_empty(htx) && (htx->flags & HTX_FL_EOM)))
+ goto out;
+
+
+ /* end of message, we should quit */
+ appctx->st0 = HTTPCLIENT_S_RES_END;
+ break;
+
+ case HTTPCLIENT_S_RES_END:
+ se_fl_set(appctx->sedesc, SE_FL_EOS);
+ goto out;
+ break;
+ }
+ }
+
+out:
+ return;
+
+process_data:
+ sc_will_read(sc);
+ goto out;
+
+error:
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+ goto out;
+}
+
+int httpclient_applet_init(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+ struct sockaddr_storage ss_url = {};
+ struct sockaddr_storage *ss_dst;
+ enum obj_type *target = NULL;
+ struct ist host = IST_NULL;
+ enum http_scheme scheme;
+ int port;
+ int doresolve = 0;
+
+
+ /* parse the URL and */
+ if (!httpclient_spliturl(hc->req.url, &scheme, &host, &port))
+ goto out_error;
+
+ if (hc->dst) {
+ /* if httpclient_set_dst() was used, sets the alternative address */
+ ss_dst = hc->dst;
+ } else {
+ /* set the dst using the host, or 0.0.0.0 to resolve */
+ ist2str(trash.area, host);
+ ss_dst = str2ip2(trash.area, &ss_url, 0);
+ if (!ss_dst) { /* couldn't get an IP from that, try to resolve */
+ doresolve = 1;
+ ss_dst = str2ip2("0.0.0.0", &ss_url, 0);
+ }
+ sock_inet_set_port(ss_dst, port);
+ }
+
+ if (!sockaddr_alloc(&addr, ss_dst, sizeof(*ss_dst)))
+ goto out_error;
+
+ /* choose the SSL server or not */
+ switch (scheme) {
+ case SCH_HTTP:
+ target = &hc->srv_raw->obj_type;
+ break;
+ case SCH_HTTPS:
+#ifdef USE_OPENSSL
+ if (hc->srv_ssl) {
+ target = &hc->srv_ssl->obj_type;
+ } else {
+ ha_alert("httpclient: SSL was disabled (wrong verify/ca-file)!\n");
+ goto out_free_addr;
+ }
+#else
+ ha_alert("httpclient: OpenSSL is not available %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_addr;
+#endif
+ break;
+ }
+
+ if (appctx_finalize_startup(appctx, hc->px, &hc->req.buf) == -1) {
+ ha_alert("httpclient: Failed to initialize appctx %s:%d.\n", __FUNCTION__, __LINE__);
+ goto out_free_addr;
+ }
+
+ s = appctx_strm(appctx);
+ s->target = target;
+ /* set the "timeout server" */
+ s->scb->ioto = hc->timeout_server;
+
+ if (doresolve) {
+ /* in order to do the set-dst we need to put the address on the front */
+ s->scf->dst = addr;
+ } else {
+ /* in cases we don't use the resolve we already have the address
+ * and must put it on the backend side, some of the cases are
+ * not meant to be used on the frontend (sockpair, unix socket etc.) */
+ s->scb->dst = addr;
+ }
+
+ s->scb->flags |= (SC_FL_RCV_ONCE|SC_FL_NOLINGER);
+ s->flags |= SF_ASSIGNED;
+
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ hc->appctx = appctx;
+ hc->flags |= HTTPCLIENT_FS_STARTED;
+
+ /* The request was transferred when the stream was created. So switch
+ * directly to REQ_BODY or RES_STLINE state
+ */
+ appctx->st0 = (hc->ops.req_payload ? HTTPCLIENT_S_REQ_BODY : HTTPCLIENT_S_RES_STLINE);
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+void httpclient_applet_release(struct appctx *appctx)
+{
+ struct httpclient *hc = appctx->svcctx;
+
+ /* mark the httpclient as ended */
+ hc->flags |= HTTPCLIENT_FS_ENDED;
+ /* the applet is leaving, remove the ptr so we don't try to call it
+ * again from the caller */
+ hc->appctx = NULL;
+
+ if (hc->ops.res_end)
+ hc->ops.res_end(hc);
+
+ /* destroy the httpclient when set to autotokill */
+ if (hc->flags & HTTPCLIENT_FA_AUTOKILL) {
+ httpclient_destroy(hc);
+ }
+
+ /* be sure not to use this ptr anymore if the IO handler is called a
+ * last time */
+ appctx->svcctx = NULL;
+
+ return;
+}
+
+/* HTTP client applet */
+static struct applet httpclient_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<HTTPCLIENT>",
+ .fct = httpclient_applet_io_handler,
+ .init = httpclient_applet_init,
+ .release = httpclient_applet_release,
+};
+
+
+static int httpclient_resolve_init(struct proxy *px)
+{
+ struct act_rule *rule;
+ int i;
+ char *do_resolve = NULL;
+ char *http_rules[][11] = {
+ { "set-var(txn.hc_ip)", "dst", "" },
+ { do_resolve, "hdr(Host),host_only", "if", "{", "var(txn.hc_ip)", "-m", "ip", "0.0.0.0", "}", "" },
+ { "return", "status", "503", "if", "{", "var(txn.hc_ip)", "-m", "ip", "0.0.0.0", "}", "" },
+ { "capture", "var(txn.hc_ip)", "len", "40", "" },
+ { "set-dst", "var(txn.hc_ip)", "" },
+ { "" }
+ };
+
+
+ if (resolvers_disabled)
+ return 0;
+
+ if (!resolvers_id)
+ resolvers_id = strdup("default");
+
+ memprintf(&do_resolve, "do-resolve(txn.hc_ip,%s%s%s)", resolvers_id, resolvers_prefer ? "," : "", resolvers_prefer ? resolvers_prefer : "");
+ http_rules[1][0] = do_resolve;
+
+ /* Try to create the default resolvers section */
+ resolvers_create_default();
+
+ /* if the resolver does not exist and no hard_error was set, simply ignore resolving */
+ if (!find_resolvers_by_id(resolvers_id) && !hard_error_resolvers) {
+ free(do_resolve);
+ return 0;
+ }
+
+
+ for (i = 0; *http_rules[i][0] != '\0'; i++) {
+ rule = parse_http_req_cond((const char **)http_rules[i], "httpclient", 0, px);
+ if (!rule) {
+ free(do_resolve);
+ ha_alert("Couldn't setup the httpclient resolver.\n");
+ return 1;
+ }
+ LIST_APPEND(&px->http_req_rules, &rule->list);
+ }
+
+ free(do_resolve);
+ return 0;
+}
+
+/*
+ * Creates an internal proxy which will be used for httpclient.
+ * This will allocate 2 servers (raw and ssl) and 1 proxy.
+ *
+ * This function must be called from a precheck callback.
+ *
+ * Return a proxy or NULL.
+ */
+struct proxy *httpclient_create_proxy(const char *id)
+{
+ int err_code = ERR_NONE;
+ char *errmsg = NULL;
+ struct proxy *px = NULL;
+ struct server *srv_raw = NULL;
+#ifdef USE_OPENSSL
+ struct server *srv_ssl = NULL;
+#endif
+
+ if (global.mode & MODE_MWORKER_WAIT)
+ return ERR_NONE;
+
+ px = alloc_new_proxy(id, PR_CAP_LISTEN|PR_CAP_INT|PR_CAP_HTTPCLIENT, &errmsg);
+ if (!px) {
+ memprintf(&errmsg, "couldn't allocate proxy.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ px->options |= PR_O_WREQ_BODY;
+ px->retry_type |= PR_RE_CONN_FAILED | PR_RE_DISCONNECTED | PR_RE_TIMEOUT;
+ px->options2 |= PR_O2_INDEPSTR;
+ px->mode = PR_MODE_HTTP;
+ px->maxconn = 0;
+ px->accept = NULL;
+ px->conn_retries = httpclient_retries;
+ px->timeout.connect = httpclient_timeout_connect;
+ px->timeout.client = TICK_ETERNITY;
+ /* The HTTP Client use the "option httplog" with the global loggers */
+ px->conf.logformat_string = httpclient_log_format;
+ px->http_needed = 1;
+
+ /* clear HTTP server */
+ srv_raw = new_server(px);
+ if (!srv_raw) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ srv_settings_cpy(srv_raw, &px->defsrv, 0);
+ srv_raw->iweight = 0;
+ srv_raw->uweight = 0;
+ srv_raw->xprt = xprt_get(XPRT_RAW);
+ srv_raw->flags |= SRV_F_MAPPORTS; /* needed to apply the port change with resolving */
+ srv_raw->id = strdup("<HTTPCLIENT>");
+ if (!srv_raw->id) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+#ifdef USE_OPENSSL
+ /* SSL HTTP server */
+ srv_ssl = new_server(px);
+ if (!srv_ssl) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ srv_settings_cpy(srv_ssl, &px->defsrv, 0);
+ srv_ssl->iweight = 0;
+ srv_ssl->uweight = 0;
+ srv_ssl->xprt = xprt_get(XPRT_SSL);
+ srv_ssl->use_ssl = 1;
+ srv_ssl->flags |= SRV_F_MAPPORTS; /* needed to apply the port change with resolving */
+ srv_ssl->id = strdup("<HTTPSCLIENT>");
+ if (!srv_ssl->id) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (ssl_sock_parse_alpn("h2,http/1.1", &srv_ssl->ssl_ctx.alpn_str, &srv_ssl->ssl_ctx.alpn_len, &errmsg) != 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+#endif
+ srv_ssl->ssl_ctx.verify = httpclient_ssl_verify;
+ /* if the verify is required, try to load the system CA */
+ if (httpclient_ssl_verify == SSL_SOCK_VERIFY_REQUIRED) {
+
+ srv_ssl->ssl_ctx.ca_file = strdup(httpclient_ssl_ca_file ? httpclient_ssl_ca_file : "@system-ca");
+ if (!__ssl_store_load_locations_file(srv_ssl->ssl_ctx.ca_file, 1, CAFILE_CERT, !hard_error_ssl)) {
+ /* if we failed to load the ca-file, only quits in
+ * error with hard_error, otherwise just disable the
+ * feature. */
+ if (hard_error_ssl) {
+ memprintf(&errmsg, "cannot initialize SSL verify with 'ca-file \"%s\"'.", srv_ssl->ssl_ctx.ca_file);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ } else {
+ ha_free(&srv_ssl->ssl_ctx.ca_file);
+ srv_drop(srv_ssl);
+ srv_ssl = NULL;
+ }
+ }
+ }
+
+#endif
+
+ /* add the proxy in the proxy list only if everything is successful */
+ px->next = proxies_list;
+ proxies_list = px;
+
+ if (httpclient_resolve_init(px) != 0) {
+ memprintf(&errmsg, "cannot initialize resolvers.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* link the 2 servers in the proxy */
+ srv_raw->next = px->srv;
+ px->srv = srv_raw;
+
+#ifdef USE_OPENSSL
+ if (srv_ssl) {
+ srv_ssl->next = px->srv;
+ px->srv = srv_ssl;
+ }
+#endif
+
+
+err:
+ if (err_code & ERR_CODE) {
+ ha_alert("httpclient: cannot initialize: %s\n", errmsg);
+ free(errmsg);
+ srv_drop(srv_raw);
+#ifdef USE_OPENSSL
+ srv_drop(srv_ssl);
+#endif
+ free_proxy(px);
+
+ return NULL;
+ }
+ return px;
+}
+
+/*
+ * Initialize the proxy for the HTTP client with 2 servers, one for raw HTTP,
+ * the other for HTTPS.
+ */
+static int httpclient_precheck()
+{
+ /* initialize the default httpclient_proxy which is used for the CLI and the lua */
+
+ httpclient_proxy = httpclient_create_proxy("<HTTPCLIENT>");
+ if (!httpclient_proxy)
+ return 1;
+
+ return 0;
+}
+
+/* Initialize the logs for every proxy dedicated to the httpclient */
+static int httpclient_postcheck_proxy(struct proxy *curproxy)
+{
+ int err_code = ERR_NONE;
+ struct logger *logger;
+ char *errmsg = NULL;
+#ifdef USE_OPENSSL
+ struct server *srv = NULL;
+ struct server *srv_ssl = NULL;
+#endif
+
+ if (global.mode & MODE_MWORKER_WAIT)
+ return ERR_NONE;
+
+ if (!(curproxy->cap & PR_CAP_HTTPCLIENT))
+ return ERR_NONE; /* nothing to do */
+
+ /* copy logs from "global" log list */
+ list_for_each_entry(logger, &global.loggers, list) {
+ struct logger *node = dup_logger(logger);
+
+ if (!node) {
+ memprintf(&errmsg, "out of memory.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ LIST_APPEND(&curproxy->loggers, &node->list);
+ }
+ if (curproxy->conf.logformat_string) {
+ curproxy->conf.args.ctx = ARGC_LOG;
+ if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat,
+ LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES,
+ SMP_VAL_FE_LOG_END, &errmsg)) {
+ memprintf(&errmsg, "failed to parse log-format : %s.", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+ }
+
+#ifdef USE_OPENSSL
+ /* initialize the SNI for the SSL servers */
+
+ for (srv = curproxy->srv; srv != NULL; srv = srv->next) {
+ if (srv->xprt == xprt_get(XPRT_SSL)) {
+ srv_ssl = srv;
+ }
+ }
+ if (srv_ssl && !srv_ssl->sni_expr) {
+ /* init the SNI expression */
+ /* always use the host header as SNI, without the port */
+ srv_ssl->sni_expr = strdup("req.hdr(host),field(1,:)");
+ err_code |= server_parse_sni_expr(srv_ssl, curproxy, &errmsg);
+ if (err_code & ERR_CODE) {
+ memprintf(&errmsg, "failed to configure sni: %s.", errmsg);
+ goto err;
+ }
+ }
+#endif
+
+err:
+ if (err_code & ERR_CODE) {
+ ha_alert("httpclient: failed to initialize: %s\n", errmsg);
+ free(errmsg);
+
+ }
+ return err_code;
+}
+
+/* initialize the proxy and servers for the HTTP client */
+
+REGISTER_PRE_CHECK(httpclient_precheck);
+REGISTER_POST_PROXY_CHECK(httpclient_postcheck_proxy);
+
+static int httpclient_parse_global_resolvers(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_resolvers = 1;
+
+ free(resolvers_id);
+ resolvers_id = strdup(args[1]);
+
+ return 0;
+}
+
+/* config parser for global "httpclient.resolvers.disabled", accepts "on" or "off" */
+static int httpclient_parse_global_resolvers_disabled(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ resolvers_disabled = 1;
+ else if (strcmp(args[1], "off") == 0)
+ resolvers_disabled = 0;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+static int httpclient_parse_global_prefer(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_resolvers = 1;
+
+
+ if (strcmp(args[1],"ipv4") == 0)
+ resolvers_prefer = "ipv4";
+ else if (strcmp(args[1],"ipv6") == 0)
+ resolvers_prefer = "ipv6";
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'ipv4' or 'ipv6' as argument.\n", file, line, args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+#ifdef USE_OPENSSL
+static int httpclient_parse_global_ca_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_ssl = 1;
+
+ free(httpclient_ssl_ca_file);
+ httpclient_ssl_ca_file = strdup(args[1]);
+
+ return 0;
+}
+
+static int httpclient_parse_global_verify(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* any configuration should set the hard_error flag */
+ hard_error_ssl = 1;
+
+ if (strcmp(args[1],"none") == 0)
+ httpclient_ssl_verify = SSL_SOCK_VERIFY_NONE;
+ else if (strcmp(args[1],"required") == 0)
+ httpclient_ssl_verify = SSL_SOCK_VERIFY_REQUIRED;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'none' or 'required' as argument.\n", file, line, args[0]);
+ return -1;
+ }
+
+ return 0;
+}
+#endif /* ! USE_OPENSSL */
+
+static int httpclient_parse_global_retries(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, line, args[0]);
+ return -1;
+ }
+ httpclient_retries = atol(args[1]);
+
+ return 0;
+}
+
+static int httpclient_parse_global_timeout_connect(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+ unsigned timeout;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n",
+ file, line, args[0]);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in '%s'", *res, args[0]);
+ return -1;
+ }
+
+ if (*args[2] != 0) {
+ memprintf(err, "'%s' : unexpected extra argument '%s' after value '%s'.", args[0], args[2], args[1]);
+ return -1;
+ }
+
+ httpclient_timeout_connect = MS_TO_TICKS(timeout);
+
+ return 0;
+}
+
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "httpclient.resolvers.disabled", httpclient_parse_global_resolvers_disabled },
+ { CFG_GLOBAL, "httpclient.resolvers.id", httpclient_parse_global_resolvers },
+ { CFG_GLOBAL, "httpclient.resolvers.prefer", httpclient_parse_global_prefer },
+ { CFG_GLOBAL, "httpclient.retries", httpclient_parse_global_retries },
+ { CFG_GLOBAL, "httpclient.timeout.connect", httpclient_parse_global_timeout_connect },
+#ifdef USE_OPENSSL
+ { CFG_GLOBAL, "httpclient.ssl.verify", httpclient_parse_global_verify },
+ { CFG_GLOBAL, "httpclient.ssl.ca-file", httpclient_parse_global_ca_file },
+#endif
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/http_conv.c b/src/http_conv.c
new file mode 100644
index 0000000..cf515a8
--- /dev/null
+++ b/src/http_conv.c
@@ -0,0 +1,453 @@
+/*
+ * HTTP sample conversion
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/chunk.h>
+#include <haproxy/http.h>
+#include <haproxy/pool.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+static int smp_check_http_date_unit(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ return smp_check_date_unit(args, err);
+}
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[0] and emits a string representing
+ * the date in RFC-1123/5322 format. If optional unit param in args[1] is
+ * provided, decode timestamp in milliseconds ("ms") or microseconds("us"),
+ * and use relevant output date format.
+ */
+static int sample_conv_http_date(const struct arg *args, struct sample *smp, void *private)
+{
+ const char day[7][4] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
+ const char mon[12][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
+ struct buffer *temp;
+ struct tm tm;
+ int sec_frac = 0;
+ time_t curr_date;
+
+ /* add offset */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint += args[0].data.sint;
+
+ /* report in milliseconds */
+ if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
+ sec_frac = smp->data.u.sint % 1000;
+ smp->data.u.sint /= 1000;
+ }
+ /* report in microseconds */
+ else if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
+ sec_frac = smp->data.u.sint % 1000000;
+ smp->data.u.sint /= 1000000;
+ }
+
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+
+ get_gmtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ if (args[1].type == ARGT_SINT && args[1].data.sint != TIME_UNIT_S) {
+ temp->data = snprintf(temp->area, temp->size - temp->data,
+ "%s, %02d %s %04d %02d:%02d:%02d.%d GMT",
+ day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
+ 1900+tm.tm_year,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, sec_frac);
+ } else {
+ temp->data = snprintf(temp->area, temp->size - temp->data,
+ "%s, %02d %s %04d %02d:%02d:%02d GMT",
+ day[tm.tm_wday], tm.tm_mday, mon[tm.tm_mon],
+ 1900+tm.tm_year,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* Arguments: The list of expected value, the number of parts returned and the separator */
+static int sample_conv_q_preferred(const struct arg *args, struct sample *smp, void *private)
+{
+ const char *al = smp->data.u.str.area;
+ const char *end = al + smp->data.u.str.data;
+ const char *token;
+ int toklen;
+ int qvalue;
+ const char *str;
+ const char *w;
+ int best_q = 0;
+
+ /* Set the constant to the sample, because the output of the
+ * function will be peek in the constant configuration string.
+ */
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+
+ /* Parse the accept language */
+ while (1) {
+
+ /* Jump spaces, quit if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ break;
+
+ /* Start of the first word. */
+ token = al;
+
+ /* Look for separator: isspace(), ',' or ';'. Next value if 0 length word. */
+ while (al < end && *al != ';' && *al != ',' && !isspace((unsigned char)*al))
+ al++;
+ if (al == token)
+ goto expect_comma;
+
+ /* Length of the token. */
+ toklen = al - token;
+ qvalue = 1000;
+
+ /* Check if the token exists in the list. If the token not exists,
+ * jump to the next token.
+ */
+ str = args[0].data.str.area;
+ w = str;
+ while (1) {
+ if (*str == ';' || *str == '\0') {
+ if (http_language_range_match(token, toklen, w, str - w))
+ goto look_for_q;
+ if (*str == '\0')
+ goto expect_comma;
+ w = str + 1;
+ }
+ str++;
+ }
+ goto expect_comma;
+
+look_for_q:
+
+ /* Jump spaces, quit if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* If ',' is found, process the result */
+ if (*al == ',')
+ goto process_value;
+
+ /* If the character is different from ';', look
+ * for the end of the header part in best effort.
+ */
+ if (*al != ';')
+ goto expect_comma;
+
+ /* Assumes that the char is ';', now expect "q=". */
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Expect 'q'. If no 'q', continue in best effort */
+ if (*al != 'q')
+ goto process_value;
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Expect '='. If no '=', continue in best effort */
+ if (*al != '=')
+ goto process_value;
+ al++;
+
+ /* Jump spaces, process value if the end is detected. */
+ while (al < end && isspace((unsigned char)*al))
+ al++;
+ if (al >= end)
+ goto process_value;
+
+ /* Parse the q value. */
+ qvalue = http_parse_qvalue(al, &al);
+
+process_value:
+
+ /* If the new q value is the best q value, then store the associated
+ * language in the response. If qvalue is the biggest value (1000),
+ * break the process.
+ */
+ if (qvalue > best_q) {
+ smp->data.u.str.area = (char *)w;
+ smp->data.u.str.data = str - w;
+ if (qvalue >= 1000)
+ break;
+ best_q = qvalue;
+ }
+
+expect_comma:
+
+ /* Expect comma or end. If the end is detected, quit the loop. */
+ while (al < end && *al != ',')
+ al++;
+ if (al >= end)
+ break;
+
+ /* Comma is found, jump it and restart the analyzer. */
+ al++;
+ }
+
+ /* Set default value if required. */
+ if (smp->data.u.str.data == 0 && args[1].type == ARGT_STR) {
+ smp->data.u.str.area = args[1].data.str.area;
+ smp->data.u.str.data = args[1].data.str.data;
+ }
+
+ /* Return true only if a matching language was found. */
+ return smp->data.u.str.data != 0;
+}
+
+/* This fetch url-decode any input string. */
+static int sample_conv_url_dec(const struct arg *args, struct sample *smp, void *private)
+{
+ int in_form = 0;
+ int len;
+
+ /* If the constant flag is set or if not size is available at
+ * the end of the buffer, copy the string in other buffer
+ * before decoding.
+ */
+ if (smp->flags & SMP_F_CONST || smp->data.u.str.size <= smp->data.u.str.data) {
+ struct buffer *str = get_trash_chunk();
+ memcpy(str->area, smp->data.u.str.area, smp->data.u.str.data);
+ smp->data.u.str.area = str->area;
+ smp->data.u.str.size = str->size;
+ smp->flags &= ~SMP_F_CONST;
+ }
+
+ /* Add final \0 required by url_decode(), and convert the input string. */
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+
+ if (args[0].type == ARGT_SINT)
+ in_form = !!args[0].data.sint;
+
+ len = url_decode(smp->data.u.str.area, in_form);
+ if (len < 0)
+ return 0;
+ smp->data.u.str.data = len;
+ return 1;
+}
+
+/* url-encode types and encode maps */
+enum encode_type {
+ ENC_QUERY = 0,
+};
+long query_encode_map[(256 / 8) / sizeof(long)];
+
+/* Check url-encode type */
+static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ enum encode_type enc_type;
+
+ if (strcmp(arg->data.str.area, "") == 0)
+ enc_type = ENC_QUERY;
+ else if (strcmp(arg->data.str.area, "query") == 0)
+ enc_type = ENC_QUERY;
+ else {
+ memprintf(err, "Unexpected encode type. "
+ "Allowed value is 'query'");
+ return 0;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->type = ARGT_SINT;
+ arg->data.sint = enc_type;
+ return 1;
+}
+
+/* Initializes some url encode data at boot */
+static void sample_conf_url_enc_init()
+{
+ int i;
+
+ memset(query_encode_map, 0, sizeof(query_encode_map));
+ /* use rfc3986 to determine list of characters to keep unchanged for
+ * query string */
+ for (i = 0; i < 256; i++) {
+ if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
+ || (i >= '0' && i <= '9') ||
+ i == '-' || i == '.' || i == '_' || i == '~'))
+ ha_bit_set(i, query_encode_map);
+ }
+}
+
+INITCALL0(STG_PREPARE, sample_conf_url_enc_init);
+
+/* This fetch url-encode any input string. Only support query string for now */
+static int sample_conv_url_enc(const struct arg *args, struct sample *smp, void
+ *private)
+{
+ enum encode_type enc_type;
+ struct buffer *trash = get_trash_chunk();
+ long *encode_map;
+ char *ret;
+
+ enc_type = ENC_QUERY;
+ enc_type = args->data.sint;
+
+ if (enc_type == ENC_QUERY)
+ encode_map = query_encode_map;
+ else
+ return 0;
+
+ ret = encode_chunk(trash->area, trash->area + trash->size, '%',
+ encode_map, &smp->data.u.str);
+ if (ret == NULL || *ret != '\0')
+ return 0;
+ trash->data = ret - trash->area;
+ smp->data.u.str = *trash;
+ return 1;
+}
+
+static int smp_conv_req_capture(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *fe;
+ int idx, i;
+ struct cap_hdr *hdr;
+ int len;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ /* Check the availability of the capture id. */
+ if (idx > fe->nb_req_cap - 1)
+ return 0;
+
+ /* Look for the original configuration. */
+ for (hdr = fe->req_cap, i = fe->nb_req_cap - 1;
+ hdr != NULL && i != idx ;
+ i--, hdr = hdr->next);
+ if (!hdr)
+ return 0;
+
+ /* check for the memory allocation */
+ if (smp->strm->req_cap[hdr->index] == NULL)
+ smp->strm->req_cap[hdr->index] = pool_alloc(hdr->pool);
+ if (smp->strm->req_cap[hdr->index] == NULL)
+ return 0;
+
+ /* Check length. */
+ len = smp->data.u.str.data;
+ if (len > hdr->len)
+ len = hdr->len;
+
+ /* Capture input data. */
+ memcpy(smp->strm->req_cap[idx], smp->data.u.str.area, len);
+ smp->strm->req_cap[idx][len] = '\0';
+
+ return 1;
+}
+
+static int smp_conv_res_capture(const struct arg *args, struct sample *smp, void *private)
+{
+ struct proxy *fe;
+ int idx, i;
+ struct cap_hdr *hdr;
+ int len;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ /* Check the availability of the capture id. */
+ if (idx > fe->nb_rsp_cap - 1)
+ return 0;
+
+ /* Look for the original configuration. */
+ for (hdr = fe->rsp_cap, i = fe->nb_rsp_cap - 1;
+ hdr != NULL && i != idx ;
+ i--, hdr = hdr->next);
+ if (!hdr)
+ return 0;
+
+ /* check for the memory allocation */
+ if (smp->strm->res_cap[hdr->index] == NULL)
+ smp->strm->res_cap[hdr->index] = pool_alloc(hdr->pool);
+ if (smp->strm->res_cap[hdr->index] == NULL)
+ return 0;
+
+ /* Check length. */
+ len = smp->data.u.str.data;
+ if (len > hdr->len)
+ len = hdr->len;
+
+ /* Capture input data. */
+ memcpy(smp->strm->res_cap[idx], smp->data.u.str.area, len);
+ smp->strm->res_cap[idx][len] = '\0';
+
+ return 1;
+}
+
+/************************************************************************/
+/* All supported converter keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "http_date", sample_conv_http_date, ARG2(0,SINT,STR), smp_check_http_date_unit, SMP_T_SINT, SMP_T_STR},
+ { "language", sample_conv_q_preferred, ARG2(1,STR,STR), NULL, SMP_T_STR, SMP_T_STR},
+ { "capture-req", smp_conv_req_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "capture-res", smp_conv_res_capture, ARG1(1,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "url_dec", sample_conv_url_dec, ARG1(0,SINT), NULL, SMP_T_STR, SMP_T_STR},
+ { "url_enc", sample_conv_url_enc, ARG1(1,STR), sample_conv_url_enc_check, SMP_T_STR, SMP_T_STR},
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_ext.c b/src/http_ext.c
new file mode 100644
index 0000000..a367519
--- /dev/null
+++ b/src/http_ext.c
@@ -0,0 +1,1881 @@
+/*
+ * HTTP extensions logic and helpers
+ *
+ * Copyright 2022 HAProxy Technologies
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2.1 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/sample.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_ext.h>
+#include <haproxy/chunk.h>
+#include <haproxy/stream.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/arg.h>
+#include <haproxy/initcall.h>
+#include <haproxy/tools.h>
+
+/*
+ * =========== ANALYZE ===========
+ * below are http process/ana helpers
+ */
+
+/* checks if <input> contains rfc7239 compliant port
+ * Returns 1 for success and 0 for failure
+ * if <port> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_port(struct ist *input, uint16_t *port)
+{
+ char *start = istptr(*input);
+ uint32_t port_cast = 0;
+ int it = 0;
+
+ /* strtol does not support non-null terminated str,
+ * we extract port ourselves
+ */
+ while (it < istlen(*input) &&
+ isdigit((unsigned char)start[it])) {
+ port_cast = (port_cast * 10) + (start[it] - '0');
+ if (port_cast > 65535)
+ return 0; /* invalid port */
+ it += 1;
+ }
+ if (!port_cast)
+ return 0; /* invalid port */
+ /* ok */
+ if (port)
+ *port = (uint16_t)port_cast;
+ *input = istadv(*input, it);
+ return 1;
+}
+
+/* check if char is a valid obfuscated identifier char
+ * (according to 7239 RFC)
+ * Returns non zero value for valid char
+ */
+static inline int http_7239_valid_obfsc(char c)
+{
+ return (isalnum((unsigned char)c) ||
+ (c == '.' || c == '-' || c == '_'));
+}
+
+/* checks if <input> contains rfc7239 compliant obfuscated identifier
+ * Returns 1 for success and 0 for failure
+ * if <obfs> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_obfs(struct ist *input, struct ist *obfs)
+{
+ int it = 0;
+
+ if (obfs)
+ obfs->ptr = input->ptr;
+
+ while (it < istlen(*input) && istptr(*input)[it] != ';') {
+ if (!http_7239_valid_obfsc(istptr(*input)[it]))
+ break; /* end of obfs token */
+ it += 1;
+ }
+ if (obfs)
+ obfs->len = it;
+ *input = istadv(*input, it);
+ return !!it;
+}
+
+/* checks if <input> contains rfc7239 compliant IPV4 address
+ * Returns 1 for success and 0 for failure
+ * if <ip> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_ipv4(struct ist *input, struct in_addr *ip)
+{
+ char ip4[INET_ADDRSTRLEN];
+ unsigned char buf[sizeof(struct in_addr)];
+ int it = 0;
+
+ /* extract ipv4 addr */
+ while (it < istlen(*input) && it < (sizeof(ip4) - 1)) {
+ if (!isdigit((unsigned char)istptr(*input)[it]) &&
+ istptr(*input)[it] != '.')
+ break; /* no more ip4 char */
+ ip4[it] = istptr(*input)[it];
+ it += 1;
+ }
+ ip4[it] = 0;
+ if (inet_pton(AF_INET, ip4, buf) != 1)
+ return 0; /* invalid ip4 addr */
+ /* ok */
+ if (ip)
+ memcpy(ip, buf, sizeof(buf));
+ *input = istadv(*input, it);
+ return 1;
+}
+
+/* checks if <input> contains rfc7239 compliant IPV6 address
+ * assuming input.len >= 1 and first char is '['
+ * Returns 1 for success and 0 for failure
+ * if <ip> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_ipv6(struct ist *input, struct in6_addr *ip)
+{
+ char ip6[INET6_ADDRSTRLEN];
+ unsigned char buf[sizeof(struct in6_addr)];
+ int it = 0;
+
+ *input = istnext(*input); /* skip '[' leading char */
+ /* extract ipv6 addr */
+ while (it < istlen(*input) &&
+ it < (sizeof(ip6) - 1)) {
+ if (!isalnum((unsigned char)istptr(*input)[it]) &&
+ istptr(*input)[it] != ':')
+ break; /* no more ip6 char */
+ ip6[it] = istptr(*input)[it];
+ it += 1;
+ }
+ ip6[it] = 0;
+ if ((istlen(*input)-it) < 1 || istptr(*input)[it] != ']')
+ return 0; /* missing ending "]" char */
+ it += 1;
+ if (inet_pton(AF_INET6, ip6, buf) != 1)
+ return 0; /* invalid ip6 addr */
+ /* ok */
+ if (ip)
+ memcpy(ip, buf, sizeof(buf));
+ *input = istadv(*input, it);
+ return 1;
+}
+
+/* checks if <input> contains rfc7239 compliant host
+ * <quoted> is used to determine if the current input is being extracted
+ * from a quoted (non zero) or unquoted (zero) token, as the parsing rules
+ * differ whether the input is quoted or not according to the rfc.
+ * Returns 1 for success and 0 for failure
+ * if <host> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_host(struct ist *input, struct ist *host, int quoted)
+{
+ if (istlen(*input) < 1)
+ return 0; /* invalid input */
+
+ if (host)
+ host->ptr = input->ptr;
+
+ if (quoted && *istptr(*input) == '[') {
+ /* raw ipv6 address */
+ if (!http_7239_extract_ipv6(input, NULL))
+ return 0; /* invalid addr */
+ }
+ else {
+ /* ipv4 or dns */
+ while (istlen(*input)) {
+ if (!isalnum((unsigned char)*istptr(*input)) &&
+ *istptr(*input) != '.')
+ break; /* end of hostname token */
+ *input = istnext(*input);
+ }
+ }
+ if (istlen(*input) < 1 || *istptr(*input) != ':') {
+ goto out; /* no optional port provided */
+ }
+ if (!quoted)
+ return 0; /* not supported */
+ *input = istnext(*input); /* skip ':' */
+ /* validate port */
+ if (!http_7239_extract_port(input, NULL))
+ return 0; /* invalid port */
+ out:
+ if (host)
+ host->len = (input->ptr - host->ptr);
+ return 1;
+}
+
+/* checks if <input> contains rfc7239 compliant nodename
+ * <quoted> is used to determine if the current input is being extracted
+ * from a quoted (non zero) or unquoted (zero) token, as the parsing rules
+ * differ whether the input is quoted or not according to the rfc.
+ * Returns 1 for success and 0 for failure
+ * if <nodename> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_nodename(struct ist *input, struct forwarded_header_nodename *nodename, int quoted)
+{
+ if (istlen(*input) < 1)
+ return 0; /* invalid input */
+ if (*istptr(*input) == '_') {
+ struct ist *obfs = NULL;
+
+ /* obfuscated nodename */
+ *input = istnext(*input); /* skip '_' */
+ if (nodename) {
+ nodename->type = FORWARDED_HEADER_OBFS;
+ obfs = &nodename->obfs;
+ }
+ if (!http_7239_extract_obfs(input, obfs))
+ return 0; /* invalid obfs */
+ } else if (*istptr(*input) == 'u') {
+ /* "unknown" nodename? */
+ if (istlen(*input) < 7 ||
+ strncmp("unknown", istptr(*input), 7))
+ return 0; /* syntax error */
+ *input = istadv(*input, 7); /* skip "unknown" */
+ if (nodename)
+ nodename->type = FORWARDED_HEADER_UNK;
+ } else if (quoted && *istptr(*input) == '[') {
+ struct in6_addr *ip6 = NULL;
+
+ /* ipv6 address */
+ if (nodename) {
+ struct sockaddr_in6 *addr = (void *)&nodename->ip;
+
+ ip6 = &addr->sin6_addr;
+ addr->sin6_family = AF_INET6;
+ nodename->type = FORWARDED_HEADER_IP;
+ }
+ if (!http_7239_extract_ipv6(input, ip6))
+ return 0; /* invalid ip6 */
+ } else if (*istptr(*input)) {
+ struct in_addr *ip = NULL;
+
+ /* ipv4 address */
+ if (nodename) {
+ struct sockaddr_in *addr = (void *)&nodename->ip;
+
+ ip = &addr->sin_addr;
+ addr->sin_family = AF_INET;
+ nodename->type = FORWARDED_HEADER_IP;
+ }
+ if (!http_7239_extract_ipv4(input, ip))
+ return 0; /* invalid ip */
+ } else
+ return 0; /* unexpected char */
+
+ /* ok */
+ return 1;
+}
+
+/* checks if <input> contains rfc7239 compliant nodeport
+ * <quoted> is used to determine if the current input is being extracted
+ * from a quoted (non zero) or unquoted (zero) token, as the parsing rules
+ * differ whether the input is quoted or not according to the rfc.
+ * Returns 1 for success and 0 for failure
+ * if <nodeport> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_nodeport(struct ist *input, struct forwarded_header_nodeport *nodeport)
+{
+ if (*istptr(*input) == '_') {
+ struct ist *obfs = NULL;
+
+ /* obfuscated nodeport */
+ *input = istnext(*input); /* skip '_' */
+ if (nodeport) {
+ nodeport->type = FORWARDED_HEADER_OBFS;
+ obfs = &nodeport->obfs;
+ }
+ if (!http_7239_extract_obfs(input, obfs))
+ return 0; /* invalid obfs */
+ } else {
+ uint16_t *port = NULL;
+
+ /* normal port */
+ if (nodeport) {
+ nodeport->type = FORWARDED_HEADER_PORT;
+ port = &nodeport->port;
+ }
+ if (!http_7239_extract_port(input, port))
+ return 0; /* invalid port */
+ }
+ /* ok */
+ return 1;
+}
+
+/* checks if <input> contains rfc7239 compliant node (nodename:nodeport token)
+ * <quoted> is used to determine if the current input is being extracted
+ * from a quoted (non zero) or unquoted (zero) token, as the parsing rules
+ * differ whether the input is quoted or not according to the rfc.
+ * Returns 1 for success and 0 for failure
+ * if <node> is not NULL, it will be set to the extracted value contained
+ * in <input>
+ * <input> will be consumed accordingly (parsed/extracted characters are
+ * removed from <input>)
+ */
+static inline int http_7239_extract_node(struct ist *input, struct forwarded_header_node *node, int quoted)
+{
+ struct forwarded_header_nodename *nodename = NULL;
+ struct forwarded_header_nodeport *nodeport = NULL;
+
+ if (node) {
+ nodename = &node->nodename;
+ nodeport = &node->nodeport;
+ node->raw.ptr = input->ptr;
+ }
+ if (!http_7239_extract_nodename(input, nodename, quoted))
+ return 0; /* invalid nodename */
+ if (istlen(*input) < 1 || *istptr(*input) != ':') {
+ if (node)
+ node->nodeport.type = FORWARDED_HEADER_UNK;
+ goto out; /* no optional port provided */
+ }
+ if (!quoted)
+ return 0; /* not supported */
+ *input = istnext(*input);
+ if (!http_7239_extract_nodeport(input, nodeport))
+ return 0; /* invalid nodeport */
+ out:
+ /* ok */
+ if (node)
+ node->raw.len = input->ptr - node->raw.ptr;
+ return 1;
+}
+
+static inline int _forwarded_header_save_ctx(struct forwarded_header_ctx *ctx, int current_step, int required_steps)
+{
+ return (ctx && (current_step & required_steps));
+}
+
+static inline void _forwarded_header_quote_expected(struct ist *hdr, uint8_t *quoted)
+{
+ if (istlen(*hdr) > 0 && *istptr(*hdr) == '"') {
+ *quoted = 1;
+ /* node is quoted, we must find corresponding
+ * ending quote at the end of the token
+ */
+ *hdr = istnext(*hdr); /* skip quote */
+ }
+}
+
+/* checks if current header <hdr> is RFC 7239 compliant and can be "trusted".
+ * function will stop parsing as soon as every <required_steps> have
+ * been validated or error is encountered.
+ * Provide FORWARDED_HEADER_ALL for a full header validating spectrum.
+ * You may provide limited scope to perform quick searches on specific attributes
+ * If <ctx> is provided (not NULL), parsed attributes will be stored according to
+ * their types, allowing you to extract some useful information from the header.
+ * Returns 0 on failure and <validated_steps> bitfield on success.
+ */
+int http_validate_7239_header(struct ist hdr, int required_steps, struct forwarded_header_ctx *ctx)
+{
+ int validated_steps = 0;
+ int current_step = 0;
+ uint8_t first = 1;
+ uint8_t quoted = 0;
+
+ while (istlen(hdr) && (required_steps & ~validated_steps)) {
+ if (!first) {
+ if (*istptr(hdr) == ';')
+ hdr = istnext(hdr); /* skip ';' */
+ else
+ goto not_ok; /* unexpected char */
+ }
+ else
+ first = 0;
+
+ if (!(validated_steps & FORWARDED_HEADER_FOR) && istlen(hdr) > 4 &&
+ strncmp("for=", istptr(hdr), 4) == 0) {
+ struct forwarded_header_node *node = NULL;
+
+ /* for parameter */
+ current_step = FORWARDED_HEADER_FOR;
+ hdr = istadv(hdr, 4); /* skip "for=" */
+ _forwarded_header_quote_expected(&hdr, &quoted);
+ if (_forwarded_header_save_ctx(ctx, current_step, required_steps))
+ node = &ctx->nfor;
+ /* validate node */
+ if (!http_7239_extract_node(&hdr, node, quoted))
+ goto not_ok; /* invalid node */
+ }
+ else if (!(validated_steps & FORWARDED_HEADER_BY) && istlen(hdr) > 3 &&
+ strncmp("by=", istptr(hdr), 3) == 0) {
+ struct forwarded_header_node *node = NULL;
+
+ /* by parameter */
+ current_step = FORWARDED_HEADER_BY;
+ hdr = istadv(hdr, 3); /* skip "by=" */
+ _forwarded_header_quote_expected(&hdr, &quoted);
+ if (_forwarded_header_save_ctx(ctx, current_step, required_steps))
+ node = &ctx->nby;
+ /* validate node */
+ if (!http_7239_extract_node(&hdr, node, quoted))
+ goto not_ok; /* invalid node */
+ }
+ else if (!(validated_steps & FORWARDED_HEADER_HOST) && istlen(hdr) > 5 &&
+ strncmp("host=", istptr(hdr), 5) == 0) {
+ struct ist *host = NULL;
+
+ /* host parameter */
+ current_step = FORWARDED_HEADER_HOST;
+ hdr = istadv(hdr, 5); /* skip "host=" */
+ _forwarded_header_quote_expected(&hdr, &quoted);
+ if (_forwarded_header_save_ctx(ctx, current_step, required_steps))
+ host = &ctx->host;
+ /* validate host */
+ if (!http_7239_extract_host(&hdr, host, quoted))
+ goto not_ok; /* invalid host */
+ }
+ else if (!(validated_steps & FORWARDED_HEADER_PROTO) && istlen(hdr) > 6 &&
+ strncmp("proto=", istptr(hdr), 6) == 0) {
+ /* proto parameter */
+ current_step = FORWARDED_HEADER_PROTO;
+ hdr = istadv(hdr, 6); /* skip "proto=" */
+ /* validate proto (only common used http|https are supported for now) */
+ if (istlen(hdr) < 4 || strncmp("http", istptr(hdr), 4))
+ goto not_ok;
+ hdr = istadv(hdr, 4); /* skip "http" */
+ if (istlen(hdr) && *istptr(hdr) == 's') {
+ hdr = istnext(hdr);
+ if (_forwarded_header_save_ctx(ctx, current_step, required_steps))
+ ctx->proto = FORWARDED_HEADER_HTTPS;
+ } else if (_forwarded_header_save_ctx(ctx, current_step, required_steps))
+ ctx->proto = FORWARDED_HEADER_HTTP;
+ /* rfc allows for potential proto quoting, but we don't support
+ * it: it is not common usage
+ */
+ }
+ else {
+ /* not supported
+ * rfc allows for upcoming extensions
+ * but obviously, we can't trust them
+ * as they are not yet standardized
+ */
+
+ goto not_ok;
+ }
+ /* quote check */
+ if (quoted) {
+ if (istlen(hdr) < 1 || *istptr(hdr) != '"') {
+ /* matching ending quote not found */
+ goto not_ok;
+ }
+ hdr = istnext(hdr); /* skip ending quote */
+ quoted = 0; /* reset */
+ }
+ validated_steps |= current_step;
+ }
+
+ return validated_steps;
+
+ not_ok:
+ return 0;
+}
+
+static inline void _7239_print_ip6(struct buffer *out, struct in6_addr *ip6_addr, int quoted)
+{
+ char pn[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6,
+ ip6_addr,
+ pn, sizeof(pn));
+ if (!quoted)
+ chunk_appendf(out, "\""); /* explicit quoting required for ipv6 */
+ chunk_appendf(out, "[%s]", pn);
+}
+
+static inline void http_build_7239_header_nodename(struct buffer *out,
+ struct stream *s, struct proxy *curproxy,
+ const struct sockaddr_storage *addr,
+ struct http_ext_7239_forby *forby)
+{
+ struct in6_addr *ip6_addr;
+ int quoted = !!forby->np_mode;
+
+ if (forby->nn_mode == HTTP_7239_FORBY_ORIG) {
+ if (addr && addr->ss_family == AF_INET) {
+ unsigned char *pn = (unsigned char *)&((struct sockaddr_in *)addr)->sin_addr;
+
+ chunk_appendf(out, "%d.%d.%d.%d", pn[0], pn[1], pn[2], pn[3]);
+ }
+ else if (addr && addr->ss_family == AF_INET6) {
+ ip6_addr = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ _7239_print_ip6(out, ip6_addr, quoted);
+ }
+ /* else: not supported */
+ }
+ else if (forby->nn_mode == HTTP_7239_FORBY_SMP && forby->nn_expr) {
+ struct sample *smp;
+
+ smp = sample_process(curproxy, s->sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL, forby->nn_expr, NULL);
+
+ if (smp) {
+ if (smp->data.type == SMP_T_IPV6) {
+ /* smp is valid IP6, print with RFC compliant output */
+ ip6_addr = &smp->data.u.ipv6;
+ _7239_print_ip6(out, ip6_addr, quoted);
+ }
+ else if (sample_casts[smp->data.type][SMP_T_STR] &&
+ sample_casts[smp->data.type][SMP_T_STR](smp)) {
+ struct ist validate_n = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct ist validate_o = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct forwarded_header_nodename nodename;
+
+ /* validate nodename */
+ if (http_7239_extract_nodename(&validate_n, &nodename, 1) &&
+ !istlen(validate_n)) {
+ if (nodename.type == FORWARDED_HEADER_IP &&
+ nodename.ip.ss_family == AF_INET6) {
+ /* special care needed for valid ip6 nodename (quoting) */
+ ip6_addr = &((struct sockaddr_in6 *)&nodename.ip)->sin6_addr;
+ _7239_print_ip6(out, ip6_addr, quoted);
+ } else {
+ /* no special care needed, input is already rfc compliant,
+ * just print as regular non quoted string
+ */
+ chunk_cat(out, &smp->data.u.str);
+ }
+ }
+ else if (http_7239_extract_obfs(&validate_o, NULL) &&
+ !istlen(validate_o)) {
+ /* raw user input that should be printed as 7239 obfs */
+ chunk_appendf(out, "_%.*s", (int)smp->data.u.str.data, smp->data.u.str.area);
+ }
+ /* else: not compliant */
+ }
+ /* else: cannot be casted to str */
+ }
+ /* else: smp error */
+ }
+}
+
+static inline void http_build_7239_header_nodeport(struct buffer *out,
+ struct stream *s, struct proxy *curproxy,
+ const struct sockaddr_storage *addr,
+ struct http_ext_7239_forby *forby)
+{
+ if (forby->np_mode == HTTP_7239_FORBY_ORIG) {
+ if (addr && addr->ss_family == AF_INET)
+ chunk_appendf(out, "%d", ntohs(((struct sockaddr_in *)addr)->sin_port));
+ else if (addr && addr->ss_family == AF_INET6)
+ chunk_appendf(out, "%d", ntohs(((struct sockaddr_in6 *)addr)->sin6_port));
+ /* else: not supported */
+ }
+ else if (forby->np_mode == HTTP_7239_FORBY_SMP && forby->np_expr) {
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(curproxy, s->sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL, forby->np_expr, SMP_T_STR);
+ if (smp) {
+ struct ist validate_n = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct ist validate_o = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ /* validate nodeport */
+ if (http_7239_extract_nodeport(&validate_n, NULL) &&
+ !istlen(validate_n)) {
+ /* no special care needed, input is already rfc compliant,
+ * just print as regular non quoted string
+ */
+ chunk_cat(out, &smp->data.u.str);
+ }
+ else if (http_7239_extract_obfs(&validate_o, NULL) &&
+ !istlen(validate_o)) {
+ /* raw user input that should be printed as 7239 obfs */
+ chunk_appendf(out, "_%.*s", (int)smp->data.u.str.data, smp->data.u.str.area);
+ }
+ /* else: not compliant */
+ }
+ /* else: smp error */
+ }
+}
+
+static inline void http_build_7239_header_node(struct buffer *out,
+ struct stream *s, struct proxy *curproxy,
+ const struct sockaddr_storage *addr,
+ struct http_ext_7239_forby *forby)
+{
+ size_t offset_start;
+ size_t offset_save;
+
+ offset_start = out->data;
+ if (forby->np_mode)
+ chunk_appendf(out, "\"");
+ offset_save = out->data;
+ http_build_7239_header_nodename(out, s, curproxy, addr, forby);
+ if (offset_save == out->data) {
+ /* could not build nodename, either because some
+ * data is not available or user is providing bad input
+ */
+ chunk_appendf(out, "unknown");
+ }
+ if (forby->np_mode) {
+ chunk_appendf(out, ":");
+ offset_save = out->data;
+ http_build_7239_header_nodeport(out, s, curproxy, addr, forby);
+ if (offset_save == out->data) {
+ /* could not build nodeport, either because some data is
+ * not available or user is providing bad input
+ */
+ out->data = offset_save - 1;
+ }
+ }
+ if (out->data != offset_start && out->area[offset_start] == '"')
+ chunk_appendf(out, "\""); /* add matching end quote */
+}
+
+static inline void http_build_7239_header_host(struct buffer *out,
+ struct stream *s, struct proxy *curproxy,
+ struct htx *htx, struct http_ext_7239_host *host)
+{
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ char *str = NULL;
+ int str_len = 0;
+
+ if (host->mode == HTTP_7239_HOST_ORIG &&
+ http_find_header(htx, ist("host"), &ctx, 0)) {
+ str = ctx.value.ptr;
+ str_len = ctx.value.len;
+ print_host:
+ {
+ struct ist validate = ist2(str, str_len);
+ /* host check, to ensure rfc compliant output
+ * (assuming host is quoted/escaped)
+ */
+ if (http_7239_extract_host(&validate, NULL, 1) && !istlen(validate))
+ chunk_memcat(out, str, str_len);
+ /* else: not compliant or partially compliant */
+ }
+
+ }
+ else if (host->mode == HTTP_7239_HOST_SMP && host->expr) {
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(curproxy, s->sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL, host->expr, SMP_T_STR);
+ if (smp) {
+ str = smp->data.u.str.area;
+ str_len = smp->data.u.str.data;
+ goto print_host;
+ }
+ /* else: smp error */
+ }
+}
+
+/* Tries build 7239 header according to <curproxy> parameters and <s> context
+ * It both depends on <curproxy>->http_ext->fwd for config and <s> for request
+ * context data.
+ * The function will write output to <out> buffer
+ * Returns 1 for success and 0 for error (ie: not enough space in buffer)
+ */
+static int http_build_7239_header(struct buffer *out,
+ struct stream *s, struct proxy *curproxy, struct htx *htx)
+{
+ struct connection *cli_conn = objt_conn(strm_sess(s)->origin);
+
+ if (curproxy->http_ext->fwd->p_proto) {
+ chunk_appendf(out, "%sproto=%s", ((out->data) ? ";" : ""),
+ ((conn_is_ssl(cli_conn)) ? "https" : "http"));
+ }
+ if (curproxy->http_ext->fwd->p_host.mode) {
+ /* always add quotes for host parameter to make output compliance checks simpler */
+ chunk_appendf(out, "%shost=\"", ((out->data) ? ";" : ""));
+ /* ignore return value for now, but could be useful some day */
+ http_build_7239_header_host(out, s, curproxy, htx, &curproxy->http_ext->fwd->p_host);
+ chunk_appendf(out, "\"");
+ }
+
+ if (curproxy->http_ext->fwd->p_by.nn_mode) {
+ const struct sockaddr_storage *dst = sc_dst(s->scf);
+
+ chunk_appendf(out, "%sby=", ((out->data) ? ";" : ""));
+ http_build_7239_header_node(out, s, curproxy, dst, &curproxy->http_ext->fwd->p_by);
+ }
+
+ if (curproxy->http_ext->fwd->p_for.nn_mode) {
+ const struct sockaddr_storage *src = sc_src(s->scf);
+
+ chunk_appendf(out, "%sfor=", ((out->data) ? ";" : ""));
+ http_build_7239_header_node(out, s, curproxy, src, &curproxy->http_ext->fwd->p_for);
+ }
+ if (unlikely(out->data == out->size)) {
+ /* not enough space in buffer, error */
+ return 0;
+ }
+ return 1;
+}
+
+/* This function will try to inject RFC 7239 forwarded header if
+ * configured on the backend (ignored for frontends).
+ * Will do nothing if the option is not enabled on the proxy.
+ * Returns 1 for success and 0 for failure
+ */
+int http_handle_7239_header(struct stream *s, struct channel *req)
+{
+ struct proxy *curproxy = s->be; /* ignore frontend */
+
+ if (curproxy->http_ext && curproxy->http_ext->fwd) {
+ struct htx *htx = htxbuf(&req->buf);
+ int validate = 1;
+ struct http_hdr_ctx find = { .blk = NULL };
+ struct http_hdr_ctx last = { .blk = NULL};
+ struct ist hdr = ist("forwarded");
+
+ /* ok, let's build forwarded header */
+ chunk_reset(&trash);
+ if (unlikely(!http_build_7239_header(&trash, s, curproxy, htx)))
+ return 0; /* error when building header (bad user conf or memory error) */
+
+ /* validate existing forwarded header (including multiple values),
+ * hard stop if error is encountered
+ */
+ while (http_find_header(htx, hdr, &find, 0)) {
+ /* validate current header chunk */
+ if (!http_validate_7239_header(find.value, FORWARDED_HEADER_ALL, NULL)) {
+ /* at least one error, existing forwarded header not OK, add our own
+ * forwarded header, so that it can be trusted
+ */
+ validate = 0;
+ break;
+ }
+ last = find;
+ }
+ /* no errors, append our data at the end of existing header */
+ if (last.blk && validate) {
+ if (unlikely(!http_append_header_value(htx, &last, ist2(trash.area, trash.data))))
+ return 0; /* htx error */
+ }
+ else {
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ return 0; /* htx error */
+ }
+ }
+ return 1;
+}
+
+/*
+ * add X-Forwarded-For if either the frontend or the backend
+ * asks for it.
+ * Returns 1 for success and 0 for failure
+ */
+int http_handle_xff_header(struct stream *s, struct channel *req)
+{
+ struct session *sess = s->sess;
+ struct http_ext_xff *f_xff = NULL;
+ struct http_ext_xff *b_xff = NULL;
+
+ if (sess->fe->http_ext && sess->fe->http_ext->xff) {
+ /* frontend */
+ f_xff = sess->fe->http_ext->xff;
+ }
+ if (s->be->http_ext && s->be->http_ext->xff) {
+ /* backend */
+ b_xff = s->be->http_ext->xff;
+ }
+
+ if (f_xff || b_xff) {
+ struct htx *htx = htxbuf(&req->buf);
+ const struct sockaddr_storage *src = sc_src(s->scf);
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist hdr = ((b_xff) ? b_xff->hdr_name : f_xff->hdr_name);
+
+ if ((!f_xff || f_xff->mode == HTTP_XFF_IFNONE) &&
+ (!b_xff || b_xff->mode == HTTP_XFF_IFNONE) &&
+ http_find_header(htx, hdr, &ctx, 0)) {
+ /* The header is set to be added only if none is present
+ * and we found it, so don't do anything.
+ */
+ }
+ else if (src && src->ss_family == AF_INET) {
+ /* Add an X-Forwarded-For header unless the source IP is
+ * in the 'except' network range.
+ */
+ if ((!f_xff || ipcmp2net(src, &f_xff->except_net)) &&
+ (!b_xff || ipcmp2net(src, &b_xff->except_net))) {
+ unsigned char *pn = (unsigned char *)&((struct sockaddr_in *)src)->sin_addr;
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%d.%d.%d.%d", pn[0], pn[1], pn[2], pn[3]);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ return 0;
+ }
+ }
+ else if (src && src->ss_family == AF_INET6) {
+ /* Add an X-Forwarded-For header unless the source IP is
+ * in the 'except' network range.
+ */
+ if ((!f_xff || ipcmp2net(src, &f_xff->except_net)) &&
+ (!b_xff || ipcmp2net(src, &b_xff->except_net))) {
+ char pn[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6,
+ (const void *)&((struct sockaddr_in6 *)(src))->sin6_addr,
+ pn, sizeof(pn));
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%s", pn);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+/*
+ * add X-Original-To if either the frontend or the backend
+ * asks for it.
+ * Returns 1 for success and 0 for failure
+ */
+int http_handle_xot_header(struct stream *s, struct channel *req)
+{
+ struct session *sess = s->sess;
+ struct http_ext_xot *f_xot = NULL;
+ struct http_ext_xot *b_xot = NULL;
+
+ if (sess->fe->http_ext && sess->fe->http_ext->xot) {
+ /* frontend */
+ f_xot = sess->fe->http_ext->xot;
+ }
+ if (s->be->http_ext && s->be->http_ext->xot) {
+ /* backend */
+ BUG_ON(!s->be->http_ext);
+ b_xot = s->be->http_ext->xot;
+ }
+
+ if (f_xot || b_xot) {
+ struct htx *htx = htxbuf(&req->buf);
+ const struct sockaddr_storage *dst = sc_dst(s->scf);
+ struct ist hdr = ((b_xot) ? b_xot->hdr_name : f_xot->hdr_name);
+
+ if (dst && dst->ss_family == AF_INET) {
+ /* Add an X-Original-To header unless the destination IP is
+ * in the 'except' network range.
+ */
+ if ((!f_xot || ipcmp2net(dst, &f_xot->except_net)) &&
+ (!b_xot || ipcmp2net(dst, &b_xot->except_net))) {
+ unsigned char *pn = (unsigned char *)&((struct sockaddr_in *)dst)->sin_addr;
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-original-to, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%d.%d.%d.%d", pn[0], pn[1], pn[2], pn[3]);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ return 0;
+ }
+ }
+ else if (dst && dst->ss_family == AF_INET6) {
+ /* Add an X-Original-To header unless the source IP is
+ * in the 'except' network range.
+ */
+ if ((!f_xot || ipcmp2net(dst, &f_xot->except_net)) &&
+ (!b_xot || ipcmp2net(dst, &b_xot->except_net))) {
+ char pn[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6,
+ (const void *)&((struct sockaddr_in6 *)dst)->sin6_addr,
+ pn, sizeof(pn));
+
+ /* Note: we rely on the backend to get the header name to be used for
+ * x-forwarded-for, because the header is really meant for the backends.
+ * However, if the backend did not specify any option, we have to rely
+ * on the frontend's header name.
+ */
+ chunk_printf(&trash, "%s", pn);
+ if (unlikely(!http_add_header(htx, hdr, ist2(trash.area, trash.data))))
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+/*
+ * =========== CONFIG ===========
+ * below are helpers to parse http ext options from the config
+ */
+static int proxy_http_parse_oom(const char *file, int linenum)
+{
+ int err_code = 0;
+
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ return err_code;
+}
+
+static inline int _proxy_http_parse_7239_expr(char **args, int *cur_arg,
+ const char *file, int linenum,
+ char **expr_s)
+{
+ int err_code = 0;
+
+ if (!*args[*cur_arg + 1]) {
+ ha_alert("parsing [%s:%d]: '%s' expects <expr> as argument.\n",
+ file, linenum, args[*cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ *cur_arg += 1;
+ ha_free(expr_s);
+ *expr_s = strdup(args[*cur_arg]);
+ if (!*expr_s)
+ return proxy_http_parse_oom(file, linenum);
+ *cur_arg += 1;
+ out:
+ return err_code;
+}
+
+/* forwarded/7239 RFC: tries to parse "option forwarded" config keyword
+ * Returns a composition of ERR_ABORT, ERR_ALERT, ERR_FATAL, ERR_WARN
+ */
+int proxy_http_parse_7239(char **args, int cur_arg,
+ struct proxy *curproxy, const struct proxy *defpx,
+ const char *file, int linenum)
+{
+ struct http_ext_7239 *fwd;
+ int err_code = 0;
+
+ if (warnifnotcap(curproxy, PR_CAP_BE, file, linenum, "option forwarded", NULL)) {
+ /* option is ignored for frontends */
+ err_code |= ERR_WARN;
+ goto out;
+ }
+
+ if (!http_ext_7239_prepare(curproxy))
+ return proxy_http_parse_oom(file, linenum);
+
+ fwd = curproxy->http_ext->fwd;
+
+ fwd->p_proto = 0;
+ fwd->p_host.mode = 0;
+ fwd->p_for.nn_mode = 0;
+ fwd->p_for.np_mode = 0;
+ fwd->p_by.nn_mode = 0;
+ fwd->p_by.np_mode = 0;
+ ha_free(&fwd->c_file);
+ fwd->c_file = strdup(file);
+ fwd->c_line = linenum;
+
+ /* start at 2, since 0+1 = "option" "forwarded" */
+ cur_arg = 2;
+ if (!*(args[cur_arg])) {
+ /* no optional argument provided, use default settings */
+ fwd->p_for.nn_mode = HTTP_7239_FORBY_ORIG; /* enable for and mimic xff */
+ fwd->p_proto = 1; /* enable proto */
+ goto out;
+ }
+ /* loop to go through optional arguments */
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "proto") == 0) {
+ fwd->p_proto = 1;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "host") == 0) {
+ fwd->p_host.mode = HTTP_7239_HOST_ORIG;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "host-expr") == 0) {
+ fwd->p_host.mode = HTTP_7239_HOST_SMP;
+ err_code |= _proxy_http_parse_7239_expr(args, &cur_arg, file, linenum,
+ &fwd->p_host.expr_s);
+ if (err_code & ERR_CODE)
+ goto out;
+ } else if (strcmp(args[cur_arg], "by") == 0) {
+ fwd->p_by.nn_mode = HTTP_7239_FORBY_ORIG;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "by-expr") == 0) {
+ fwd->p_by.nn_mode = HTTP_7239_FORBY_SMP;
+ err_code |= _proxy_http_parse_7239_expr(args, &cur_arg, file, linenum,
+ &fwd->p_by.nn_expr_s);
+ if (err_code & ERR_CODE)
+ goto out;
+ } else if (strcmp(args[cur_arg], "for") == 0) {
+ fwd->p_for.nn_mode = HTTP_7239_FORBY_ORIG;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "for-expr") == 0) {
+ fwd->p_for.nn_mode = HTTP_7239_FORBY_SMP;
+ err_code |= _proxy_http_parse_7239_expr(args, &cur_arg, file, linenum,
+ &fwd->p_for.nn_expr_s);
+ if (err_code & ERR_CODE)
+ goto out;
+ } else if (strcmp(args[cur_arg], "by_port") == 0) {
+ fwd->p_by.np_mode = HTTP_7239_FORBY_ORIG;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "by_port-expr") == 0) {
+ fwd->p_by.np_mode = HTTP_7239_FORBY_SMP;
+ err_code |= _proxy_http_parse_7239_expr(args, &cur_arg, file, linenum,
+ &fwd->p_by.np_expr_s);
+ if (err_code & ERR_CODE)
+ goto out;
+ } else if (strcmp(args[cur_arg], "for_port") == 0) {
+ fwd->p_for.np_mode = HTTP_7239_FORBY_ORIG;
+ cur_arg += 1;
+ } else if (strcmp(args[cur_arg], "for_port-expr") == 0) {
+ fwd->p_for.np_mode = HTTP_7239_FORBY_SMP;
+ err_code |= _proxy_http_parse_7239_expr(args, &cur_arg, file, linenum,
+ &fwd->p_for.np_expr_s);
+ if (err_code & ERR_CODE)
+ goto out;
+ } else {
+ /* unknown suboption - catchall */
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'proto', 'host', "
+ "'host-expr', 'by', 'by-expr', 'by_port', 'by_port-expr', "
+ "'for', 'for-expr', 'for_port' and 'for_port-expr'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } /* end while loop */
+
+ /* consistency check */
+ if (fwd->p_by.np_mode &&
+ !fwd->p_by.nn_mode) {
+ fwd->p_by.np_mode = 0;
+ ha_free(&fwd->p_by.np_expr_s);
+ ha_warning("parsing [%s:%d] : '%s %s' : '%s' will be ignored because both 'by' "
+ "and 'by-expr' are unset\n",
+ file, linenum, args[0], args[1],
+ ((fwd->p_by.np_mode == HTTP_7239_FORBY_ORIG) ? "by_port" : "by_port-expr"));
+ err_code |= ERR_WARN;
+ }
+ if (fwd->p_for.np_mode &&
+ !fwd->p_for.nn_mode) {
+ fwd->p_for.np_mode = 0;
+ ha_free(&fwd->p_for.np_expr_s);
+ ha_warning("parsing [%s:%d] : '%s %s' : '%s' will be ignored because both 'for' "
+ "and 'for-expr' are unset\n",
+ file, linenum, args[0], args[1],
+ ((fwd->p_for.np_mode == HTTP_7239_FORBY_ORIG) ? "for_port" : "for_port-expr"));
+ err_code |= ERR_WARN;
+ }
+
+ out:
+ return err_code;
+}
+
+/* rfc7239 forwarded option needs a postparsing step
+ * to convert parsing hints into runtime usable sample expressions
+ * Returns a composition of ERR_NONE, ERR_FATAL, ERR_ALERT, ERR_WARN
+ */
+int proxy_http_compile_7239(struct proxy *curproxy)
+{
+ struct http_ext_7239 *fwd;
+ int err = ERR_NONE;
+ int loop;
+
+ if (!(curproxy->cap & PR_CAP_BE)) {
+ /* no backend cap: not supported (ie: frontend) */
+ goto out;
+ }
+
+ /* should not happen (test should be performed after BE cap test) */
+ BUG_ON(!curproxy->http_ext || !curproxy->http_ext->fwd);
+
+ curproxy->conf.args.ctx = ARGC_OPT; /* option */
+ curproxy->conf.args.file = curproxy->http_ext->fwd->c_file;
+ curproxy->conf.args.line = curproxy->http_ext->fwd->c_line;
+ fwd = curproxy->http_ext->fwd;
+
+ /* it is important that we keep iterating on error to make sure
+ * all fwd config fields are in the same state (post-parsing state)
+ */
+ for (loop = 0; loop < 5; loop++) {
+ char **expr_str = NULL;
+ struct sample_expr **expr = NULL;
+ struct sample_expr *cur_expr;
+ char *err_str = NULL;
+ int smp = 0;
+ int idx = 0;
+
+ switch (loop) {
+ case 0:
+ /* host */
+ expr_str = &fwd->p_host.expr_s;
+ expr = &fwd->p_host.expr;
+ smp = (fwd->p_host.mode == HTTP_7239_HOST_SMP);
+ break;
+ case 1:
+ /* by->node */
+ expr_str = &fwd->p_by.nn_expr_s;
+ expr = &fwd->p_by.nn_expr;
+ smp = (fwd->p_by.nn_mode == HTTP_7239_FORBY_SMP);
+ break;
+ case 2:
+ /* by->nodeport */
+ expr_str = &fwd->p_by.np_expr_s;
+ expr = &fwd->p_by.np_expr;
+ smp = (fwd->p_by.np_mode == HTTP_7239_FORBY_SMP);
+ break;
+ case 3:
+ /* for->node */
+ expr_str = &fwd->p_for.nn_expr_s;
+ expr = &fwd->p_for.nn_expr;
+ smp = (fwd->p_for.nn_mode == HTTP_7239_FORBY_SMP);
+ break;
+ case 4:
+ /* for->nodeport */
+ expr_str = &fwd->p_for.np_expr_s;
+ expr = &fwd->p_for.np_expr;
+ smp = (fwd->p_for.np_mode == HTTP_7239_FORBY_SMP);
+ break;
+ }
+ if (!smp)
+ continue; /* no expr */
+
+ /* expr and expr_str cannot be NULL past this point */
+ BUG_ON(!expr || !expr_str);
+
+ if (!*expr_str) {
+ /* should not happen unless system memory exhaustion */
+ ha_alert("%s '%s' [%s:%d]: failed to parse 'option forwarded' expression : %s.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ fwd->c_file, fwd->c_line,
+ "memory error");
+ err |= ERR_ALERT | ERR_FATAL;
+ continue;
+ }
+
+ cur_expr =
+ sample_parse_expr((char*[]){*expr_str, NULL}, &idx,
+ fwd->c_file,
+ fwd->c_line,
+ &err_str, &curproxy->conf.args, NULL);
+
+ if (!cur_expr) {
+ ha_alert("%s '%s' [%s:%d]: failed to parse 'option forwarded' expression '%s' in : %s.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ fwd->c_file, fwd->c_line,
+ *expr_str, err_str);
+ ha_free(&err_str);
+ err |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (!(cur_expr->fetch->val & SMP_VAL_BE_HRQ_HDR)) {
+ /* fetch not available in this context: sample expr is resolved
+ * within backend right after headers are processed.
+ * (in http_process_request())
+ * -> we simply warn the user about the misuse
+ */
+ ha_warning("%s '%s' [%s:%d]: in 'option forwarded' sample expression '%s' : "
+ "some args extract information from '%s', "
+ "none of which is available here.\n",
+ proxy_type_str(curproxy), curproxy->id,
+ fwd->c_file, fwd->c_line,
+ *expr_str, sample_ckp_names(cur_expr->fetch->use));
+ err |= ERR_WARN;
+ }
+ /* post parsing individual expr cleanup */
+ ha_free(expr_str);
+
+ /* expr assignment */
+ *expr = cur_expr;
+ }
+ curproxy->conf.args.file = NULL;
+ curproxy->conf.args.line = 0;
+
+ /* post parsing general cleanup */
+ ha_free(&fwd->c_file);
+ fwd->c_line = 0;
+
+ fwd->c_mode = 1; /* parsing completed */
+
+ out:
+ return err;
+}
+
+/* x-forwarded-for: tries to parse "option forwardfor" config keyword
+ * Returns a composition of ERR_NONE, ERR_FATAL, ERR_ALERT
+ */
+int proxy_http_parse_xff(char **args, int cur_arg,
+ struct proxy *curproxy, const struct proxy *defpx,
+ const char *file, int linenum)
+{
+ struct http_ext_xff *xff;
+ int err_code = 0;
+
+ if (!http_ext_xff_prepare(curproxy))
+ return proxy_http_parse_oom(file, linenum);
+
+ xff = curproxy->http_ext->xff;
+
+ /* insert x-forwarded-for field, but not for the IP address listed as an except.
+ * set default options (ie: bitfield, header name, etc)
+ */
+
+ xff->mode = HTTP_XFF_ALWAYS;
+
+ istfree(&xff->hdr_name);
+ xff->hdr_name = istdup(ist(DEF_XFORWARDFOR_HDR));
+ if (!isttest(xff->hdr_name))
+ return proxy_http_parse_oom(file, linenum);
+ xff->except_net.family = AF_UNSPEC;
+
+ /* loop to go through arguments - start at 2, since 0+1 = "option" "forwardfor" */
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "except") == 0) {
+ unsigned char mask;
+ int i;
+
+ /* suboption except - needs additional argument for it */
+ if (*(args[cur_arg+1]) &&
+ str2net(args[cur_arg+1], 1, &xff->except_net.addr.v4.ip, &xff->except_net.addr.v4.mask)) {
+ xff->except_net.family = AF_INET;
+ xff->except_net.addr.v4.ip.s_addr &= xff->except_net.addr.v4.mask.s_addr;
+ }
+ else if (*(args[cur_arg+1]) &&
+ str62net(args[cur_arg+1], &xff->except_net.addr.v6.ip, &mask)) {
+ xff->except_net.family = AF_INET6;
+ len2mask6(mask, &xff->except_net.addr.v6.mask);
+ for (i = 0; i < 16; i++)
+ xff->except_net.addr.v6.ip.s6_addr[i] &= xff->except_net.addr.v6.mask.s6_addr[i];
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <address>[/mask] as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* flush useless bits */
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "header") == 0) {
+ /* suboption header - needs additional argument for it */
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <header_name> as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ istfree(&xff->hdr_name);
+ xff->hdr_name = istdup(ist(args[cur_arg+1]));
+ if (!isttest(xff->hdr_name))
+ return proxy_http_parse_oom(file, linenum);
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "if-none") == 0) {
+ xff->mode = HTTP_XFF_IFNONE;
+ cur_arg += 1;
+ } else {
+ /* unknown suboption - catchall */
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'except', 'header' and 'if-none'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } /* end while loop */
+ out:
+ return err_code;
+}
+
+/* x-original-to: tries to parse "option originalto" config keyword
+ * Returns a composition of ERR_NONE, ERR_FATAL, ERR_ALERT
+ */
+int proxy_http_parse_xot(char **args, int cur_arg,
+ struct proxy *curproxy, const struct proxy *defpx,
+ const char *file, int linenum)
+{
+ struct http_ext_xot *xot;
+ int err_code = 0;
+
+ if (!http_ext_xot_prepare(curproxy))
+ return proxy_http_parse_oom(file, linenum);
+
+ xot = curproxy->http_ext->xot;
+
+ /* insert x-original-to field, but not for the IP address listed as an except.
+ * set default options (ie: bitfield, header name, etc)
+ */
+
+ istfree(&xot->hdr_name);
+ xot->hdr_name = istdup(ist(DEF_XORIGINALTO_HDR));
+ if (!isttest(xot->hdr_name))
+ return proxy_http_parse_oom(file, linenum);
+ xot->except_net.family = AF_UNSPEC;
+
+ /* loop to go through arguments - start at 2, since 0+1 = "option" "originalto" */
+ cur_arg = 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "except") == 0) {
+ unsigned char mask;
+ int i;
+
+ /* suboption except - needs additional argument for it */
+ if (*(args[cur_arg+1]) &&
+ str2net(args[cur_arg+1], 1, &xot->except_net.addr.v4.ip, &xot->except_net.addr.v4.mask)) {
+ xot->except_net.family = AF_INET;
+ xot->except_net.addr.v4.ip.s_addr &= xot->except_net.addr.v4.mask.s_addr;
+ }
+ else if (*(args[cur_arg+1]) &&
+ str62net(args[cur_arg+1], &xot->except_net.addr.v6.ip, &mask)) {
+ xot->except_net.family = AF_INET6;
+ len2mask6(mask, &xot->except_net.addr.v6.mask);
+ for (i = 0; i < 16; i++)
+ xot->except_net.addr.v6.ip.s6_addr[i] &= xot->except_net.addr.v6.mask.s6_addr[i];
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <address>[/mask] as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cur_arg += 2;
+ } else if (strcmp(args[cur_arg], "header") == 0) {
+ /* suboption header - needs additional argument for it */
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <header_name> as argument.\n",
+ file, linenum, args[0], args[1], args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ istfree(&xot->hdr_name);
+ xot->hdr_name = istdup(ist(args[cur_arg+1]));
+ if (!isttest(xot->hdr_name))
+ return proxy_http_parse_oom(file, linenum);
+ cur_arg += 2;
+ } else {
+ /* unknown suboption - catchall */
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'except' and 'header'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ } /* end while loop */
+
+ out:
+ return err_code;
+}
+
+/*
+ * =========== MGMT ===========
+ * below are helpers to manage http ext options
+ */
+
+/* Ensure http_ext->fwd is properly allocated and
+ * initialized for <curproxy>.
+ * The function will leverage http_ext_prepare() to make
+ * sure http_ext is properly allocated and initialized as well.
+ * Returns 1 for success and 0 for failure (memory error)
+ */
+int http_ext_7239_prepare(struct proxy *curproxy)
+{
+ struct http_ext_7239 *fwd;
+
+ if (!http_ext_prepare(curproxy))
+ return 0;
+ if (curproxy->http_ext->fwd)
+ return 1; /* nothing to do */
+
+ fwd = malloc(sizeof(*fwd));
+ if (!fwd)
+ return 0;
+ /* initialize fwd mandatory fields */
+ fwd->c_mode = 0; /* pre-compile (parse) time */
+ fwd->c_file = NULL;
+ fwd->p_host.expr_s = NULL;
+ fwd->p_by.nn_expr_s = NULL;
+ fwd->p_by.np_expr_s = NULL;
+ fwd->p_for.nn_expr_s = NULL;
+ fwd->p_for.np_expr_s = NULL;
+ /* assign */
+ curproxy->http_ext->fwd = fwd;
+ return 1;
+}
+
+/* Ensure http_ext->xff is properly allocated and
+ * initialized for <curproxy>.
+ * The function will leverage http_ext_prepare() to make
+ * sure http_ext is properly allocated and initialized as well.
+ * Returns 1 for success and 0 for failure (memory error)
+ */
+int http_ext_xff_prepare(struct proxy *curproxy)
+{
+ struct http_ext_xff *xff;
+
+ if (!http_ext_prepare(curproxy))
+ return 0;
+ if (curproxy->http_ext->xff)
+ return 1; /* nothing to do */
+
+ xff = malloc(sizeof(*xff));
+ if (!xff)
+ return 0;
+ /* initialize xff mandatory fields */
+ xff->hdr_name = IST_NULL;
+ /* assign */
+ curproxy->http_ext->xff = xff;
+ return 1;
+}
+
+/* Ensure http_ext->xot is properly allocated and
+ * initialized for <curproxy>.
+ * The function will leverage http_ext_prepare() to make
+ * sure http_ext is properly allocated and initialized as well.
+ * Returns 1 for success and 0 for failure (memory error)
+ */
+int http_ext_xot_prepare(struct proxy *curproxy)
+{
+ struct http_ext_xot *xot;
+
+ if (!http_ext_prepare(curproxy))
+ return 0;
+ if (curproxy->http_ext->xot)
+ return 1; /* nothing to do */
+
+ xot = malloc(sizeof(*xot));
+ if (!xot)
+ return 0;
+ /* initialize xot mandatory fields */
+ xot->hdr_name = IST_NULL;
+ /* assign */
+ curproxy->http_ext->xot = xot;
+ return 1;
+}
+
+/* deep clean http_ext->fwd parameter for <curproxy>
+ * http_ext->fwd will be freed
+ * clean behavior will differ depending on http_ext->fwd
+ * state. If fwd is in 'parsed' state, parsing hints will be
+ * cleaned. Else, it means fwd is in 'compiled' state, in this
+ * case we're cleaning compiled results.
+ * This is because parse and compile memory areas are shared in
+ * a single union to optimize struct http_ext_7239 size.
+ */
+void http_ext_7239_clean(struct proxy *curproxy)
+{
+ struct http_ext_7239 *clean;
+
+ if (!curproxy->http_ext)
+ return;
+ clean = curproxy->http_ext->fwd;
+ if (!clean)
+ return; /* nothing to do */
+ if (!clean->c_mode) {
+ /* parsed */
+ ha_free(&clean->c_file);
+ ha_free(&clean->p_host.expr_s);
+ ha_free(&clean->p_by.nn_expr_s);
+ ha_free(&clean->p_by.np_expr_s);
+ ha_free(&clean->p_for.nn_expr_s);
+ ha_free(&clean->p_for.np_expr_s);
+ }
+ else {
+ /* compiled */
+ release_sample_expr(clean->p_host.expr);
+ clean->p_host.expr = NULL;
+ release_sample_expr(clean->p_by.nn_expr);
+ clean->p_by.nn_expr = NULL;
+ release_sample_expr(clean->p_by.np_expr);
+ clean->p_by.np_expr = NULL;
+ release_sample_expr(clean->p_for.nn_expr);
+ clean->p_for.nn_expr = NULL;
+ release_sample_expr(clean->p_for.np_expr);
+ clean->p_for.np_expr = NULL;
+ }
+ /* free fwd */
+ ha_free(&curproxy->http_ext->fwd);
+}
+
+/* deep clean http_ext->xff parameter for <curproxy>
+ * http_ext->xff will be freed
+ */
+void http_ext_xff_clean(struct proxy *curproxy)
+{
+ struct http_ext_xff *clean;
+
+ if (!curproxy->http_ext)
+ return;
+ clean = curproxy->http_ext->xff;
+ if (!clean)
+ return; /* nothing to do */
+ istfree(&clean->hdr_name);
+ /* free xff */
+ ha_free(&curproxy->http_ext->xff);
+}
+
+/* deep clean http_ext->xot parameter for <curproxy>
+ * http_ext->xot will be freed
+ */
+void http_ext_xot_clean(struct proxy *curproxy)
+{
+ struct http_ext_xot *clean;
+
+ if (!curproxy->http_ext)
+ return;
+ clean = curproxy->http_ext->xot;
+ if (!clean)
+ return; /* nothing to do */
+ istfree(&clean->hdr_name);
+ /* free xot */
+ ha_free(&curproxy->http_ext->xot);
+}
+
+/* duplicate http_ext->fwd parameters from <def> to <cpy>
+ * performs the required memory allocation and initialization
+ */
+void http_ext_7239_dup(const struct proxy *def, struct proxy *cpy)
+{
+ struct http_ext_7239 *dest = NULL;
+ struct http_ext_7239 *orig = NULL;
+
+ /* feature requires backend cap */
+ if (!(cpy->cap & PR_CAP_BE))
+ return;
+
+ if (def->http_ext == NULL || def->http_ext->fwd == NULL)
+ return;
+
+ orig = def->http_ext->fwd;
+
+ if (orig->c_mode)
+ return; /* copy not supported once compiled */
+
+ if (!http_ext_7239_prepare(cpy))
+ return;
+
+ dest = cpy->http_ext->fwd;
+
+ if (orig->c_file)
+ dest->c_file = strdup(orig->c_file);
+ dest->c_line = orig->c_line;
+ /* proto */
+ dest->p_proto = orig->p_proto;
+ /* host */
+ dest->p_host.mode = orig->p_host.mode;
+ if (orig->p_host.expr_s)
+ dest->p_host.expr_s = strdup(orig->p_host.expr_s);
+ /* by - nodename */
+ dest->p_by.nn_mode = orig->p_by.nn_mode;
+ if (orig->p_by.nn_expr_s)
+ dest->p_by.nn_expr_s = strdup(orig->p_by.nn_expr_s);
+ /* by - nodeport */
+ dest->p_by.np_mode = orig->p_by.np_mode;
+ if (orig->p_by.np_expr_s)
+ dest->p_by.np_expr_s = strdup(orig->p_by.np_expr_s);
+ /* for - nodename */
+ dest->p_for.nn_mode = orig->p_for.nn_mode;
+ if (orig->p_for.nn_expr_s)
+ dest->p_for.nn_expr_s = strdup(orig->p_for.nn_expr_s);
+ /* for - nodeport */
+ dest->p_for.np_mode = orig->p_for.np_mode;
+ if (orig->p_for.np_expr_s)
+ dest->p_for.np_expr_s = strdup(orig->p_for.np_expr_s);
+}
+
+/* duplicate http_ext->xff parameters from <def> to <cpy>
+ * performs the required memory allocation and initialization
+ */
+void http_ext_xff_dup(const struct proxy *def, struct proxy *cpy)
+{
+ struct http_ext_xff *dest = NULL;
+ struct http_ext_xff *orig = NULL;
+
+ if (def->http_ext == NULL || def->http_ext->xff == NULL ||
+ !http_ext_xff_prepare(cpy))
+ return;
+
+ orig = def->http_ext->xff;
+ dest = cpy->http_ext->xff;
+
+ if (isttest(orig->hdr_name))
+ dest->hdr_name = istdup(orig->hdr_name);
+ dest->mode = orig->mode;
+ dest->except_net = orig->except_net;
+}
+
+/* duplicate http_ext->xot parameters from <def> to <cpy>
+ * performs the required memory allocation and initialization
+ */
+void http_ext_xot_dup(const struct proxy *def, struct proxy *cpy)
+{
+ struct http_ext_xot *dest = NULL;
+ struct http_ext_xot *orig = NULL;
+
+ if (def->http_ext == NULL || def->http_ext->xot == NULL ||
+ !http_ext_xot_prepare(cpy))
+ return;
+
+ orig = def->http_ext->xot;
+ dest = cpy->http_ext->xot;
+
+ if (isttest(orig->hdr_name))
+ dest->hdr_name = istdup(orig->hdr_name);
+ dest->except_net = orig->except_net;
+}
+
+/* Allocate new http_ext and initialize it
+ * if needed
+ * Returns 1 for success and 0 for failure
+ */
+int http_ext_prepare(struct proxy *curproxy)
+{
+ if (curproxy->http_ext)
+ return 1; /* nothing to do */
+
+ curproxy->http_ext = malloc(sizeof(*curproxy->http_ext));
+ if (!curproxy->http_ext)
+ return 0; /* failure */
+ /* first init, set supported ext to NULL */
+ curproxy->http_ext->fwd = NULL;
+ curproxy->http_ext->xff = NULL;
+ curproxy->http_ext->xot = NULL;
+ return 1;
+}
+
+/* duplicate existing http_ext from <defproxy> to <curproxy>
+ */
+void http_ext_dup(const struct proxy *defproxy, struct proxy *curproxy)
+{
+ /* copy defproxy.http_ext members */
+ http_ext_7239_dup(defproxy, curproxy);
+ http_ext_xff_dup(defproxy, curproxy);
+ http_ext_xot_dup(defproxy, curproxy);
+}
+
+/* deep clean http_ext for <curproxy> (if previously allocated)
+ */
+void http_ext_clean(struct proxy *curproxy)
+{
+ if (!curproxy->http_ext)
+ return; /* nothing to do */
+ /* first, free supported ext */
+ http_ext_7239_clean(curproxy);
+ http_ext_xff_clean(curproxy);
+ http_ext_xot_clean(curproxy);
+
+ /* then, free http_ext */
+ ha_free(&curproxy->http_ext);
+}
+
+/* soft clean (only clean http_ext if no more options are used) */
+void http_ext_softclean(struct proxy *curproxy)
+{
+ if (!curproxy->http_ext)
+ return; /* nothing to do */
+ if (!curproxy->http_ext->fwd &&
+ !curproxy->http_ext->xff &&
+ !curproxy->http_ext->xot) {
+ /* no more use for http_ext, all options are disabled */
+ http_ext_clean(curproxy);
+ }
+}
+
+/* Perform some consistency checks on px.http_ext after parsing
+ * is completed.
+ * We make sure to perform a softclean in case some options were
+ * to be disabled in this check. This way we can release some memory.
+ * Returns a composition of ERR_NONE, ERR_ALERT, ERR_FATAL, ERR_WARN
+ */
+static int check_http_ext_postconf(struct proxy *px) {
+ int err = ERR_NONE;
+
+ if (px->http_ext) {
+ /* consistency check for http_ext */
+ if (px->mode != PR_MODE_HTTP && !(px->options & PR_O_HTTP_UPG)) {
+ /* http is disabled on px, yet it is required by http_ext */
+ if (px->http_ext->fwd) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ "forwarded", proxy_type_str(px), px->id);
+ err |= ERR_WARN;
+ http_ext_7239_clean(px);
+ }
+ if (px->http_ext->xff) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ "forwardfor", proxy_type_str(px), px->id);
+ err |= ERR_WARN;
+ http_ext_xff_clean(px);
+ }
+ if (px->http_ext->xot) {
+ ha_warning("'option %s' ignored for %s '%s' as it requires HTTP mode.\n",
+ "originalto", proxy_type_str(px), px->id);
+ err |= ERR_WARN;
+ http_ext_xot_clean(px);
+ }
+ } else if (px->http_ext->fwd) {
+ /* option "forwarded" may need to compile its expressions */
+ err |= proxy_http_compile_7239(px);
+ }
+ /* http_ext post init early cleanup */
+ http_ext_softclean(px);
+
+ }
+ return err;
+}
+
+REGISTER_POST_PROXY_CHECK(check_http_ext_postconf);
+/*
+ * =========== CONV ===========
+ * related converters
+ */
+
+/* input: string representing 7239 forwarded header single value
+ * does not take arguments
+ * output: 1 if header is RFC compliant, 0 otherwise
+ */
+static int sample_conv_7239_valid(const struct arg *args, struct sample *smp, void *private)
+{
+ struct ist input = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = !!http_validate_7239_header(input, FORWARDED_HEADER_ALL, NULL);
+ return 1;
+}
+
+/* input: string representing 7239 forwarded header single value
+ * argument: parameter name to look for in the header
+ * output: header parameter raw value, as a string
+ */
+static int sample_conv_7239_field(const struct arg *args, struct sample *smp, void *private)
+{
+ struct ist input = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct buffer *output;
+ struct forwarded_header_ctx ctx;
+ int validate;
+ int field = 0;
+
+ if (strcmp(args->data.str.area, "proto") == 0)
+ field = FORWARDED_HEADER_PROTO;
+ else if (strcmp(args->data.str.area, "host") == 0)
+ field = FORWARDED_HEADER_HOST;
+ else if (strcmp(args->data.str.area, "for") == 0)
+ field = FORWARDED_HEADER_FOR;
+ else if (strcmp(args->data.str.area, "by") == 0)
+ field = FORWARDED_HEADER_BY;
+
+ validate = http_validate_7239_header(input, FORWARDED_HEADER_ALL, &ctx);
+ if (!(validate & field))
+ return 0; /* invalid header or header does not contain field */
+ output = get_trash_chunk();
+ switch (field) {
+ case FORWARDED_HEADER_PROTO:
+ if (ctx.proto == FORWARDED_HEADER_HTTP)
+ chunk_appendf(output, "http");
+ else if (ctx.proto == FORWARDED_HEADER_HTTPS)
+ chunk_appendf(output, "https");
+ break;
+ case FORWARDED_HEADER_HOST:
+ chunk_istcat(output, ctx.host);
+ break;
+ case FORWARDED_HEADER_FOR:
+ chunk_istcat(output, ctx.nfor.raw);
+ break;
+ case FORWARDED_HEADER_BY:
+ chunk_istcat(output, ctx.nby.raw);
+ break;
+ default:
+ break;
+ }
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *output;
+ return 1;
+}
+
+/* input: substring representing 7239 forwarded header node
+ * output: forwarded header nodename translated to either
+ * ipv4 address, ipv6 address or str
+ * ('_' prefix if obfuscated, or "unknown" if unknown)
+ */
+static int sample_conv_7239_n2nn(const struct arg *args, struct sample *smp, void *private)
+{
+ struct ist input = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct forwarded_header_node ctx;
+ struct buffer *output;
+
+ if (http_7239_extract_node(&input, &ctx, 1) == 0)
+ return 0; /* could not extract node */
+ switch (ctx.nodename.type) {
+ case FORWARDED_HEADER_UNK:
+ output = get_trash_chunk();
+ chunk_appendf(output, "unknown");
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *output;
+ break;
+ case FORWARDED_HEADER_OBFS:
+ output = get_trash_chunk();
+ chunk_appendf(output, "_"); /* append obfs prefix */
+ chunk_istcat(output, ctx.nodename.obfs);
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *output;
+ break;
+ case FORWARDED_HEADER_IP:
+ if (ctx.nodename.ip.ss_family == AF_INET) {
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = ((struct sockaddr_in *)&ctx.nodename.ip)->sin_addr;
+ }
+ else if (ctx.nodename.ip.ss_family == AF_INET6) {
+ smp->data.type = SMP_T_IPV6;
+ smp->data.u.ipv6 = ((struct sockaddr_in6 *)&ctx.nodename.ip)->sin6_addr;
+ }
+ else
+ return 0; /* unsupported */
+ break;
+ default:
+ return 0; /* unsupported */
+ }
+ return 1;
+}
+
+/* input: substring representing 7239 forwarded header node
+ * output: forwarded header nodeport translated to either
+ * integer or str for obfuscated ('_' prefix)
+ */
+static int sample_conv_7239_n2np(const struct arg *args, struct sample *smp, void *private)
+{
+ struct ist input = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ struct forwarded_header_node ctx;
+ struct buffer *output;
+
+ if (http_7239_extract_node(&input, &ctx, 1) == 0)
+ return 0; /* could not extract node */
+
+ switch (ctx.nodeport.type) {
+ case FORWARDED_HEADER_UNK:
+ return 0; /* not provided */
+ case FORWARDED_HEADER_OBFS:
+ output = get_trash_chunk();
+ chunk_appendf(output, "_"); /* append obfs prefix */
+ chunk_istcat(output, ctx.nodeport.obfs);
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *output;
+ break;
+ case FORWARDED_HEADER_PORT:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ctx.nodeport.port;
+ break;
+ default:
+ return 0; /* unsupported */
+ }
+
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "rfc7239_is_valid", sample_conv_7239_valid, 0, NULL, SMP_T_STR, SMP_T_BOOL},
+ { "rfc7239_field", sample_conv_7239_field, ARG1(1,STR), NULL, SMP_T_STR, SMP_T_STR},
+ { "rfc7239_n2nn", sample_conv_7239_n2nn, 0, NULL, SMP_T_STR, SMP_T_ANY},
+ { "rfc7239_n2np", sample_conv_7239_n2np, 0, NULL, SMP_T_STR, SMP_T_ANY},
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/http_fetch.c b/src/http_fetch.c
new file mode 100644
index 0000000..1f3e4a0
--- /dev/null
+++ b/src/http_fetch.c
@@ -0,0 +1,2368 @@
+/*
+ * HTTP samples fetching
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/pool.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* this struct is used between calls to smp_fetch_hdr() or smp_fetch_cookie() */
+static THREAD_LOCAL struct http_hdr_ctx static_http_hdr_ctx;
+/* this is used to convert raw connection buffers to htx */
+static THREAD_LOCAL struct buffer static_raw_htx_chunk;
+static THREAD_LOCAL char *static_raw_htx_buf;
+
+#define SMP_REQ_CHN(smp) (smp->strm ? &smp->strm->req : NULL)
+#define SMP_RES_CHN(smp) (smp->strm ? &smp->strm->res : NULL)
+
+/* This function returns the static htx chunk, where raw connections get
+ * converted to HTX as needed for samplxsing.
+ */
+struct buffer *get_raw_htx_chunk(void)
+{
+ chunk_reset(&static_raw_htx_chunk);
+ return &static_raw_htx_chunk;
+}
+
+static int alloc_raw_htx_chunk_per_thread()
+{
+ static_raw_htx_buf = malloc(global.tune.bufsize);
+ if (!static_raw_htx_buf)
+ return 0;
+ chunk_init(&static_raw_htx_chunk, static_raw_htx_buf, global.tune.bufsize);
+ return 1;
+}
+
+static void free_raw_htx_chunk_per_thread()
+{
+ ha_free(&static_raw_htx_buf);
+}
+
+REGISTER_PER_THREAD_ALLOC(alloc_raw_htx_chunk_per_thread);
+REGISTER_PER_THREAD_FREE(free_raw_htx_chunk_per_thread);
+
+/*
+ * Returns the data from Authorization header. Function may be called more
+ * than once so data is stored in txn->auth_data. When no header is found
+ * or auth method is unknown auth_method is set to HTTP_AUTH_WRONG to avoid
+ * searching again for something we are unable to find anyway. However, if
+ * the result if valid, the cache is not reused because we would risk to
+ * have the credentials overwritten by another stream in parallel.
+ * The caller is responsible for passing a sample with a valid stream/txn,
+ * and a valid htx.
+ */
+
+static int get_http_auth(struct sample *smp, struct htx *htx)
+{
+ struct stream *s = smp->strm;
+ struct http_txn *txn = s->txn;
+ struct http_hdr_ctx ctx = { .blk = NULL };
+ struct ist hdr;
+ struct buffer auth_method;
+ char *p;
+ int len;
+
+#ifdef DEBUG_AUTH
+ printf("Auth for stream %p: %d\n", s, txn->auth.method);
+#endif
+ if (txn->auth.method == HTTP_AUTH_WRONG)
+ return 0;
+
+ txn->auth.method = HTTP_AUTH_WRONG;
+
+ if (txn->flags & TX_USE_PX_CONN)
+ hdr = ist("Proxy-Authorization");
+ else
+ hdr = ist("Authorization");
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, hdr, &ctx, 0))
+ return 0;
+
+ p = memchr(ctx.value.ptr, ' ', ctx.value.len);
+ if (!p || p == ctx.value.ptr) /* if no space was found or if the space is the first character */
+ return 0;
+ len = p - ctx.value.ptr;
+
+ if (chunk_initlen(&auth_method, ctx.value.ptr, 0, len) != 1)
+ return 0;
+
+ /* According to RFC7235, there could be multiple spaces between the
+ * scheme and its value, we must skip all of them.
+ */
+ while (p < istend(ctx.value) && *p == ' ')
+ ++p;
+
+ chunk_initlen(&txn->auth.method_data, p, 0, istend(ctx.value) - p);
+
+ if (!strncasecmp("Basic", auth_method.area, auth_method.data)) {
+ struct buffer *http_auth = get_trash_chunk();
+
+ len = base64dec(txn->auth.method_data.area,
+ txn->auth.method_data.data,
+ http_auth->area, global.tune.bufsize - 1);
+
+ if (len < 0)
+ return 0;
+
+
+ http_auth->area[len] = '\0';
+
+ p = strchr(http_auth->area, ':');
+
+ if (!p)
+ return 0;
+
+ txn->auth.user = http_auth->area;
+ *p = '\0';
+ txn->auth.pass = p+1;
+
+ txn->auth.method = HTTP_AUTH_BASIC;
+ return 1;
+ } else if (!strncasecmp("Bearer", auth_method.area, auth_method.data)) {
+ txn->auth.method = HTTP_AUTH_BEARER;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This function ensures that the prerequisites for an L7 fetch are ready,
+ * which means that a request or response is ready. If some data is missing,
+ * a parsing attempt is made. This is useful in TCP-based ACLs which are able
+ * to extract data from L7. If <vol> is non-null during a prefetch, another
+ * test is made to ensure the required information is not gone.
+ *
+ * The function returns :
+ * NULL with SMP_F_MAY_CHANGE in the sample flags if some data is missing to
+ * decide whether or not an HTTP message is present ;
+ * NULL if the requested data cannot be fetched or if it is certain that
+ * we'll never have any HTTP message there; this includes null strm or chn.
+ * NULL if the sample's direction does not match the channel's (i.e. the
+ * function was asked to work on the wrong channel)
+ * The HTX message if ready
+ */
+struct htx *smp_prefetch_htx(struct sample *smp, struct channel *chn, struct check *check, int vol)
+{
+ struct stream *s = smp->strm;
+ struct http_txn *txn = NULL;
+ struct htx *htx = NULL;
+ struct http_msg *msg;
+ struct htx_sl *sl;
+
+ if (chn &&
+ (((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ && (chn->flags & CF_ISRESP)) ||
+ ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES && !(chn->flags & CF_ISRESP))))
+ return 0;
+
+ /* Note: it is possible that <s> is NULL when called before stream
+ * initialization (eg: tcp-request connection), so this function is the
+ * one responsible for guarding against this case for all HTTP users.
+ *
+ * In the health check context, the stream and the channel must be NULL
+ * and <check> must be set. In this case, only the input buffer,
+ * corresponding to the response, is considered. It is the caller
+ * responsibility to provide <check>.
+ */
+ BUG_ON(check && (s || chn));
+ if (!s || !chn) {
+ if (check) {
+ htx = htxbuf(&check->bi);
+
+ /* Analyse not yet started */
+ if (htx_is_empty(htx) || htx->first == -1)
+ return NULL;
+
+ sl = http_get_stline(htx);
+ if (vol && !sl) {
+ /* The start-line was already forwarded, it is too late to fetch anything */
+ return NULL;
+ }
+ goto end;
+ }
+
+ return NULL;
+ }
+
+ if (!s->txn && !http_create_txn(s))
+ return NULL;
+ txn = s->txn;
+ msg = (!(chn->flags & CF_ISRESP) ? &txn->req : &txn->rsp);
+
+ if (IS_HTX_STRM(s)) {
+ htx = htxbuf(&chn->buf);
+
+ if (htx->flags & HTX_FL_PARSING_ERROR)
+ return NULL;
+
+ if (msg->msg_state < HTTP_MSG_BODY) {
+ /* Analyse not yet started */
+ if (htx_is_empty(htx) || htx->first == -1) {
+ /* Parsing is done by the mux, just wait */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return NULL;
+ }
+ }
+ sl = http_get_stline(htx);
+ if (vol && !sl) {
+ /* The start-line was already forwarded, it is too late to fetch anything */
+ return NULL;
+ }
+ }
+ else { /* RAW mode */
+ struct buffer *buf;
+ struct h1m h1m;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ union h1_sl h1sl;
+ unsigned int flags = HTX_FL_NONE;
+ int ret;
+
+ /* no HTTP fetch on the response in TCP mode */
+ if (chn->flags & CF_ISRESP)
+ return NULL;
+
+ /* Now we are working on the request only */
+ buf = &chn->buf;
+ if (b_head(buf) + b_data(buf) > b_wrap(buf))
+ b_slow_realign(buf, trash.area, 0);
+
+ h1m_init_req(&h1m);
+ ret = h1_headers_to_hdr_list(b_head(buf), b_stop(buf),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &h1m, &h1sl);
+ if (ret <= 0) {
+ /* Invalid or too big*/
+ if (ret < 0 || channel_full(&s->req, global.tune.maxrewrite))
+ return NULL;
+
+ /* wait for a full request */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return NULL;
+ }
+
+ /* OK we just got a valid HTTP message. We have to convert it
+ * into an HTX message.
+ */
+ if (unlikely(h1sl.rq.v.len == 0)) {
+ /* try to convert HTTP/0.9 requests to HTTP/1.0 */
+ if (h1sl.rq.meth != HTTP_METH_GET || !h1sl.rq.u.len)
+ return NULL;
+ h1sl.rq.v = ist("HTTP/1.0");
+ }
+
+ /* Set HTX start-line flags */
+ if (h1m.flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK)
+ flags |= HTX_SL_F_CHNK;
+ else if (h1m.flags & H1_MF_CLEN)
+ flags |= HTX_SL_F_CLEN;
+
+ htx = htx_from_buf(get_raw_htx_chunk());
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, h1sl.rq.m, h1sl.rq.u, h1sl.rq.v);
+ if (!sl || !htx_add_all_headers(htx, hdrs))
+ return NULL;
+ sl->info.req.meth = h1sl.rq.meth;
+ }
+
+ /* OK we just got a valid HTTP message. If not already done by
+ * HTTP analyzers, we have some minor preparation to perform so
+ * that further checks can rely on HTTP tests.
+ */
+ if (sl && msg->msg_state < HTTP_MSG_BODY) {
+ if (!(chn->flags & CF_ISRESP)) {
+ txn->meth = sl->info.req.meth;
+ if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD)
+ s->flags |= SF_REDIRECTABLE;
+ }
+ else {
+ if (txn->status == -1)
+ txn->status = sl->info.res.status;
+ if (!(htx->flags & HTX_FL_PROXY_RESP) && txn->server_status == -1)
+ txn->server_status = sl->info.res.status;
+ }
+ if (sl->flags & HTX_SL_F_VER_11)
+ msg->flags |= HTTP_MSGF_VER_11;
+ }
+
+ /* everything's OK */
+ end:
+ return htx;
+}
+
+/* This function fetches the method of current HTTP request and stores
+ * it in the global pattern struct as a chunk. There are two possibilities :
+ * - if the method is known (not HTTP_METH_OTHER), its identifier is stored
+ * in <len> and <ptr> is NULL ;
+ * - if the method is unknown (HTTP_METH_OTHER), <ptr> points to the text and
+ * <len> to its length.
+ * This is intended to be used with pat_match_meth() only.
+ */
+static int smp_fetch_meth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct http_txn *txn;
+ struct htx *htx = NULL;
+ int meth;
+
+ txn = (smp->strm ? smp->strm->txn : NULL);
+ if (!txn)
+ return 0;
+
+ meth = txn->meth;
+ if (meth == HTTP_METH_OTHER) {
+ htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ if (!htx)
+ return 0;
+ meth = txn->meth;
+ }
+
+ smp->data.type = SMP_T_METH;
+ smp->data.u.meth.meth = meth;
+ if (meth == HTTP_METH_OTHER) {
+ struct htx_sl *sl;
+
+ sl = http_get_stline(htx);
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.meth.str.area = HTX_SL_REQ_MPTR(sl);
+ smp->data.u.meth.str.data = HTX_SL_REQ_MLEN(sl);
+ }
+ smp->flags |= SMP_F_VOL_1ST;
+ return 1;
+}
+
+static int smp_fetch_rqver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_REQ_VLEN(sl);
+ ptr = HTX_SL_REQ_VPTR(sl);
+
+ while ((len-- > 0) && (*ptr++ != '/'));
+ if (len <= 0)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = len;
+
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_stver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_RES_CHN(smp);
+ struct check *check = objt_check(smp->sess->origin);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_RES_VLEN(sl);
+ ptr = HTX_SL_RES_VPTR(sl);
+
+ while ((len-- > 0) && (*ptr++ != '/'));
+ if (len <= 0)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = len;
+
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+/* 3. Check on Status Code. We manipulate integers here. */
+static int smp_fetch_stcode(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_RES_CHN(smp);
+ struct check *check = objt_check(smp->sess->origin);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct htx_sl *sl;
+ char *ptr;
+ int len;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ len = HTX_SL_RES_CLEN(sl);
+ ptr = HTX_SL_RES_CPTR(sl);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = __strl2ui(ptr, len);
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* It returns the server or the txn status code, depending on the keyword */
+static int smp_fetch_srv_status(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+ short status;
+
+ txn = (smp->strm ? smp->strm->txn : NULL);
+ if (!txn)
+ return 0;
+
+ status = (kw[0] == 't' ? txn->status : txn->server_status);
+ if (status == -1) {
+ struct channel *chn = SMP_RES_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+
+ if (!htx)
+ return 0;
+
+ status = (kw[0] == 't' ? txn->status : txn->server_status);
+ }
+
+ if (kw[0] != 't')
+ smp->flags = SMP_F_VOL_1ST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = status;
+ return 1;
+}
+
+static int smp_fetch_uniqueid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct ist unique_id;
+
+ if (LIST_ISEMPTY(&smp->sess->fe->format_unique_id))
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ unique_id = stream_generate_unique_id(smp->strm, &smp->sess->fe->format_unique_id);
+ if (!isttest(unique_id))
+ return 0;
+
+ smp->data.u.str.area = smp->strm->unique_id.ptr;
+ smp->data.u.str.data = smp->strm->unique_id.len;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Returns a string block containing all headers including the
+ * empty line which separates headers from the body. This is useful
+ * for some headers analysis.
+ */
+static int smp_fetch_hdrs(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdrs, res.hdrs */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_HDR) {
+ struct ist n = htx_get_blk_name(htx, blk);
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ if (!h1_format_htx_hdr(n, v, temp))
+ return 0;
+ }
+ else if (type == HTX_BLK_EOH) {
+ if (!chunk_memcat(temp, "\r\n", 2))
+ return 0;
+ break;
+ }
+ }
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ return 1;
+}
+
+/* Returns the header request in a length/value encoded format.
+ * This is useful for exchanges with the SPOE.
+ *
+ * A "length value" is a multibyte code encoding numbers. It uses the
+ * SPOE format. The encoding is the following:
+ *
+ * Each couple "header name" / "header value" is composed
+ * like this:
+ * "length value" "header name bytes"
+ * "length value" "header value bytes"
+ * When the last header is reached, the header name and the header
+ * value are empty. Their length are 0
+ */
+static int smp_fetch_hdrs_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdrs_bin, res.hdrs_bin */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ char *p, *end;
+ int32_t pos;
+ int ret;
+
+ if (!htx)
+ return 0;
+ temp = get_trash_chunk();
+ p = temp->area;
+ end = temp->area + temp->size;
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n, v;
+
+ if (type == HTX_BLK_HDR) {
+ n = htx_get_blk_name(htx,blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* encode the header name. */
+ ret = encode_varint(n.len, &p, end);
+ if (ret == -1)
+ return 0;
+ if (p + n.len > end)
+ return 0;
+ memcpy(p, n.ptr, n.len);
+ p += n.len;
+
+ /* encode the header value. */
+ ret = encode_varint(v.len, &p, end);
+ if (ret == -1)
+ return 0;
+ if (p + v.len > end)
+ return 0;
+ memcpy(p, v.ptr, v.len);
+ p += v.len;
+
+ }
+ else if (type == HTX_BLK_EOH) {
+ /* encode the end of the header list with empty
+ * header name and header value.
+ */
+ ret = encode_varint(0, &p, end);
+ if (ret == -1)
+ return 0;
+ ret = encode_varint(0, &p, end);
+ if (ret == -1)
+ return 0;
+ break;
+ }
+ }
+
+ /* Initialise sample data which will be filled. */
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = temp->area;
+ smp->data.u.str.data = p - temp->area;
+ smp->data.u.str.size = temp->size;
+ return 1;
+}
+
+/* returns the longest available part of the body. This requires that the body
+ * has been waited for using http-buffer-request.
+ */
+static int smp_fetch_body(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body, res.body */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ int32_t pos;
+ int finished = 0;
+
+ if (!htx)
+ return 0;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) {
+ finished = 1;
+ break;
+ }
+ if (type == HTX_BLK_DATA) {
+ if (!h1_format_htx_data(htx_get_blk_value(htx, blk), temp, 0))
+ return 0;
+ }
+ }
+
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_TEST;
+
+ if (!finished && (check || (chn && !channel_full(chn, global.tune.maxrewrite) &&
+ !(chn_prod(chn)->flags & (SC_FL_EOI|SC_FL_EOS|SC_FL_ABRT_DONE)))))
+ smp->flags |= SMP_F_MAY_CHANGE;
+
+ return 1;
+}
+
+
+/* returns the available length of the body. This requires that the body
+ * has been waited for using http-buffer-request.
+ */
+static int smp_fetch_body_len(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body_len, res.body_len */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ int32_t pos;
+ unsigned long long len = 0;
+
+ if (!htx)
+ return 0;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = len;
+ smp->flags = SMP_F_VOL_TEST;
+ return 1;
+}
+
+
+/* returns the advertised length of the body, or the advertised size of the
+ * chunks available in the buffer. This requires that the body has been waited
+ * for using http-buffer-request.
+ */
+static int smp_fetch_body_size(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.body_size, res.body_size */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ int32_t pos;
+ unsigned long long len = 0;
+
+ if (!htx)
+ return 0;
+
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ if (htx->extra != HTX_UNKOWN_PAYLOAD_LENGTH)
+ len += htx->extra;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = len;
+ smp->flags = SMP_F_VOL_TEST;
+ return 1;
+}
+
+
+/* 4. Check on URL/URI. A pointer to the URI is stored. */
+static int smp_fetch_url(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = HTX_SL_REQ_UPTR(sl);
+ smp->data.u.str.data = HTX_SL_REQ_ULEN(sl);
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_url_ip(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct sockaddr_storage addr;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ if (url2sa(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), &addr, NULL) < 0)
+ return 0;
+
+ if (addr.ss_family != AF_INET)
+ return 0;
+
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = ((struct sockaddr_in *)&addr)->sin_addr;
+ smp->flags = 0;
+ return 1;
+}
+
+static int smp_fetch_url_port(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct sockaddr_storage addr;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (!htx)
+ return 0;
+ sl = http_get_stline(htx);
+ if (url2sa(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), &addr, NULL) < 0)
+ return 0;
+
+ if (addr.ss_family != AF_INET)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = get_host_port(&addr);
+ smp->flags = 0;
+ return 1;
+}
+
+/* Fetch an HTTP header. A pointer to the beginning of the value is returned.
+ * Accepts an optional argument of type string containing the header field name,
+ * and an optional argument of type signed or unsigned integer to request an
+ * explicit occurrence of the header. Note that in the event of a missing name,
+ * headers are considered from the first one. It does not stop on commas and
+ * returns full lines instead (useful for User-Agent or Date for example).
+ */
+static int smp_fetch_fhdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.fhdr, res.fhdr */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[0];
+ struct ist name;
+ int occ = 0;
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[0] = ctx;
+ }
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+ name = ist2(args[0].data.str.area, args[0].data.str.data);
+
+ if (args[1].type == ARGT_SINT)
+ occ = args[1].data.sint;
+
+ if (!htx)
+ return 0;
+
+ if (ctx && !(smp->flags & SMP_F_NOT_LAST))
+ /* search for header from the beginning */
+ ctx->blk = NULL;
+
+ if (!occ && !(smp->opt & SMP_OPT_ITERATE))
+ /* no explicit occurrence and single fetch => last header by default */
+ occ = -1;
+
+ if (!occ)
+ /* prepare to report multiple occurrences for ACL fetches */
+ smp->flags |= SMP_F_NOT_LAST;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_HDR | SMP_F_CONST;
+ if (http_get_htx_fhdr(htx, name, occ, ctx, &smp->data.u.str.area, &smp->data.u.str.data))
+ return 1;
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return 0;
+}
+
+/* 6. Check on HTTP header count. The number of occurrences is returned.
+ * Accepts exactly 1 argument of type string. It does not stop on commas and
+ * returns full lines instead (useful for User-Agent or Date for example).
+ */
+static int smp_fetch_fhdr_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.fhdr_cnt, res.fhdr_cnt */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist name;
+ int cnt;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ name = ist2(args->data.str.area, args->data.str.data);
+ } else {
+ name = IST_NULL;
+ }
+
+ ctx.blk = NULL;
+ cnt = 0;
+ while (http_find_header(htx, name, &ctx, 1))
+ cnt++;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+static int smp_fetch_hdr_names(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr_names, res.hdr_names */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct buffer *temp;
+ char del = ',';
+
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR)
+ del = *args[0].data.str.area;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ struct ist n;
+
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+ n = htx_get_blk_name(htx, blk);
+
+ if (temp->data)
+ temp->area[temp->data++] = del;
+ chunk_istcat(temp, n);
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an HTTP header. A pointer to the beginning of the value is returned.
+ * Accepts an optional argument of type string containing the header field name,
+ * and an optional argument of type signed or unsigned integer to request an
+ * explicit occurrence of the header. Note that in the event of a missing name,
+ * headers are considered from the first one.
+ */
+static int smp_fetch_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr / hdr, res.hdr / shdr */
+ struct channel *chn = ((kw[0] == 'h' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[0];
+ struct ist name;
+ int occ = 0;
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[0] = ctx;
+ }
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+ name = ist2(args[0].data.str.area, args[0].data.str.data);
+
+ if (args[1].type == ARGT_SINT)
+ occ = args[1].data.sint;
+
+ if (!htx)
+ return 0;
+
+ if (ctx && !(smp->flags & SMP_F_NOT_LAST))
+ /* search for header from the beginning */
+ ctx->blk = NULL;
+
+ if (!occ && !(smp->opt & SMP_OPT_ITERATE))
+ /* no explicit occurrence and single fetch => last header by default */
+ occ = -1;
+
+ if (!occ)
+ /* prepare to report multiple occurrences for ACL fetches */
+ smp->flags |= SMP_F_NOT_LAST;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_HDR | SMP_F_CONST;
+ if (http_get_htx_hdr(htx, name, occ, ctx, &smp->data.u.str.area, &smp->data.u.str.data))
+ return 1;
+
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return 0;
+}
+
+/* Same than smp_fetch_hdr() but only relies on the sample direction to choose
+ * the right channel. So instead of duplicating the code, we just change the
+ * keyword and then fallback on smp_fetch_hdr().
+ */
+static int smp_fetch_chn_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ kw = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ ? "req.hdr" : "res.hdr");
+ return smp_fetch_hdr(args, smp, kw, private);
+}
+
+/* 6. Check on HTTP header count. The number of occurrences is returned.
+ * Accepts exactly 1 argument of type string.
+ */
+static int smp_fetch_hdr_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.hdr_cnt / hdr_cnt, res.hdr_cnt / shdr_cnt */
+ struct channel *chn = ((kw[0] == 'h' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist name;
+ int cnt;
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ name = ist2(args->data.str.area, args->data.str.data);
+ } else {
+ name = IST_NULL;
+ }
+
+ ctx.blk = NULL;
+ cnt = 0;
+ while (http_find_header(htx, name, &ctx, 0))
+ cnt++;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags = SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an HTTP header's integer value. The integer value is returned. It
+ * takes a mandatory argument of type string and an optional one of type int
+ * to designate a specific occurrence. It returns an unsigned integer, which
+ * may or may not be appropriate for everything.
+ */
+static int smp_fetch_hdr_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_hdr(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/* Fetch an HTTP header's IP value. takes a mandatory argument of type string
+ * and an optional one of type int to designate a specific occurrence.
+ * It returns an IPv4 or IPv6 address. Addresses surrounded by invalid chars
+ * are rejected. However IPv4 addresses may be followed with a colon and a
+ * valid port number.
+ */
+static int smp_fetch_hdr_ip(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp = get_trash_chunk();
+ int ret, len;
+ int port;
+
+ while ((ret = smp_fetch_hdr(args, smp, kw, private)) > 0) {
+ if (smp->data.u.str.data < temp->size - 1) {
+ memcpy(temp->area, smp->data.u.str.area,
+ smp->data.u.str.data);
+ temp->area[smp->data.u.str.data] = '\0';
+ len = url2ipv4((char *) temp->area, &smp->data.u.ipv4);
+ if (len > 0 && len == smp->data.u.str.data) {
+ /* plain IPv4 address */
+ smp->data.type = SMP_T_IPV4;
+ break;
+ } else if (len > 0 && temp->area[len] == ':' &&
+ strl2irc(temp->area + len + 1, smp->data.u.str.data - len - 1, &port) == 0 &&
+ port >= 0 && port <= 65535) {
+ /* IPv4 address suffixed with ':' followed by a valid port number */
+ smp->data.type = SMP_T_IPV4;
+ break;
+ } else if (temp->area[0] == '[' && temp->area[smp->data.u.str.data-1] == ']') {
+ /* IPv6 address enclosed in square brackets */
+ temp->area[smp->data.u.str.data-1] = '\0';
+ if (inet_pton(AF_INET6, temp->area+1, &smp->data.u.ipv6)) {
+ smp->data.type = SMP_T_IPV6;
+ break;
+ }
+ } else if (inet_pton(AF_INET6, temp->area, &smp->data.u.ipv6)) {
+ /* plain IPv6 address */
+ smp->data.type = SMP_T_IPV6;
+ break;
+ }
+ }
+
+ /* if the header doesn't match an IP address, fetch next one */
+ if (!(smp->flags & SMP_F_NOT_LAST))
+ return 0;
+ }
+ return ret;
+}
+
+/* 8. Check on URI PATH. A pointer to the PATH is stored. The path starts at the
+ * first '/' after the possible hostname. It ends before the possible '?' except
+ * for 'pathq' keyword.
+ */
+static int smp_fetch_path(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+
+ if (kw[4] == 'q' && (kw[0] == 'p' || kw[0] == 'b')) // pathq or baseq
+ path = http_parse_path(&parser);
+ else
+ path = iststop(http_parse_path(&parser), '?');
+
+ if (!isttest(path))
+ return 0;
+
+ /* OK, we got the '/' ! */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = path.ptr;
+ smp->data.u.str.data = path.len;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+/* This produces a concatenation of the first occurrence of the Host header
+ * followed by the path component if it begins with a slash ('/'). This means
+ * that '*' will not be added, resulting in exactly the first Host entry.
+ * If no Host header is found, then the path is returned as-is. The returned
+ * value is stored in the trash so it does not need to be marked constant.
+ * The returned sample is of type string.
+ */
+static int smp_fetch_base(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct buffer *temp;
+ struct http_hdr_ctx ctx;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (!http_find_header(htx, ist("Host"), &ctx, 0) || !ctx.value.len)
+ return smp_fetch_path(args, smp, kw, private);
+
+ /* OK we have the header value in ctx.value */
+ temp = get_trash_chunk();
+ chunk_istcat(temp, ctx.value);
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ size_t len;
+
+ if (kw[4] == 'q' && kw[0] == 'b') { // baseq
+ len = path.len;
+ } else {
+ for (len = 0; len < path.len && *(path.ptr + len) != '?'; len++)
+ ;
+ }
+
+ if (len && *(path.ptr) == '/')
+ chunk_memcat(temp, path.ptr, len);
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This produces a 32-bit hash of the concatenation of the first occurrence of
+ * the Host header followed by the path component if it begins with a slash ('/').
+ * This means that '*' will not be added, resulting in exactly the first Host
+ * entry. If no Host header is found, then the path is used. The resulting value
+ * is hashed using the path hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-path activity on
+ * high-traffic sites without having to store whole paths.
+ */
+static int smp_fetch_base32(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ struct http_hdr_ctx ctx;
+ struct ist path;
+ unsigned int hash = 0;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 0)) {
+ /* OK we have the header value in ctx.value */
+ while (ctx.value.len--)
+ hash = *(ctx.value.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (isttest(path)) {
+ size_t len;
+
+ for (len = 0; len < path.len && *(path.ptr + len) != '?'; len++)
+ ;
+
+ if (len && *(path.ptr) == '/') {
+ while (len--)
+ hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+ }
+
+ hash = full_hash(hash);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hash;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This concatenates the source address with the 32-bit hash of the Host and
+ * path as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-path counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The path hash is stored before the address so
+ * that in environments where IPv6 is insignificant, truncating the output to
+ * 8 bytes would still work.
+ */
+static int smp_fetch_base32_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = (smp->strm ? sc_src(smp->strm->scf) : NULL);
+ struct buffer *temp;
+
+ if (!src)
+ return 0;
+
+ if (!smp_fetch_base32(args, smp, kw, private))
+ return 0;
+
+ temp = get_trash_chunk();
+ *(unsigned int *) temp->area = htonl(smp->data.u.sint);
+ temp->data += sizeof(unsigned int);
+
+ switch (src->ss_family) {
+ case AF_INET:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in *)src)->sin_addr,
+ 4);
+ temp->data += 4;
+ break;
+ case AF_INET6:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in6 *)src)->sin6_addr,
+ 16);
+ temp->data += 16;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* Extracts the query string, which comes after the question mark '?'. If no
+ * question mark is found, nothing is returned. Otherwise it returns a sample
+ * of type string carrying the whole query string.
+ */
+static int smp_fetch_query(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+ char *ptr, *end;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ ptr = HTX_SL_REQ_UPTR(sl);
+ end = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ /* look up the '?' */
+ do {
+ if (ptr == end)
+ return 0;
+ } while (*ptr++ != '?');
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ptr;
+ smp->data.u.str.data = end - ptr;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_proto_http(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 0);
+
+ if (!htx)
+ return 0;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* return a valid test if the current request is the first one on the connection */
+static int smp_fetch_http_first_req(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = !(smp->strm->txn->flags & TX_NOT_FIRST);
+ return 1;
+}
+
+/* Fetch the authentication method if there is an Authorization header. It
+ * relies on get_http_auth()
+ */
+static int smp_fetch_http_auth_type(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx))
+ return 0;
+
+ switch (txn->auth.method) {
+ case HTTP_AUTH_BASIC:
+ smp->data.u.str.area = "Basic";
+ smp->data.u.str.data = 5;
+ break;
+ case HTTP_AUTH_DIGEST:
+ /* Unexpected because not supported */
+ smp->data.u.str.area = "Digest";
+ smp->data.u.str.data = 6;
+ break;
+ case HTTP_AUTH_BEARER:
+ smp->data.u.str.area = "Bearer";
+ smp->data.u.str.data = 6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Fetch the user supplied if there is an Authorization header. It relies on
+ * get_http_auth()
+ */
+static int smp_fetch_http_auth_user(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = txn->auth.user;
+ smp->data.u.str.data = strlen(txn->auth.user);
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Fetch the password supplied if there is an Authorization header. It relies on
+ * get_http_auth()
+ */
+static int smp_fetch_http_auth_pass(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+
+ if (!htx)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = txn->auth.pass;
+ smp->data.u.str.data = strlen(txn->auth.pass);
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_http_auth_bearer(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_txn *txn;
+ struct buffer bearer_val = {};
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR) {
+ struct http_hdr_ctx ctx;
+ struct ist hdr_name = ist2(args->data.str.area, args->data.str.data);
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, hdr_name, &ctx, 0)) {
+ struct ist type = istsplit(&ctx.value, ' ');
+
+ /* There must be "at least" one space character between
+ * the scheme and the following value so ctx.value might
+ * still have leading spaces here (see RFC7235).
+ */
+ ctx.value = istskip(ctx.value, ' ');
+
+ if (isteqi(type, ist("Bearer")) && istlen(ctx.value))
+ chunk_initlen(&bearer_val, istptr(ctx.value), 0, istlen(ctx.value));
+ }
+ }
+ else {
+ txn = smp->strm->txn;
+ if (!get_http_auth(smp, htx) || txn->auth.method != HTTP_AUTH_BEARER)
+ return 0;
+
+ bearer_val = txn->auth.method_data;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = bearer_val;
+ smp->flags = SMP_F_CONST;
+ return 1;
+}
+
+/* Accepts exactly 1 argument of type userlist */
+static int smp_fetch_http_auth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+
+ if (args->type != ARGT_USR)
+ return 0;
+
+ if (!htx)
+ return 0;
+ if (!get_http_auth(smp, htx) || smp->strm->txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = check_user(args->data.usr, smp->strm->txn->auth.user,
+ smp->strm->txn->auth.pass);
+ return 1;
+}
+
+/* Accepts exactly 1 argument of type userlist */
+static int smp_fetch_http_auth_grp(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+
+ if (args->type != ARGT_USR)
+ return 0;
+
+ if (!htx)
+ return 0;
+ if (!get_http_auth(smp, htx) || smp->strm->txn->auth.method != HTTP_AUTH_BASIC)
+ return 0;
+
+ /* if the user does not belong to the userlist or has a wrong password,
+ * report that it unconditionally does not match. Otherwise we return
+ * a string containing the username.
+ */
+ if (!check_user(args->data.usr, smp->strm->txn->auth.user,
+ smp->strm->txn->auth.pass))
+ return 0;
+
+ /* pat_match_auth() will need the user list */
+ smp->ctx.a[0] = args->data.usr;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->txn->auth.user;
+ smp->data.u.str.data = strlen(smp->strm->txn->auth.user);
+
+ return 1;
+}
+
+/* Fetch a captured HTTP request header. The index is the position of
+ * the "capture" option in the configuration file
+ */
+static int smp_fetch_capture_req_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *fe;
+ int idx;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ if (idx > (fe->nb_req_cap - 1) || smp->strm->req_cap == NULL || smp->strm->req_cap[idx] == NULL)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->req_cap[idx];
+ smp->data.u.str.data = strlen(smp->strm->req_cap[idx]);
+
+ return 1;
+}
+
+/* Fetch a captured HTTP response header. The index is the position of
+ * the "capture" option in the configuration file
+ */
+static int smp_fetch_capture_res_hdr(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct proxy *fe;
+ int idx;
+
+ if (args->type != ARGT_SINT)
+ return 0;
+
+ if (!smp->strm)
+ return 0;
+
+ fe = strm_fe(smp->strm);
+ idx = args->data.sint;
+
+ if (idx > (fe->nb_rsp_cap - 1) || smp->strm->res_cap == NULL || smp->strm->res_cap[idx] == NULL)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = smp->strm->res_cap[idx];
+ smp->data.u.str.data = strlen(smp->strm->res_cap[idx]);
+
+ return 1;
+}
+
+/* Extracts the METHOD in the HTTP request, the txn->uri should be filled before the call */
+static int smp_fetch_capture_req_method(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp;
+ struct http_txn *txn;
+ char *ptr;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || !txn->uri)
+ return 0;
+
+ ptr = txn->uri;
+
+ while (*ptr != ' ' && *ptr != '\0') /* find first space */
+ ptr++;
+
+ temp = get_trash_chunk();
+ temp->area = txn->uri;
+ temp->data = ptr - txn->uri;
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+
+ return 1;
+
+}
+
+/* Extracts the path in the HTTP request, the txn->uri should be filled before the call */
+static int smp_fetch_capture_req_uri(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+ struct ist path;
+ const char *ptr;
+ struct http_uri_parser parser;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || !txn->uri)
+ return 0;
+
+ ptr = txn->uri;
+
+ while (*ptr != ' ' && *ptr != '\0') /* find first space */
+ ptr++;
+
+ if (!*ptr)
+ return 0;
+
+ /* skip the first space and find space after URI */
+ path = ist2(++ptr, 0);
+ while (*ptr != ' ' && *ptr != '\0')
+ ptr++;
+ path.len = ptr - path.ptr;
+
+ parser = http_uri_parser_init(path);
+ path = http_parse_path(&parser);
+ if (!isttest(path))
+ return 0;
+
+ smp->data.u.str.area = path.ptr;
+ smp->data.u.str.data = path.len;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+
+ return 1;
+}
+
+/* Retrieves the HTTP version from the request (either 1.0 or 1.1) and emits it
+ * as a string (either "HTTP/1.0" or "HTTP/1.1").
+ */
+static int smp_fetch_capture_req_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || txn->req.msg_state < HTTP_MSG_BODY)
+ return 0;
+
+ if (txn->req.flags & HTTP_MSGF_VER_11)
+ smp->data.u.str.area = "HTTP/1.1";
+ else
+ smp->data.u.str.area = "HTTP/1.0";
+
+ smp->data.u.str.data = 8;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+
+}
+
+/* Retrieves the HTTP version from the response (either 1.0 or 1.1) and emits it
+ * as a string (either "HTTP/1.0" or "HTTP/1.1").
+ */
+static int smp_fetch_capture_res_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct http_txn *txn;
+
+ if (!smp->strm)
+ return 0;
+
+ txn = smp->strm->txn;
+ if (!txn || txn->rsp.msg_state < HTTP_MSG_BODY)
+ return 0;
+
+ if (txn->rsp.flags & HTTP_MSGF_VER_11)
+ smp->data.u.str.area = "HTTP/1.1";
+ else
+ smp->data.u.str.area = "HTTP/1.0";
+
+ smp->data.u.str.data = 8;
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ return 1;
+
+}
+
+/* Iterate over all cookies present in a message. The context is stored in
+ * smp->ctx.a[0] for the in-header position, smp->ctx.a[1] for the
+ * end-of-header-value, and smp->ctx.a[2] for the hdr_ctx. Depending on
+ * the direction, multiple cookies may be parsed on the same line or not.
+ * If provided, the searched cookie name is in args, in args->data.str. If
+ * the input options indicate that no iterating is desired, then only last
+ * value is fetched if any. If no cookie name is provided, the first cookie
+ * value found is fetched. The returned sample is of type CSTR. Can be used
+ * to parse cookies in other files.
+ */
+static int smp_fetch_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.cookie / cookie / cook, res.cookie / scook / set-cookie */
+ struct channel *chn = ((kw[0] == 'c' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx *ctx = smp->ctx.a[2];
+ struct ist hdr;
+ char *cook = NULL;
+ size_t cook_l = 0;
+ int found = 0;
+
+ if (args->type == ARGT_STR) {
+ cook = args->data.str.area;
+ cook_l = args->data.str.data;
+ }
+
+ if (!ctx) {
+ /* first call */
+ ctx = &static_http_hdr_ctx;
+ ctx->blk = NULL;
+ smp->ctx.a[2] = ctx;
+ }
+
+ if (!htx)
+ return 0;
+
+ hdr = (!(check || (chn && chn->flags & CF_ISRESP)) ? ist("Cookie") : ist("Set-Cookie"));
+
+ /* OK so basically here, either we want only one value or we want to
+ * iterate over all of them and we fetch the next one. In this last case
+ * SMP_OPT_ITERATE option is set.
+ */
+
+ if (!(smp->flags & SMP_F_NOT_LAST)) {
+ /* search for the header from the beginning, we must first initialize
+ * the search parameters.
+ */
+ smp->ctx.a[0] = NULL;
+ ctx->blk = NULL;
+ }
+
+ smp->flags |= SMP_F_VOL_HDR;
+ while (1) {
+ /* Note: smp->ctx.a[0] == NULL every time we need to fetch a new header */
+ if (!smp->ctx.a[0]) {
+ if (!http_find_header(htx, hdr, ctx, 0))
+ goto out;
+
+ if (ctx->value.len < cook_l + 1)
+ continue;
+
+ smp->ctx.a[0] = ctx->value.ptr;
+ smp->ctx.a[1] = smp->ctx.a[0] + ctx->value.len;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->ctx.a[0] = http_extract_cookie_value(smp->ctx.a[0], smp->ctx.a[1],
+ cook, cook_l,
+ (smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ,
+ &smp->data.u.str.area,
+ &smp->data.u.str.data);
+ if (smp->ctx.a[0]) {
+ found = 1;
+ if (smp->opt & SMP_OPT_ITERATE) {
+ /* iterate on cookie value */
+ smp->flags |= SMP_F_NOT_LAST;
+ return 1;
+ }
+ if (args->data.str.data == 0) {
+ /* No cookie name, first occurrence returned */
+ break;
+ }
+ }
+ /* if we're looking for last occurrence, let's loop */
+ }
+
+ /* all cookie headers and values were scanned. If we're looking for the
+ * last occurrence, we may return it now.
+ */
+ out:
+ smp->flags &= ~SMP_F_NOT_LAST;
+ return found;
+}
+
+/* Same than smp_fetch_cookie() but only relies on the sample direction to
+ * choose the right channel. So instead of duplicating the code, we just change
+ * the keyword and then fallback on smp_fetch_cookie().
+ */
+static int smp_fetch_chn_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ kw = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ ? "req.cook" : "res.cook");
+ return smp_fetch_cookie(args, smp, kw, private);
+}
+
+/* Iterate over all cookies present in a request to count how many occurrences
+ * match the name in args and args->data.str.len. If <multi> is non-null, then
+ * multiple cookies may be parsed on the same line. The returned sample is of
+ * type UINT. Accepts exactly 1 argument of type string.
+ */
+static int smp_fetch_cookie_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.cook_cnt / cook_cnt, res.cook_cnt / scook_cnt */
+ struct channel *chn = ((kw[0] == 'c' || kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[0] == 's' || kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist hdr;
+ char *val_beg, *val_end;
+ char *cook = NULL;
+ size_t cook_l = 0;
+ int cnt;
+
+ if (args->type == ARGT_STR){
+ cook = args->data.str.area;
+ cook_l = args->data.str.data;
+ }
+
+ if (!htx)
+ return 0;
+
+ hdr = (!(check || (chn && chn->flags & CF_ISRESP)) ? ist("Cookie") : ist("Set-Cookie"));
+
+ val_end = val_beg = NULL;
+ ctx.blk = NULL;
+ cnt = 0;
+ while (1) {
+ /* Note: val_beg == NULL every time we need to fetch a new header */
+ if (!val_beg) {
+ if (!http_find_header(htx, hdr, &ctx, 0))
+ break;
+
+ if (ctx.value.len < cook_l + 1)
+ continue;
+
+ val_beg = ctx.value.ptr;
+ val_end = val_beg + ctx.value.len;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ while ((val_beg = http_extract_cookie_value(val_beg, val_end,
+ cook, cook_l,
+ (smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_REQ,
+ &smp->data.u.str.area,
+ &smp->data.u.str.data))) {
+ cnt++;
+ }
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = cnt;
+ smp->flags |= SMP_F_VOL_HDR;
+ return 1;
+}
+
+/* Fetch an cookie's integer value. The integer value is returned. It
+ * takes a mandatory argument of type string. It relies on smp_fetch_cookie().
+ */
+static int smp_fetch_cookie_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_cookie(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/* Iterate over all cookies present in a message,
+ * and return the list of cookie names separated by
+ * the input argument character.
+ * If no input argument is provided,
+ * the default delimiter is ','.
+ * The returned sample is of type CSTR.
+ */
+static int smp_fetch_cookie_names(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ /* possible keywords: req.cook_names, res.cook_names */
+ struct channel *chn = ((kw[2] == 'q') ? SMP_REQ_CHN(smp) : SMP_RES_CHN(smp));
+ struct check *check = ((kw[2] == 's') ? objt_check(smp->sess->origin) : NULL);
+ struct htx *htx = smp_prefetch_htx(smp, chn, check, 1);
+ struct http_hdr_ctx ctx;
+ struct ist hdr;
+ struct buffer *temp;
+ char del = ',';
+ char *ptr, *attr_beg, *attr_end;
+ size_t len = 0;
+ int is_req = !(check || (chn && chn->flags & CF_ISRESP));
+
+ if (!htx)
+ return 0;
+
+ if (args->type == ARGT_STR)
+ del = *args[0].data.str.area;
+
+ hdr = (is_req ? ist("Cookie") : ist("Set-Cookie"));
+ temp = get_trash_chunk();
+
+ smp->flags |= SMP_F_VOL_HDR;
+ attr_end = attr_beg = NULL;
+ ctx.blk = NULL;
+ /* Scan through all headers and extract all cookie names from
+ * 1. Cookie header(s) for request channel OR
+ * 2. Set-Cookie header(s) for response channel
+ */
+ while (1) {
+ /* Note: attr_beg == NULL every time we need to fetch a new header */
+ if (!attr_beg) {
+ /* For Set-Cookie, we need to fetch the entire header line (set flag to 1) */
+ if (!http_find_header(htx, hdr, &ctx, !is_req))
+ break;
+ attr_beg = ctx.value.ptr;
+ attr_end = attr_beg + ctx.value.len;
+ }
+
+ while (1) {
+ attr_beg = http_extract_next_cookie_name(attr_beg, attr_end, is_req, &ptr, &len);
+ if (!attr_beg)
+ break;
+
+ /* prepend delimiter if this is not the first cookie name found */
+ if (temp->data)
+ temp->area[temp->data++] = del;
+
+ /* At this point ptr should point to the start of the cookie name and len would be the length of the cookie name */
+ if (!chunk_memcat(temp, ptr, len))
+ return 0;
+ }
+ }
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *temp;
+ return 1;
+}
+
+/************************************************************************/
+/* The code below is dedicated to sample fetches */
+/************************************************************************/
+
+/* This scans a URL-encoded query string. It takes an optionally wrapping
+ * string whose first contiguous chunk has its beginning in ctx->a[0] and end
+ * in ctx->a[1], and the optional second part in (ctx->a[2]..ctx->a[3]). The
+ * pointers are updated for next iteration before leaving.
+ */
+static int smp_fetch_param(char delim, const char *name, int name_len, const struct arg *args, struct sample *smp, const char *kw, void *private, char insensitive)
+{
+ const char *vstart, *vend;
+ struct buffer *temp;
+ const char **chunks = (const char **)smp->ctx.a;
+
+ if (!http_find_next_url_param(chunks, name, name_len,
+ &vstart, &vend, delim, insensitive))
+ return 0;
+
+ /* Create sample. If the value is contiguous, return the pointer as CONST,
+ * if the value is wrapped, copy-it in a buffer.
+ */
+ smp->data.type = SMP_T_STR;
+ if (chunks[2] &&
+ vstart >= chunks[0] && vstart <= chunks[1] &&
+ vend >= chunks[2] && vend <= chunks[3]) {
+ /* Wrapped case. */
+ temp = get_trash_chunk();
+ memcpy(temp->area, vstart, chunks[1] - vstart);
+ memcpy(temp->area + ( chunks[1] - vstart ), chunks[2],
+ vend - chunks[2]);
+ smp->data.u.str.area = temp->area;
+ smp->data.u.str.data = ( chunks[1] - vstart ) + ( vend - chunks[2] );
+ } else {
+ /* Contiguous case. */
+ smp->data.u.str.area = (char *)vstart;
+ smp->data.u.str.data = vend - vstart;
+ smp->flags = SMP_F_VOL_1ST | SMP_F_CONST;
+ }
+
+ /* Update context, check wrapping. */
+ chunks[0] = vend;
+ if (chunks[2] && vend >= chunks[2] && vend <= chunks[3]) {
+ chunks[1] = chunks[3];
+ chunks[2] = NULL;
+ }
+
+ if (chunks[0] < chunks[1])
+ smp->flags |= SMP_F_NOT_LAST;
+
+ return 1;
+}
+
+/* This function iterates over each parameter of the query string. It uses
+ * ctx->a[0] and ctx->a[1] to store the beginning and end of the current
+ * parameter. Since it uses smp_fetch_param(), ctx->a[2..3] are both NULL.
+ * An optional parameter name is passed in args[0], otherwise any parameter is
+ * considered. It supports an optional delimiter argument for the beginning of
+ * the string in args[1], which defaults to "?".
+ */
+static int smp_fetch_url_param(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ char delim = '?';
+ const char *name;
+ int name_len;
+ char insensitive = 0;
+
+ if ((args[0].type && args[0].type != ARGT_STR) ||
+ (args[1].type && args[1].type != ARGT_STR) ||
+ (args[2].type && args[2].type != ARGT_STR))
+ return 0;
+
+ name = "";
+ name_len = 0;
+ if (args->type == ARGT_STR) {
+ name = args->data.str.area;
+ name_len = args->data.str.data;
+ }
+
+ if (args[1].type && *args[1].data.str.area)
+ delim = *args[1].data.str.area;
+ if (args[2].type && *args[2].data.str.area == 'i')
+ insensitive = 1;
+
+ if (!smp->ctx.a[0]) { // first call, find the query string
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct htx_sl *sl;
+
+ if (!htx)
+ return 0;
+
+ sl = http_get_stline(htx);
+ smp->ctx.a[0] = http_find_param_list(HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl), delim);
+ if (!smp->ctx.a[0])
+ return 0;
+
+ smp->ctx.a[1] = HTX_SL_REQ_UPTR(sl) + HTX_SL_REQ_ULEN(sl);
+
+ /* Assume that the context is filled with NULL pointer
+ * before the first call.
+ * smp->ctx.a[2] = NULL;
+ * smp->ctx.a[3] = NULL;
+ */
+ }
+
+ return smp_fetch_param(delim, name, name_len, args, smp, kw, private, insensitive);
+}
+
+/* This function iterates over each parameter of the body. This requires
+ * that the body has been waited for using http-buffer-request. It uses
+ * ctx->a[0] and ctx->a[1] to store the beginning and end of the first
+ * contiguous part of the body, and optionally ctx->a[2..3] to reference the
+ * optional second part if the body wraps at the end of the buffer. An optional
+ * parameter name is passed in args[0], otherwise any parameter is considered.
+ */
+static int smp_fetch_body_param(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ const char *name;
+ int name_len;
+ char insensitive = 0;
+
+ if ((args[0].type && args[0].type != ARGT_STR) ||
+ (args[1].type && args[1].type != ARGT_STR))
+ return 0;
+
+ name = "";
+ name_len = 0;
+ if (args[0].type == ARGT_STR) {
+ name = args[0].data.str.area;
+ name_len = args[0].data.str.data;
+ }
+
+ if (args[1].type && *args[1].data.str.area == 'i')
+ insensitive = 1;
+
+ if (!smp->ctx.a[0]) { // first call, find the query string
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct buffer *temp;
+ int32_t pos;
+
+ if (!htx)
+ return 0;
+
+ temp = get_trash_chunk();
+ for (pos = htx_get_first(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *blk = htx_get_blk(htx, pos);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ if (!h1_format_htx_data(htx_get_blk_value(htx, blk), temp, 0))
+ return 0;
+ }
+ }
+
+ smp->ctx.a[0] = temp->area;
+ smp->ctx.a[1] = temp->area + temp->data;
+
+ /* Assume that the context is filled with NULL pointer
+ * before the first call.
+ * smp->ctx.a[2] = NULL;
+ * smp->ctx.a[3] = NULL;
+ */
+
+ }
+
+ return smp_fetch_param('&', name, name_len, args, smp, kw, private, insensitive);
+}
+
+/* Return the signed integer value for the specified url parameter (see url_param
+ * above).
+ */
+static int smp_fetch_url_param_val(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret = smp_fetch_url_param(args, smp, kw, private);
+
+ if (ret > 0) {
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = strl2ic(smp->data.u.str.area,
+ smp->data.u.str.data);
+ }
+
+ return ret;
+}
+
+/* This produces a 32-bit hash of the concatenation of the first occurrence of
+ * the Host header followed by the path component if it begins with a slash ('/').
+ * This means that '*' will not be added, resulting in exactly the first Host
+ * entry. If no Host header is found, then the path is used. The resulting value
+ * is hashed using the url hash followed by a full avalanche hash and provides a
+ * 32-bit integer value. This fetch is useful for tracking per-URL activity on
+ * high-traffic sites without having to store whole paths.
+ * this differs from the base32 functions in that it includes the url parameters
+ * as well as the path
+ */
+static int smp_fetch_url32(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn = SMP_REQ_CHN(smp);
+ struct htx *htx = smp_prefetch_htx(smp, chn, NULL, 1);
+ struct http_hdr_ctx ctx;
+ struct htx_sl *sl;
+ struct ist path;
+ unsigned int hash = 0;
+ struct http_uri_parser parser;
+
+ if (!htx)
+ return 0;
+
+ ctx.blk = NULL;
+ if (http_find_header(htx, ist("Host"), &ctx, 1)) {
+ /* OK we have the header value in ctx.value */
+ while (ctx.value.len--)
+ hash = *(ctx.value.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ /* now retrieve the path */
+ sl = http_get_stline(htx);
+ parser = http_uri_parser_init(htx_sl_req_uri(sl));
+ path = http_parse_path(&parser);
+ if (path.len && *(path.ptr) == '/') {
+ while (path.len--)
+ hash = *(path.ptr++) + (hash << 6) + (hash << 16) - hash;
+ }
+
+ hash = full_hash(hash);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hash;
+ smp->flags = SMP_F_VOL_1ST;
+ return 1;
+}
+
+/* This concatenates the source address with the 32-bit hash of the Host and
+ * URL as returned by smp_fetch_base32(). The idea is to have per-source and
+ * per-url counters. The result is a binary block from 8 to 20 bytes depending
+ * on the source address length. The URL hash is stored before the address so
+ * that in environments where IPv6 is insignificant, truncating the output to
+ * 8 bytes would still work.
+ */
+static int smp_fetch_url32_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = (smp->strm ? sc_src(smp->strm->scf) : NULL);
+ struct buffer *temp;
+
+ if (!src)
+ return 0;
+
+ if (!smp_fetch_url32(args, smp, kw, private))
+ return 0;
+
+ temp = get_trash_chunk();
+ *(unsigned int *) temp->area = htonl(smp->data.u.sint);
+ temp->data += sizeof(unsigned int);
+
+ switch (src->ss_family) {
+ case AF_INET:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in *)src)->sin_addr,
+ 4);
+ temp->data += 4;
+ break;
+ case AF_INET6:
+ memcpy(temp->area + temp->data,
+ &((struct sockaddr_in6 *)src)->sin6_addr,
+ 16);
+ temp->data += 16;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/************************************************************************/
+/* Other utility functions */
+/************************************************************************/
+
+/* This function is used to validate the arguments passed to any "hdr" fetch
+ * keyword. These keywords support an optional positive or negative occurrence
+ * number. We must ensure that the number is greater than -MAX_HDR_HISTORY. It
+ * is assumed that the types are already the correct ones. Returns 0 on error,
+ * non-zero if OK. If <err> is not NULL, it will be filled with a pointer to an
+ * error message in case of error, that the caller is responsible for freeing.
+ * The initial location must either be freeable or NULL.
+ * Note: this function's pointer is checked from Lua.
+ */
+int val_hdr(struct arg *arg, char **err_msg)
+{
+ if (arg && arg[1].type == ARGT_SINT && arg[1].data.sint < -MAX_HDR_HISTORY) {
+ memprintf(err_msg, "header occurrence must be >= %d", -MAX_HDR_HISTORY);
+ return 0;
+ }
+ return 1;
+}
+
+/************************************************************************/
+/* All supported sample fetch keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "base", smp_fetch_base, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "base32", smp_fetch_base32, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "base32+src", smp_fetch_base32_src, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "baseq", smp_fetch_base, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ /* capture are allocated and are permanent in the stream */
+ { "capture.req.hdr", smp_fetch_capture_req_hdr, ARG1(1,SINT), NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ /* retrieve these captures from the HTTP logs */
+ { "capture.req.method", smp_fetch_capture_req_method, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+ { "capture.req.uri", smp_fetch_capture_req_uri, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+ { "capture.req.ver", smp_fetch_capture_req_ver, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ { "capture.res.hdr", smp_fetch_capture_res_hdr, ARG1(1,SINT), NULL, SMP_T_STR, SMP_USE_HRSHP },
+ { "capture.res.ver", smp_fetch_capture_res_ver, 0, NULL, SMP_T_STR, SMP_USE_HRQHP },
+
+ /* cookie is valid in both directions (eg: for "stick ...") but cook*
+ * are only here to match the ACL's name, are request-only and are used
+ * for ACL compatibility only.
+ */
+ { "cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "cookie", smp_fetch_chn_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV },
+ { "cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+
+ /* hdr is valid in both directions (eg: for "stick ...") but hdr_* are
+ * only here to match the ACL's name, are request-only and are used for
+ * ACL compatibility only.
+ */
+ { "hdr", smp_fetch_chn_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV },
+ { "hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_ADDR, SMP_USE_HRQHV },
+ { "hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRQHV },
+
+ { "http_auth_type", smp_fetch_http_auth_type, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_user", smp_fetch_http_auth_user, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_pass", smp_fetch_http_auth_pass, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth_bearer", smp_fetch_http_auth_bearer, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_auth", smp_fetch_http_auth, ARG1(1,USR), NULL, SMP_T_BOOL, SMP_USE_HRQHV },
+ { "http_auth_group", smp_fetch_http_auth_grp, ARG1(1,USR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "http_first_req", smp_fetch_http_first_req, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+ { "method", smp_fetch_meth, 0, NULL, SMP_T_METH, SMP_USE_HRQHP },
+ { "path", smp_fetch_path, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "pathq", smp_fetch_path, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "query", smp_fetch_query, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ /* HTTP protocol on the request path */
+ { "req.proto_http", smp_fetch_proto_http, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+ { "req_proto_http", smp_fetch_proto_http, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHP },
+
+ /* HTTP version on the request path */
+ { "req.ver", smp_fetch_rqver, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req_ver", smp_fetch_rqver, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ { "req.body", smp_fetch_body, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "req.body_len", smp_fetch_body_len, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.body_size", smp_fetch_body_size, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.body_param", smp_fetch_body_param, ARG2(0,STR,STR), NULL, SMP_T_BIN, SMP_USE_HRQHV },
+
+ { "req.hdrs", smp_fetch_hdrs, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "req.hdrs_bin", smp_fetch_hdrs_bin, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+
+ /* HTTP version on the response path */
+ { "res.ver", smp_fetch_stver, 0, NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "resp_ver", smp_fetch_stver, 0, NULL, SMP_T_STR, SMP_USE_HRSHV },
+
+ { "res.body", smp_fetch_body, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+ { "res.body_len", smp_fetch_body_len, 0, NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.body_size", smp_fetch_body_size, 0, NULL, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "res.hdrs", smp_fetch_hdrs, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+ { "res.hdrs_bin", smp_fetch_hdrs_bin, 0, NULL, SMP_T_BIN, SMP_USE_HRSHV },
+
+ /* explicit req.{cook,hdr} are used to force the fetch direction to be request-only */
+ { "req.cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.cook_names", smp_fetch_cookie_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+
+ { "req.fhdr", smp_fetch_fhdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.fhdr_cnt", smp_fetch_fhdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.hdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "req.hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_ADDR, SMP_USE_HRQHV },
+ { "req.hdr_names", smp_fetch_hdr_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "req.hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRQHV },
+
+ /* explicit req.{cook,hdr} are used to force the fetch direction to be response-only */
+ { "res.cook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.cook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.cook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.cook_names", smp_fetch_cookie_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+
+ { "res.fhdr", smp_fetch_fhdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.fhdr_cnt", smp_fetch_fhdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.hdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.hdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "res.hdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_ADDR, SMP_USE_HRSHV },
+ { "res.hdr_names", smp_fetch_hdr_names, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "res.hdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "server_status", smp_fetch_srv_status, 0, NULL, SMP_T_SINT, SMP_USE_HRSHP },
+
+ /* scook is valid only on the response and is used for ACL compatibility */
+ { "scook", smp_fetch_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_HRSHV },
+ { "scook_cnt", smp_fetch_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "scook_val", smp_fetch_cookie_val, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+
+ /* shdr is valid only on the response and is used for ACL compatibility */
+ { "shdr", smp_fetch_hdr, ARG2(0,STR,SINT), val_hdr, SMP_T_STR, SMP_USE_HRSHV },
+ { "shdr_cnt", smp_fetch_hdr_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_HRSHV },
+ { "shdr_ip", smp_fetch_hdr_ip, ARG2(0,STR,SINT), val_hdr, SMP_T_ADDR, SMP_USE_HRSHV },
+ { "shdr_val", smp_fetch_hdr_val, ARG2(0,STR,SINT), val_hdr, SMP_T_SINT, SMP_USE_HRSHV },
+
+ { "status", smp_fetch_stcode, 0, NULL, SMP_T_SINT, SMP_USE_HRSHP },
+ { "txn.status", smp_fetch_srv_status, 0, NULL, SMP_T_SINT, SMP_USE_HRSHP },
+ { "unique-id", smp_fetch_uniqueid, 0, NULL, SMP_T_STR, SMP_SRC_L4SRV },
+ { "url", smp_fetch_url, 0, NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "url32", smp_fetch_url32, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "url32+src", smp_fetch_url32_src, 0, NULL, SMP_T_BIN, SMP_USE_HRQHV },
+ { "url_ip", smp_fetch_url_ip, 0, NULL, SMP_T_IPV4, SMP_USE_HRQHV },
+ { "url_port", smp_fetch_url_port, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV },
+ { "url_param", smp_fetch_url_param, ARG3(0,STR,STR,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "urlp" , smp_fetch_url_param, ARG3(0,STR,STR,STR), NULL, SMP_T_STR, SMP_USE_HRQHV },
+ { "urlp_val", smp_fetch_url_param_val, ARG3(0,STR,STR,STR), NULL, SMP_T_SINT, SMP_USE_HRQHV },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/http_htx.c b/src/http_htx.c
new file mode 100644
index 0000000..004d343
--- /dev/null
+++ b/src/http_htx.c
@@ -0,0 +1,3028 @@
+/*
+ * Functions to manipulate HTTP messages using the internal representation.
+ *
+ * Copyright (C) 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/http_fetch.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/log.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+
+
+struct buffer http_err_chunks[HTTP_ERR_SIZE];
+struct http_reply http_err_replies[HTTP_ERR_SIZE];
+
+struct eb_root http_error_messages = EB_ROOT;
+struct list http_errors_list = LIST_HEAD_INIT(http_errors_list);
+struct list http_replies_list = LIST_HEAD_INIT(http_replies_list);
+
+/* The declaration of an errorfiles/errorfile directives. Used during config
+ * parsing only. */
+struct conf_errors {
+ char type; /* directive type (0: errorfiles, 1: errorfile) */
+ union {
+ struct {
+ int status; /* the status code associated to this error */
+ struct http_reply *reply; /* the http reply for the errorfile */
+ } errorfile; /* describe an "errorfile" directive */
+ struct {
+ char *name; /* the http-errors section name */
+ char status[HTTP_ERR_SIZE]; /* list of status to import (0: ignore, 1: implicit import, 2: explicit import) */
+ } errorfiles; /* describe an "errorfiles" directive */
+ } info;
+
+ char *file; /* file where the directive appears */
+ int line; /* line where the directive appears */
+
+ struct list list; /* next conf_errors */
+};
+
+/* Returns the next unporocessed start line in the HTX message. It returns NULL
+ * if the start-line is undefined (first == -1). Otherwise, it returns the
+ * pointer on the htx_sl structure.
+ */
+struct htx_sl *http_get_stline(const struct htx *htx)
+{
+ struct htx_blk *blk;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_REQ_SL && htx_get_blk_type(blk) != HTX_BLK_RES_SL))
+ return NULL;
+ return htx_get_blk_ptr(htx, blk);
+}
+
+/* Returns the headers size in the HTX message */
+size_t http_get_hdrs_size(struct htx *htx)
+{
+ struct htx_blk *blk;
+ size_t sz = 0;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || htx_get_blk_type(blk) > HTX_BLK_EOH)
+ return sz;
+
+ for (; blk; blk = htx_get_next_blk(htx, blk)) {
+ sz += htx_get_blksz(blk);
+ if (htx_get_blk_type(blk) == HTX_BLK_EOH)
+ break;
+ }
+ return sz;
+}
+
+/* Finds the first or next occurrence of header matching <pattern> in the HTX
+ * message <htx> using the context <ctx>. This structure holds everything
+ * necessary to use the header and find next occurrence. If its <blk> member is
+ * NULL, the header is searched from the beginning. Otherwise, the next
+ * occurrence is returned. The function returns 1 when it finds a value, and 0
+ * when there is no more. It is designed to work with headers defined as
+ * comma-separated lists. If HTTP_FIND_FL_FULL flag is set, it works on
+ * full-line headers in whose comma is not a delimiter but is part of the
+ * syntax. A special case, if ctx->value is NULL when searching for a new values
+ * of a header, the current header is rescanned. This allows rescanning after a
+ * header deletion.
+ *
+ * The matching method is chosen by checking the flags :
+ *
+ * * HTTP_FIND_FL_MATCH_REG : <pattern> is a regex. header names matching
+ * the regex are evaluated.
+ * * HTTP_FIND_FL_MATCH_STR : <pattern> is a string. The header names equal
+ * to the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_PFX : <pattern> is a string. The header names
+ * starting by the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_SFX : <pattern> is a string. The header names
+ * ending by the string are evaluated.
+ * * HTTP_FIND_FL_MATCH_SUB : <pattern> is a string. The header names
+ * containing the string are evaluated.
+ */
+
+#define HTTP_FIND_FL_MATCH_STR 0x0001
+#define HTTP_FIND_FL_MATCH_PFX 0x0002
+#define HTTP_FIND_FL_MATCH_SFX 0x0003
+#define HTTP_FIND_FL_MATCH_SUB 0x0004
+#define HTTP_FIND_FL_MATCH_REG 0x0005
+/* 0x0006..0x000f: for other matching methods */
+#define HTTP_FIND_FL_MATCH_TYPE 0x000F
+#define HTTP_FIND_FL_FULL 0x0010
+
+static int __http_find_header(const struct htx *htx, const void *pattern, struct http_hdr_ctx *ctx, int flags)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct ist n, v;
+ enum htx_blk_type type;
+
+ if (blk) {
+ char *p;
+
+ if (!isttest(ctx->value))
+ goto rescan_hdr;
+ if (flags & HTTP_FIND_FL_FULL)
+ goto next_blk;
+ v = htx_get_blk_value(htx, blk);
+ p = istend(ctx->value) + ctx->lws_after;
+ v.len -= (p - v.ptr);
+ v.ptr = p;
+ if (!v.len)
+ goto next_blk;
+ /* Skip comma */
+ if (*(v.ptr) == ',') {
+ v = istnext(v);
+ }
+
+ goto return_hdr;
+ }
+
+ if (htx_is_empty(htx))
+ return 0;
+
+ for (blk = htx_get_first_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ rescan_hdr:
+ type = htx_get_blk_type(blk);
+ if (type == HTX_BLK_EOH)
+ break;
+ if (type != HTX_BLK_HDR)
+ continue;
+
+ if ((flags & HTTP_FIND_FL_MATCH_TYPE) == HTTP_FIND_FL_MATCH_REG) {
+ const struct my_regex *re = pattern;
+
+ n = htx_get_blk_name(htx, blk);
+ if (!regex_exec2(re, n.ptr, n.len))
+ goto next_blk;
+ }
+ else {
+ const struct ist name = *(const struct ist *)(pattern);
+
+ /* If no name was passed, we want any header. So skip the comparison */
+ if (!istlen(name))
+ goto match;
+
+ n = htx_get_blk_name(htx, blk);
+ switch (flags & HTTP_FIND_FL_MATCH_TYPE) {
+ case HTTP_FIND_FL_MATCH_STR:
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_PFX:
+ if (istlen(n) < istlen(name))
+ goto next_blk;
+
+ n = ist2(istptr(n), istlen(name));
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_SFX:
+ if (istlen(n) < istlen(name))
+ goto next_blk;
+
+ n = ist2(istend(n) - istlen(name),
+ istlen(name));
+ if (!isteqi(n, name))
+ goto next_blk;
+ break;
+ case HTTP_FIND_FL_MATCH_SUB:
+ if (!strnistr(n.ptr, n.len, name.ptr, name.len))
+ goto next_blk;
+ break;
+ default:
+ goto next_blk;
+ break;
+ }
+ }
+ match:
+ v = htx_get_blk_value(htx, blk);
+
+ return_hdr:
+ ctx->lws_before = 0;
+ ctx->lws_after = 0;
+ while (v.len && HTTP_IS_LWS(*v.ptr)) {
+ v = istnext(v);
+ ctx->lws_before++;
+ }
+ if (!(flags & HTTP_FIND_FL_FULL))
+ v.len = http_find_hdr_value_end(v.ptr, istend(v)) - v.ptr;
+
+ while (v.len && HTTP_IS_LWS(*(istend(v) - 1))) {
+ v.len--;
+ ctx->lws_after++;
+ }
+ ctx->blk = blk;
+ ctx->value = v;
+ return 1;
+
+ next_blk:
+ ;
+ }
+
+ ctx->blk = NULL;
+ ctx->value = ist("");
+ ctx->lws_before = ctx->lws_after = 0;
+ return 0;
+}
+
+
+/* Header names must match <name> */
+int http_find_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &name, ctx, HTTP_FIND_FL_MATCH_STR | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must match <name>. Same than http_find_header */
+int http_find_str_header(const struct htx *htx, const struct ist name, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &name, ctx, HTTP_FIND_FL_MATCH_STR | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+
+/* Header names must start with <prefix> */
+int http_find_pfx_header(const struct htx *htx, const struct ist prefix, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &prefix, ctx, HTTP_FIND_FL_MATCH_PFX | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must end with <suffix> */
+int http_find_sfx_header(const struct htx *htx, const struct ist suffix, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &suffix, ctx, HTTP_FIND_FL_MATCH_SFX | (full ? HTTP_FIND_FL_FULL : 0));
+}
+/* Header names must contain <sub> */
+int http_find_sub_header(const struct htx *htx, const struct ist sub, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, &sub, ctx, HTTP_FIND_FL_MATCH_SUB | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+/* Header names must match <re> regex*/
+int http_match_header(const struct htx *htx, const struct my_regex *re, struct http_hdr_ctx *ctx, int full)
+{
+ return __http_find_header(htx, re, ctx, HTTP_FIND_FL_MATCH_REG | (full ? HTTP_FIND_FL_FULL : 0));
+}
+
+
+/* Adds a header block int the HTX message <htx>, just before the EOH block. It
+ * returns 1 on success, otherwise it returns 0.
+ */
+int http_add_header(struct htx *htx, const struct ist n, const struct ist v)
+{
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ enum htx_blk_type type = htx_get_tail_type(htx);
+ int32_t prev;
+
+ blk = htx_add_header(htx, n, v);
+ if (!blk)
+ goto fail;
+
+ if (unlikely(type < HTX_BLK_EOH))
+ goto end;
+
+ /* <blk> is the head, swap it iteratively with its predecessor to place
+ * it just before the end-of-header block. So blocks remains ordered. */
+ for (prev = htx_get_prev(htx, htx->tail); prev != htx->first; prev = htx_get_prev(htx, prev)) {
+ struct htx_blk *pblk = htx_get_blk(htx, prev);
+ enum htx_blk_type type = htx_get_blk_type(pblk);
+
+ /* Swap .addr and .info fields */
+ blk->addr ^= pblk->addr; pblk->addr ^= blk->addr; blk->addr ^= pblk->addr;
+ blk->info ^= pblk->info; pblk->info ^= blk->info; blk->info ^= pblk->info;
+
+ if (blk->addr == pblk->addr)
+ blk->addr += htx_get_blksz(pblk);
+
+ /* Stop when end-of-header is reached */
+ if (type == HTX_BLK_EOH)
+ break;
+
+ blk = pblk;
+ }
+
+ end:
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(n, ist("host"))) {
+ if (!http_update_authority(htx, sl, v))
+ goto fail;
+ }
+ return 1;
+
+ fail:
+ return 0;
+}
+
+/* Replaces parts of the start-line of the HTX message <htx>. It returns 1 on
+ * success, otherwise it returns 0.
+ */
+int http_replace_stline(struct htx *htx, const struct ist p1, const struct ist p2, const struct ist p3)
+{
+ struct htx_blk *blk;
+
+ blk = htx_get_first_blk(htx);
+ if (!blk || !htx_replace_stline(htx, blk, p1, p2, p3))
+ return 0;
+ return 1;
+}
+
+/* Replace the request method in the HTX message <htx> by <meth>. It returns 1
+ * on success, otherwise 0.
+ */
+int http_replace_req_meth(struct htx *htx, const struct ist meth)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist uri, vsn;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_REQ_UPTR(sl), HTX_SL_REQ_ULEN(sl)); /* uri */
+ uri = ist2(temp->area, HTX_SL_REQ_ULEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + uri.len, HTX_SL_REQ_VLEN(sl));
+
+ /* create the new start line */
+ sl->info.req.meth = find_http_meth(meth.ptr, meth.len);
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the request uri in the HTX message <htx> by <uri>. It returns 1 on
+ * success, otherwise 0.
+ */
+int http_replace_req_uri(struct htx *htx, const struct ist uri)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, vsn;
+
+ if (!sl)
+ goto fail;
+
+ /* Start by copying old method and version */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ /* create the new start line */
+ if (!http_replace_stline(htx, meth, uri, vsn))
+ goto fail;
+
+ sl = http_get_stline(htx);
+ ALREADY_CHECKED(sl); /* the stline exists because http_replace_stline() succeeded */
+
+ sl->flags &= ~HTX_SL_F_NORMALIZED_URI;
+ if (!http_update_host(htx, sl, uri))
+ goto fail;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Replace the request path in the HTX message <htx> by <path>. The host part is
+ * preserverd. if <with_qs> is set, the query string is evaluated as part of the
+ * path and replaced. Otherwise, it is preserved too. It returns 1 on success,
+ * otherwise 0.
+ */
+int http_replace_req_path(struct htx *htx, const struct ist path, int with_qs)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, uri, vsn, p;
+ size_t plen = 0;
+ struct http_uri_parser parser;
+
+ if (!sl)
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+ parser = http_uri_parser_init(uri);
+ p = http_parse_path(&parser);
+ if (!isttest(p))
+ p = uri;
+ if (with_qs)
+ plen = p.len;
+ else {
+ while (plen < p.len && *(p.ptr + plen) != '?')
+ plen++;
+ }
+
+ /* Start by copying old method and version and create the new uri */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, p.ptr - uri.ptr); /* uri: host part */
+ chunk_istcat(temp, path); /* uri: new path */
+ chunk_memcat(temp, p.ptr + plen, p.len - plen); /* uri: QS part */
+ uri = ist2(temp->area + meth.len + vsn.len, uri.len - plen + path.len);
+
+ /* create the new start line */
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the request query-string in the HTX message <htx> by <query>. The
+ * host part and the path are preserved. It returns 1 on success, otherwise
+ * 0.
+ */
+int http_replace_req_query(struct htx *htx, const struct ist query)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist meth, uri, vsn, q;
+ int offset = 1;
+
+ if (!sl)
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+ q = uri;
+ while (q.len > 0 && *(q.ptr) != '?') {
+ q = istnext(q);
+ }
+
+ /* skip the question mark or indicate that we must insert it
+ * (but only if the format string is not empty then).
+ */
+ if (q.len) {
+ q = istnext(q);
+ }
+ else if (query.len > 1)
+ offset = 0;
+
+ /* Start by copying old method and version and create the new uri */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, q.ptr - uri.ptr); /* uri: host + path part */
+ chunk_memcat(temp, query.ptr + offset, query.len - offset); /* uri: new QS */
+ uri = ist2(temp->area + meth.len + vsn.len, uri.len - q.len + query.len - offset);
+
+ /* create the new start line */
+ return http_replace_stline(htx, meth, uri, vsn);
+}
+
+/* Replace the response status in the HTX message <htx> by <status>. It returns
+ * 1 on success, otherwise 0.
+*/
+int http_replace_res_status(struct htx *htx, const struct ist status, const struct ist reason)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist vsn, r;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area, HTX_SL_RES_VLEN(sl));
+ r = reason;
+ if (!isttest(r)) {
+ chunk_memcat(temp, HTX_SL_RES_RPTR(sl), HTX_SL_RES_RLEN(sl)); /* reason */
+ r = ist2(temp->area + vsn.len, HTX_SL_RES_RLEN(sl));
+ }
+
+ /* create the new start line */
+ sl->info.res.status = strl2ui(status.ptr, status.len);
+ return http_replace_stline(htx, vsn, status, r);
+}
+
+/* Replace the response reason in the HTX message <htx> by <reason>. It returns
+ * 1 on success, otherwise 0.
+*/
+int http_replace_res_reason(struct htx *htx, const struct ist reason)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct htx_sl *sl = http_get_stline(htx);
+ struct ist vsn, status;
+
+ if (!sl)
+ return 0;
+
+ /* Start by copying old uri and version */
+ chunk_memcat(temp, HTX_SL_RES_VPTR(sl), HTX_SL_RES_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area, HTX_SL_RES_VLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl)); /* code */
+ status = ist2(temp->area + vsn.len, HTX_SL_RES_CLEN(sl));
+
+ /* create the new start line */
+ return http_replace_stline(htx, vsn, status, reason);
+}
+
+/* Append new value <data> after <ctx> value in header
+ * if header is not empty (at least one value exists):
+ * - ',' delimiter is added before <data> is appended
+ * - <ctx> must be valid and must point to an existing value,
+ * else it is an error and prepend_value should be used instead.
+ *
+ * ctx is updated to point to new value
+ *
+ * Returns 1 on success and 0 on failure.
+ */
+int http_append_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data)
+{
+ char *start;
+ struct htx_blk *blk = ctx->blk;
+ struct ist v;
+ uint32_t off = 0;
+
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+
+ if (!istlen(v)) {
+ start = v.ptr;
+ goto empty; /* header is empty, append without ',' */
+ }
+ if (unlikely(!istlen(ctx->value)))
+ goto fail; /* invalid: value is empty, not supported */
+
+ start = istend(ctx->value) + ctx->lws_after;
+ off = start - v.ptr;
+
+ blk = htx_replace_blk_value(htx, blk, ist2(start, 0), ist(","));
+ if (!blk)
+ goto fail;
+ off += 1; /* add 1 for ',' */
+ v = htx_get_blk_value(htx, blk);
+ start = v.ptr + off;
+
+ empty:
+ blk = htx_replace_blk_value(htx, blk, ist2(start, 0), data);
+ if (!blk)
+ goto fail;
+ v = htx_get_blk_value(htx, blk);
+
+ ctx->blk = blk;
+ ctx->value = ist2(v.ptr + off, data.len);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Prepend new value <data> before <ctx> value in header
+ * if <ctx> is not first value (at least one value exists):
+ * - ',' delimiter is added after <data> is prepended
+ *
+ * ctx is updated to point to new value
+ *
+ * Returns 1 on success and 0 on failure.
+ */
+int http_prepend_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data)
+{
+ char *start;
+ struct htx_blk *blk = ctx->blk;
+ struct ist v;
+ uint32_t off = 0;
+ uint8_t first;
+
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+
+ first = !istlen(v);
+ start = first ? v.ptr : istptr(ctx->value) - ctx->lws_before;
+
+ if (unlikely(!istlen(ctx->value)))
+ goto fail; /* invalid: value is empty, not supported */
+
+ off = start - v.ptr;
+
+ blk = htx_replace_blk_value(htx, blk, ist2(start, 0), data);
+ if (!blk)
+ goto fail;
+ v = htx_get_blk_value(htx, blk);
+
+ if (first)
+ goto end; /* header is empty, don't append ',' */
+
+ start = v.ptr + off + data.len;
+
+ blk = htx_replace_blk_value(htx, blk, ist2(start, 0), ist(","));
+ if (!blk)
+ goto fail;
+ v = htx_get_blk_value(htx, blk);
+
+ end:
+ ctx->blk = blk;
+ ctx->value = ist2(v.ptr + off, data.len);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Replaces a part of a header value referenced in the context <ctx> by
+ * <data>. It returns 1 on success, otherwise it returns 0. The context is
+ * updated if necessary.
+ */
+int http_replace_header_value(struct htx *htx, struct http_hdr_ctx *ctx, const struct ist data)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct htx_sl *sl;
+ char *start;
+ struct ist v;
+ uint32_t len, off;
+
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+ start = ctx->value.ptr - ctx->lws_before;
+ len = ctx->lws_before + ctx->value.len + ctx->lws_after;
+ off = start - v.ptr;
+
+ blk = htx_replace_blk_value(htx, blk, ist2(start, len), data);
+ if (!blk)
+ goto fail;
+
+ v = htx_get_blk_value(htx, blk);
+
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY)) {
+ struct ist n = htx_get_blk_name(htx, blk);
+
+ if (isteq(n, ist("host"))) {
+ if (!http_update_authority(htx, sl, v))
+ goto fail;
+ ctx->blk = NULL;
+ http_find_header(htx, ist("host"), ctx, 1);
+ blk = ctx->blk;
+ v = htx_get_blk_value(htx, blk);
+ }
+ }
+
+ ctx->blk = blk;
+ ctx->value = ist2(v.ptr + off, data.len);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Fully replaces a header referenced in the context <ctx> by the name <name>
+ * with the value <value>. It returns 1 on success, otherwise it returns 0. The
+ * context is updated if necessary.
+ */
+int http_replace_header(struct htx *htx, struct http_hdr_ctx *ctx,
+ const struct ist name, const struct ist value)
+{
+ struct htx_blk *blk = ctx->blk;
+ struct htx_sl *sl;
+
+ if (!blk)
+ goto fail;
+
+ blk = htx_replace_header(htx, blk, name, value);
+ if (!blk)
+ goto fail;
+
+ sl = http_get_stline(htx);
+ if (sl && (sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(name, ist("host"))) {
+ if (!http_update_authority(htx, sl, value))
+ goto fail;
+ ctx->blk = NULL;
+ http_find_header(htx, ist("host"), ctx, 1);
+ blk = ctx->blk;
+ }
+
+ ctx->blk = blk;
+ ctx->value = ist(NULL);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Remove one value of a header. This only works on a <ctx> returned by
+ * http_find_header function. The value is removed, as well as surrounding commas
+ * if any. If the removed value was alone, the whole header is removed. The
+ * <ctx> is always updated accordingly, as well as the HTX message <htx>. It
+ * returns 1 on success. Otherwise, it returns 0. The <ctx> is always left in a
+ * form that can be handled by http_find_header() to find next occurrence.
+ */
+int http_remove_header(struct htx *htx, struct http_hdr_ctx *ctx)
+{
+ struct htx_blk *blk = ctx->blk;
+ char *start;
+ struct ist v;
+ uint32_t len;
+
+ if (!blk)
+ return 0;
+
+ start = ctx->value.ptr - ctx->lws_before;
+ len = ctx->lws_before + ctx->value.len + ctx->lws_after;
+
+ v = htx_get_blk_value(htx, blk);
+ if (len == v.len) {
+ blk = htx_remove_blk(htx, blk);
+ if (blk || htx_is_empty(htx)) {
+ ctx->blk = blk;
+ ctx->value = IST_NULL;
+ ctx->lws_before = ctx->lws_after = 0;
+ }
+ else {
+ ctx->blk = htx_get_blk(htx, htx->tail);
+ ctx->value = htx_get_blk_value(htx, ctx->blk);
+ ctx->lws_before = ctx->lws_after = 0;
+ }
+ return 1;
+ }
+
+ /* This was not the only value of this header. We have to remove the
+ * part pointed by ctx->value. If it is the last entry of the list, we
+ * remove the last separator.
+ */
+ if (start == v.ptr) {
+ /* It's the first header part but not the only one. So remove
+ * the comma after it. */
+ len++;
+ }
+ else {
+ /* There is at least one header part before the removed one. So
+ * remove the comma between them. */
+ start--;
+ len++;
+ }
+ /* Update the block content and its len */
+ memmove(start, start+len, v.len-len);
+ htx_change_blk_value_len(htx, blk, v.len-len);
+
+ /* Finally update the ctx */
+ ctx->value = ist2(start, 0);
+ ctx->lws_before = ctx->lws_after = 0;
+
+ return 1;
+}
+
+/* Updates the authority part of the uri with the value <host>. It happens when
+ * the header host is modified. It returns 0 on failure and 1 on success. It is
+ * the caller responsibility to provide the start-line and to be sure the uri
+ * contains an authority. Thus, if no authority is found in the uri, an error is
+ * returned.
+ */
+int http_update_authority(struct htx *htx, struct htx_sl *sl, const struct ist host)
+{
+ struct buffer *temp = get_trash_chunk();
+ struct ist meth, vsn, uri, authority;
+ struct http_uri_parser parser;
+
+ uri = htx_sl_req_uri(sl);
+ parser = http_uri_parser_init(uri);
+ authority = http_parse_authority(&parser, 1);
+ if (!authority.len)
+ return 0;
+
+ /* Don't update the uri if there is no change */
+ if (isteq(host, authority))
+ return 1;
+
+ /* Start by copying old method and version */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl)); /* meth */
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl)); /* vsn */
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr);
+ chunk_istcat(temp, host);
+ chunk_memcat(temp, istend(authority), istend(uri) - istend(authority));
+ uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */
+
+ return http_replace_stline(htx, meth, uri, vsn);
+
+}
+
+/* Update the header host by extracting the authority of the uri <uri>. flags of
+ * the start-line are also updated accordingly. For orgin-form and asterisk-form
+ * uri, the header host is not changed and the flag HTX_SL_F_HAS_AUTHORITY is
+ * removed from the flags of the start-line. Otherwise, this flag is set and the
+ * authority is used to set the value of the header host. This function returns
+ * 0 on failure and 1 on success.
+*/
+int http_update_host(struct htx *htx, struct htx_sl *sl, const struct ist uri)
+{
+ struct ist authority;
+ struct http_hdr_ctx ctx;
+ struct http_uri_parser parser = http_uri_parser_init(uri);
+
+ if (parser.format == URI_PARSER_FORMAT_EMPTY ||
+ parser.format == URI_PARSER_FORMAT_ASTERISK ||
+ parser.format == URI_PARSER_FORMAT_ABSPATH) {
+ sl->flags &= ~HTX_SL_F_HAS_AUTHORITY;
+ }
+ else {
+ sl->flags |= HTX_SL_F_HAS_AUTHORITY;
+ if (sl->info.req.meth != HTTP_METH_CONNECT) {
+ // absolute-form (RFC7320 #5.3.2)
+ sl->flags |= HTX_SL_F_HAS_SCHM;
+ if (uri.len > 4 && (uri.ptr[0] | 0x20) == 'h')
+ sl->flags |= ((uri.ptr[4] == ':') ? HTX_SL_F_SCHM_HTTP : HTX_SL_F_SCHM_HTTPS);
+
+ authority = http_parse_authority(&parser, 1);
+ if (!authority.len)
+ goto fail;
+ }
+ else {
+ // authority-form (RFC7320 #5.3.3)
+ authority = uri;
+ }
+
+ /* Replace header host value */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("host"), &ctx, 1)) {
+ if (!http_replace_header_value(htx, &ctx, authority))
+ goto fail;
+ }
+
+ }
+ return 1;
+ fail:
+ return 0;
+}
+
+/* Return in <vptr> and <vlen> the pointer and length of occurrence <occ> of
+ * header whose name is <hname> of length <hlen>. If <ctx> is null, lookup is
+ * performed over the whole headers. Otherwise it must contain a valid header
+ * context, initialised with ctx->blk=NULL for the first lookup in a series. If
+ * <occ> is positive or null, occurrence #occ from the beginning (or last ctx)
+ * is returned. Occ #0 and #1 are equivalent. If <occ> is negative (and no less
+ * than -MAX_HDR_HISTORY), the occurrence is counted from the last one which is
+ * -1. The value fetch stops at commas, so this function is suited for use with
+ * list headers.
+ * The return value is 0 if nothing was found, or non-zero otherwise.
+ */
+unsigned int http_get_htx_hdr(const struct htx *htx, const struct ist hdr,
+ int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen)
+{
+ struct http_hdr_ctx local_ctx;
+ struct ist val_hist[MAX_HDR_HISTORY];
+ unsigned int hist_idx;
+ int found;
+
+ if (!ctx) {
+ local_ctx.blk = NULL;
+ ctx = &local_ctx;
+ }
+
+ if (occ >= 0) {
+ /* search from the beginning */
+ while (http_find_header(htx, hdr, ctx, 0)) {
+ occ--;
+ if (occ <= 0) {
+ *vptr = ctx->value.ptr;
+ *vlen = ctx->value.len;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ /* negative occurrence, we scan all the list then walk back */
+ if (-occ > MAX_HDR_HISTORY)
+ return 0;
+
+ found = hist_idx = 0;
+ while (http_find_header(htx, hdr, ctx, 0)) {
+ val_hist[hist_idx] = ctx->value;
+ if (++hist_idx >= MAX_HDR_HISTORY)
+ hist_idx = 0;
+ found++;
+ }
+ if (-occ > found)
+ return 0;
+
+ /* OK now we have the last occurrence in [hist_idx-1], and we need to
+ * find occurrence -occ. 0 <= hist_idx < MAX_HDR_HISTORY, and we have
+ * -10 <= occ <= -1. So we have to check [hist_idx%MAX_HDR_HISTORY+occ]
+ * to remain in the 0..9 range.
+ */
+ hist_idx += occ + MAX_HDR_HISTORY;
+ if (hist_idx >= MAX_HDR_HISTORY)
+ hist_idx -= MAX_HDR_HISTORY;
+ *vptr = val_hist[hist_idx].ptr;
+ *vlen = val_hist[hist_idx].len;
+ return 1;
+}
+
+/* Return in <vptr> and <vlen> the pointer and length of occurrence <occ> of
+ * header whose name is <hname> of length <hlen>. If <ctx> is null, lookup is
+ * performed over the whole headers. Otherwise it must contain a valid header
+ * context, initialised with ctx->blk=NULL for the first lookup in a series. If
+ * <occ> is positive or null, occurrence #occ from the beginning (or last ctx)
+ * is returned. Occ #0 and #1 are equivalent. If <occ> is negative (and no less
+ * than -MAX_HDR_HISTORY), the occurrence is counted from the last one which is
+ * -1. This function differs from http_get_hdr() in that it only returns full
+ * line header values and does not stop at commas.
+ * The return value is 0 if nothing was found, or non-zero otherwise.
+ */
+unsigned int http_get_htx_fhdr(const struct htx *htx, const struct ist hdr,
+ int occ, struct http_hdr_ctx *ctx, char **vptr, size_t *vlen)
+{
+ struct http_hdr_ctx local_ctx;
+ struct ist val_hist[MAX_HDR_HISTORY];
+ unsigned int hist_idx;
+ int found;
+
+ if (!ctx) {
+ local_ctx.blk = NULL;
+ ctx = &local_ctx;
+ }
+
+ if (occ >= 0) {
+ /* search from the beginning */
+ while (http_find_header(htx, hdr, ctx, 1)) {
+ occ--;
+ if (occ <= 0) {
+ *vptr = ctx->value.ptr;
+ *vlen = ctx->value.len;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ /* negative occurrence, we scan all the list then walk back */
+ if (-occ > MAX_HDR_HISTORY)
+ return 0;
+
+ found = hist_idx = 0;
+ while (http_find_header(htx, hdr, ctx, 1)) {
+ val_hist[hist_idx] = ctx->value;
+ if (++hist_idx >= MAX_HDR_HISTORY)
+ hist_idx = 0;
+ found++;
+ }
+ if (-occ > found)
+ return 0;
+
+ /* OK now we have the last occurrence in [hist_idx-1], and we need to
+ * find occurrence -occ. 0 <= hist_idx < MAX_HDR_HISTORY, and we have
+ * -10 <= occ <= -1. So we have to check [hist_idx%MAX_HDR_HISTORY+occ]
+ * to remain in the 0..9 range.
+ */
+ hist_idx += occ + MAX_HDR_HISTORY;
+ if (hist_idx >= MAX_HDR_HISTORY)
+ hist_idx -= MAX_HDR_HISTORY;
+ *vptr = val_hist[hist_idx].ptr;
+ *vlen = val_hist[hist_idx].len;
+ return 1;
+}
+
+int http_str_to_htx(struct buffer *buf, struct ist raw, char **errmsg)
+{
+ struct htx *htx;
+ struct htx_sl *sl;
+ struct h1m h1m;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ union h1_sl h1sl;
+ unsigned int flags = HTX_SL_F_IS_RESP;
+ int ret = 0;
+
+ b_reset(buf);
+ if (!raw.len) {
+ buf->size = 0;
+ buf->area = NULL;
+ return 1;
+ }
+
+ buf->size = global.tune.bufsize;
+ buf->area = malloc(buf->size);
+ if (!buf->area)
+ goto error;
+
+ h1m_init_res(&h1m);
+ h1m.flags |= H1_MF_NO_PHDR;
+ ret = h1_headers_to_hdr_list(raw.ptr, istend(raw),
+ hdrs, sizeof(hdrs)/sizeof(hdrs[0]), &h1m, &h1sl);
+ if (ret <= 0) {
+ memprintf(errmsg, "unable to parse headers (error offset: %d)", h1m.err_pos);
+ goto error;
+ }
+
+ if (unlikely(h1sl.st.v.len != 8)) {
+ memprintf(errmsg, "invalid http version (%.*s)", (int)h1sl.st.v.len, h1sl.st.v.ptr);
+ goto error;
+ }
+ if ((*(h1sl.st.v.ptr + 5) > '1') ||
+ ((*(h1sl.st.v.ptr + 5) == '1') && (*(h1sl.st.v.ptr + 7) >= '1')))
+ h1m.flags |= H1_MF_VER_11;
+
+ if (h1sl.st.status < 200 && (h1sl.st.status == 100 || h1sl.st.status >= 102)) {
+ memprintf(errmsg, "invalid http status code for an error message (%u)",
+ h1sl.st.status);
+ goto error;
+ }
+
+ if (h1sl.st.status == 204 || h1sl.st.status == 304) {
+ /* Responses known to have no body. */
+ h1m.flags &= ~(H1_MF_CLEN|H1_MF_CHNK);
+ h1m.flags |= H1_MF_XFER_LEN;
+ h1m.curr_len = h1m.body_len = 0;
+ }
+ else if (h1m.flags & (H1_MF_CLEN|H1_MF_CHNK))
+ h1m.flags |= H1_MF_XFER_LEN;
+
+ if (h1m.flags & H1_MF_VER_11)
+ flags |= HTX_SL_F_VER_11;
+ if (h1m.flags & H1_MF_XFER_ENC)
+ flags |= HTX_SL_F_XFER_ENC;
+ if (h1m.flags & H1_MF_XFER_LEN) {
+ flags |= HTX_SL_F_XFER_LEN;
+ if (h1m.flags & H1_MF_CHNK) {
+ memprintf(errmsg, "chunk-encoded payload not supported");
+ goto error;
+ }
+ else if (h1m.flags & H1_MF_CLEN) {
+ flags |= HTX_SL_F_CLEN;
+ if (h1m.body_len == 0)
+ flags |= HTX_SL_F_BODYLESS;
+ }
+ else
+ flags |= HTX_SL_F_BODYLESS;
+ }
+
+ if ((flags & HTX_SL_F_BODYLESS) && raw.len > ret) {
+ memprintf(errmsg, "message payload not expected");
+ goto error;
+ }
+ if ((flags & HTX_SL_F_CLEN) && h1m.body_len != (raw.len - ret)) {
+ memprintf(errmsg, "payload size does not match the announced content-length (%lu != %lu)",
+ (unsigned long)(raw.len - ret), (unsigned long)h1m.body_len);
+ goto error;
+ }
+
+ htx = htx_from_buf(buf);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, h1sl.st.v, h1sl.st.c, h1sl.st.r);
+ if (!sl || !htx_add_all_headers(htx, hdrs)) {
+ memprintf(errmsg, "unable to add headers into the HTX message");
+ goto error;
+ }
+ sl->info.res.status = h1sl.st.status;
+
+ while (raw.len > ret) {
+ int sent = htx_add_data(htx, ist2(raw.ptr + ret, raw.len - ret));
+ if (!sent) {
+ memprintf(errmsg, "unable to add payload into the HTX message");
+ goto error;
+ }
+ ret += sent;
+ }
+
+ htx->flags |= HTX_FL_EOM;
+
+ return 1;
+
+error:
+ if (buf->size)
+ free(buf->area);
+ return 0;
+}
+
+void release_http_reply(struct http_reply *http_reply)
+{
+ struct logformat_node *lf, *lfb;
+ struct http_reply_hdr *hdr, *hdrb;
+
+ if (!http_reply)
+ return;
+
+ ha_free(&http_reply->ctype);
+ list_for_each_entry_safe(hdr, hdrb, &http_reply->hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ list_for_each_entry_safe(lf, lfb, &hdr->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ istfree(&hdr->name);
+ free(hdr);
+ }
+
+ if (http_reply->type == HTTP_REPLY_ERRFILES) {
+ ha_free(&http_reply->body.http_errors);
+ }
+ else if (http_reply->type == HTTP_REPLY_RAW)
+ chunk_destroy(&http_reply->body.obj);
+ else if (http_reply->type == HTTP_REPLY_LOGFMT) {
+ list_for_each_entry_safe(lf, lfb, &http_reply->body.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ }
+ free(http_reply);
+}
+
+static int http_htx_init(void)
+{
+ struct buffer chk;
+ struct ist raw;
+ char *errmsg = NULL;
+ int rc;
+ int err_code = 0;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (!http_err_msgs[rc]) {
+ ha_alert("Internal error: no default message defined for HTTP return code %d", rc);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ continue;
+ }
+
+ raw = ist(http_err_msgs[rc]);
+ if (!http_str_to_htx(&chk, raw, &errmsg)) {
+ ha_alert("Internal error: invalid default message for HTTP return code %d: %s.\n",
+ http_err_codes[rc], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ else if (errmsg) {
+ ha_warning("invalid default message for HTTP return code %d: %s.\n", http_err_codes[rc], errmsg);
+ err_code |= ERR_WARN;
+ }
+
+ /* Reset errmsg */
+ ha_free(&errmsg);
+
+ http_err_chunks[rc] = chk;
+ http_err_replies[rc].type = HTTP_REPLY_ERRMSG;
+ http_err_replies[rc].status = http_err_codes[rc];
+ http_err_replies[rc].ctype = NULL;
+ LIST_INIT(&http_err_replies[rc].hdrs);
+ http_err_replies[rc].body.errmsg = &http_err_chunks[rc];
+ }
+end:
+ return err_code;
+}
+
+static void http_htx_deinit(void)
+{
+ struct http_errors *http_errs, *http_errsb;
+ struct http_reply *http_rep, *http_repb;
+ struct ebpt_node *node, *next;
+ struct http_error_msg *http_errmsg;
+ int rc;
+
+ node = ebpt_first(&http_error_messages);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ chunk_destroy(&http_errmsg->msg);
+ free(node->key);
+ free(http_errmsg);
+ node = next;
+ }
+
+ list_for_each_entry_safe(http_errs, http_errsb, &http_errors_list, list) {
+ free(http_errs->conf.file);
+ free(http_errs->id);
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ release_http_reply(http_errs->replies[rc]);
+ LIST_DELETE(&http_errs->list);
+ free(http_errs);
+ }
+
+ list_for_each_entry_safe(http_rep, http_repb, &http_replies_list, list) {
+ LIST_DELETE(&http_rep->list);
+ release_http_reply(http_rep);
+ }
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ chunk_destroy(&http_err_chunks[rc]);
+}
+
+REGISTER_CONFIG_POSTPARSER("http_htx", http_htx_init);
+REGISTER_POST_DEINIT(http_htx_deinit);
+
+/* Reads content of the error file <file> and convert it into an HTX message. On
+ * success, the HTX message is returned. On error, NULL is returned and an error
+ * message is written into the <errmsg> buffer.
+ */
+struct buffer *http_load_errorfile(const char *file, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ struct buffer chk;
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+ struct stat stat;
+ char *err = NULL;
+ int errnum, errlen;
+ int fd = -1;
+
+ /* already loaded */
+ node = ebis_lookup_len(&http_error_messages, file, strlen(file));
+ if (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ buf = &http_errmsg->msg;
+ goto out;
+ }
+
+ /* Read the error file content */
+ fd = open(file, O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'.", file);
+ goto out;
+ }
+
+ if (stat.st_size <= global.tune.bufsize)
+ errlen = stat.st_size;
+ else {
+ ha_warning("custom error message file '%s' larger than %d bytes. Truncating.\n",
+ file, global.tune.bufsize);
+ errlen = global.tune.bufsize;
+ }
+
+ err = malloc(errlen);
+ if (!err) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+
+ errnum = read(fd, err, errlen);
+ if (errnum != errlen) {
+ memprintf(errmsg, "error reading file '%s'.", file);
+ goto out;
+ }
+
+ /* Create the node corresponding to the error file */
+ http_errmsg = calloc(1, sizeof(*http_errmsg));
+ if (!http_errmsg) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ http_errmsg->node.key = strdup(file);
+ if (!http_errmsg->node.key) {
+ memprintf(errmsg, "out of memory.");
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Convert the error file into an HTX message */
+ if (!http_str_to_htx(&chk, ist2(err, errlen), errmsg)) {
+ memprintf(errmsg, "'%s': %s", file, *errmsg);
+ free(http_errmsg->node.key);
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Insert the node in the tree and return the HTX message */
+ http_errmsg->msg = chk;
+ ebis_insert(&http_error_messages, &http_errmsg->node);
+ buf = &http_errmsg->msg;
+
+ out:
+ if (fd >= 0)
+ close(fd);
+ free(err);
+ return buf;
+}
+
+/* Convert the raw http message <msg> into an HTX message. On success, the HTX
+ * message is returned. On error, NULL is returned and an error message is
+ * written into the <errmsg> buffer.
+ */
+struct buffer *http_load_errormsg(const char *key, const struct ist msg, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ struct buffer chk;
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+
+ /* already loaded */
+ node = ebis_lookup_len(&http_error_messages, key, strlen(key));
+ if (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ buf = &http_errmsg->msg;
+ goto out;
+ }
+ /* Create the node corresponding to the error file */
+ http_errmsg = calloc(1, sizeof(*http_errmsg));
+ if (!http_errmsg) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ http_errmsg->node.key = strdup(key);
+ if (!http_errmsg->node.key) {
+ memprintf(errmsg, "out of memory.");
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Convert the error file into an HTX message */
+ if (!http_str_to_htx(&chk, msg, errmsg)) {
+ memprintf(errmsg, "invalid error message: %s", *errmsg);
+ free(http_errmsg->node.key);
+ free(http_errmsg);
+ goto out;
+ }
+
+ /* Insert the node in the tree and return the HTX message */
+ http_errmsg->msg = chk;
+ ebis_insert(&http_error_messages, &http_errmsg->node);
+ buf = &http_errmsg->msg;
+ out:
+ return buf;
+}
+
+/* This function parses the raw HTTP error file <file> for the status code
+ * <status>. It returns NULL if there is any error, otherwise it return the
+ * corresponding HTX message.
+ */
+struct buffer *http_parse_errorfile(int status, const char *file, char **errmsg)
+{
+ struct buffer *buf = NULL;
+ int rc;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ buf = http_load_errorfile(file, errmsg);
+ break;
+ }
+ }
+
+ if (rc >= HTTP_ERR_SIZE)
+ memprintf(errmsg, "status code '%d' not handled.", status);
+ return buf;
+}
+
+/* This function creates HTX error message corresponding to a redirect message
+ * for the status code <status>. <url> is used as location url for the
+ * redirect. <errloc> is used to know if it is a 302 or a 303 redirect. It
+ * returns NULL if there is any error, otherwise it return the corresponding HTX
+ * message.
+ */
+struct buffer *http_parse_errorloc(int errloc, int status, const char *url, char **errmsg)
+{
+ static const char *HTTP_302 =
+ "HTTP/1.1 302 Found\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-length: 0\r\n"
+ "Location: "; /* not terminated since it will be concatenated with the URL */
+ static const char *HTTP_303 =
+ "HTTP/1.1 303 See Other\r\n"
+ "Cache-Control: no-cache\r\n"
+ "Content-length: 0\r\n"
+ "Location: "; /* not terminated since it will be concatenated with the URL */
+
+ struct buffer *buf = NULL;
+ const char *msg;
+ char *key = NULL, *err = NULL;
+ int rc, errlen;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ /* Create the error key */
+ if (!memprintf(&key, "errorloc%d %s", errloc, url)) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ /* Create the error message */
+ msg = (errloc == 302 ? HTTP_302 : HTTP_303);
+ errlen = strlen(msg) + strlen(url) + 5;
+ err = malloc(errlen);
+ if (!err) {
+ memprintf(errmsg, "out of memory.");
+ goto out;
+ }
+ errlen = snprintf(err, errlen, "%s%s\r\n\r\n", msg, url);
+
+ /* Load it */
+ buf = http_load_errormsg(key, ist2(err, errlen), errmsg);
+ break;
+ }
+ }
+
+ if (rc >= HTTP_ERR_SIZE)
+ memprintf(errmsg, "status code '%d' not handled.", status);
+out:
+ free(key);
+ free(err);
+ return buf;
+}
+
+/* Check an "http reply" and, for replies referencing an http-errors section,
+ * try to find the right section and the right error message in this section. If
+ * found, the reply is updated. If the http-errors section exists but the error
+ * message is not found, no error message is set to fallback on the default
+ * ones. Otherwise (unknown section) an error is returned.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and errmsg is
+ * filled.
+ */
+int http_check_http_reply(struct http_reply *reply, struct proxy *px, char **errmsg)
+{
+ struct http_errors *http_errs;
+ int ret = 1;
+
+ if (reply->type != HTTP_REPLY_ERRFILES)
+ goto end;
+
+ list_for_each_entry(http_errs, &http_errors_list, list) {
+ if (strcmp(http_errs->id, reply->body.http_errors) == 0) {
+ reply->type = HTTP_REPLY_INDIRECT;
+ free(reply->body.http_errors);
+ reply->body.reply = http_errs->replies[http_get_status_idx(reply->status)];
+ if (!reply->body.reply)
+ ha_warning("Proxy '%s': status '%d' referenced by an http reply "
+ "not declared in http-errors section '%s'.\n",
+ px->id, reply->status, http_errs->id);
+ break;
+ }
+ }
+
+ if (&http_errs->list == &http_errors_list) {
+ memprintf(errmsg, "unknown http-errors section '%s' referenced by an http reply ",
+ reply->body.http_errors);
+ ret = 0;
+ }
+
+ end:
+ return ret;
+}
+
+/* Parse an "http reply". It returns the reply on success or NULL on error. This
+ * function creates one of the following http replies :
+ *
+ * - HTTP_REPLY_EMPTY : dummy response, no payload
+ * - HTTP_REPLY_ERRMSG : implicit error message depending on the status code or explicit one
+ * - HTTP_REPLY_ERRFILES : points on an http-errors section (resolved during post-parsing)
+ * - HTTP_REPLY_RAW : explicit file object ('file' argument)
+ * - HTTP_REPLY_LOGFMT : explicit log-format string ('content' argument)
+ *
+ * The content-type must be defined for non-empty payload. It is ignored for
+ * error messages (implicit or explicit). When an http-errors section is
+ * referenced (HTTP_REPLY_ERRFILES), the real error message should be resolved
+ * during the configuration validity check or dynamically. It is the caller
+ * responsibility to choose. If no status code is configured, <default_status>
+ * is set.
+ */
+struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px,
+ int default_status, char **errmsg)
+{
+ struct logformat_node *lf, *lfb;
+ struct http_reply *reply = NULL;
+ struct http_reply_hdr *hdr, *hdrb;
+ struct stat stat;
+ const char *act_arg = NULL;
+ char *obj = NULL;
+ int cur_arg, cap = 0, objlen = 0, fd = -1;
+
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&reply->hdrs);
+ reply->type = HTTP_REPLY_EMPTY;
+ reply->status = default_status;
+
+ if (px->conf.args.ctx == ARGC_HERR)
+ cap = (SMP_VAL_REQUEST | SMP_VAL_RESPONSE);
+ else {
+ if (px->cap & PR_CAP_FE)
+ cap |= ((px->conf.args.ctx == ARGC_HRQ) ? SMP_VAL_FE_HRQ_HDR : SMP_VAL_FE_HRS_HDR);
+ if (px->cap & PR_CAP_BE)
+ cap |= ((px->conf.args.ctx == ARGC_HRQ) ? SMP_VAL_BE_HRQ_HDR : SMP_VAL_BE_HRS_HDR);
+ }
+
+ cur_arg = *orig_arg;
+ while (*args[cur_arg]) {
+ if (strcmp(args[cur_arg], "status") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <status_code> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->status = atol(args[cur_arg]);
+ if (reply->status < 200 || reply->status > 599) {
+ memprintf(errmsg, "Unexpected status code '%d'", reply->status);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "content-type") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <ctype> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ free(reply->ctype);
+ reply->ctype = strdup(args[cur_arg]);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "errorfiles") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <name> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->body.http_errors = strdup(args[cur_arg]);
+ if (!reply->body.http_errors) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ reply->type = HTTP_REPLY_ERRFILES;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "default-errorfiles") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ reply->type = HTTP_REPLY_ERRMSG;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "errorfile") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <fmt> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ reply->body.errmsg = http_load_errorfile(args[cur_arg], errmsg);
+ if (!reply->body.errmsg) {
+ goto error;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "file") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <file> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ fd = open(args[cur_arg], O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (stat.st_size > global.tune.bufsize) {
+ memprintf(errmsg, "file '%s' exceeds the buffer size (%lld > %d)",
+ args[cur_arg], (long long)stat.st_size, global.tune.bufsize);
+ goto error;
+ }
+ objlen = stat.st_size;
+ obj = malloc(objlen);
+ if (!obj || read(fd, obj, objlen) != objlen) {
+ memprintf(errmsg, "error reading file '%s'", args[cur_arg]);
+ goto error;
+ }
+ close(fd);
+ fd = -1;
+ reply->type = HTTP_REPLY_RAW;
+ chunk_initlen(&reply->body.obj, obj, global.tune.bufsize, objlen);
+ obj = NULL;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "string") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <str> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ obj = strdup(args[cur_arg]);
+ objlen = strlen(args[cur_arg]);
+ if (!obj) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ reply->type = HTTP_REPLY_RAW;
+ chunk_initlen(&reply->body.obj, obj, global.tune.bufsize, objlen);
+ obj = NULL;
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "lf-file") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <file> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ fd = open(args[cur_arg], O_RDONLY);
+ if ((fd < 0) || (fstat(fd, &stat) < 0)) {
+ memprintf(errmsg, "error opening file '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (stat.st_size > global.tune.bufsize) {
+ memprintf(errmsg, "file '%s' exceeds the buffer size (%lld > %d)",
+ args[cur_arg], (long long)stat.st_size, global.tune.bufsize);
+ goto error;
+ }
+ objlen = stat.st_size;
+ obj = malloc(objlen + 1);
+ if (!obj || read(fd, obj, objlen) != objlen) {
+ memprintf(errmsg, "error reading file '%s'", args[cur_arg]);
+ goto error;
+ }
+ close(fd);
+ fd = -1;
+ obj[objlen] = '\0';
+ reply->type = HTTP_REPLY_LOGFMT;
+ LIST_INIT(&reply->body.fmt);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "lf-string") == 0) {
+ if (reply->type != HTTP_REPLY_EMPTY) {
+ memprintf(errmsg, "unexpected '%s' argument, '%s' already defined", args[cur_arg], act_arg);
+ goto error;
+ }
+ act_arg = args[cur_arg];
+ cur_arg++;
+ if (!*args[cur_arg]) {
+ memprintf(errmsg, "'%s' expects <fmt> as argument", args[cur_arg-1]);
+ goto error;
+ }
+ obj = strdup(args[cur_arg]);
+ objlen = strlen(args[cur_arg]);
+ reply->type = HTTP_REPLY_LOGFMT;
+ LIST_INIT(&reply->body.fmt);
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0) {
+ cur_arg++;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ memprintf(errmsg, "'%s' expects <name> and <value> as arguments", args[cur_arg-1]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg], "content-length") == 0 ||
+ strcasecmp(args[cur_arg], "transfer-encoding") == 0 ||
+ strcasecmp(args[cur_arg], "content-type") == 0) {
+ ha_warning("parsing [%s:%d] : header '%s' always ignored by the http reply.\n",
+ px->conf.args.file, px->conf.args.line, args[cur_arg]);
+ cur_arg += 2;
+ continue;
+ }
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(errmsg, "'%s' : out of memory", args[cur_arg-1]);
+ goto error;
+ }
+ LIST_APPEND(&reply->hdrs, &hdr->list);
+ LIST_INIT(&hdr->value);
+ hdr->name = ist(strdup(args[cur_arg]));
+ if (!isttest(hdr->name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ if (!parse_logformat_string(args[cur_arg+1], px, &hdr->value, LOG_OPT_HTTP, cap, errmsg))
+ goto error;
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ cur_arg += 2;
+ }
+ else
+ break;
+ }
+
+ if (reply->type == HTTP_REPLY_EMPTY) { /* no payload */
+ if (reply->ctype) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply because"
+ " neither errorfile nor payload defined.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ }
+ else if (reply->type == HTTP_REPLY_ERRFILES || reply->type == HTTP_REPLY_ERRMSG) { /* errorfiles or errorfile */
+
+ if (reply->type != HTTP_REPLY_ERRMSG || !reply->body.errmsg) {
+ /* default errorfile or errorfiles: check the status */
+ int rc;
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == reply->status)
+ break;
+ }
+
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(errmsg, "status code '%d' not handled by default with '%s' argument.",
+ reply->status, act_arg);
+ goto error;
+ }
+ }
+
+ if (reply->ctype) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply when used "
+ "with an erorrfile.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ if (!LIST_ISEMPTY(&reply->hdrs)) {
+ ha_warning("parsing [%s:%d] : hdr parameters ignored by the http reply when used "
+ "with an erorrfile.\n",
+ px->conf.args.file, px->conf.args.line);
+ list_for_each_entry_safe(hdr, hdrb, &reply->hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ list_for_each_entry_safe(lf, lfb, &hdr->value, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ istfree(&hdr->name);
+ free(hdr);
+ }
+ }
+ }
+ else if (reply->type == HTTP_REPLY_RAW) { /* explicit parameter using 'file' parameter*/
+ if ((reply->status == 204 || reply->status == 304) && objlen) {
+ memprintf(errmsg, "No body expected for %d responses", reply->status);
+ goto error;
+ }
+ if (!reply->ctype && objlen) {
+ memprintf(errmsg, "a content type must be defined when non-empty payload is configured");
+ goto error;
+ }
+ if (reply->ctype && !b_data(&reply->body.obj)) {
+ ha_warning("parsing [%s:%d] : content-type '%s' ignored by the http reply when used "
+ "with an empty payload.\n",
+ px->conf.args.file, px->conf.args.line, reply->ctype);
+ ha_free(&reply->ctype);
+ }
+ if (b_room(&reply->body.obj) < global.tune.maxrewrite) {
+ ha_warning("parsing [%s:%d] : http reply payload runs over the buffer space reserved to headers rewriting."
+ " It may lead to internal errors if strict rewriting mode is enabled.\n",
+ px->conf.args.file, px->conf.args.line);
+ }
+ }
+ else if (reply->type == HTTP_REPLY_LOGFMT) { /* log-format payload using 'lf-file' of 'lf-string' parameter */
+ LIST_INIT(&reply->body.fmt);
+ if ((reply->status == 204 || reply->status == 304)) {
+ memprintf(errmsg, "No body expected for %d responses", reply->status);
+ goto error;
+ }
+ if (!reply->ctype) {
+ memprintf(errmsg, "a content type must be defined with a log-format payload");
+ goto error;
+ }
+ if (!parse_logformat_string(obj, px, &reply->body.fmt, LOG_OPT_HTTP, cap, errmsg))
+ goto error;
+
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ }
+
+ free(obj);
+ *orig_arg = cur_arg;
+ return reply;
+
+ error:
+ free(obj);
+ if (fd >= 0)
+ close(fd);
+ release_http_reply(reply);
+ return NULL;
+}
+
+/* Apply schemed-based normalization as described on rfc3986 on section 6.3.2.
+ * Returns 0 if no error has been found else non-zero.
+ *
+ * The normalization is processed on the target-uri at the condition that it is
+ * in absolute-form. In the case where the target-uri was normalized, every
+ * host headers values found are also replaced by the normalized hostname. This
+ * assumes that the target-uri and host headers were properly identify as
+ * similar before calling this function.
+ */
+int http_scheme_based_normalize(struct htx *htx)
+{
+ struct http_hdr_ctx ctx;
+ struct htx_sl *sl;
+ struct ist uri, scheme, authority, host, port;
+ struct http_uri_parser parser;
+
+ sl = http_get_stline(htx);
+
+ if (!sl || !(sl->flags & (HTX_SL_F_HAS_SCHM|HTX_SL_F_HAS_AUTHORITY)))
+ return 0;
+
+ uri = htx_sl_req_uri(sl);
+
+ parser = http_uri_parser_init(uri);
+ scheme = http_parse_scheme(&parser);
+ /* if no scheme found, no normalization to proceed */
+ if (!isttest(scheme))
+ return 0;
+
+ /* Extract the port if present in authority */
+ authority = http_parse_authority(&parser, 1);
+ port = http_get_host_port(authority);
+ if (!isttest(port)) {
+ /* if no port found, no normalization to proceed */
+ return 0;
+ }
+ host = isttrim(authority, istlen(authority) - istlen(port) - 1);
+
+ if (http_is_default_port(scheme, port)) {
+ /* reconstruct the uri with removal of the port */
+ struct buffer *temp = get_trash_chunk();
+ struct ist meth, vsn;
+
+ /* meth */
+ chunk_memcat(temp, HTX_SL_REQ_MPTR(sl), HTX_SL_REQ_MLEN(sl));
+ meth = ist2(temp->area, HTX_SL_REQ_MLEN(sl));
+
+ /* vsn */
+ chunk_memcat(temp, HTX_SL_REQ_VPTR(sl), HTX_SL_REQ_VLEN(sl));
+ vsn = ist2(temp->area + meth.len, HTX_SL_REQ_VLEN(sl));
+
+ /* reconstruct uri without port */
+ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr);
+ chunk_istcat(temp, host);
+ chunk_memcat(temp, istend(authority), istend(uri) - istend(authority));
+ uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */
+
+ http_replace_stline(htx, meth, uri, vsn);
+
+ /* replace every host headers values by the normalized host */
+ ctx.blk = NULL;
+ while (http_find_header(htx, ist("host"), &ctx, 0)) {
+ if (!http_replace_header_value(htx, &ctx, host))
+ goto fail;
+ }
+ }
+
+ return 0;
+
+ fail:
+ return 1;
+}
+
+/* First step function to merge multiple cookie headers in a single entry.
+ *
+ * Use it for each cookie header at <idx> index over HTTP headers in <list>.
+ * <first> and <last> are state variables used internally and must be
+ * initialized to -1 before the first invocation.
+ */
+void http_cookie_register(struct http_hdr *list, int idx, int *first, int *last)
+{
+ /* Build a linked list of cookie headers. Use header length to point to
+ * the next one. The last entry will contains -1.
+ */
+
+ /* Caller is responsible to initialize *first and *last to -1 on first
+ * invocation. Both will thus be set to a valid index after it.
+ */
+ BUG_ON(*first > 0 && *last < 0);
+
+ /* Mark the current end of cookie linked list. */
+ list[idx].n.len = -1;
+ if (*first < 0) {
+ /* Save first found cookie for http_cookie_merge call. */
+ *first = idx;
+ }
+ else {
+ /* Update linked list of cookies. */
+ list[*last].n.len = idx;
+ }
+
+ *last = idx;
+}
+
+/* Second step to merge multiple cookie headers in a single entry.
+ *
+ * Use it when looping over HTTP headers is done and <htx> message is built.
+ * This will concatenate each cookie headers present from <list> directly into
+ * <htx> message. <first> is reused from previous http_cookie_register
+ * invocation.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int http_cookie_merge(struct htx *htx, struct http_hdr *list, int first)
+{
+ uint32_t fs; /* free space */
+ uint32_t bs; /* block size */
+ uint32_t vl; /* value len */
+ uint32_t tl; /* total length */
+ struct htx_blk *blk;
+
+ if (first < 0)
+ return 0;
+
+ blk = htx_add_header(htx, ist("cookie"), list[first].v);
+ if (!blk)
+ return 1;
+
+ tl = list[first].v.len;
+ fs = htx_free_data_space(htx);
+ bs = htx_get_blksz(blk);
+
+ /* for each extra cookie, we'll extend the cookie's value and insert
+ * ";" before the new value.
+ */
+ fs += tl; /* first one is already counted */
+
+ /* Loop over cookies linked list built from http_cookie_register. */
+ while ((first = list[first].n.len) >= 0) {
+ vl = list[first].v.len;
+ tl += vl + 2;
+ if (tl > fs)
+ return 1;
+
+ htx_change_blk_value_len(htx, blk, tl);
+ *(char *)(htx_get_blk_ptr(htx, blk) + bs + 0) = ';';
+ *(char *)(htx_get_blk_ptr(htx, blk) + bs + 1) = ' ';
+ memcpy(htx_get_blk_ptr(htx, blk) + bs + 2,
+ list[first].v.ptr, vl);
+ bs += vl + 2;
+ }
+
+ return 0;
+}
+
+/* Parses the "errorloc[302|303]" proxy keyword */
+static int proxy_parse_errorloc(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply;
+ struct buffer *msg;
+ int errloc, status;
+ int ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(errmsg, "%s : expects <status_code> and <url> as arguments.\n", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ errloc = (strcmp(args[0], "errorloc303") == 0 ? 303 : 302);
+ msg = http_parse_errorloc(errloc, status, args[2], errmsg);
+ if (!msg) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ ret = -1;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+ LIST_APPEND(&http_replies_list, &reply->list);
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ free(reply);
+ ret = -1;
+ goto out;
+ }
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = status;
+ conf_err->info.errorfile.reply = reply;
+
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+}
+
+/* Parses the "errorfile" proxy keyword */
+static int proxy_parse_errorfile(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply;
+ struct buffer *msg;
+ int status;
+ int ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(errmsg, "%s : expects <status_code> and <file> as arguments.\n", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ msg = http_parse_errorfile(status, args[2], errmsg);
+ if (!msg) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ ret = -1;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+ LIST_APPEND(&http_replies_list, &reply->list);
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ free(reply);
+ ret = -1;
+ goto out;
+ }
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = status;
+ conf_err->info.errorfile.reply = reply;
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+}
+
+/* Parses the "errorfiles" proxy keyword */
+static int proxy_parse_errorfiles(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct conf_errors *conf_err = NULL;
+ char *name = NULL;
+ int rc, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (!*(args[1])) {
+ memprintf(err, "%s : expects <name> as argument.", args[0]);
+ ret = -1;
+ goto out;
+ }
+
+ name = strdup(args[1]);
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!name || !conf_err) {
+ memprintf(err, "%s : out of memory.", args[0]);
+ goto error;
+ }
+ conf_err->type = 0;
+
+ conf_err->info.errorfiles.name = name;
+ if (!*(args[2])) {
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++)
+ conf_err->info.errorfiles.status[rc] = 1;
+ }
+ else {
+ int cur_arg, status;
+ for (cur_arg = 2; *(args[cur_arg]); cur_arg++) {
+ status = atol(args[cur_arg]);
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == status) {
+ conf_err->info.errorfiles.status[rc] = 2;
+ break;
+ }
+ }
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(err, "%s : status code '%d' not handled.", args[0], status);
+ goto error;
+ }
+ }
+ }
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+ out:
+ return ret;
+
+ error:
+ free(name);
+ free(conf_err);
+ ret = -1;
+ goto out;
+}
+
+/* Parses the "http-error" proxy keyword */
+static int proxy_parse_http_error(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct conf_errors *conf_err;
+ struct http_reply *reply = NULL;
+ int rc, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_FE | PR_CAP_BE, file, line, args[0], NULL)) {
+ ret = 1;
+ goto out;
+ }
+
+ cur_arg = 1;
+ curpx->conf.args.ctx = ARGC_HERR;
+ reply = http_parse_http_reply((const char **)args, &cur_arg, curpx, 0, errmsg);
+ if (!reply) {
+ memprintf(errmsg, "%s : %s", args[0], *errmsg);
+ goto error;
+ }
+ else if (!reply->status) {
+ memprintf(errmsg, "%s : expects at least a <status> as arguments.\n", args[0]);
+ goto error;
+ }
+
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (http_err_codes[rc] == reply->status)
+ break;
+ }
+
+ if (rc >= HTTP_ERR_SIZE) {
+ memprintf(errmsg, "%s: status code '%d' not handled.", args[0], reply->status);
+ goto error;
+ }
+ if (*args[cur_arg]) {
+ memprintf(errmsg, "%s : unknown keyword '%s'.", args[0], args[cur_arg]);
+ goto error;
+ }
+
+ conf_err = calloc(1, sizeof(*conf_err));
+ if (!conf_err) {
+ memprintf(errmsg, "%s : out of memory.", args[0]);
+ goto error;
+ }
+ if (reply->type == HTTP_REPLY_ERRFILES) {
+ int rc = http_get_status_idx(reply->status);
+
+ conf_err->type = 2;
+ conf_err->info.errorfiles.name = reply->body.http_errors;
+ conf_err->info.errorfiles.status[rc] = 2;
+ reply->body.http_errors = NULL;
+ release_http_reply(reply);
+ }
+ else {
+ conf_err->type = 1;
+ conf_err->info.errorfile.status = reply->status;
+ conf_err->info.errorfile.reply = reply;
+ LIST_APPEND(&http_replies_list, &reply->list);
+ }
+ conf_err->file = strdup(file);
+ conf_err->line = line;
+ LIST_APPEND(&curpx->conf.errors, &conf_err->list);
+
+ /* handle warning message */
+ if (*errmsg)
+ ret = 1;
+ out:
+ return ret;
+
+ error:
+ release_http_reply(reply);
+ ret = -1;
+ goto out;
+
+}
+
+/* Check "errorfiles" proxy keyword */
+static int proxy_check_errors(struct proxy *px)
+{
+ struct conf_errors *conf_err, *conf_err_back;
+ struct http_errors *http_errs;
+ int rc, err = ERR_NONE;
+
+ list_for_each_entry_safe(conf_err, conf_err_back, &px->conf.errors, list) {
+ if (conf_err->type == 1) {
+ /* errorfile */
+ rc = http_get_status_idx(conf_err->info.errorfile.status);
+ px->replies[rc] = conf_err->info.errorfile.reply;
+
+ /* For proxy, to rely on default replies, just don't reference a reply */
+ if (px->replies[rc]->type == HTTP_REPLY_ERRMSG && !px->replies[rc]->body.errmsg)
+ px->replies[rc] = NULL;
+ }
+ else {
+ /* errorfiles */
+ list_for_each_entry(http_errs, &http_errors_list, list) {
+ if (strcmp(http_errs->id, conf_err->info.errorfiles.name) == 0)
+ break;
+ }
+
+ /* unknown http-errors section */
+ if (&http_errs->list == &http_errors_list) {
+ ha_alert("proxy '%s': unknown http-errors section '%s' (at %s:%d).\n",
+ px->id, conf_err->info.errorfiles.name, conf_err->file, conf_err->line);
+ err |= ERR_ALERT | ERR_FATAL;
+ free(conf_err->info.errorfiles.name);
+ goto next;
+ }
+
+ free(conf_err->info.errorfiles.name);
+ for (rc = 0; rc < HTTP_ERR_SIZE; rc++) {
+ if (conf_err->info.errorfiles.status[rc] > 0) {
+ if (http_errs->replies[rc])
+ px->replies[rc] = http_errs->replies[rc];
+ else if (conf_err->info.errorfiles.status[rc] == 2)
+ ha_warning("config: proxy '%s' : status '%d' not declared in"
+ " http-errors section '%s' (at %s:%d).\n",
+ px->id, http_err_codes[rc], http_errs->id,
+ conf_err->file, conf_err->line);
+ }
+ }
+ }
+ next:
+ LIST_DELETE(&conf_err->list);
+ free(conf_err->file);
+ free(conf_err);
+ }
+
+ out:
+ return err;
+}
+
+static int post_check_errors()
+{
+ struct ebpt_node *node;
+ struct http_error_msg *http_errmsg;
+ struct htx *htx;
+ int err_code = ERR_NONE;
+
+ node = ebpt_first(&http_error_messages);
+ while (node) {
+ http_errmsg = container_of(node, typeof(*http_errmsg), node);
+ if (b_is_null(&http_errmsg->msg))
+ goto next;
+ htx = htxbuf(&http_errmsg->msg);
+ if (htx_free_data_space(htx) < global.tune.maxrewrite) {
+ ha_warning("config: errorfile '%s' runs over the buffer space"
+ " reserved to headers rewriting. It may lead to internal errors if "
+ " http-after-response rules are evaluated on this message.\n",
+ (char *)node->key);
+ err_code |= ERR_WARN;
+ }
+ next:
+ node = ebpt_next(node);
+ }
+
+ return err_code;
+}
+
+int proxy_dup_default_conf_errors(struct proxy *curpx, const struct proxy *defpx, char **errmsg)
+{
+ struct conf_errors *conf_err, *new_conf_err = NULL;
+ int ret = 0;
+
+ list_for_each_entry(conf_err, &defpx->conf.errors, list) {
+ new_conf_err = calloc(1, sizeof(*new_conf_err));
+ if (!new_conf_err) {
+ memprintf(errmsg, "unable to duplicate default errors (out of memory).");
+ goto out;
+ }
+ new_conf_err->type = conf_err->type;
+ if (conf_err->type == 1) {
+ new_conf_err->info.errorfile.status = conf_err->info.errorfile.status;
+ new_conf_err->info.errorfile.reply = conf_err->info.errorfile.reply;
+ }
+ else {
+ new_conf_err->info.errorfiles.name = strdup(conf_err->info.errorfiles.name);
+ if (!new_conf_err->info.errorfiles.name) {
+ memprintf(errmsg, "unable to duplicate default errors (out of memory).");
+ goto out;
+ }
+ memcpy(&new_conf_err->info.errorfiles.status, &conf_err->info.errorfiles.status,
+ sizeof(conf_err->info.errorfiles.status));
+ }
+ new_conf_err->file = strdup(conf_err->file);
+ new_conf_err->line = conf_err->line;
+ LIST_APPEND(&curpx->conf.errors, &new_conf_err->list);
+ new_conf_err = NULL;
+ }
+ ret = 1;
+
+ out:
+ free(new_conf_err);
+ return ret;
+}
+
+void proxy_release_conf_errors(struct proxy *px)
+{
+ struct conf_errors *conf_err, *conf_err_back;
+
+ list_for_each_entry_safe(conf_err, conf_err_back, &px->conf.errors, list) {
+ if (conf_err->type == 0)
+ free(conf_err->info.errorfiles.name);
+ LIST_DELETE(&conf_err->list);
+ free(conf_err->file);
+ free(conf_err);
+ }
+}
+
+/*
+ * Parse an <http-errors> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+static int cfg_parse_http_errors(const char *file, int linenum, char **args, int kwm)
+{
+ static struct http_errors *curr_errs = NULL;
+ int err_code = 0;
+ const char *err;
+ char *errmsg = NULL;
+
+ if (strcmp(args[0], "http-errors") == 0) { /* new errors section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for http-errors section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(curr_errs, &http_errors_list, list) {
+ /* Error if two errors section owns the same name */
+ if (strcmp(curr_errs->id, args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: http-errors section '%s' already exists (declared at %s:%d).\n",
+ file, linenum, args[1], curr_errs->conf.file, curr_errs->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ if ((curr_errs = calloc(1, sizeof(*curr_errs))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ LIST_APPEND(&http_errors_list, &curr_errs->list);
+ curr_errs->id = strdup(args[1]);
+ curr_errs->conf.file = strdup(file);
+ curr_errs->conf.line = linenum;
+ }
+ else if (strcmp(args[0], "errorfile") == 0) { /* error message from a file */
+ struct http_reply *reply;
+ struct buffer *msg;
+ int status, rc;
+
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ ha_alert("parsing [%s:%d] : %s: expects <status_code> and <file> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ status = atol(args[1]);
+ msg = http_parse_errorfile(status, args[2], &errmsg);
+ if (!msg) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s: %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_WARN;
+ }
+
+ reply = calloc(1, sizeof(*reply));
+ if (!reply) {
+ ha_alert("parsing [%s:%d] : %s : out of memory.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ reply->type = HTTP_REPLY_ERRMSG;
+ reply->status = status;
+ reply->ctype = NULL;
+ LIST_INIT(&reply->hdrs);
+ reply->body.errmsg = msg;
+
+ rc = http_get_status_idx(status);
+ curr_errs->replies[rc] = reply;
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ return err_code;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "errorloc", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorloc302", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorloc303", proxy_parse_errorloc },
+ { CFG_LISTEN, "errorfile", proxy_parse_errorfile },
+ { CFG_LISTEN, "errorfiles", proxy_parse_errorfiles },
+ { CFG_LISTEN, "http-error", proxy_parse_http_error },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+REGISTER_POST_PROXY_CHECK(proxy_check_errors);
+REGISTER_POST_CHECK(post_check_errors);
+
+REGISTER_CONFIG_SECTION("http-errors", cfg_parse_http_errors, NULL);
+
+/************************************************************************/
+/* HTX sample fetches */
+/************************************************************************/
+
+/* Returns 1 if a stream is an HTX stream. Otherwise, it returns 0. */
+static int
+smp_fetch_is_htx(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = !!IS_HTX_STRM(smp->strm);
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Returns the number of blocks in an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_nbblks(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_nbblks(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the size of an HTX message. The channel is chosen depending on the
+ * sample direction. */
+static int
+smp_fetch_htx_size(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx->size;
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the data size of an HTX message. The channel is chosen depending on the
+ * sample direction. */
+static int
+smp_fetch_htx_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx->data;
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the used space (data+meta) of an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_used(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_used_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the free space (size-used) of an HTX message. The channel is chosen
+ * depending on the sample direction. */
+static int
+smp_fetch_htx_free(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_free_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the free space for data (free-sizeof(blk)) of an HTX message. The
+ * channel is chosen depending on the sample direction. */
+static int
+smp_fetch_htx_free_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = htx_free_data_space(htx);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns 1 if the HTX message contains EOM flag. Otherwise it returns 0. The
+ * channel is chosen depending on the sample direction.
+ */
+static int
+smp_fetch_htx_has_eom(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+
+ if (!smp->strm)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ smp->data.u.sint = !!(htx->flags & HTX_FL_EOM);
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the type of a specific HTX block, if found in the message. Otherwise
+ * HTX_BLK_UNUSED is returned. Any positive integer (>= 0) is supported or
+ * "head", "tail" or "first". The channel is chosen depending on the sample
+ * direction. */
+static int
+smp_fetch_htx_blk_type(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ enum htx_blk_type type;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ type = htx_get_head_type(htx);
+ else if (pos == -2)
+ type = htx_get_tail_type(htx);
+ else if (pos == -3)
+ type = htx_get_first_type(htx);
+ else
+ type = ((pos >= htx->head && pos <= htx->tail)
+ ? htx_get_blk_type(htx_get_blk(htx, pos))
+ : HTX_BLK_UNUSED);
+
+ chunk_initstr(&smp->data.u.str, htx_blk_type_str(type));
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the size of a specific HTX block, if found in the message. Otherwise
+ * 0 is returned. Any positive integer (>= 0) is supported or "head", "tail" or
+ * "first". The channel is chosen depending on the sample direction. */
+static int
+smp_fetch_htx_blk_size(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ smp->data.u.sint = (blk ? htx_get_blksz(blk) : 0);
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the start-line if the selected HTX block exists and is a
+ * start-line. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_stline(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *temp;
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_REQ_SL && htx_get_blk_type(blk) != HTX_BLK_RES_SL)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ sl = htx_get_blk_ptr(htx, blk);
+
+ temp = get_trash_chunk();
+ chunk_istcat(temp, htx_sl_p1(sl));
+ temp->area[temp->data++] = ' ';
+ chunk_istcat(temp, htx_sl_p2(sl));
+ temp->area[temp->data++] = ' ';
+ chunk_istcat(temp, htx_sl_p3(sl));
+
+ smp->data.u.str = *temp;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the header name if the selected HTX block exists and is a header or a
+ * trailer. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_hdrname(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_HDR && htx_get_blk_type(blk) != HTX_BLK_TLR)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist name = htx_get_blk_name(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, name.ptr, name.len, name.len);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the header value if the selected HTX block exists and is a header or
+ * a trailer. Otherwise 0 an empty string. Any positive integer (>= 0) is
+ * supported or "head", "tail" or "first". The channel is chosen depending on
+ * the sample direction. */
+static int
+smp_fetch_htx_blk_hdrval(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || (htx_get_blk_type(blk) != HTX_BLK_HDR && htx_get_blk_type(blk) != HTX_BLK_TLR)) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist val = htx_get_blk_value(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, val.ptr, val.len, val.len);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns the value if the selected HTX block exists and is a data
+ * block. Otherwise 0 an empty string. Any positive integer (>= 0) is supported
+ * or "head", "tail" or "first". The channel is chosen depending on the sample
+ * direction. */
+static int
+smp_fetch_htx_blk_data(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ struct channel *chn;
+ struct htx *htx;
+ struct htx_blk *blk;
+ int32_t pos;
+
+ if (!smp->strm || !arg_p)
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ htx = smp_prefetch_htx(smp, chn, NULL, 0);
+ if (!htx)
+ return 0;
+
+ pos = arg_p[0].data.sint;
+ if (pos == -1)
+ blk = htx_get_head_blk(htx);
+ else if (pos == -2)
+ blk = htx_get_tail_blk(htx);
+ else if (pos == -3)
+ blk = htx_get_first_blk(htx);
+ else
+ blk = ((pos >= htx->head && pos <= htx->tail) ? htx_get_blk(htx, pos) : NULL);
+
+ if (!blk || htx_get_blk_type(blk) != HTX_BLK_DATA) {
+ smp->data.u.str.size = 0;
+ smp->data.u.str.area = "";
+ smp->data.u.str.data = 0;
+ }
+ else {
+ struct ist val = htx_get_blk_value(htx, blk);
+
+ chunk_initlen(&smp->data.u.str, val.ptr, val.len, val.len);
+ }
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_CONST | SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* This function is used to validate the arguments passed to any "htx_blk" fetch
+ * keywords. An argument is expected by these keywords. It must be a positive
+ * integer or on of the following strings: "head", "tail" or "first". It returns
+ * 0 on error, and a non-zero value if OK.
+ */
+int val_blk_arg(struct arg *arg, char **err_msg)
+{
+ if (arg[0].type != ARGT_STR || !arg[0].data.str.data) {
+ memprintf(err_msg, "a block position is expected (> 0) or a special block name (head, tail, first)");
+ return 0;
+ }
+ if (arg[0].data.str.data == 4 && !strncmp(arg[0].data.str.area, "head", 4)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -1;
+ }
+ else if (arg[0].data.str.data == 4 && !strncmp(arg[0].data.str.area, "tail", 4)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -2;
+ }
+ else if (arg[0].data.str.data == 5 && !strncmp(arg[0].data.str.area, "first", 5)) {
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = -3;
+ }
+ else {
+ int pos;
+
+ for (pos = 0; pos < arg[0].data.str.data; pos++) {
+ if (!isdigit((unsigned char)arg[0].data.str.area[pos])) {
+ memprintf(err_msg, "invalid block position");
+ return 0;
+ }
+ }
+
+ pos = strl2uic(arg[0].data.str.area, arg[0].data.str.data);
+ if (pos < 0) {
+ memprintf(err_msg, "block position must not be negative");
+ return 0;
+ }
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = pos;
+ }
+
+ return 1;
+}
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: htx sample fetches should only used for development purpose.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "internal.strm.is_htx", smp_fetch_is_htx, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+
+ { "internal.htx.nbblks", smp_fetch_htx_nbblks, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.size", smp_fetch_htx_size, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.data", smp_fetch_htx_data, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.used", smp_fetch_htx_used, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.free", smp_fetch_htx_free, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.free_data", smp_fetch_htx_free_data, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx.has_eom", smp_fetch_htx_has_eom, 0, NULL, SMP_T_BOOL, SMP_USE_HRQHV|SMP_USE_HRSHV},
+
+ { "internal.htx_blk.type", smp_fetch_htx_blk_type, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.size", smp_fetch_htx_blk_size, ARG1(1,STR), val_blk_arg, SMP_T_SINT, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.start_line", smp_fetch_htx_blk_stline, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.hdrname", smp_fetch_htx_blk_hdrname, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.hdrval", smp_fetch_htx_blk_hdrval, ARG1(1,STR), val_blk_arg, SMP_T_STR, SMP_USE_HRQHV|SMP_USE_HRSHV},
+ { "internal.htx_blk.data", smp_fetch_htx_blk_data, ARG1(1,STR), val_blk_arg, SMP_T_BIN, SMP_USE_HRQHV|SMP_USE_HRSHV},
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/http_rules.c b/src/http_rules.c
new file mode 100644
index 0000000..192f0c7
--- /dev/null
+++ b/src/http_rules.c
@@ -0,0 +1,530 @@
+/*
+ * HTTP rules parsing and registration
+ *
+ * Copyright 2000-2018 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/chunk.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana-t.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+/* List head of all known action keywords for "http-request" */
+struct action_kw_list http_req_keywords = {
+ .list = LIST_HEAD_INIT(http_req_keywords.list)
+};
+
+/* List head of all known action keywords for "http-response" */
+struct action_kw_list http_res_keywords = {
+ .list = LIST_HEAD_INIT(http_res_keywords.list)
+};
+
+/* List head of all known action keywords for "http-after-response" */
+struct action_kw_list http_after_res_keywords = {
+ .list = LIST_HEAD_INIT(http_after_res_keywords.list)
+};
+
+void http_req_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_req_keywords.list, &kw_list->list);
+}
+
+void http_res_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_res_keywords.list, &kw_list->list);
+}
+
+void http_after_res_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&http_after_res_keywords.list, &kw_list->list);
+}
+
+/*
+ * Return the struct http_req_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_req_custom(const char *kw)
+{
+ return action_lookup(&http_req_keywords.list, kw);
+}
+
+/*
+ * Return the struct http_res_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_res_custom(const char *kw)
+{
+ return action_lookup(&http_res_keywords.list, kw);
+}
+
+/*
+ * Return the struct http_after_res_action_kw associated to a keyword.
+ */
+struct action_kw *action_http_after_res_custom(const char *kw)
+{
+ return action_lookup(&http_after_res_keywords.list, kw);
+}
+
+/* parse an "http-request" rule */
+struct act_rule *parse_http_req_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_REQ, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_req_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+
+ if (custom->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ ha_alert("parsing [%s:%d] : '%s' action is experimental, must be allowed via a global 'expose-experimental-directives'\n",
+ file, linenum, custom->kw);
+ goto out_err;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-request %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_req_keywords.list, NULL);
+
+ action_build_list(&http_req_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-request' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-request %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-request %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+/* parse an "http-respose" rule */
+struct act_rule *parse_http_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_RES, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_res_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+
+ if (custom->flags & KWF_EXPERIMENTAL) {
+ if (!experimental_directives_allowed) {
+ ha_alert("parsing [%s:%d] : '%s' action is experimental, must be allowed via a global 'expose-experimental-directives'\n",
+ file, linenum, custom->kw);
+ goto out_err;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+ }
+
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-response %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_res_keywords.list, NULL);
+
+ action_build_list(&http_res_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-response' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-response %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-response %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+
+/* parse an "http-after-response" rule */
+struct act_rule *parse_http_after_res_cond(const char **args, const char *file, int linenum, struct proxy *proxy)
+{
+ struct act_rule *rule;
+ const struct action_kw *custom = NULL;
+ int cur_arg;
+
+ rule = new_act_rule(ACT_F_HTTP_RES, file, linenum);
+ if (!rule) {
+ ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ if (((custom = action_http_after_res_custom(args[0])) != NULL)) {
+ char *errmsg = NULL;
+
+ cur_arg = 1;
+ /* try in the module list */
+ rule->kw = custom;
+ if (custom->parse(args, &cur_arg, proxy, rule, &errmsg) == ACT_RET_PRS_ERR) {
+ ha_alert("parsing [%s:%d] : error detected in %s '%s' while parsing 'http-after-response %s' rule : %s.\n",
+ file, linenum, proxy_type_str(proxy), proxy->id, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ else if (errmsg) {
+ ha_warning("parsing [%s:%d] : %s.\n", file, linenum, errmsg);
+ free(errmsg);
+ }
+ }
+ else {
+ const char *best = action_suggest(args[0], &http_after_res_keywords.list, NULL);
+
+ action_build_list(&http_after_res_keywords.list, &trash);
+ ha_alert("parsing [%s:%d]: 'http-after-response' expects %s, but got '%s'%s.%s%s%s\n",
+ file, linenum, trash.area,
+ args[0], *args[0] ? "" : " (missing argument)",
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ goto out_err;
+ }
+
+ if (strcmp(args[cur_arg], "if") == 0 || strcmp(args[cur_arg], "unless") == 0) {
+ struct acl_cond *cond;
+ char *errmsg = NULL;
+
+ if ((cond = build_acl_cond(file, linenum, &proxy->acl, proxy, args+cur_arg, &errmsg)) == NULL) {
+ ha_alert("parsing [%s:%d] : error detected while parsing an 'http-after-response %s' condition : %s.\n",
+ file, linenum, args[0], errmsg);
+ free(errmsg);
+ goto out_err;
+ }
+ rule->cond = cond;
+ }
+ else if (*args[cur_arg]) {
+ ha_alert("parsing [%s:%d]: 'http-after-response %s' expects"
+ " either 'if' or 'unless' followed by a condition but found '%s'.\n",
+ file, linenum, args[0], args[cur_arg]);
+ goto out_err;
+ }
+
+ return rule;
+ out_err:
+ free_act_rule(rule);
+ out:
+ return NULL;
+}
+
+/* completely free redirect rule */
+void http_free_redirect_rule(struct redirect_rule *rdr)
+{
+ struct logformat_node *lf, *lfb;
+
+ free_acl_cond(rdr->cond);
+ free(rdr->rdr_str);
+ free(rdr->cookie_str);
+ list_for_each_entry_safe(lf, lfb, &rdr->rdr_fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+ free(rdr);
+}
+
+/* Parses a redirect rule. Returns the redirect rule on success or NULL on error,
+ * with <err> filled with the error message. If <use_fmt> is not null, builds a
+ * dynamic log-format rule instead of a static string. Parameter <dir> indicates
+ * the direction of the rule, and equals 0 for request, non-zero for responses.
+ */
+struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, struct proxy *curproxy,
+ const char **args, char **errmsg, int use_fmt, int dir)
+{
+ struct redirect_rule *rule = NULL;
+ int cur_arg;
+ int type = REDIRECT_TYPE_NONE;
+ int code = 302;
+ const char *destination = NULL;
+ const char *cookie = NULL;
+ int cookie_set = 0;
+ unsigned int flags = (!dir ? REDIRECT_FLAG_FROM_REQ : REDIRECT_FLAG_NONE);
+ struct acl_cond *cond = NULL;
+
+ cur_arg = 0;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "location") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ type = REDIRECT_TYPE_LOCATION;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "prefix") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+ type = REDIRECT_TYPE_PREFIX;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "scheme") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ type = REDIRECT_TYPE_SCHEME;
+ cur_arg++;
+ destination = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "set-cookie") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ cookie = args[cur_arg];
+ cookie_set = 1;
+ }
+ else if (strcmp(args[cur_arg], "clear-cookie") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ cookie = args[cur_arg];
+ cookie_set = 0;
+ }
+ else if (strcmp(args[cur_arg], "code") == 0) {
+ if (!*args[cur_arg + 1])
+ goto missing_arg;
+
+ cur_arg++;
+ code = atol(args[cur_arg]);
+ if (code < 301 || code > 308 || (code > 303 && code < 307)) {
+ memprintf(errmsg,
+ "'%s': unsupported HTTP code '%s' (must be one of 301, 302, 303, 307 or 308)",
+ args[cur_arg - 1], args[cur_arg]);
+ goto err;
+ }
+ }
+ else if (strcmp(args[cur_arg], "drop-query") == 0) {
+ flags |= REDIRECT_FLAG_DROP_QS;
+ }
+ else if (strcmp(args[cur_arg], "append-slash") == 0) {
+ flags |= REDIRECT_FLAG_APPEND_SLASH;
+ }
+ else if (strcmp(args[cur_arg], "ignore-empty") == 0) {
+ flags |= REDIRECT_FLAG_IGNORE_EMPTY;
+ }
+ else if (strcmp(args[cur_arg], "if") == 0 ||
+ strcmp(args[cur_arg], "unless") == 0) {
+ cond = build_acl_cond(file, linenum, &curproxy->acl, curproxy, (const char **)args + cur_arg, errmsg);
+ if (!cond) {
+ memprintf(errmsg, "error in condition: %s", *errmsg);
+ goto err;
+ }
+ break;
+ }
+ else {
+ memprintf(errmsg,
+ "expects 'code', 'prefix', 'location', 'scheme', 'set-cookie', 'clear-cookie', 'drop-query', 'ignore-empty' or 'append-slash' (was '%s')",
+ args[cur_arg]);
+ goto err;
+ }
+ cur_arg++;
+ }
+
+ if (type == REDIRECT_TYPE_NONE) {
+ memprintf(errmsg, "redirection type expected ('prefix', 'location', or 'scheme')");
+ goto err;
+ }
+
+ if (dir && type != REDIRECT_TYPE_LOCATION) {
+ memprintf(errmsg, "response only supports redirect type 'location'");
+ goto err;
+ }
+
+ rule = calloc(1, sizeof(*rule));
+ if (!rule)
+ goto out_of_memory;
+ rule->cond = cond;
+ LIST_INIT(&rule->rdr_fmt);
+
+ if (!use_fmt) {
+ /* old-style static redirect rule */
+ rule->rdr_str = strdup(destination);
+ if (!rule->rdr_str)
+ goto out_of_memory;
+ rule->rdr_len = strlen(destination);
+ }
+ else {
+ /* log-format based redirect rule */
+ int cap = 0;
+
+ /* Parse destination. Note that in the REDIRECT_TYPE_PREFIX case,
+ * if prefix == "/", we don't want to add anything, otherwise it
+ * makes it hard for the user to configure a self-redirection.
+ */
+ curproxy->conf.args.ctx = ARGC_RDR;
+ if (curproxy->cap & PR_CAP_FE)
+ cap |= (dir ? SMP_VAL_FE_HRS_HDR : SMP_VAL_FE_HRQ_HDR);
+ if (curproxy->cap & PR_CAP_BE)
+ cap |= (dir ? SMP_VAL_BE_HRS_HDR : SMP_VAL_BE_HRQ_HDR);
+ if (!(type == REDIRECT_TYPE_PREFIX && destination[0] == '/' && destination[1] == '\0')) {
+ if (!parse_logformat_string(destination, curproxy, &rule->rdr_fmt, LOG_OPT_HTTP, cap, errmsg)) {
+ goto err;
+ }
+ free(curproxy->conf.lfs_file);
+ curproxy->conf.lfs_file = strdup(curproxy->conf.args.file);
+ curproxy->conf.lfs_line = curproxy->conf.args.line;
+ }
+ }
+
+ if (cookie) {
+ /* depending on cookie_set, either we want to set the cookie, or to clear it.
+ * a clear consists in appending "; path=/; Max-Age=0;" at the end.
+ */
+ rule->cookie_len = strlen(cookie);
+ if (cookie_set) {
+ rule->cookie_str = malloc(rule->cookie_len + 10);
+ if (!rule->cookie_str)
+ goto out_of_memory;
+ memcpy(rule->cookie_str, cookie, rule->cookie_len);
+ memcpy(rule->cookie_str + rule->cookie_len, "; path=/;", 10);
+ rule->cookie_len += 9;
+ } else {
+ rule->cookie_str = malloc(rule->cookie_len + 21);
+ if (!rule->cookie_str)
+ goto out_of_memory;
+ memcpy(rule->cookie_str, cookie, rule->cookie_len);
+ memcpy(rule->cookie_str + rule->cookie_len, "; path=/; Max-Age=0;", 21);
+ rule->cookie_len += 20;
+ }
+ }
+ rule->type = type;
+ rule->code = code;
+ rule->flags = flags;
+ LIST_INIT(&rule->list);
+ return rule;
+
+ missing_arg:
+ memprintf(errmsg, "missing argument for '%s'", args[cur_arg]);
+ goto err;
+ out_of_memory:
+ memprintf(errmsg, "parsing [%s:%d]: out of memory.", file, linenum);
+ err:
+ if (rule)
+ http_free_redirect_rule(rule);
+ else if (cond) {
+ /* rule not yet allocated, but cond already is */
+ free_acl_cond(cond);
+ }
+
+ return NULL;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/htx.c b/src/htx.c
new file mode 100644
index 0000000..feb7eec
--- /dev/null
+++ b/src/htx.c
@@ -0,0 +1,1099 @@
+/*
+ * internal HTTP message
+ *
+ * Copyright 2018 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/chunk.h>
+#include <haproxy/htx.h>
+#include <haproxy/net_helper.h>
+
+struct htx htx_empty = { .size = 0, .data = 0, .head = -1, .tail = -1, .first = -1 };
+
+/* tests show that 63% of these calls are for 64-bit chunks, so better avoid calling
+ * memcpy() for that!
+ */
+static inline __attribute__((always_inline)) void htx_memcpy(void *dst, void *src, size_t len)
+{
+ if (likely(len == 8))
+ write_u64(dst, read_u64(src));
+ else
+ memcpy(dst, src, len);
+}
+
+/* Defragments an HTX message. It removes unused blocks and unwraps the payloads
+ * part. A temporary buffer is used to do so. This function never fails. Most of
+ * time, we need keep a ref on a specific HTX block. Thus is <blk> is set, the
+ * pointer on its new position, after defrag, is returned. In addition, if the
+ * size of the block must be altered, <blkinfo> info must be provided (!=
+ * 0). But in this case, it remains the caller responsibility to update the
+ * block content.
+ */
+/* TODO: merge data blocks into one */
+struct htx_blk *htx_defrag(struct htx *htx, struct htx_blk *blk, uint32_t blkinfo)
+{
+ struct buffer *chunk = get_trash_chunk();
+ struct htx *tmp = htxbuf(chunk);
+ struct htx_blk *newblk, *oldblk;
+ uint32_t new, old, blkpos;
+ uint32_t addr, blksz;
+ int32_t first = -1;
+
+ if (htx->head == -1)
+ return NULL;
+
+ blkpos = -1;
+
+ new = 0;
+ addr = 0;
+ tmp->size = htx->size;
+ tmp->data = 0;
+
+ /* start from the head */
+ for (old = htx_get_head(htx); old != -1; old = htx_get_next(htx, old)) {
+ oldblk = htx_get_blk(htx, old);
+ if (htx_get_blk_type(oldblk) == HTX_BLK_UNUSED)
+ continue;
+
+ blksz = htx_get_blksz(oldblk);
+ htx_memcpy((void *)tmp->blocks + addr, htx_get_blk_ptr(htx, oldblk), blksz);
+
+ /* update the start-line position */
+ if (htx->first == old)
+ first = new;
+
+ newblk = htx_get_blk(tmp, new);
+ newblk->addr = addr;
+ newblk->info = oldblk->info;
+
+ /* if <blk> is defined, save its new position */
+ if (blk != NULL && blk == oldblk) {
+ if (blkinfo)
+ newblk->info = blkinfo;
+ blkpos = new;
+ }
+
+ blksz = htx_get_blksz(newblk);
+ addr += blksz;
+ tmp->data += blksz;
+ new++;
+ }
+
+ htx->data = tmp->data;
+ htx->first = first;
+ htx->head = 0;
+ htx->tail = new - 1;
+ htx->head_addr = htx->end_addr = 0;
+ htx->tail_addr = addr;
+ htx->flags &= ~HTX_FL_FRAGMENTED;
+ htx_memcpy((void *)htx->blocks, (void *)tmp->blocks, htx->size);
+
+ return ((blkpos == -1) ? NULL : htx_get_blk(htx, blkpos));
+}
+
+/* Degragments HTX blocks of an HTX message. Payloads part is keep untouched
+ * here. This function will move back all blocks starting at the position 0,
+ * removing unused blocks. It must never be called with an empty message.
+ */
+static void htx_defrag_blks(struct htx *htx)
+{
+ int32_t pos, new;
+
+ new = 0;
+ for (pos = htx_get_head(htx); pos != -1; pos = htx_get_next(htx, pos)) {
+ struct htx_blk *posblk, *newblk;
+
+ if (pos == new) {
+ new++;
+ continue;
+ }
+
+ posblk = htx_get_blk(htx, pos);
+ if (htx_get_blk_type(posblk) == HTX_BLK_UNUSED)
+ continue;
+
+ if (htx->first == pos)
+ htx->first = new;
+ newblk = htx_get_blk(htx, new++);
+ newblk->info = posblk->info;
+ newblk->addr = posblk->addr;
+ }
+ BUG_ON(!new);
+ htx->head = 0;
+ htx->tail = new - 1;
+}
+
+/* Reserves a new block in the HTX message <htx> with a content of <blksz>
+ * bytes. If there is not enough space, NULL is returned. Otherwise the reserved
+ * block is returned and the HTX message is updated. Space for this new block is
+ * reserved in the HTX message. But it is the caller responsibility to set right
+ * info in the block to reflect the stored data.
+ */
+static struct htx_blk *htx_reserve_nxblk(struct htx *htx, uint32_t blksz)
+{
+ struct htx_blk *blk;
+ uint32_t tail, headroom, tailroom;
+
+ if (blksz > htx_free_data_space(htx))
+ return NULL; /* full */
+
+ if (htx->head == -1) {
+ /* Empty message */
+ htx->head = htx->tail = htx->first = 0;
+ blk = htx_get_blk(htx, htx->tail);
+ blk->addr = 0;
+ htx->data = blksz;
+ htx->tail_addr = blksz;
+ return blk;
+ }
+
+ /* Find the block's position. First, we try to get the next position in
+ * the message, increasing the tail by one. If this position is not
+ * available with some holes, we try to defrag the blocks without
+ * touching their paylood. If it is impossible, we fully defrag the
+ * message.
+ */
+ tail = htx->tail + 1;
+ if (htx_pos_to_addr(htx, tail) >= htx->tail_addr)
+ ;
+ else if (htx->head > 0) {
+ htx_defrag_blks(htx);
+ tail = htx->tail + 1;
+ BUG_ON(htx_pos_to_addr(htx, tail) < htx->tail_addr);
+ }
+ else
+ goto defrag;
+
+ /* Now, we have found the block's position. Try to find where to put its
+ * payload. The free space is split in two areas:
+ *
+ * * The free space in front of the blocks table. This one is used if and
+ * only if the other one was not used yet.
+ *
+ * * The free space at the beginning of the message. Once this one is
+ * used, the other one is never used again, until the next defrag.
+ */
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (!htx->head_addr ? htx_pos_to_addr(htx, tail) - htx->tail_addr : 0);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ if (blksz <= tailroom) {
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += blksz;
+ }
+ else if (blksz <= headroom) {
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->head_addr;
+ htx->head_addr += blksz;
+ }
+ else {
+ defrag:
+ /* need to defragment the message before inserting upfront */
+ htx_defrag(htx, NULL, 0);
+ tail = htx->tail + 1;
+ blk = htx_get_blk(htx, tail);
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += blksz;
+ }
+
+ htx->tail = tail;
+ htx->data += blksz;
+ /* Set first position if not already set */
+ if (htx->first == -1)
+ htx->first = tail;
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+
+ return blk;
+}
+
+/* Prepares the block to an expansion of its payload. The payload will be
+ * expanded by <delta> bytes and we need find where this expansion will be
+ * performed. It can be a compression if <delta> is negative. This function only
+ * updates all addresses. The caller have the responsibility to perform the
+ * expansion and update the block and the HTX message accordingly. No error must
+ * occur. It returns following values:
+ *
+ * 0: The expansion cannot be performed, there is not enough space.
+ *
+ * 1: the expansion must be performed in place, there is enough space after
+ * the block's payload to handle it. This is especially true if it is a
+ * compression and not an expansion.
+ *
+ * 2: the block's payload must be moved at the new block address before doing
+ * the expansion.
+ *
+ * 3: the HTX message message must be defragmented
+ */
+static int htx_prepare_blk_expansion(struct htx *htx, struct htx_blk *blk, int32_t delta)
+{
+ uint32_t sz, tailroom, headroom;
+ int ret = 3;
+
+ BUG_ON(htx->head == -1);
+
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ sz = htx_get_blksz(blk);
+ if (delta <= 0) {
+ /* It is a compression, it can be performed in place */
+ if (blk->addr+sz == htx->tail_addr)
+ htx->tail_addr += delta;
+ else if (blk->addr+sz == htx->head_addr)
+ htx->head_addr += delta;
+ ret = 1;
+ }
+ else if (delta > htx_free_space(htx)) {
+ /* There is not enough space to handle the expansion */
+ ret = 0;
+ }
+ else if (blk->addr+sz == htx->tail_addr) {
+ /* The block's payload is just before the tail room */
+ if (delta < tailroom) {
+ /* Expand the block's payload */
+ htx->tail_addr += delta;
+ ret = 1;
+ }
+ else if ((sz + delta) < headroom) {
+ uint32_t oldaddr = blk->addr;
+
+ /* Move the block's payload into the headroom */
+ blk->addr = htx->head_addr;
+ htx->tail_addr -= sz;
+ htx->head_addr += sz + delta;
+ if (oldaddr == htx->end_addr) {
+ if (htx->end_addr == htx->tail_addr) {
+ htx->tail_addr = htx->head_addr;
+ htx->head_addr = htx->end_addr = 0;
+ }
+ else
+ htx->end_addr += sz;
+ }
+ ret = 2;
+ }
+ }
+ else if (blk->addr+sz == htx->head_addr) {
+ /* The block's payload is just before the head room */
+ if (delta < headroom) {
+ /* Expand the block's payload */
+ htx->head_addr += delta;
+ ret = 1;
+ }
+ }
+ else {
+ /* The block's payload is not at the rooms edge */
+ if (!htx->head_addr && sz+delta < tailroom) {
+ /* Move the block's payload into the tailroom */
+ if (blk->addr == htx->end_addr)
+ htx->end_addr += sz;
+ blk->addr = htx->tail_addr;
+ htx->tail_addr += sz + delta;
+ ret = 2;
+ }
+ else if (sz+delta < headroom) {
+ /* Move the block's payload into the headroom */
+ if (blk->addr == htx->end_addr)
+ htx->end_addr += sz;
+ blk->addr = htx->head_addr;
+ htx->head_addr += sz + delta;
+ ret = 2;
+ }
+ }
+ /* Otherwise defrag the HTX message */
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return ret;
+}
+
+/* Adds a new block of type <type> in the HTX message <htx>. Its content size is
+ * passed but it is the caller responsibility to do the copy.
+ */
+struct htx_blk *htx_add_blk(struct htx *htx, enum htx_blk_type type, uint32_t blksz)
+{
+ struct htx_blk *blk;
+
+ BUG_ON(blksz >= 256 << 20);
+ blk = htx_reserve_nxblk(htx, blksz);
+ if (!blk)
+ return NULL;
+ BUG_ON(blk->addr > htx->size);
+
+ blk->info = (type << 28);
+ return blk;
+}
+
+/* Removes the block <blk> from the HTX message <htx>. The function returns the
+ * block following <blk> or NULL if <blk> is the last block or the last inserted
+ * one.
+ */
+struct htx_blk *htx_remove_blk(struct htx *htx, struct htx_blk *blk)
+{
+ enum htx_blk_type type;
+ uint32_t pos, addr, sz;
+
+ BUG_ON(!blk || htx->head == -1);
+
+ /* This is the last block in use */
+ if (htx->head == htx->tail) {
+ uint32_t flags = (htx->flags & ~HTX_FL_FRAGMENTED); /* Preserve flags except FRAGMENTED */
+
+ htx_reset(htx);
+ htx->flags = flags; /* restore flags */
+ return NULL;
+ }
+
+ type = htx_get_blk_type(blk);
+ pos = htx_get_blk_pos(htx, blk);
+ sz = htx_get_blksz(blk);
+ addr = blk->addr;
+ if (type != HTX_BLK_UNUSED) {
+ /* Mark the block as unused, decrement allocated size */
+ htx->data -= htx_get_blksz(blk);
+ blk->info = ((uint32_t)HTX_BLK_UNUSED << 28);
+ }
+
+ /* There is at least 2 blocks, so tail is always > 0 */
+ if (pos == htx->head) {
+ /* move the head forward */
+ htx->head++;
+ }
+ else if (pos == htx->tail) {
+ /* remove the tail. this was the last inserted block so
+ * return NULL. */
+ htx->tail--;
+ blk = NULL;
+ goto end;
+ }
+ else
+ htx->flags |= HTX_FL_FRAGMENTED;
+
+ blk = htx_get_blk(htx, pos+1);
+
+ end:
+ if (pos == htx->first)
+ htx->first = (blk ? htx_get_blk_pos(htx, blk) : -1);
+
+ if (htx->head == htx->tail) {
+ /* If there is just one block in the HTX message, free space can
+ * be adjusted. This operation could save some defrags. */
+ struct htx_blk *lastblk = htx_get_blk(htx, htx->tail);
+
+ htx->head_addr = 0;
+ htx->end_addr = lastblk->addr;
+ htx->tail_addr = lastblk->addr+htx->data;
+ }
+ else {
+ if (addr+sz == htx->tail_addr)
+ htx->tail_addr = addr;
+ else if (addr+sz == htx->head_addr)
+ htx->head_addr = addr;
+ if (addr == htx->end_addr) {
+ if (htx->tail_addr == htx->end_addr) {
+ htx->tail_addr = htx->head_addr;
+ htx->head_addr = htx->end_addr = 0;
+ }
+ else
+ htx->end_addr += sz;
+ }
+ }
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return blk;
+}
+
+/* Looks for the HTX block containing the offset <offset>, starting at the HTX
+ * message's head. The function returns an htx_ret with the found HTX block and
+ * the position inside this block where the offset is. If the offset <offset> is
+ * outside of the HTX message, htx_ret.blk is set to NULL.
+ */
+struct htx_ret htx_find_offset(struct htx *htx, uint32_t offset)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = { .blk = NULL, .ret = 0 };
+
+ if (offset >= htx->data)
+ return htxret;
+
+ for (blk = htx_get_head_blk(htx); blk && offset; blk = htx_get_next_blk(htx, blk)) {
+ uint32_t sz = htx_get_blksz(blk);
+
+ if (offset < sz)
+ break;
+ offset -= sz;
+ }
+ htxret.blk = blk;
+ htxret.ret = offset;
+ return htxret;
+}
+
+/* Removes all blocks after the one containing the offset <offset>. This last
+ * one may be truncated if it is a DATA block.
+ */
+void htx_truncate(struct htx *htx, uint32_t offset)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = htx_find_offset(htx, offset);
+
+ blk = htxret.blk;
+ if (blk && htxret.ret && htx_get_blk_type(blk) == HTX_BLK_DATA) {
+ htx_change_blk_value_len(htx, blk, htxret.ret);
+ blk = htx_get_next_blk(htx, blk);
+ }
+ while (blk)
+ blk = htx_remove_blk(htx, blk);
+}
+
+/* Drains <count> bytes from the HTX message <htx>. If the last block is a DATA
+ * block, it will be cut if necessary. Others blocks will be removed at once if
+ * <count> is large enough. The function returns an htx_ret with the first block
+ * remaining in the message and the amount of data drained. If everything is
+ * removed, htx_ret.blk is set to NULL.
+ */
+struct htx_ret htx_drain(struct htx *htx, uint32_t count)
+{
+ struct htx_blk *blk;
+ struct htx_ret htxret = { .blk = NULL, .ret = 0 };
+
+ if (count == htx->data) {
+ uint32_t flags = (htx->flags & ~HTX_FL_FRAGMENTED); /* Preserve flags except FRAGMENTED */
+
+ htx_reset(htx);
+ htx->flags = flags; /* restore flags */
+ htxret.ret = count;
+ return htxret;
+ }
+
+ blk = htx_get_head_blk(htx);
+ while (count && blk) {
+ uint32_t sz = htx_get_blksz(blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ /* Ignore unused block */
+ if (type == HTX_BLK_UNUSED)
+ goto next;
+
+ if (sz > count) {
+ if (type == HTX_BLK_DATA) {
+ htx_cut_data_blk(htx, blk, count);
+ htxret.ret += count;
+ }
+ break;
+ }
+ count -= sz;
+ htxret.ret += sz;
+ next:
+ blk = htx_remove_blk(htx, blk);
+ }
+ htxret.blk = blk;
+
+ return htxret;
+}
+
+/* Tries to append data to the last inserted block, if the type matches and if
+ * there is enough space to take it all. If the space wraps, the buffer is
+ * defragmented and a new block is inserted. If an error occurred, NULL is
+ * returned. Otherwise, on success, the updated block (or the new one) is
+ * returned. Due to its nature this function can be expensive and should be
+ * avoided whenever possible.
+ */
+struct htx_blk *htx_add_data_atonce(struct htx *htx, struct ist data)
+{
+ struct htx_blk *blk, *tailblk;
+ void *ptr;
+ uint32_t len, sz, tailroom, headroom;
+
+ if (htx->head == -1)
+ goto add_new_block;
+
+ /* Not enough space to store data */
+ if (data.len > htx_free_data_space(htx))
+ return NULL;
+
+ /* get the tail block and its size */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto add_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto add_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ headroom = (htx->end_addr - htx->head_addr);
+ tailroom = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ BUG_ON((int32_t)headroom < 0);
+ BUG_ON((int32_t)tailroom < 0);
+
+ len = data.len;
+ if (tailblk->addr+sz == htx->tail_addr) {
+ if (data.len <= tailroom)
+ goto append_data;
+ else if (!htx->head_addr) {
+ len = tailroom;
+ goto append_data;
+ }
+ }
+ else if (tailblk->addr+sz == htx->head_addr && data.len <= headroom)
+ goto append_data;
+
+ goto add_new_block;
+
+ append_data:
+ /* Append data and update the block itself */
+ ptr = htx_get_blk_ptr(htx, tailblk);
+ htx_memcpy(ptr+sz, data.ptr, len);
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ if (data.len == len) {
+ blk = tailblk;
+ goto end;
+ }
+ data = istadv(data, len);
+
+ add_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, data.len);
+ if (!blk)
+ return NULL;
+
+ blk->info += data.len;
+ htx_memcpy(htx_get_blk_ptr(htx, blk), data.ptr, data.len);
+
+ end:
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return blk;
+}
+
+/* Replaces a value part of a block by a new one. The new part can be smaller or
+ * larger than the old one. This function works for any kind of block with
+ * attached data. It returns the new block on success, otherwise it returns
+ * NULL.
+ */
+struct htx_blk *htx_replace_blk_value(struct htx *htx, struct htx_blk *blk,
+ const struct ist old, const struct ist new)
+{
+ struct ist n, v;
+ int32_t delta;
+ int ret;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+ delta = new.len - old.len;
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+ if (ret == 1) { /* Replace in place */
+ if (delta <= 0) {
+ /* compression: copy new data first then move the end */
+ htx_memcpy(old.ptr, new.ptr, new.len);
+ memmove(old.ptr + new.len, istend(old),
+ istend(v) - istend(old));
+ }
+ else {
+ /* expansion: move the end first then copy new data */
+ memmove(old.ptr + new.len, istend(old),
+ istend(v) - istend(old));
+ htx_memcpy(old.ptr, new.ptr, new.len);
+ }
+
+ /* set the new block size and update HTX message */
+ htx_set_blk_value_len(blk, v.len + delta);
+ htx->data += delta;
+ }
+ else if (ret == 2) { /* New address but no defrag */
+ void *ptr = htx_get_blk_ptr(htx, blk);
+
+ /* Copy the name, if any */
+ htx_memcpy(ptr, n.ptr, n.len);
+ ptr += n.len;
+
+ /* Copy value before old part, if any */
+ htx_memcpy(ptr, v.ptr, old.ptr - v.ptr);
+ ptr += old.ptr - v.ptr;
+
+ /* Copy new value */
+ htx_memcpy(ptr, new.ptr, new.len);
+ ptr += new.len;
+
+ /* Copy value after old part, if any */
+ htx_memcpy(ptr, istend(old), istend(v) - istend(old));
+
+ /* set the new block size and update HTX message */
+ htx_set_blk_value_len(blk, v.len + delta);
+ htx->data += delta;
+ }
+ else { /* Do a degrag first (it is always an expansion) */
+ struct htx_blk tmpblk;
+ int32_t offset;
+
+ /* use tmpblk to set new block size before defrag and to compute
+ * the offset after defrag
+ */
+ tmpblk.addr = blk->addr;
+ tmpblk.info = blk->info;
+ htx_set_blk_value_len(&tmpblk, v.len + delta);
+
+ /* htx_defrag() will take care to update the block size and the htx message */
+ blk = htx_defrag(htx, blk, tmpblk.info);
+
+ /* newblk is now the new HTX block. Compute the offset to copy/move payload */
+ offset = blk->addr - tmpblk.addr;
+
+ /* move the end first and copy new data
+ */
+ memmove(old.ptr + offset + new.len, old.ptr + offset + old.len,
+ istend(v) - istend(old));
+ htx_memcpy(old.ptr + offset, new.ptr, new.len);
+ }
+ return blk;
+}
+
+/* Transfer HTX blocks from <src> to <dst>, stopping on the first block of the
+ * type <mark> (typically EOH or EOT) or when <count> bytes were moved
+ * (including payload and meta-data). It returns the number of bytes moved and
+ * the last HTX block inserted in <dst>.
+ */
+struct htx_ret htx_xfer_blks(struct htx *dst, struct htx *src, uint32_t count,
+ enum htx_blk_type mark)
+{
+ struct htx_blk *blk, *dstblk;
+ struct htx_blk *srcref, *dstref;
+ enum htx_blk_type type;
+ uint32_t info, max, sz, ret;
+
+ ret = htx_used_space(dst);
+ srcref = dstref = dstblk = NULL;
+
+ /* blocks are not removed yet from <src> HTX message to be able to
+ * rollback the transfer if all the headers/trailers are not copied.
+ */
+ for (blk = htx_get_head_blk(src); blk && count; blk = htx_get_next_blk(src, blk)) {
+ type = htx_get_blk_type(blk);
+
+ /* Ignore unused block */
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+
+ max = htx_get_max_blksz(dst, count);
+ if (!max)
+ break;
+
+ sz = htx_get_blksz(blk);
+ info = blk->info;
+ if (sz > max) {
+ /* Only DATA blocks can be partially xferred */
+ if (type != HTX_BLK_DATA)
+ break;
+ sz = max;
+ info = (type << 28) + sz;
+ }
+
+ dstblk = htx_reserve_nxblk(dst, sz);
+ if (!dstblk)
+ break;
+ dstblk->info = info;
+ htx_memcpy(htx_get_blk_ptr(dst, dstblk), htx_get_blk_ptr(src, blk), sz);
+
+ count -= sizeof(dstblk) + sz;
+ if (blk->info != info) {
+ /* Partial xfer: don't remove <blk> from <src> but
+ * resize its content */
+ htx_cut_data_blk(src, blk, sz);
+ break;
+ }
+
+ if (type == mark) {
+ blk = htx_get_next_blk(src, blk);
+ srcref = dstref = NULL;
+ break;
+ }
+
+ /* Save <blk> to <srcref> and <dstblk> to <dstref> when we start
+ * to xfer headers or trailers. When EOH/EOT block is reached,
+ * both are reset. It is mandatory to be able to rollback a
+ * partial transfer.
+ */
+ if (!srcref && !dstref &&
+ (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL || type == HTX_BLK_TLR)) {
+ srcref = blk;
+ dstref = dstblk;
+ }
+ else if (type == HTX_BLK_EOH || type == HTX_BLK_EOT)
+ srcref = dstref = NULL;
+ }
+
+ if (unlikely(dstref)) {
+ /* Headers or trailers part was partially xferred, so rollback
+ * the copy by removing all block between <dstref> and <dstblk>,
+ * both included. <dstblk> may be NULL.
+ */
+ while (dstref && dstref != dstblk)
+ dstref = htx_remove_blk(dst, dstref);
+ if (dstblk)
+ htx_remove_blk(dst, dstblk);
+
+ /* <dst> HTX message is empty, it means the headers or trailers
+ * part is too big to be copied at once.
+ */
+ if (htx_is_empty(dst))
+ src->flags |= HTX_FL_PARSING_ERROR;
+ }
+
+ /* Now, remove xferred blocks from <src> htx message */
+ if (!blk && !srcref) {
+ /* End of src reached, all blocks were consumed, drain all data */
+ htx_drain(src, src->data);
+ }
+ else {
+ /* Remove all block from the head to <blk>, or <srcref> if defined, excluded */
+ srcref = (srcref ? srcref : blk);
+ for (blk = htx_get_head_blk(src); blk && blk != srcref; blk = htx_remove_blk(src, blk));
+ }
+
+ end:
+ ret = htx_used_space(dst) - ret;
+ return (struct htx_ret){.ret = ret, .blk = dstblk};
+}
+
+/* Replaces an header by a new one. The new header can be smaller or larger than
+ * the old one. It returns the new block on success, otherwise it returns NULL.
+ * The header name is always lower cased.
+ */
+struct htx_blk *htx_replace_header(struct htx *htx, struct htx_blk *blk,
+ const struct ist name, const struct ist value)
+{
+ enum htx_blk_type type;
+ void *ptr;
+ int32_t delta;
+ int ret;
+
+ type = htx_get_blk_type(blk);
+ if (type != HTX_BLK_HDR)
+ return NULL;
+
+ delta = name.len + value.len - htx_get_blksz(blk);
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+
+ /* Replace in place or at a new address is the same. We replace all the
+ * header (name+value). Only take care to defrag the message if
+ * necessary. */
+ if (ret == 3)
+ blk = htx_defrag(htx, blk, (type << 28) + (value.len << 8) + name.len);
+ else {
+ /* Set the new block size and update HTX message */
+ blk->info = (type << 28) + (value.len << 8) + name.len;
+ htx->data += delta;
+ }
+
+ /* Finally, copy data. */
+ ptr = htx_get_blk_ptr(htx, blk);
+ ist2bin_lc(ptr, name);
+ htx_memcpy(ptr + name.len, value.ptr, value.len);
+ return blk;
+}
+
+/* Replaces the parts of the start-line. It returns the new start-line on
+ * success, otherwise it returns NULL. It is the caller responsibility to update
+ * sl->info, if necessary.
+ */
+struct htx_sl *htx_replace_stline(struct htx *htx, struct htx_blk *blk, const struct ist p1,
+ const struct ist p2, const struct ist p3)
+{
+ enum htx_blk_type type;
+ struct htx_sl *sl;
+ struct htx_sl tmp; /* used to save sl->info and sl->flags */
+ uint32_t sz;
+ int32_t delta;
+ int ret;
+
+ type = htx_get_blk_type(blk);
+ if (type != HTX_BLK_REQ_SL && type != HTX_BLK_RES_SL)
+ return NULL;
+
+ /* Save start-line info and flags */
+ sl = htx_get_blk_ptr(htx, blk);
+ tmp.info = sl->info;
+ tmp.flags = sl->flags;
+
+ sz = htx_get_blksz(blk);
+ delta = sizeof(*sl) + p1.len + p2.len + p3.len - sz;
+ ret = htx_prepare_blk_expansion(htx, blk, delta);
+ if (!ret)
+ return NULL; /* not enough space */
+
+ /* Replace in place or at a new address is the same. We replace all the
+ * start-line. Only take care to defrag the message if necessary. */
+ if (ret == 3) {
+ blk = htx_defrag(htx, blk, (type << 28) + sz + delta);
+ }
+ else {
+ /* Set the new block size and update HTX message */
+ blk->info = (type << 28) + sz + delta;
+ htx->data += delta;
+ }
+
+ /* Restore start-line info and flags and copy parts of the start-line */
+ sl = htx_get_blk_ptr(htx, blk);
+ sl->info = tmp.info;
+ sl->flags = tmp.flags;
+
+ HTX_SL_P1_LEN(sl) = p1.len;
+ HTX_SL_P2_LEN(sl) = p2.len;
+ HTX_SL_P3_LEN(sl) = p3.len;
+
+ htx_memcpy(HTX_SL_P1_PTR(sl), p1.ptr, p1.len);
+ htx_memcpy(HTX_SL_P2_PTR(sl), p2.ptr, p2.len);
+ htx_memcpy(HTX_SL_P3_PTR(sl), p3.ptr, p3.len);
+
+ return sl;
+}
+
+/* Reserves the maximum possible size for an HTX data block, by extending an
+ * existing one or by creating a now one. It returns a compound result with the
+ * HTX block and the position where new data must be inserted (0 for a new
+ * block). If an error occurs or if there is no space left, NULL is returned
+ * instead of a pointer on an HTX block.
+ */
+struct htx_ret htx_reserve_max_data(struct htx *htx)
+{
+ struct htx_blk *blk, *tailblk;
+ uint32_t sz, room;
+ int32_t len = htx_free_data_space(htx);
+
+ if (htx->head == -1)
+ goto rsv_new_block;
+
+ if (!len)
+ return (struct htx_ret){.ret = 0, .blk = NULL};
+
+ /* get the tail and head block */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto rsv_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto rsv_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ if (!htx->head_addr) {
+ if (tailblk->addr+sz != htx->tail_addr)
+ goto rsv_new_block;
+ room = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ }
+ else {
+ if (tailblk->addr+sz != htx->head_addr)
+ goto rsv_new_block;
+ room = (htx->end_addr - htx->head_addr);
+ }
+ BUG_ON((int32_t)room < 0);
+ if (room < len)
+ len = room;
+
+append_data:
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return (struct htx_ret){.ret = sz, .blk = tailblk};
+
+rsv_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, len);
+ if (!blk)
+ return (struct htx_ret){.ret = 0, .blk = NULL};
+ blk->info += len;
+ return (struct htx_ret){.ret = 0, .blk = blk};
+}
+
+/* Adds an HTX block of type DATA in <htx>. It first tries to append data if
+ * possible. It returns the number of bytes consumed from <data>, which may be
+ * zero if nothing could be copied.
+ */
+size_t htx_add_data(struct htx *htx, const struct ist data)
+{
+ struct htx_blk *blk, *tailblk;
+ void *ptr;
+ uint32_t sz, room;
+ int32_t len = data.len;
+
+ /* Not enough space to store data */
+ if (len > htx_free_data_space(htx))
+ len = htx_free_data_space(htx);
+
+ if (!len)
+ return 0;
+
+ if (htx->head == -1)
+ goto add_new_block;
+
+ /* get the tail and head block */
+ tailblk = htx_get_tail_blk(htx);
+ if (tailblk == NULL)
+ goto add_new_block;
+ sz = htx_get_blksz(tailblk);
+
+ /* Don't try to append data if the last inserted block is not of the
+ * same type */
+ if (htx_get_blk_type(tailblk) != HTX_BLK_DATA)
+ goto add_new_block;
+
+ /*
+ * Same type and enough space: append data
+ */
+ if (!htx->head_addr) {
+ if (tailblk->addr+sz != htx->tail_addr)
+ goto add_new_block;
+ room = (htx_pos_to_addr(htx, htx->tail) - htx->tail_addr);
+ }
+ else {
+ if (tailblk->addr+sz != htx->head_addr)
+ goto add_new_block;
+ room = (htx->end_addr - htx->head_addr);
+ }
+ BUG_ON((int32_t)room < 0);
+ if (room < len)
+ len = room;
+
+ append_data:
+ /* Append data and update the block itself */
+ ptr = htx_get_blk_ptr(htx, tailblk);
+ htx_memcpy(ptr + sz, data.ptr, len);
+ htx_change_blk_value_len(htx, tailblk, sz+len);
+
+ BUG_ON((int32_t)htx->tail_addr < 0);
+ BUG_ON((int32_t)htx->head_addr < 0);
+ BUG_ON(htx->end_addr > htx->tail_addr);
+ BUG_ON(htx->head_addr > htx->end_addr);
+ return len;
+
+ add_new_block:
+ blk = htx_add_blk(htx, HTX_BLK_DATA, len);
+ if (!blk)
+ return 0;
+
+ blk->info += len;
+ htx_memcpy(htx_get_blk_ptr(htx, blk), data.ptr, len);
+ return len;
+}
+
+
+/* Adds an HTX block of type DATA in <htx> just after all other DATA
+ * blocks. Because it relies on htx_add_data_atonce(), It may be happened to a
+ * DATA block if possible. But, if the function succeeds, it will be the last
+ * DATA block in all cases. If an error occurred, NULL is returned. Otherwise,
+ * on success, the updated block (or the new one) is returned.
+ */
+struct htx_blk *htx_add_last_data(struct htx *htx, struct ist data)
+{
+ struct htx_blk *blk, *pblk;
+
+ blk = htx_add_data_atonce(htx, data);
+ if (!blk)
+ return NULL;
+
+ for (pblk = htx_get_prev_blk(htx, blk); pblk; pblk = htx_get_prev_blk(htx, pblk)) {
+ if (htx_get_blk_type(pblk) <= HTX_BLK_DATA)
+ break;
+
+ /* Swap .addr and .info fields */
+ blk->addr ^= pblk->addr; pblk->addr ^= blk->addr; blk->addr ^= pblk->addr;
+ blk->info ^= pblk->info; pblk->info ^= blk->info; blk->info ^= pblk->info;
+
+ if (blk->addr == pblk->addr)
+ blk->addr += htx_get_blksz(pblk);
+ blk = pblk;
+ }
+
+ return blk;
+}
+
+/* Moves the block <blk> just before the block <ref>. Both blocks must be in the
+ * HTX message <htx> and <blk> must be placed after <ref>. pointer to these
+ * blocks are updated to remain valid after the move. */
+void htx_move_blk_before(struct htx *htx, struct htx_blk **blk, struct htx_blk **ref)
+{
+ struct htx_blk *cblk, *pblk;
+
+ cblk = *blk;
+ for (pblk = htx_get_prev_blk(htx, cblk); pblk; pblk = htx_get_prev_blk(htx, pblk)) {
+ /* Swap .addr and .info fields */
+ cblk->addr ^= pblk->addr; pblk->addr ^= cblk->addr; cblk->addr ^= pblk->addr;
+ cblk->info ^= pblk->info; pblk->info ^= cblk->info; cblk->info ^= pblk->info;
+
+ if (cblk->addr == pblk->addr)
+ cblk->addr += htx_get_blksz(pblk);
+ if (pblk == *ref)
+ break;
+ cblk = pblk;
+ }
+ *blk = cblk;
+ *ref = pblk;
+}
+
+/* Append the HTX message <src> to the HTX message <dst>. It returns 1 on
+ * success and 0 on error. All the message or nothing is copied. If an error
+ * occurred, all blocks from <src> already appended to <dst> are truncated.
+ */
+int htx_append_msg(struct htx *dst, const struct htx *src)
+{
+ struct htx_blk *blk, *newblk;
+ enum htx_blk_type type;
+ uint32_t blksz, offset = dst->data;
+
+ for (blk = htx_get_head_blk(src); blk; blk = htx_get_next_blk(src, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ blksz = htx_get_blksz(blk);
+ newblk = htx_add_blk(dst, type, blksz);
+ if (!newblk)
+ goto error;
+ newblk->info = blk->info;
+ htx_memcpy(htx_get_blk_ptr(dst, newblk), htx_get_blk_ptr(src, blk), blksz);
+ }
+
+ return 1;
+
+ error:
+ htx_truncate(dst, offset);
+ return 0;
+}
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 0000000..6367ac5
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,249 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <haproxy/init.h>
+#include <haproxy/list.h>
+
+/* These functions are called just before a config validity check, which mean
+ * they are suited to use them in case we need to generate part of the
+ * configuration. It could be used for example to generate a proxy with
+ * multiple servers using the configuration parser itself. At this step the
+ * trash buffers are allocated.
+ * The functions must return 0 on success, or a combination
+ * of ERR_* flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause
+ * and immediate exit, so the function must have emitted any useful error.
+ */
+struct list pre_check_list = LIST_HEAD_INIT(pre_check_list);
+
+/* These functions are called just after the point where the program exits
+ * after a config validity check, so they are generally suited for resource
+ * allocation and slow initializations that should be skipped during basic
+ * config checks. The functions must return 0 on success, or a combination
+ * of ERR_* flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause
+ * and immediate exit, so the function must have emitted any useful error.
+ */
+struct list post_check_list = LIST_HEAD_INIT(post_check_list);
+
+/* These functions are called for each proxy just after the config validity
+ * check. The functions must return 0 on success, or a combination of ERR_*
+ * flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause and immediate
+ * exit, so the function must have emitted any useful error.
+ */
+struct list post_proxy_check_list = LIST_HEAD_INIT(post_proxy_check_list);
+
+/* These functions are called for each server just after the config validity
+ * check. The functions must return 0 on success, or a combination of ERR_*
+ * flags (ERR_WARN, ERR_ABORT, ERR_FATAL, ...). The 2 latter cause and immediate
+ * exit, so the function must have emitted any useful error.
+ */
+struct list post_server_check_list = LIST_HEAD_INIT(post_server_check_list);
+
+/* These functions are called for each thread just after the thread creation
+ * and before running the init functions. They should be used to do per-thread
+ * (re-)allocations that are needed by subsequent functoins. They must return 0
+ * if an error occurred. */
+struct list per_thread_alloc_list = LIST_HEAD_INIT(per_thread_alloc_list);
+
+/* These functions are called for each thread just after the thread creation
+ * and before running the scheduler. They should be used to do per-thread
+ * initializations. They must return 0 if an error occurred. */
+struct list per_thread_init_list = LIST_HEAD_INIT(per_thread_init_list);
+
+/* These functions are called when freeing the global sections at the end of
+ * deinit, after everything is stopped. They don't return anything. They should
+ * not release shared resources that are possibly used by other deinit
+ * functions, only close/release what is private. Use the per_thread_free_list
+ * to release shared resources.
+ */
+struct list post_deinit_list = LIST_HEAD_INIT(post_deinit_list);
+
+/* These functions are called when freeing a proxy during the deinit, after
+ * everything isg stopped. They don't return anything. They should not release
+ * the proxy itself or any shared resources that are possibly used by other
+ * deinit functions, only close/release what is private.
+ */
+struct list proxy_deinit_list = LIST_HEAD_INIT(proxy_deinit_list);
+
+/* These functions are called when freeing a server during the deinit, after
+ * everything isg stopped. They don't return anything. They should not release
+ * the proxy itself or any shared resources that are possibly used by other
+ * deinit functions, only close/release what is private.
+ */
+struct list server_deinit_list = LIST_HEAD_INIT(server_deinit_list);
+
+/* These functions are called when freeing the global sections at the end of
+ * deinit, after the thread deinit functions, to release unneeded memory
+ * allocations. They don't return anything, and they work in best effort mode
+ * as their sole goal is to make valgrind mostly happy.
+ */
+struct list per_thread_free_list = LIST_HEAD_INIT(per_thread_free_list);
+
+/* These functions are called for each thread just after the scheduler loop and
+ * before exiting the thread. They don't return anything and, as for post-deinit
+ * functions, they work in best effort mode as their sole goal is to make
+ * valgrind mostly happy. */
+struct list per_thread_deinit_list = LIST_HEAD_INIT(per_thread_deinit_list);
+
+/* used to register some initialization functions to call before the checks. */
+void hap_register_pre_check(int (*fct)())
+{
+ struct pre_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&pre_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call after the checks. */
+void hap_register_post_check(int (*fct)())
+{
+ struct post_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each proxy after
+ * the checks.
+ */
+void hap_register_post_proxy_check(int (*fct)(struct proxy *))
+{
+ struct post_proxy_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_proxy_check_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each server after
+ * the checks.
+ */
+void hap_register_post_server_check(int (*fct)(struct server *))
+{
+ struct post_server_check_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_server_check_list, &b->list);
+}
+
+/* used to register some de-initialization functions to call after everything
+ * has stopped.
+ */
+void hap_register_post_deinit(void (*fct)())
+{
+ struct post_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&post_deinit_list, &b->list);
+}
+
+/* used to register some per proxy de-initialization functions to call after
+ * everything has stopped.
+ */
+void hap_register_proxy_deinit(void (*fct)(struct proxy *))
+{
+ struct proxy_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&proxy_deinit_list, &b->list);
+}
+
+/* used to register some per server de-initialization functions to call after
+ * everything has stopped.
+ */
+void hap_register_server_deinit(void (*fct)(struct server *))
+{
+ struct server_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&server_deinit_list, &b->list);
+}
+
+/* used to register some allocation functions to call for each thread. */
+void hap_register_per_thread_alloc(int (*fct)())
+{
+ struct per_thread_alloc_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_alloc_list, &b->list);
+}
+
+/* used to register some initialization functions to call for each thread. */
+void hap_register_per_thread_init(int (*fct)())
+{
+ struct per_thread_init_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_init_list, &b->list);
+}
+
+/* used to register some de-initialization functions to call for each thread. */
+void hap_register_per_thread_deinit(void (*fct)())
+{
+ struct per_thread_deinit_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_deinit_list, &b->list);
+}
+
+/* used to register some free functions to call for each thread. */
+void hap_register_per_thread_free(void (*fct)())
+{
+ struct per_thread_free_fct *b;
+
+ b = calloc(1, sizeof(*b));
+ if (!b) {
+ fprintf(stderr, "out of memory\n");
+ exit(1);
+ }
+ b->fct = fct;
+ LIST_APPEND(&per_thread_free_list, &b->list);
+}
diff --git a/src/jwt.c b/src/jwt.c
new file mode 100644
index 0000000..6c4cbd3
--- /dev/null
+++ b/src/jwt.c
@@ -0,0 +1,478 @@
+/*
+ * JSON Web Token (JWT) processing
+ *
+ * Copyright 2021 HAProxy Technologies
+ * Remi Tricot-Le Breton <rlebreton@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <import/ebmbtree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/tools.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/base64.h>
+#include <haproxy/jwt.h>
+#include <haproxy/buf.h>
+
+
+#ifdef USE_OPENSSL
+/* Tree into which the public certificates used to validate JWTs will be stored. */
+static struct eb_root jwt_cert_tree = EB_ROOT_UNIQUE;
+
+/*
+ * The possible algorithm strings that can be found in a JWS's JOSE header are
+ * defined in section 3.1 of RFC7518.
+ */
+enum jwt_alg jwt_parse_alg(const char *alg_str, unsigned int alg_len)
+{
+ enum jwt_alg alg = JWT_ALG_DEFAULT;
+
+ /* Algorithms are all 5 characters long apart from "none". */
+ if (alg_len < sizeof("HS256")-1) {
+ if (alg_len == sizeof("none")-1 && strcmp("none", alg_str) == 0)
+ alg = JWS_ALG_NONE;
+ return alg;
+ }
+
+ if (alg == JWT_ALG_DEFAULT) {
+ switch(*alg_str++) {
+ case 'H':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_HS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_HS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_HS512;
+ break;
+ case 'R':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_RS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_RS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_RS512;
+ break;
+ case 'E':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_ES256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_ES384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_ES512;
+ break;
+ case 'P':
+ if (strncmp(alg_str, "S256", alg_len-1) == 0)
+ alg = JWS_ALG_PS256;
+ else if (strncmp(alg_str, "S384", alg_len-1) == 0)
+ alg = JWS_ALG_PS384;
+ else if (strncmp(alg_str, "S512", alg_len-1) == 0)
+ alg = JWS_ALG_PS512;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return alg;
+}
+
+/*
+ * Split a JWT into its separate dot-separated parts.
+ * Since only JWS following the Compact Serialization format are managed for
+ * now, we don't need to manage more than three subparts in the tokens.
+ * See section 3.1 of RFC7515 for more information about JWS Compact
+ * Serialization.
+ * Returns 0 in case of success.
+ */
+int jwt_tokenize(const struct buffer *jwt, struct jwt_item *items, unsigned int *item_num)
+{
+ char *ptr = jwt->area;
+ char *jwt_end = jwt->area + jwt->data;
+ unsigned int index = 0;
+ unsigned int length = 0;
+
+ if (index < *item_num) {
+ items[index].start = ptr;
+ items[index].length = 0;
+ }
+
+ while (index < *item_num && ptr < jwt_end) {
+ if (*ptr++ == '.') {
+ items[index++].length = length;
+
+ if (index == *item_num)
+ return -1;
+ items[index].start = ptr;
+ items[index].length = 0;
+ length = 0;
+ } else
+ ++length;
+ }
+
+ if (index < *item_num)
+ items[index].length = length;
+
+ *item_num = (index+1);
+
+ return (ptr != jwt_end);
+}
+
+/*
+ * Parse a public certificate and insert it into the jwt_cert_tree.
+ * Returns 0 in case of success.
+ */
+int jwt_tree_load_cert(char *path, int pathlen, char **err)
+{
+ int retval = -1;
+ struct jwt_cert_tree_entry *entry = NULL;
+ EVP_PKEY *pkey = NULL;
+ BIO *bio = NULL;
+
+ entry = calloc(1, sizeof(*entry) + pathlen + 1);
+ if (!entry) {
+ memprintf(err, "%sunable to allocate memory (jwt_cert_tree_entry).\n", err && *err ? *err : "");
+ return -1;
+ }
+ memcpy(entry->path, path, pathlen + 1);
+
+ if (ebst_insert(&jwt_cert_tree, &entry->node) != &entry->node) {
+ free(entry);
+ return 0; /* Entry already in the tree */
+ }
+
+ bio = BIO_new(BIO_s_file());
+ if (!bio) {
+ memprintf(err, "%sunable to allocate memory (BIO).\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (BIO_read_filename(bio, path) == 1) {
+
+ pkey = PEM_read_bio_PUBKEY(bio, NULL, NULL, NULL);
+
+ if (!pkey) {
+ memprintf(err, "%sfile not found (%s)\n", err && *err ? *err : "", path);
+ goto end;
+ }
+
+ entry->pkey = pkey;
+ retval = 0;
+ }
+
+end:
+ if (retval) {
+ /* Some error happened during pkey parsing, remove the already
+ * inserted node from the tree and free it.
+ */
+ ebmb_delete(&entry->node);
+ free(entry);
+ }
+ BIO_free(bio);
+ return retval;
+}
+
+/*
+ * Calculate the HMAC signature of a specific JWT and check that it matches the
+ * one included in the token.
+ * Returns 1 in case of success.
+ */
+static enum jwt_vrfy_status
+jwt_jwsverify_hmac(const struct jwt_ctx *ctx, const struct buffer *decoded_signature)
+{
+ const EVP_MD *evp = NULL;
+ unsigned char signature[EVP_MAX_MD_SIZE];
+ unsigned int signature_length = 0;
+ unsigned char *hmac_res = NULL;
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+
+ switch(ctx->alg) {
+ case JWS_ALG_HS256:
+ evp = EVP_sha256();
+ break;
+ case JWS_ALG_HS384:
+ evp = EVP_sha384();
+ break;
+ case JWS_ALG_HS512:
+ evp = EVP_sha512();
+ break;
+ default: break;
+ }
+
+ hmac_res = HMAC(evp, ctx->key, ctx->key_length, (const unsigned char*)ctx->jose.start,
+ ctx->jose.length + ctx->claims.length + 1, signature, &signature_length);
+
+ if (hmac_res && signature_length == decoded_signature->data &&
+ (CRYPTO_memcmp(decoded_signature->area, signature, signature_length) == 0))
+ retval = JWT_VRFY_OK;
+
+ return retval;
+}
+
+/*
+ * Convert a JWT ECDSA signature (R and S parameters concatenatedi, see section
+ * 3.4 of RFC7518) into an ECDSA_SIG that can be fed back into OpenSSL's digest
+ * verification functions.
+ * Returns 0 in case of success.
+ */
+static int convert_ecdsa_sig(const struct jwt_ctx *ctx, EVP_PKEY *pkey, struct buffer *signature)
+{
+ int retval = 0;
+ ECDSA_SIG *ecdsa_sig = NULL;
+ BIGNUM *ec_R = NULL, *ec_S = NULL;
+ unsigned int bignum_len;
+ unsigned char *p;
+
+ ecdsa_sig = ECDSA_SIG_new();
+ if (!ecdsa_sig) {
+ retval = JWT_VRFY_OUT_OF_MEMORY;
+ goto end;
+ }
+
+ if (b_data(signature) % 2) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ bignum_len = b_data(signature) / 2;
+
+ ec_R = BN_bin2bn((unsigned char*)b_orig(signature), bignum_len, NULL);
+ ec_S = BN_bin2bn((unsigned char *)(b_orig(signature) + bignum_len), bignum_len, NULL);
+
+ if (!ec_R || !ec_S) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ /* Build ecdsa out of R and S values. */
+ ECDSA_SIG_set0(ecdsa_sig, ec_R, ec_S);
+
+ p = (unsigned char*)signature->area;
+
+ signature->data = i2d_ECDSA_SIG(ecdsa_sig, &p);
+ if (signature->data == 0) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+end:
+ ECDSA_SIG_free(ecdsa_sig);
+ return retval;
+}
+
+/*
+ * Check that the signature included in a JWT signed via RSA or ECDSA is valid
+ * and can be verified thanks to a given public certificate.
+ * Returns 1 in case of success.
+ */
+static enum jwt_vrfy_status
+jwt_jwsverify_rsa_ecdsa(const struct jwt_ctx *ctx, struct buffer *decoded_signature)
+{
+ const EVP_MD *evp = NULL;
+ EVP_MD_CTX *evp_md_ctx;
+ EVP_PKEY_CTX *pkey_ctx = NULL;
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+ struct ebmb_node *eb;
+ struct jwt_cert_tree_entry *entry = NULL;
+ int is_ecdsa = 0;
+ int padding = RSA_PKCS1_PADDING;
+
+ switch(ctx->alg) {
+ case JWS_ALG_RS256:
+ evp = EVP_sha256();
+ break;
+ case JWS_ALG_RS384:
+ evp = EVP_sha384();
+ break;
+ case JWS_ALG_RS512:
+ evp = EVP_sha512();
+ break;
+
+ case JWS_ALG_ES256:
+ evp = EVP_sha256();
+ is_ecdsa = 1;
+ break;
+ case JWS_ALG_ES384:
+ evp = EVP_sha384();
+ is_ecdsa = 1;
+ break;
+ case JWS_ALG_ES512:
+ evp = EVP_sha512();
+ is_ecdsa = 1;
+ break;
+
+ case JWS_ALG_PS256:
+ evp = EVP_sha256();
+ padding = RSA_PKCS1_PSS_PADDING;
+ break;
+ case JWS_ALG_PS384:
+ evp = EVP_sha384();
+ padding = RSA_PKCS1_PSS_PADDING;
+ break;
+ case JWS_ALG_PS512:
+ evp = EVP_sha512();
+ padding = RSA_PKCS1_PSS_PADDING;
+ break;
+ default: break;
+ }
+
+ evp_md_ctx = EVP_MD_CTX_new();
+ if (!evp_md_ctx)
+ return JWT_VRFY_OUT_OF_MEMORY;
+
+ eb = ebst_lookup(&jwt_cert_tree, ctx->key);
+
+ if (!eb) {
+ retval = JWT_VRFY_UNKNOWN_CERT;
+ goto end;
+ }
+
+ entry = ebmb_entry(eb, struct jwt_cert_tree_entry, node);
+
+ if (!entry->pkey) {
+ retval = JWT_VRFY_UNKNOWN_CERT;
+ goto end;
+ }
+
+ /*
+ * ECXXX signatures are a direct concatenation of the (R, S) pair and
+ * need to be converted back to asn.1 in order for verify operations to
+ * work with OpenSSL.
+ */
+ if (is_ecdsa) {
+ int conv_retval = convert_ecdsa_sig(ctx, entry->pkey, decoded_signature);
+ if (conv_retval != 0) {
+ retval = conv_retval;
+ goto end;
+ }
+ }
+
+ if (EVP_DigestVerifyInit(evp_md_ctx, &pkey_ctx, evp, NULL, entry->pkey) == 1) {
+ if (is_ecdsa || EVP_PKEY_CTX_set_rsa_padding(pkey_ctx, padding) > 0) {
+ if (EVP_DigestVerifyUpdate(evp_md_ctx, (const unsigned char*)ctx->jose.start,
+ ctx->jose.length + ctx->claims.length + 1) == 1 &&
+ EVP_DigestVerifyFinal(evp_md_ctx, (const unsigned char*)decoded_signature->area, decoded_signature->data) == 1) {
+ retval = JWT_VRFY_OK;
+ }
+ }
+ }
+
+end:
+ EVP_MD_CTX_free(evp_md_ctx);
+ return retval;
+}
+
+/*
+ * Check that the <token> that was signed via algorithm <alg> using the <key>
+ * (either an HMAC secret or the path to a public certificate) has a valid
+ * signature.
+ * Returns 1 in case of success.
+ */
+enum jwt_vrfy_status jwt_verify(const struct buffer *token, const struct buffer *alg,
+ const struct buffer *key)
+{
+ struct jwt_item items[JWT_ELT_MAX] = { { 0 } };
+ unsigned int item_num = JWT_ELT_MAX;
+ struct buffer *decoded_sig = NULL;
+ struct jwt_ctx ctx = {};
+ enum jwt_vrfy_status retval = JWT_VRFY_KO;
+ int ret;
+
+ ctx.alg = jwt_parse_alg(alg->area, alg->data);
+
+ if (ctx.alg == JWT_ALG_DEFAULT)
+ return JWT_VRFY_UNKNOWN_ALG;
+
+ if (jwt_tokenize(token, items, &item_num))
+ return JWT_VRFY_INVALID_TOKEN;
+
+ if (item_num != JWT_ELT_MAX)
+ if (ctx.alg != JWS_ALG_NONE || item_num != JWT_ELT_SIG)
+ return JWT_VRFY_INVALID_TOKEN;
+
+ ctx.jose = items[JWT_ELT_JOSE];
+ ctx.claims = items[JWT_ELT_CLAIMS];
+ ctx.signature = items[JWT_ELT_SIG];
+
+ /* "alg" is "none", the signature must be empty for the JWS to be valid. */
+ if (ctx.alg == JWS_ALG_NONE) {
+ return (ctx.signature.length == 0) ? JWT_VRFY_OK : JWT_VRFY_KO;
+ }
+
+ if (ctx.signature.length == 0)
+ return JWT_VRFY_INVALID_TOKEN;
+
+ decoded_sig = alloc_trash_chunk();
+ if (!decoded_sig)
+ return JWT_VRFY_OUT_OF_MEMORY;
+
+ ret = base64urldec(ctx.signature.start, ctx.signature.length,
+ decoded_sig->area, decoded_sig->size);
+ if (ret == -1) {
+ retval = JWT_VRFY_INVALID_TOKEN;
+ goto end;
+ }
+
+ decoded_sig->data = ret;
+ ctx.key = key->area;
+ ctx.key_length = key->data;
+
+ /* We have all three sections, signature calculation can begin. */
+
+ switch(ctx.alg) {
+
+ case JWS_ALG_HS256:
+ case JWS_ALG_HS384:
+ case JWS_ALG_HS512:
+ /* HMAC + SHA-XXX */
+ retval = jwt_jwsverify_hmac(&ctx, decoded_sig);
+ break;
+ case JWS_ALG_RS256:
+ case JWS_ALG_RS384:
+ case JWS_ALG_RS512:
+ case JWS_ALG_ES256:
+ case JWS_ALG_ES384:
+ case JWS_ALG_ES512:
+ case JWS_ALG_PS256:
+ case JWS_ALG_PS384:
+ case JWS_ALG_PS512:
+ /* RSASSA-PKCS1-v1_5 + SHA-XXX */
+ /* ECDSA using P-XXX and SHA-XXX */
+ /* RSASSA-PSS using SHA-XXX and MGF1 with SHA-XXX */
+ retval = jwt_jwsverify_rsa_ecdsa(&ctx, decoded_sig);
+ break;
+ default:
+ /* Not managed yet */
+ retval = JWT_VRFY_UNMANAGED_ALG;
+ break;
+ }
+
+end:
+ free_trash_chunk(decoded_sig);
+
+ return retval;
+}
+
+static void jwt_deinit(void)
+{
+ struct ebmb_node *node = NULL;
+ struct jwt_cert_tree_entry *entry = NULL;
+
+ node = ebmb_first(&jwt_cert_tree);
+ while (node) {
+ entry = ebmb_entry(node, struct jwt_cert_tree_entry, node);
+ ebmb_delete(node);
+ EVP_PKEY_free(entry->pkey);
+ ha_free(&entry);
+ node = ebmb_first(&jwt_cert_tree);
+ }
+}
+REGISTER_POST_DEINIT(jwt_deinit);
+
+
+#endif /* USE_OPENSSL */
diff --git a/src/lb_chash.c b/src/lb_chash.c
new file mode 100644
index 0000000..4e8fb15
--- /dev/null
+++ b/src/lb_chash.c
@@ -0,0 +1,517 @@
+/*
+ * Consistent Hash implementation
+ * Please consult this very well detailed article for more information :
+ * http://www.spiteful.com/2008/03/17/programmers-toolbox-part-3-consistent-hashing/
+ *
+ * Our implementation has to support both weighted hashing and weighted round
+ * robin because we'll use it to replace the previous map-based implementation
+ * which offered both algorithms.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/errors.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+#include <haproxy/tools.h>
+
+/* Return next tree node after <node> which must still be in the tree, or be
+ * NULL. Lookup wraps around the end to the beginning. If the next node is the
+ * same node, return NULL. This is designed to find a valid next node before
+ * deleting one from the tree.
+ */
+static inline struct eb32_node *chash_skip_node(struct eb_root *root, struct eb32_node *node)
+{
+ struct eb32_node *stop = node;
+
+ if (!node)
+ return NULL;
+ node = eb32_next(node);
+ if (!node)
+ node = eb32_first(root);
+ if (node == stop)
+ return NULL;
+ return node;
+}
+
+/* Remove all of a server's entries from its tree. This may be used when
+ * setting a server down.
+ */
+static inline void chash_dequeue_srv(struct server *s)
+{
+ while (s->lb_nodes_now > 0) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ s->lb_nodes_now = s->lb_nodes_tot;
+ s->lb_nodes_now--;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
+ }
+}
+
+/* Adjust the number of entries of a server in its tree. The server must appear
+ * as many times as its weight indicates it. If it's there too often, we remove
+ * the last occurrences. If it's not there enough, we add more occurrences. To
+ * remove a server from the tree, normally call this with eweight=0.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void chash_queue_dequeue_srv(struct server *s)
+{
+ while (s->lb_nodes_now > s->next_eweight) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ s->lb_nodes_now = s->lb_nodes_tot;
+ s->lb_nodes_now--;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_delete(&s->lb_nodes[s->lb_nodes_now].node);
+ }
+
+ /* Attempt to increase the total number of nodes, if the user
+ * increased the weight beyond the original weight
+ */
+ if (s->lb_nodes_tot < s->next_eweight) {
+ struct tree_occ *new_nodes;
+
+ /* First we need to remove all server's entries from its tree
+ * because the realloc will change all nodes pointers */
+ chash_dequeue_srv(s);
+
+ new_nodes = realloc(s->lb_nodes, s->next_eweight * sizeof(*new_nodes));
+ if (new_nodes) {
+ unsigned int j;
+
+ s->lb_nodes = new_nodes;
+ memset(&s->lb_nodes[s->lb_nodes_tot], 0,
+ (s->next_eweight - s->lb_nodes_tot) * sizeof(*s->lb_nodes));
+ for (j = s->lb_nodes_tot; j < s->next_eweight; j++) {
+ s->lb_nodes[j].server = s;
+ s->lb_nodes[j].node.key = full_hash(s->puid * SRV_EWGHT_RANGE + j);
+ }
+ s->lb_nodes_tot = s->next_eweight;
+ }
+ }
+ while (s->lb_nodes_now < s->next_eweight) {
+ if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway
+ break;
+ if (s->proxy->lbprm.chash.last == &s->lb_nodes[s->lb_nodes_now].node)
+ s->proxy->lbprm.chash.last = chash_skip_node(s->lb_tree, s->proxy->lbprm.chash.last);
+ eb32_insert(s->lb_tree, &s->lb_nodes[s->lb_nodes_now].node);
+ s->lb_nodes_now++;
+ }
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm lock will be used.
+ */
+static void chash_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ chash_dequeue_srv(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ out_update_state:
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm lock will be used.
+ */
+static void chash_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ chash_queue_dequeue_srv(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ out_update_state:
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm lock may be used.
+ */
+static void chash_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ chash_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ chash_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ /* only adjust the server's presence in the tree */
+ chash_queue_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP)
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ else
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+
+ update_backend_weight(p);
+ srv_lb_commit_status(srv);
+
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/*
+ * This function implements the "Consistent Hashing with Bounded Loads" algorithm
+ * of Mirrokni, Thorup, and Zadimoghaddam (arxiv:1608.01350), adapted for use with
+ * unequal server weights.
+ */
+int chash_server_is_eligible(struct server *s)
+{
+ /* The total number of slots to allocate is the total number of outstanding requests
+ * (including the one we're about to make) times the load-balance-factor, rounded up.
+ */
+ unsigned tot_slots = ((s->proxy->served + 1) * s->proxy->lbprm.hash_balance_factor + 99) / 100;
+ unsigned slots_per_weight = tot_slots / s->proxy->lbprm.tot_weight;
+ unsigned remainder = tot_slots % s->proxy->lbprm.tot_weight;
+
+ /* Allocate a whole number of slots per weight unit... */
+ unsigned slots = s->cur_eweight * slots_per_weight;
+
+ /* And then distribute the rest among servers proportionally to their weight. */
+ slots += ((s->cumulative_weight + s->cur_eweight) * remainder) / s->proxy->lbprm.tot_weight
+ - (s->cumulative_weight * remainder) / s->proxy->lbprm.tot_weight;
+
+ /* But never leave a server with 0. */
+ if (slots == 0)
+ slots = 1;
+
+ return s->served < slots;
+}
+
+/*
+ * This function returns the running server from the CHASH tree, which is at
+ * the closest distance from the value of <hash>. Doing so ensures that even
+ * with a well imbalanced hash, if some servers are close to each other, they
+ * will still both receive traffic. If any server is found, it will be returned.
+ * It will also skip server <avoid> if the hash result ends on this one.
+ * If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used in R/O mode. The server's lock is not used.
+ */
+struct server *chash_get_server_hash(struct proxy *p, unsigned int hash, const struct server *avoid)
+{
+ struct eb32_node *next, *prev;
+ struct server *nsrv, *psrv;
+ struct eb_root *root;
+ unsigned int dn, dp;
+ int loop;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (p->srv_act)
+ root = &p->lbprm.chash.act;
+ else if (p->lbprm.fbck) {
+ nsrv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ root = &p->lbprm.chash.bck;
+ else {
+ nsrv = NULL;
+ goto out;
+ }
+
+ /* find the node after and the node before */
+ next = eb32_lookup_ge(root, hash);
+ if (!next)
+ next = eb32_first(root);
+ if (!next) {
+ nsrv = NULL; /* tree is empty */
+ goto out;
+ }
+
+ prev = eb32_prev(next);
+ if (!prev)
+ prev = eb32_last(root);
+
+ nsrv = eb32_entry(next, struct tree_occ, node)->server;
+ psrv = eb32_entry(prev, struct tree_occ, node)->server;
+
+ /* OK we're located between two servers, let's
+ * compare distances between hash and the two servers
+ * and select the closest server.
+ */
+ dp = hash - prev->key;
+ dn = next->key - hash;
+
+ if (dp <= dn) {
+ next = prev;
+ nsrv = psrv;
+ }
+
+ loop = 0;
+ while (nsrv == avoid || (p->lbprm.hash_balance_factor && !chash_server_is_eligible(nsrv))) {
+ next = eb32_next(next);
+ if (!next) {
+ next = eb32_first(root);
+ if (++loop > 1) // protection against accidental loop
+ break;
+ }
+ nsrv = eb32_entry(next, struct tree_occ, node)->server;
+ }
+
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return nsrv;
+}
+
+/* Return next server from the CHASH tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used in R/W mode. The server's lock is not used.
+ */
+struct server *chash_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node, *stop, *avoided_node;
+ struct eb_root *root;
+
+ srv = avoided = NULL;
+ avoided_node = NULL;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ root = &p->lbprm.chash.act;
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ root = &p->lbprm.chash.bck;
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ stop = node = p->lbprm.chash.last;
+ do {
+ struct server *s;
+
+ if (node)
+ node = eb32_next(node);
+ if (!node)
+ node = eb32_first(root);
+
+ p->lbprm.chash.last = node;
+ if (!node) {
+ /* no node is available */
+ srv = NULL;
+ goto out;
+ }
+
+ /* Note: if we came here after a down/up cycle with no last
+ * pointer, and after a redispatch (srvtoavoid is set), we
+ * must set stop to non-null otherwise we can loop forever.
+ */
+ if (!stop)
+ stop = node;
+
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ s = eb32_entry(node, struct tree_occ, node)->server;
+ if (!s->maxconn || (!s->queue.length && s->served < srv_dynamic_maxconn(s))) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ avoided_node = node;
+ }
+ } while (node != stop);
+
+ if (!srv) {
+ srv = avoided;
+ p->lbprm.chash.last = avoided_node;
+ }
+
+ out:
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+/* This function is responsible for building the active and backup trees for
+ * consistent hashing. The servers receive an array of initialized nodes
+ * with their assigned keys. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio.
+ * Return 0 in case of success, -1 in case of allocation failure.
+ */
+int chash_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+ int node;
+
+ p->lbprm.set_server_status_up = chash_set_server_status_up;
+ p->lbprm.set_server_status_down = chash_set_server_status_down;
+ p->lbprm.update_server_eweight = chash_update_server_weight;
+ p->lbprm.server_take_conn = NULL;
+ p->lbprm.server_drop_conn = NULL;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.chash.act = init_head;
+ p->lbprm.chash.bck = init_head;
+ p->lbprm.chash.last = NULL;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.chash.bck : &p->lbprm.chash.act;
+ srv->lb_nodes_tot = srv->uweight * BE_WEIGHT_SCALE;
+ srv->lb_nodes_now = 0;
+ srv->lb_nodes = calloc(srv->lb_nodes_tot,
+ sizeof(*srv->lb_nodes));
+ if (!srv->lb_nodes) {
+ ha_alert("failed to allocate lb_nodes for server %s.\n", srv->id);
+ return -1;
+ }
+ for (node = 0; node < srv->lb_nodes_tot; node++) {
+ srv->lb_nodes[node].server = srv;
+ srv->lb_nodes[node].node.key = full_hash(srv->puid * SRV_EWGHT_RANGE + node);
+ }
+
+ if (srv_currently_usable(srv))
+ chash_queue_dequeue_srv(srv);
+ }
+ return 0;
+}
diff --git a/src/lb_fas.c b/src/lb_fas.c
new file mode 100644
index 0000000..d90388b
--- /dev/null
+++ b/src/lb_fas.c
@@ -0,0 +1,348 @@
+/*
+ * First Available Server load balancing algorithm.
+ *
+ * This file implements an algorithm which emerged during a discussion with
+ * Steen Larsen, initially inspired from Anshul Gandhi et.al.'s work now
+ * described as "packing" in section 3.5:
+ *
+ * http://reports-archive.adm.cs.cmu.edu/anon/2012/CMU-CS-12-109.pdf
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void fas_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* simply removes a server from a tree.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fas_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* Queue a server in its associated tree, assuming the weight is >0.
+ * Servers are sorted by unique ID so that we send all connections to the first
+ * available server in declaration order (or ID order) until its maxconn is
+ * reached. It is important to understand that the server weight is not used
+ * here.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fas_queue_srv(struct server *s)
+{
+ s->lb_node.key = s->puid;
+ eb32_insert(s->lb_tree, &s->lb_node);
+}
+
+/* Re-position the server in the FS tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ * The lbprm's lock will be used.
+ */
+static void fas_srv_reposition(struct server *s)
+{
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+ if (s->lb_tree) {
+ fas_dequeue_srv(s);
+ fas_queue_srv(s);
+ }
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ fas_dequeue_srv(srv);
+ fas_remove_from_tree(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ srv->lb_tree = &p->lbprm.fas.bck;
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ srv->lb_tree = &p->lbprm.fas.act;
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fas_queue_srv(srv);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fas_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fas_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fas_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv->lb_tree)
+ fas_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fas.bck;
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fas.act;
+ }
+
+ fas_queue_srv(srv);
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* This function is responsible for building the trees in case of fast
+ * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio. Both active and backup groups are initialized.
+ */
+void fas_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fas_set_server_status_up;
+ p->lbprm.set_server_status_down = fas_set_server_status_down;
+ p->lbprm.update_server_eweight = fas_update_server_weight;
+ p->lbprm.server_take_conn = fas_srv_reposition;
+ p->lbprm.server_drop_conn = fas_srv_reposition;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.fas.act = init_head;
+ p->lbprm.fas.bck = init_head;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fas.bck : &p->lbprm.fas.act;
+ fas_queue_srv(srv);
+ }
+}
+
+/* Return next server from the FS tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used. The server's lock is not used.
+ */
+struct server *fas_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node;
+
+ srv = avoided = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ node = eb32_first(&p->lbprm.fas.act);
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ node = eb32_first(&p->lbprm.fas.bck);
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ while (node) {
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ struct server *s;
+
+ s = eb32_entry(node, struct server, lb_node);
+ if (!s->maxconn || (!s->queue.length && s->served < srv_dynamic_maxconn(s))) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ }
+ node = eb32_next(node);
+ }
+
+ if (!srv)
+ srv = avoided;
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_fwlc.c b/src/lb_fwlc.c
new file mode 100644
index 0000000..8e913d4
--- /dev/null
+++ b/src/lb_fwlc.c
@@ -0,0 +1,375 @@
+/*
+ * Fast Weighted Least Connection load balancing algorithm.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The server's lock and the lbprm's lock must be held.
+ */
+static inline void fwlc_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* simply removes a server from a tree.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fwlc_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* Queue a server in its associated tree, assuming the <eweight> is >0.
+ * Servers are sorted by (#conns+1)/weight. To ensure maximum accuracy,
+ * we use (#conns+1)*SRV_EWGHT_MAX/eweight as the sorting key. The reason
+ * for using #conns+1 is to sort by weights in case the server is picked
+ * and not before it is picked. This provides a better load accuracy for
+ * low connection counts when weights differ and makes sure the round-robin
+ * applies between servers of highest weight first. However servers with no
+ * connection are always picked first so that under low loads, it's not
+ * always the single server with the highest weight that gets picked.
+ *
+ * NOTE: Depending on the calling context, we use s->next_eweight or
+ * s->cur_eweight. The next value is used when the server state is updated
+ * (because the weight changed for instance). During this step, the server
+ * state is not yet committed. The current value is used to reposition the
+ * server in the tree. This happens when the server is used.
+ *
+ * The lbprm's lock must be held.
+ */
+static inline void fwlc_queue_srv(struct server *s, unsigned int eweight)
+{
+ unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+
+ s->lb_node.key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / eweight : 0;
+ eb32_insert(s->lb_tree, &s->lb_node);
+}
+
+/* Re-position the server in the FWLC tree after it has been assigned one
+ * connection or after it has released one. Note that it is possible that
+ * the server has been moved out of the tree due to failed health-checks.
+ * The lbprm's lock will be used.
+ */
+static void fwlc_srv_reposition(struct server *s)
+{
+ unsigned int inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+ unsigned int eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+ unsigned int new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / (eweight ? eweight : 1) : 0;
+
+ /* some calls will be made for no change (e.g connect_server() after
+ * assign_server(). Let's check that first.
+ */
+ if (s->lb_node.node.leaf_p && eweight && s->lb_node.key == new_key)
+ return;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+ if (s->lb_tree) {
+ /* we might have been waiting for a while on the lock above
+ * so it's worth testing again because other threads are very
+ * likely to have released a connection or taken one leading
+ * to our target value (50% of the case in measurements).
+ */
+ inflight = _HA_ATOMIC_LOAD(&s->served) + _HA_ATOMIC_LOAD(&s->queue.length);
+ eweight = _HA_ATOMIC_LOAD(&s->cur_eweight);
+ new_key = inflight ? (inflight + 1) * SRV_EWGHT_MAX / (eweight ? eweight : 1) : 0;
+ if (!s->lb_node.node.leaf_p || s->lb_node.key != new_key) {
+ eb32_delete(&s->lb_node);
+ s->lb_node.key = new_key;
+ eb32_insert(s->lb_tree, &s->lb_node);
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &s->proxy->lbprm.lock);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck -= srv->cur_eweight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact -= srv->cur_eweight;
+ p->srv_act--;
+ }
+
+ fwlc_dequeue_srv(srv);
+ fwlc_remove_from_tree(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ srv->lb_tree = &p->lbprm.fwlc.bck;
+ p->lbprm.tot_wbck += srv->next_eweight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ srv->lb_tree = &p->lbprm.fwlc.act;
+ p->lbprm.tot_wact += srv->next_eweight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fwlc_queue_srv(srv, srv->next_eweight);
+
+ out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwlc_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fwlc_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fwlc_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv->lb_tree)
+ fwlc_dequeue_srv(srv);
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fwlc.bck;
+ } else {
+ p->lbprm.tot_wact += srv->next_eweight - srv->cur_eweight;
+ srv->lb_tree = &p->lbprm.fwlc.act;
+ }
+
+ fwlc_queue_srv(srv, srv->next_eweight);
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* This function is responsible for building the trees in case of fast
+ * weighted least-conns. It also sets p->lbprm.wdiv to the eweight to
+ * uweight ratio. Both active and backup groups are initialized.
+ */
+void fwlc_init_server_tree(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fwlc_set_server_status_up;
+ p->lbprm.set_server_status_down = fwlc_set_server_status_down;
+ p->lbprm.update_server_eweight = fwlc_update_server_weight;
+ p->lbprm.server_take_conn = fwlc_srv_reposition;
+ p->lbprm.server_drop_conn = fwlc_srv_reposition;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ p->lbprm.fwlc.act = init_head;
+ p->lbprm.fwlc.bck = init_head;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ srv->lb_tree = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwlc.bck : &p->lbprm.fwlc.act;
+ fwlc_queue_srv(srv, srv->next_eweight);
+ }
+}
+
+/* Return next server from the FWLC tree in backend <p>. If the tree is empty,
+ * return NULL. Saturated servers are skipped.
+ *
+ * The lbprm's lock will be used in R/O mode. The server's lock is not used.
+ */
+struct server *fwlc_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *avoided;
+ struct eb32_node *node;
+
+ srv = avoided = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ node = eb32_first(&p->lbprm.fwlc.act);
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ node = eb32_first(&p->lbprm.fwlc.bck);
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ while (node) {
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now, so we'll simply
+ * skip it. Same if it's the server we try to avoid, in which
+ * case we simply remember it for later use if needed.
+ */
+ struct server *s;
+
+ s = eb32_entry(node, struct server, lb_node);
+ if (!s->maxconn || s->served + s->queue.length < srv_dynamic_maxconn(s) + s->maxqueue) {
+ if (s != srvtoavoid) {
+ srv = s;
+ break;
+ }
+ avoided = s;
+ }
+ node = eb32_next(node);
+ }
+
+ if (!srv)
+ srv = avoided;
+ out:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_fwrr.c b/src/lb_fwrr.c
new file mode 100644
index 0000000..a762623
--- /dev/null
+++ b/src/lb_fwrr.c
@@ -0,0 +1,623 @@
+/*
+ * Fast Weighted Round Robin load balancing algorithm.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+
+static inline void fwrr_remove_from_tree(struct server *s);
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s);
+static inline void fwrr_dequeue_srv(struct server *s);
+static void fwrr_get_srv(struct server *s);
+static void fwrr_queue_srv(struct server *s);
+
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to down.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely down (the caller may not
+ * know all the variables of a server's state).
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (!srv_currently_usable(srv))
+ /* server was already down */
+ goto out_update_backend;
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight -= srv->cur_eweight;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+ p->srv_bck--;
+
+ if (srv == p->lbprm.fbck) {
+ /* we lost the first backup server in a single-backup
+ * configuration, we must search another one.
+ */
+ struct server *srv2 = p->lbprm.fbck;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 &&
+ !((srv2->flags & SRV_F_BACKUP) &&
+ srv_willbe_usable(srv2)));
+ p->lbprm.fbck = srv2;
+ }
+ } else {
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->srv_act--;
+ }
+
+ fwrr_dequeue_srv(srv);
+ fwrr_remove_from_tree(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the server trees according to server <srv>'s new
+ * state. It should be called when server <srv>'s status changes to up.
+ * It is not important whether the server was already down or not. It is not
+ * important either that the new state is completely UP (the caller may not
+ * know all the variables of a server's state). This function will not change
+ * the weight of a server which was already up.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ if (srv_currently_usable(srv))
+ /* server was already up */
+ goto out_update_backend;
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight += srv->next_eweight;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+ p->srv_bck++;
+
+ if (!(p->options & PR_O_USE_ALL_BK)) {
+ if (!p->lbprm.fbck) {
+ /* there was no backup server anymore */
+ p->lbprm.fbck = srv;
+ } else {
+ /* we may have restored a backup server prior to fbck,
+ * in which case it should replace it.
+ */
+ struct server *srv2 = srv;
+ do {
+ srv2 = srv2->next;
+ } while (srv2 && (srv2 != p->lbprm.fbck));
+ if (srv2)
+ p->lbprm.fbck = srv;
+ }
+ }
+ } else {
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->srv_act++;
+ }
+
+ /* note that eweight cannot be 0 here */
+ fwrr_get_srv(srv);
+ srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+ fwrr_queue_srv(srv);
+
+out_update_backend:
+ /* check/update tot_used, tot_weight */
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function must be called after an update to server <srv>'s effective
+ * weight. It may be called after a state change too.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void fwrr_update_server_weight(struct server *srv)
+{
+ int old_state, new_state;
+ struct proxy *p = srv->proxy;
+ struct fwrr_group *grp;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ /* If changing the server's weight changes its state, we simply apply
+ * the procedures we already have for status change. If the state
+ * remains down, the server is not in any tree, so it's as easy as
+ * updating its values. If the state remains up with different weights,
+ * there are some computations to perform to find a new place and
+ * possibly a new tree for this server.
+ */
+
+ old_state = srv_currently_usable(srv);
+ new_state = srv_willbe_usable(srv);
+
+ if (!old_state && !new_state) {
+ srv_lb_commit_status(srv);
+ return;
+ }
+ else if (!old_state && new_state) {
+ fwrr_set_server_status_up(srv);
+ return;
+ }
+ else if (old_state && !new_state) {
+ fwrr_set_server_status_down(srv);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ grp = (srv->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+ grp->next_weight = grp->next_weight - srv->cur_eweight + srv->next_eweight;
+
+ p->lbprm.tot_wact = p->lbprm.fwrr.act.next_weight;
+ p->lbprm.tot_wbck = p->lbprm.fwrr.bck.next_weight;
+
+ if (srv->lb_tree == grp->init) {
+ fwrr_dequeue_srv(srv);
+ fwrr_queue_by_weight(grp->init, srv);
+ }
+ else if (!srv->lb_tree) {
+ /* FIXME: server was down. This is not possible right now but
+ * may be needed soon for slowstart or graceful shutdown.
+ */
+ fwrr_dequeue_srv(srv);
+ fwrr_get_srv(srv);
+ srv->npos = grp->curr_pos + (grp->next_weight + grp->curr_weight - grp->curr_pos) / srv->next_eweight;
+ fwrr_queue_srv(srv);
+ } else {
+ /* The server is either active or in the next queue. If it's
+ * still in the active queue and it has not consumed all of its
+ * places, let's adjust its next position.
+ */
+ fwrr_get_srv(srv);
+
+ if (srv->next_eweight > 0) {
+ int prev_next = srv->npos;
+ int step = grp->next_weight / srv->next_eweight;
+
+ srv->npos = srv->lpos + step;
+ srv->rweight = 0;
+
+ if (srv->npos > prev_next)
+ srv->npos = prev_next;
+ if (srv->npos < grp->curr_pos + 2)
+ srv->npos = grp->curr_pos + step;
+ } else {
+ /* push it into the next tree */
+ srv->npos = grp->curr_pos + grp->curr_weight;
+ }
+
+ fwrr_dequeue_srv(srv);
+ fwrr_queue_srv(srv);
+ }
+
+ update_backend_weight(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+
+ srv_lb_commit_status(srv);
+}
+
+/* Remove a server from a tree. It must have previously been dequeued. This
+ * function is meant to be called when a server is going down or has its
+ * weight disabled.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_remove_from_tree(struct server *s)
+{
+ s->lb_tree = NULL;
+}
+
+/* Queue a server in the weight tree <root>, assuming the weight is >0.
+ * We want to sort them by inverted weights, because we need to place
+ * heavy servers first in order to get a smooth distribution.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_queue_by_weight(struct eb_root *root, struct server *s)
+{
+ s->lb_node.key = SRV_EWGHT_MAX - s->next_eweight;
+ eb32_insert(root, &s->lb_node);
+ s->lb_tree = root;
+}
+
+/* This function is responsible for building the weight trees in case of fast
+ * weighted round-robin. It also sets p->lbprm.wdiv to the eweight to uweight
+ * ratio. Both active and backup groups are initialized.
+ */
+void fwrr_init_server_groups(struct proxy *p)
+{
+ struct server *srv;
+ struct eb_root init_head = EB_ROOT;
+
+ p->lbprm.set_server_status_up = fwrr_set_server_status_up;
+ p->lbprm.set_server_status_down = fwrr_set_server_status_down;
+ p->lbprm.update_server_eweight = fwrr_update_server_weight;
+
+ p->lbprm.wdiv = BE_WEIGHT_SCALE;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+ srv_lb_commit_status(srv);
+ }
+
+ recount_servers(p);
+ update_backend_weight(p);
+
+ /* prepare the active servers group */
+ p->lbprm.fwrr.act.curr_pos = p->lbprm.fwrr.act.curr_weight =
+ p->lbprm.fwrr.act.next_weight = p->lbprm.tot_wact;
+ p->lbprm.fwrr.act.curr = p->lbprm.fwrr.act.t0 =
+ p->lbprm.fwrr.act.t1 = init_head;
+ p->lbprm.fwrr.act.init = &p->lbprm.fwrr.act.t0;
+ p->lbprm.fwrr.act.next = &p->lbprm.fwrr.act.t1;
+
+ /* prepare the backup servers group */
+ p->lbprm.fwrr.bck.curr_pos = p->lbprm.fwrr.bck.curr_weight =
+ p->lbprm.fwrr.bck.next_weight = p->lbprm.tot_wbck;
+ p->lbprm.fwrr.bck.curr = p->lbprm.fwrr.bck.t0 =
+ p->lbprm.fwrr.bck.t1 = init_head;
+ p->lbprm.fwrr.bck.init = &p->lbprm.fwrr.bck.t0;
+ p->lbprm.fwrr.bck.next = &p->lbprm.fwrr.bck.t1;
+
+ /* queue active and backup servers in two distinct groups */
+ for (srv = p->srv; srv; srv = srv->next) {
+ if (!srv_currently_usable(srv))
+ continue;
+ fwrr_queue_by_weight((srv->flags & SRV_F_BACKUP) ?
+ p->lbprm.fwrr.bck.init :
+ p->lbprm.fwrr.act.init,
+ srv);
+ }
+}
+
+/* simply removes a server from a weight tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_dequeue_srv(struct server *s)
+{
+ eb32_delete(&s->lb_node);
+}
+
+/* queues a server into the appropriate group and tree depending on its
+ * backup status, and ->npos. If the server is disabled, simply assign
+ * it to the NULL tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static void fwrr_queue_srv(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct fwrr_group *grp;
+
+ grp = (s->flags & SRV_F_BACKUP) ? &p->lbprm.fwrr.bck : &p->lbprm.fwrr.act;
+
+ /* Delay everything which does not fit into the window and everything
+ * which does not fit into the theoretical new window.
+ */
+ if (!srv_willbe_usable(s)) {
+ fwrr_remove_from_tree(s);
+ }
+ else if (s->next_eweight <= 0 ||
+ s->npos >= 2 * grp->curr_weight ||
+ s->npos >= grp->curr_weight + grp->next_weight) {
+ /* put into next tree, and readjust npos in case we could
+ * finally take this back to current. */
+ s->npos -= grp->curr_weight;
+ fwrr_queue_by_weight(grp->next, s);
+ }
+ else {
+ /* The sorting key is stored in units of s->npos * user_weight
+ * in order to avoid overflows. As stated in backend.h, the
+ * lower the scale, the rougher the weights modulation, and the
+ * higher the scale, the lower the number of servers without
+ * overflow. With this formula, the result is always positive,
+ * so we can use eb32_insert().
+ */
+ s->lb_node.key = SRV_UWGHT_RANGE * s->npos +
+ (unsigned)(SRV_EWGHT_MAX + s->rweight - s->next_eweight) / BE_WEIGHT_SCALE;
+
+ eb32_insert(&grp->curr, &s->lb_node);
+ s->lb_tree = &grp->curr;
+ }
+}
+
+/* prepares a server when extracting it from the "init" tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_init(struct server *s)
+{
+ s->npos = s->rweight = 0;
+}
+
+/* prepares a server when extracting it from the "next" tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_next(struct server *s)
+{
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &s->proxy->lbprm.fwrr.bck :
+ &s->proxy->lbprm.fwrr.act;
+
+ s->npos += grp->curr_weight;
+}
+
+/* prepares a server when it was marked down.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_get_srv_down(struct server *s)
+{
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &s->proxy->lbprm.fwrr.bck :
+ &s->proxy->lbprm.fwrr.act;
+
+ s->npos = grp->curr_pos;
+}
+
+/* prepares a server when extracting it from its tree.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static void fwrr_get_srv(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct fwrr_group *grp = (s->flags & SRV_F_BACKUP) ?
+ &p->lbprm.fwrr.bck :
+ &p->lbprm.fwrr.act;
+
+ if (s->lb_tree == grp->init) {
+ fwrr_get_srv_init(s);
+ }
+ else if (s->lb_tree == grp->next) {
+ fwrr_get_srv_next(s);
+ }
+ else if (s->lb_tree == NULL) {
+ fwrr_get_srv_down(s);
+ }
+}
+
+/* switches trees "init" and "next" for FWRR group <grp>. "init" should be empty
+ * when this happens, and "next" filled with servers sorted by weights.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static inline void fwrr_switch_trees(struct fwrr_group *grp)
+{
+ struct eb_root *swap;
+ swap = grp->init;
+ grp->init = grp->next;
+ grp->next = swap;
+ grp->curr_weight = grp->next_weight;
+ grp->curr_pos = grp->curr_weight;
+}
+
+/* return next server from the current tree in FWRR group <grp>, or a server
+ * from the "init" tree if appropriate. If both trees are empty, return NULL.
+ *
+ * The lbprm's lock must be held. The server's lock is not used.
+ */
+static struct server *fwrr_get_server_from_group(struct fwrr_group *grp)
+{
+ struct eb32_node *node1;
+ struct eb32_node *node2;
+ struct server *s1 = NULL;
+ struct server *s2 = NULL;
+
+ node1 = eb32_first(&grp->curr);
+ if (node1) {
+ s1 = eb32_entry(node1, struct server, lb_node);
+ if (s1->cur_eweight && s1->npos <= grp->curr_pos)
+ return s1;
+ }
+
+ /* Either we have no server left, or we have a hole. We'll look in the
+ * init tree or a better proposal. At this point, if <s1> is non-null,
+ * it is guaranteed to remain available as the tree is locked.
+ */
+ node2 = eb32_first(grp->init);
+ if (node2) {
+ s2 = eb32_entry(node2, struct server, lb_node);
+ if (s2->cur_eweight) {
+ fwrr_get_srv_init(s2);
+ return s2;
+ }
+ }
+ return s1;
+}
+
+/* Computes next position of server <s> in the group. Nothing is done if <s>
+ * has a zero weight.
+ *
+ * The lbprm's lock must be held to protect lpos/npos/rweight.
+ */
+static inline void fwrr_update_position(struct fwrr_group *grp, struct server *s)
+{
+ unsigned int eweight = *(volatile unsigned int *)&s->cur_eweight;
+
+ if (!eweight)
+ return;
+
+ if (!s->npos) {
+ /* first time ever for this server */
+ s->npos = grp->curr_pos;
+ }
+
+ s->lpos = s->npos;
+ s->npos += grp->next_weight / eweight;
+ s->rweight += grp->next_weight % eweight;
+
+ if (s->rweight >= eweight) {
+ s->rweight -= eweight;
+ s->npos++;
+ }
+}
+
+/* Return next server from the current tree in backend <p>, or a server from
+ * the init tree if appropriate. If both trees are empty, return NULL.
+ * Saturated servers are skipped and requeued.
+ *
+ * The lbprm's lock will be used in R/W mode. The server's lock is not used.
+ */
+struct server *fwrr_get_next_server(struct proxy *p, struct server *srvtoavoid)
+{
+ struct server *srv, *full, *avoided;
+ struct fwrr_group *grp;
+ int switched;
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ if (p->srv_act)
+ grp = &p->lbprm.fwrr.act;
+ else if (p->lbprm.fbck) {
+ srv = p->lbprm.fbck;
+ goto out;
+ }
+ else if (p->srv_bck)
+ grp = &p->lbprm.fwrr.bck;
+ else {
+ srv = NULL;
+ goto out;
+ }
+
+ switched = 0;
+ avoided = NULL;
+ full = NULL; /* NULL-terminated list of saturated servers */
+ while (1) {
+ /* if we see an empty group, let's first try to collect weights
+ * which might have recently changed.
+ */
+ if (!grp->curr_weight)
+ grp->curr_pos = grp->curr_weight = grp->next_weight;
+
+ /* get first server from the "current" tree. When the end of
+ * the tree is reached, we may have to switch, but only once.
+ */
+ while (1) {
+ srv = fwrr_get_server_from_group(grp);
+ if (srv)
+ break;
+ if (switched) {
+ if (avoided) {
+ srv = avoided;
+ goto take_this_one;
+ }
+ goto requeue_servers;
+ }
+ switched = 1;
+ fwrr_switch_trees(grp);
+ }
+
+ /* OK, we have a server. However, it may be saturated, in which
+ * case we don't want to reconsider it for now. We'll update
+ * its position and dequeue it anyway, so that we can move it
+ * to a better place afterwards.
+ */
+ fwrr_update_position(grp, srv);
+ fwrr_dequeue_srv(srv);
+ grp->curr_pos++;
+ if (!srv->maxconn || (!srv->queue.length && srv->served < srv_dynamic_maxconn(srv))) {
+ /* make sure it is not the server we are trying to exclude... */
+ if (srv != srvtoavoid || avoided)
+ break;
+
+ avoided = srv; /* ...but remember that is was selected yet avoided */
+ }
+
+ /* the server is saturated or avoided, let's chain it for later reinsertion.
+ */
+ srv->next_full = full;
+ full = srv;
+ }
+
+ take_this_one:
+ /* OK, we got the best server, let's update it */
+ fwrr_queue_srv(srv);
+
+ requeue_servers:
+ /* Requeue all extracted servers. If full==srv then it was
+ * avoided (unsuccessfully) and chained, omit it now. The
+ * only way to get there is by having <avoided>==NULL or
+ * <avoided>==<srv>.
+ */
+ if (unlikely(full != NULL)) {
+ if (switched) {
+ /* the tree has switched, requeue all extracted servers
+ * into "init", because their place was lost, and only
+ * their weight matters.
+ */
+ do {
+ if (likely(full != srv))
+ fwrr_queue_by_weight(grp->init, full);
+ full = full->next_full;
+ } while (full);
+ } else {
+ /* requeue all extracted servers just as if they were consumed
+ * so that they regain their expected place.
+ */
+ do {
+ if (likely(full != srv))
+ fwrr_queue_srv(full);
+ full = full->next_full;
+ } while (full);
+ }
+ }
+ out:
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ return srv;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lb_map.c b/src/lb_map.c
new file mode 100644
index 0000000..592df91
--- /dev/null
+++ b/src/lb_map.c
@@ -0,0 +1,281 @@
+/*
+ * Map-based load-balancing (RR and HASH)
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/lb_map.h>
+#include <haproxy/queue.h>
+#include <haproxy/server-t.h>
+
+/* this function updates the map according to server <srv>'s new state.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void map_set_server_status_down(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (srv_willbe_usable(srv))
+ goto out_update_state;
+
+ /* FIXME: could be optimized since we know what changed */
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function updates the map according to server <srv>'s new state.
+ *
+ * The server's lock must be held. The lbprm's lock will be used.
+ */
+static void map_set_server_status_up(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return;
+
+ if (!srv_willbe_usable(srv))
+ goto out_update_state;
+
+ /* FIXME: could be optimized since we know what changed */
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ out_update_state:
+ srv_lb_commit_status(srv);
+}
+
+/* This function recomputes the server map for proxy px. It relies on
+ * px->lbprm.tot_wact, tot_wbck, tot_used, tot_weight, so it must be
+ * called after recount_servers(). It also expects px->lbprm.map.srv
+ * to be allocated with the largest size needed. It updates tot_weight.
+ *
+ * The lbprm's lock must be held.
+ */
+void recalc_server_map(struct proxy *px)
+{
+ int o, tot, flag;
+ struct server *cur, *best;
+
+ switch (px->lbprm.tot_used) {
+ case 0: /* no server */
+ return;
+ default:
+ tot = px->lbprm.tot_weight;
+ break;
+ }
+
+ /* here we *know* that we have some servers */
+ if (px->srv_act)
+ flag = 0;
+ else
+ flag = SRV_F_BACKUP;
+
+ /* this algorithm gives priority to the first server, which means that
+ * it will respect the declaration order for equivalent weights, and
+ * that whatever the weights, the first server called will always be
+ * the first declared. This is an important assumption for the backup
+ * case, where we want the first server only.
+ */
+ for (cur = px->srv; cur; cur = cur->next)
+ cur->wscore = 0;
+
+ for (o = 0; o < tot; o++) {
+ int max = 0;
+ best = NULL;
+ for (cur = px->srv; cur; cur = cur->next) {
+ if ((cur->flags & SRV_F_BACKUP) == flag &&
+ srv_willbe_usable(cur)) {
+ int v;
+
+ /* If we are forced to return only one server, we don't want to
+ * go further, because we would return the wrong one due to
+ * divide overflow.
+ */
+ if (tot == 1) {
+ best = cur;
+ /* note that best->wscore will be wrong but we don't care */
+ break;
+ }
+
+ _HA_ATOMIC_ADD(&cur->wscore, cur->next_eweight);
+ v = (cur->wscore + tot) / tot; /* result between 0 and 3 */
+ if (best == NULL || v > max) {
+ max = v;
+ best = cur;
+ }
+ }
+ }
+ px->lbprm.map.srv[o] = best;
+ if (best)
+ _HA_ATOMIC_SUB(&best->wscore, tot);
+ }
+}
+
+/* This function is responsible of building the server MAP for map-based LB
+ * algorithms, allocating the map, and setting p->lbprm.wmult to the GCD of the
+ * weights if applicable. It should be called only once per proxy, at config
+ * time.
+ */
+void init_server_map(struct proxy *p)
+{
+ struct server *srv;
+ int pgcd;
+ int act, bck;
+
+ p->lbprm.set_server_status_up = map_set_server_status_up;
+ p->lbprm.set_server_status_down = map_set_server_status_down;
+ p->lbprm.update_server_eweight = NULL;
+
+ if (!p->srv)
+ return;
+
+ /* We will factor the weights to reduce the table,
+ * using Euclide's largest common divisor algorithm.
+ * Since we may have zero weights, we have to first
+ * find a non-zero weight server.
+ */
+ pgcd = 1;
+ srv = p->srv;
+ while (srv && !srv->uweight)
+ srv = srv->next;
+
+ if (srv) {
+ pgcd = srv->uweight; /* note: cannot be zero */
+ while (pgcd > 1 && (srv = srv->next)) {
+ int w = srv->uweight;
+ while (w) {
+ int t = pgcd % w;
+ pgcd = w;
+ w = t;
+ }
+ }
+ }
+
+ /* It is sometimes useful to know what factor to apply
+ * to the backend's effective weight to know its real
+ * weight.
+ */
+ p->lbprm.wmult = pgcd;
+
+ act = bck = 0;
+ for (srv = p->srv; srv; srv = srv->next) {
+ srv->next_eweight = (srv->uweight * p->lbprm.wdiv + p->lbprm.wmult - 1) / p->lbprm.wmult;
+
+ if (srv->flags & SRV_F_BACKUP)
+ bck += srv->next_eweight;
+ else
+ act += srv->next_eweight;
+ srv_lb_commit_status(srv);
+ }
+
+ /* this is the largest map we will ever need for this servers list */
+ if (act < bck)
+ act = bck;
+
+ if (!act)
+ act = 1;
+
+ p->lbprm.map.srv = calloc(act, sizeof(*p->lbprm.map.srv));
+ /* recounts servers and their weights */
+ recount_servers(p);
+ update_backend_weight(p);
+ recalc_server_map(p);
+}
+
+/*
+ * This function tries to find a running server with free connection slots for
+ * the proxy <px> following the round-robin method.
+ * If any server is found, it will be returned and px->lbprm.map.rr_idx will be updated
+ * to point to the next server. If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used.
+ */
+struct server *map_get_server_rr(struct proxy *px, struct server *srvtoavoid)
+{
+ int newidx, avoididx;
+ struct server *srv, *avoided;
+
+ HA_RWLOCK_SKLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (px->lbprm.tot_weight == 0) {
+ avoided = NULL;
+ goto out;
+ }
+
+ if (px->lbprm.map.rr_idx < 0 || px->lbprm.map.rr_idx >= px->lbprm.tot_weight)
+ px->lbprm.map.rr_idx = 0;
+ newidx = px->lbprm.map.rr_idx;
+
+ avoided = NULL;
+ avoididx = 0; /* shut a gcc warning */
+ do {
+ srv = px->lbprm.map.srv[newidx++];
+ if (!srv->maxconn || (!srv->queue.length && srv->served < srv_dynamic_maxconn(srv))) {
+ /* make sure it is not the server we are try to exclude... */
+ /* ...but remember that is was selected yet avoided */
+ avoided = srv;
+ avoididx = newidx;
+ if (srv != srvtoavoid) {
+ px->lbprm.map.rr_idx = newidx;
+ goto out;
+ }
+ }
+ if (newidx == px->lbprm.tot_weight)
+ newidx = 0;
+ } while (newidx != px->lbprm.map.rr_idx);
+
+ if (avoided)
+ px->lbprm.map.rr_idx = avoididx;
+
+ out:
+ HA_RWLOCK_SKUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ /* return NULL or srvtoavoid if found */
+ return avoided;
+}
+
+/*
+ * This function returns the running server from the map at the location
+ * pointed to by the result of a modulo operation on <hash>. The server map may
+ * be recomputed if required before being looked up. If any server is found, it
+ * will be returned. If no valid server is found, NULL is returned.
+ *
+ * The lbprm's lock will be used.
+ */
+struct server *map_get_server_hash(struct proxy *px, unsigned int hash)
+{
+ struct server *srv = NULL;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (px->lbprm.tot_weight)
+ srv = px->lbprm.map.srv[hash % px->lbprm.tot_weight];
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ return srv;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/linuxcap.c b/src/linuxcap.c
new file mode 100644
index 0000000..919086c
--- /dev/null
+++ b/src/linuxcap.c
@@ -0,0 +1,191 @@
+/*
+ * Minimal handling of Linux kernel capabilities
+ *
+ * Copyright 2000-2023 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* Depending on distros, some have capset(), others use the more complicated
+ * libcap. Let's stick to what we need and the kernel documents (capset).
+ * Note that prctl is needed here.
+ */
+#include <linux/capability.h>
+#include <sys/prctl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <syscall.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/tools.h>
+
+/* supported names, zero-terminated */
+static const struct {
+ int cap;
+ const char *name;
+} known_caps[] = {
+#ifdef CAP_NET_RAW
+ { CAP_NET_RAW, "cap_net_raw" },
+#endif
+#ifdef CAP_NET_ADMIN
+ { CAP_NET_ADMIN, "cap_net_admin" },
+#endif
+#ifdef CAP_NET_BIND_SERVICE
+ { CAP_NET_BIND_SERVICE, "cap_net_bind_service" },
+#endif
+ /* must be last */
+ { 0, 0 }
+};
+
+/* provided by sys/capability.h on some distros */
+static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap)
+{
+ return syscall(SYS_capset, hdrp, datap);
+}
+
+/* defaults to zero, i.e. we don't keep any cap after setuid() */
+static uint32_t caplist;
+
+/* try to apply capabilities before switching UID from <from_uid> to <to_uid>.
+ * In practice we need to do this in 4 steps:
+ * - set PR_SET_KEEPCAPS to preserve caps across the final setuid()
+ * - set the effective and permitted caps ;
+ * - switch euid to non-zero
+ * - set the effective and permitted caps again
+ * - then the caller can safely call setuid()
+ * We don't do this if the current euid is not zero or if the target uid
+ * is zero. Returns >=0 on success, negative on failure. Alerts or warnings
+ * may be emitted.
+ */
+int prepare_caps_for_setuid(int from_uid, int to_uid)
+{
+ struct __user_cap_data_struct cap_data = { };
+ struct __user_cap_header_struct cap_hdr = {
+ .pid = 0, /* current process */
+ .version = _LINUX_CAPABILITY_VERSION_1,
+ };
+
+ if (from_uid != 0)
+ return 0;
+
+ if (!to_uid)
+ return 0;
+
+ if (!caplist)
+ return 0;
+
+ if (prctl(PR_SET_KEEPCAPS, 1) == -1) {
+ ha_alert("Failed to preserve capabilities using prctl(): %s\n", strerror(errno));
+ return -1;
+ }
+
+ cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
+ if (capset(&cap_hdr, &cap_data) == -1) {
+ ha_alert("Failed to preset the capabilities to preserve using capset(): %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (seteuid(to_uid) == -1) {
+ ha_alert("Failed to set effective uid to %d: %s\n", to_uid, strerror(errno));
+ return -1;
+ }
+
+ cap_data.effective = cap_data.permitted = caplist | (1 << CAP_SETUID);
+ if (capset(&cap_hdr, &cap_data) == -1) {
+ ha_alert("Failed to set the final capabilities using capset(): %s\n", strerror(errno));
+ return -1;
+ }
+ /* all's good */
+ return 0;
+}
+
+/* finalize the capabilities after setuid(). The most important is to drop the
+ * CAP_SET_SETUID capability, which would otherwise allow to switch back to any
+ * UID and recover everything.
+ */
+int finalize_caps_after_setuid(int from_uid, int to_uid)
+{
+ struct __user_cap_data_struct cap_data = { };
+ struct __user_cap_header_struct cap_hdr = {
+ .pid = 0, /* current process */
+ .version = _LINUX_CAPABILITY_VERSION_1,
+ };
+
+ if (from_uid != 0)
+ return 0;
+
+ if (!to_uid)
+ return 0;
+
+ if (!caplist)
+ return 0;
+
+ cap_data.effective = cap_data.permitted = caplist;
+ if (capset(&cap_hdr, &cap_data) == -1) {
+ ha_alert("Failed to drop the setuid capability using capset(): %s\n", strerror(errno));
+ return -1;
+ }
+ /* all's good */
+ return 0;
+}
+
+/* parse the "setcap" global keyword. Returns -1 on failure, 0 on success. */
+static int cfg_parse_global_setcap(char **args, int section_type,
+ struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line, char **err)
+{
+ char *name = args[1];
+ char *next;
+ uint32_t caps = 0;
+ int id;
+
+ if (!*name) {
+ memprintf(err, "'%s' : missing capability name(s). ", args[0]);
+ goto dump_caps;
+ }
+
+ while (name && *name) {
+ next = strchr(name, ',');
+ if (next)
+ *(next++) = '\0';
+
+ for (id = 0; known_caps[id].cap; id++) {
+ if (strcmp(name, known_caps[id].name) == 0) {
+ caps |= 1U << known_caps[id].cap;
+ break;
+ }
+ }
+
+ if (!known_caps[id].cap) {
+ memprintf(err, "'%s' : unsupported capability '%s'. ", args[0], args[1]);
+ goto dump_caps;
+ }
+ name = next;
+ }
+
+ caplist |= caps;
+ return 0;
+
+
+ dump_caps:
+ memprintf(err, "%s Supported ones are: ", *err);
+
+ for (id = 0; known_caps[id].cap; id++)
+ memprintf(err, "%s%s%s%s", *err,
+ id ? known_caps[id+1].cap ? ", " : " and " : "",
+ known_caps[id].name, known_caps[id+1].cap ? "" : ".");
+ return -1;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "setcap", cfg_parse_global_setcap },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/listener.c b/src/listener.c
new file mode 100644
index 0000000..86d0945
--- /dev/null
+++ b/src/listener.c
@@ -0,0 +1,2487 @@
+/*
+ * Listener management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/sample.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+
+
+/* List head of all known bind keywords */
+struct bind_kw_list bind_keywords = {
+ .list = LIST_HEAD_INIT(bind_keywords.list)
+};
+
+/* list of the temporarily limited listeners because of lack of resource */
+static struct mt_list global_listener_queue = MT_LIST_HEAD_INIT(global_listener_queue);
+static struct task *global_listener_queue_task;
+/* number of times an accepted connection resulted in maxconn being reached */
+ullong maxconn_reached = 0;
+__decl_thread(static HA_RWLOCK_T global_listener_rwlock);
+
+/* listener status for stats */
+const char* li_status_st[LI_STATE_COUNT] = {
+ [LI_STATUS_WAITING] = "WAITING",
+ [LI_STATUS_OPEN] = "OPEN",
+ [LI_STATUS_FULL] = "FULL",
+};
+
+#if defined(USE_THREAD)
+
+struct accept_queue_ring accept_queue_rings[MAX_THREADS] __attribute__((aligned(64))) = { };
+
+/* dequeue and process a pending connection from the local accept queue (single
+ * consumer). Returns the accepted connection or NULL if none was found.
+ */
+struct connection *accept_queue_pop_sc(struct accept_queue_ring *ring)
+{
+ unsigned int pos, next;
+ struct connection *ptr;
+ struct connection **e;
+ uint32_t idx = _HA_ATOMIC_LOAD(&ring->idx); /* (head << 16) + tail */
+
+ pos = idx >> 16;
+ if (pos == (uint16_t)idx)
+ return NULL;
+
+ next = pos + 1;
+ if (next >= ACCEPT_QUEUE_SIZE)
+ next = 0;
+
+ e = &ring->entry[pos];
+
+ /* wait for the producer to update the listener's pointer */
+ while (1) {
+ ptr = *e;
+ __ha_barrier_load();
+ if (ptr)
+ break;
+ pl_cpu_relax();
+ }
+
+ /* release the entry */
+ *e = NULL;
+
+ __ha_barrier_store();
+ do {
+ pos = (next << 16) | (idx & 0xffff);
+ } while (unlikely(!HA_ATOMIC_CAS(&ring->idx, &idx, pos) && __ha_cpu_relax()));
+
+ return ptr;
+}
+
+
+/* tries to push a new accepted connection <conn> into ring <ring>. Returns
+ * non-zero if it succeeds, or zero if the ring is full. Supports multiple
+ * producers.
+ */
+int accept_queue_push_mp(struct accept_queue_ring *ring, struct connection *conn)
+{
+ unsigned int pos, next;
+ uint32_t idx = _HA_ATOMIC_LOAD(&ring->idx); /* (head << 16) + tail */
+
+ do {
+ pos = (uint16_t)idx;
+ next = pos + 1;
+ if (next >= ACCEPT_QUEUE_SIZE)
+ next = 0;
+ if (next == (idx >> 16))
+ return 0; // ring full
+ next |= (idx & 0xffff0000U);
+ } while (unlikely(!_HA_ATOMIC_CAS(&ring->idx, &idx, next) && __ha_cpu_relax()));
+
+ ring->entry[pos] = conn;
+ __ha_barrier_store();
+ return 1;
+}
+
+/* proceed with accepting new connections. Don't mark it static so that it appears
+ * in task dumps.
+ */
+struct task *accept_queue_process(struct task *t, void *context, unsigned int state)
+{
+ struct accept_queue_ring *ring = context;
+ struct connection *conn;
+ struct listener *li;
+ unsigned int max_accept;
+ int ret;
+
+ /* if global.tune.maxaccept is -1, then max_accept is UINT_MAX. It
+ * is not really illimited, but it is probably enough.
+ */
+ max_accept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ for (; max_accept; max_accept--) {
+ conn = accept_queue_pop_sc(ring);
+ if (!conn)
+ break;
+
+ li = __objt_listener(conn->target);
+ _HA_ATOMIC_INC(&li->thr_conn[ti->ltid]);
+ ret = li->bind_conf->accept(conn);
+ if (ret <= 0) {
+ /* connection was terminated by the application */
+ continue;
+ }
+
+ /* increase the per-process number of cumulated sessions, this
+ * may only be done once l->bind_conf->accept() has accepted the
+ * connection.
+ */
+ if (!(li->bind_conf->options & BC_O_UNLIMITED)) {
+ HA_ATOMIC_UPDATE_MAX(&global.sps_max,
+ update_freq_ctr(&global.sess_per_sec, 1));
+ if (li->bind_conf->options & BC_O_USE_SSL) {
+ HA_ATOMIC_UPDATE_MAX(&global.ssl_max,
+ update_freq_ctr(&global.ssl_per_sec, 1));
+ }
+ }
+ }
+
+ /* ran out of budget ? Let's come here ASAP */
+ if (!max_accept)
+ tasklet_wakeup(ring->tasklet);
+
+ return NULL;
+}
+
+/* Initializes the accept-queues. Returns 0 on success, otherwise ERR_* flags */
+static int accept_queue_init()
+{
+ struct tasklet *t;
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ t = tasklet_new();
+ if (!t) {
+ ha_alert("Out of memory while initializing accept queue for thread %d\n", i);
+ return ERR_FATAL|ERR_ABORT;
+ }
+ t->tid = i;
+ t->process = accept_queue_process;
+ t->context = &accept_queue_rings[i];
+ accept_queue_rings[i].tasklet = t;
+ }
+ return 0;
+}
+
+REGISTER_CONFIG_POSTPARSER("multi-threaded accept queue", accept_queue_init);
+
+static void accept_queue_deinit()
+{
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ tasklet_free(accept_queue_rings[i].tasklet);
+ }
+}
+
+REGISTER_POST_DEINIT(accept_queue_deinit);
+
+#endif // USE_THREAD
+
+/* Memory allocation and initialization of the per_thr field (one entry per
+ * bound thread).
+ * Returns 0 if the field has been successfully initialized, -1 on failure.
+ */
+int li_init_per_thr(struct listener *li)
+{
+ int nbthr = MIN(global.nbthread, MAX_THREADS_PER_GROUP);
+ int i;
+
+ /* allocate per-thread elements for listener */
+ li->per_thr = calloc(nbthr, sizeof(*li->per_thr));
+ if (!li->per_thr)
+ return -1;
+
+ for (i = 0; i < nbthr; ++i) {
+ MT_LIST_INIT(&li->per_thr[i].quic_accept.list);
+ MT_LIST_INIT(&li->per_thr[i].quic_accept.conns);
+
+ li->per_thr[i].li = li;
+ }
+
+ return 0;
+}
+
+/* helper to get listener status for stats */
+enum li_status get_li_status(struct listener *l)
+{
+ if (!l->bind_conf->maxconn || l->nbconn < l->bind_conf->maxconn) {
+ if (l->state == LI_LIMITED)
+ return LI_STATUS_WAITING;
+ else
+ return LI_STATUS_OPEN;
+ }
+ return LI_STATUS_FULL;
+}
+
+/* adjust the listener's state and its proxy's listener counters if needed.
+ * It must be called under the listener's lock, but uses atomic ops to change
+ * the proxy's counters so that the proxy lock is not needed.
+ */
+void listener_set_state(struct listener *l, enum li_state st)
+{
+ struct proxy *px = l->bind_conf->frontend;
+
+ if (px) {
+ /* from state */
+ switch (l->state) {
+ case LI_NEW: /* first call */
+ _HA_ATOMIC_INC(&px->li_all);
+ break;
+ case LI_INIT:
+ case LI_ASSIGNED:
+ break;
+ case LI_PAUSED:
+ _HA_ATOMIC_DEC(&px->li_paused);
+ break;
+ case LI_LISTEN:
+ _HA_ATOMIC_DEC(&px->li_bound);
+ break;
+ case LI_READY:
+ case LI_FULL:
+ case LI_LIMITED:
+ _HA_ATOMIC_DEC(&px->li_ready);
+ break;
+ }
+
+ /* to state */
+ switch (st) {
+ case LI_NEW:
+ case LI_INIT:
+ case LI_ASSIGNED:
+ break;
+ case LI_PAUSED:
+ BUG_ON(l->rx.fd == -1);
+ _HA_ATOMIC_INC(&px->li_paused);
+ break;
+ case LI_LISTEN:
+ BUG_ON(l->rx.fd == -1 && !l->rx.rhttp.task);
+ _HA_ATOMIC_INC(&px->li_bound);
+ break;
+ case LI_READY:
+ case LI_FULL:
+ case LI_LIMITED:
+ BUG_ON(l->rx.fd == -1 && !l->rx.rhttp.task);
+ _HA_ATOMIC_INC(&px->li_ready);
+ l->flags |= LI_F_FINALIZED;
+ break;
+ }
+ }
+ l->state = st;
+}
+
+/* This function adds the specified listener's file descriptor to the polling
+ * lists if it is in the LI_LISTEN state. The listener enters LI_READY or
+ * LI_FULL state depending on its number of connections. In daemon mode, we
+ * also support binding only the relevant processes to their respective
+ * listeners. We don't do that in debug mode however.
+ */
+void enable_listener(struct listener *listener)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+
+ /* If this listener is supposed to be only in the master, close it in
+ * the workers. Conversely, if it's supposed to be only in the workers
+ * close it in the master.
+ */
+ if (!!master != !!(listener->rx.flags & RX_F_MWORKER))
+ do_unbind_listener(listener);
+
+ if (listener->state == LI_LISTEN) {
+ BUG_ON(listener->rx.fd == -1 && !listener->rx.rhttp.task);
+ if ((global.mode & (MODE_DAEMON | MODE_MWORKER)) &&
+ (!!master != !!(listener->rx.flags & RX_F_MWORKER))) {
+ /* we don't want to enable this listener and don't
+ * want any fd event to reach it.
+ */
+ do_unbind_listener(listener);
+ }
+ else if (!listener->bind_conf->maxconn || listener->nbconn < listener->bind_conf->maxconn) {
+ listener->rx.proto->enable(listener);
+ listener_set_state(listener, LI_READY);
+ }
+ else {
+ listener_set_state(listener, LI_FULL);
+ }
+ }
+
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+}
+
+/*
+ * This function completely stops a listener.
+ * The proxy's listeners count is updated and the proxy is
+ * disabled and woken up after the last one is gone.
+ * It will need to operate under the proxy's lock, the protocol's lock and
+ * the listener's lock. The caller is responsible for indicating in lpx,
+ * lpr, lli whether the respective locks are already held (non-zero) or
+ * not (zero) so that the function picks the missing ones, in this order.
+ */
+void stop_listener(struct listener *l, int lpx, int lpr, int lli)
+{
+ struct proxy *px = l->bind_conf->frontend;
+
+ if (l->bind_conf->options & BC_O_NOSTOP) {
+ /* master-worker sockpairs are never closed but don't count as a
+ * job.
+ */
+ return;
+ }
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ if (!lpr)
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+
+ if (!lli)
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ if (l->state > LI_INIT) {
+ do_unbind_listener(l);
+
+ if (l->state >= LI_ASSIGNED)
+ __delete_listener(l);
+
+ if (px)
+ proxy_cond_disable(px);
+ }
+
+ if (!lli)
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpr)
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+}
+
+/* This function adds the specified <listener> to the protocol <proto>. It
+ * does nothing if the protocol was already added. The listener's state is
+ * automatically updated from LI_INIT to LI_ASSIGNED. The number of listeners
+ * for the protocol is updated. This must be called with the proto lock held.
+ */
+void default_add_listener(struct protocol *proto, struct listener *listener)
+{
+ if (listener->state != LI_INIT)
+ return;
+ listener_set_state(listener, LI_ASSIGNED);
+ listener->rx.proto = proto;
+ LIST_APPEND(&proto->receivers, &listener->rx.proto_list);
+ proto->nb_receivers++;
+}
+
+/* default function called to suspend a listener: it simply passes the call to
+ * the underlying receiver. This is find for most socket-based protocols. This
+ * must be called under the listener's lock. It will return < 0 in case of
+ * failure, 0 if the listener was totally stopped, or > 0 if correctly paused..
+ * If no receiver-level suspend is provided, the operation is assumed
+ * to succeed.
+ */
+int default_suspend_listener(struct listener *l)
+{
+ if (!l->rx.proto->rx_suspend)
+ return 1;
+
+ return l->rx.proto->rx_suspend(&l->rx);
+}
+
+
+/* Tries to resume a suspended listener, and returns non-zero on success or
+ * zero on failure. On certain errors, an alert or a warning might be displayed.
+ * It must be called with the listener's lock held. Depending on the listener's
+ * state and protocol, a listen() call might be used to resume operations, or a
+ * call to the receiver's resume() function might be used as well. This is
+ * suitable as a default function for TCP and UDP. This must be called with the
+ * listener's lock held.
+ */
+int default_resume_listener(struct listener *l)
+{
+ int ret = 1;
+
+ if (l->state == LI_ASSIGNED) {
+ char msg[100];
+ char *errmsg;
+ int err;
+
+ /* first, try to bind the receiver */
+ err = l->rx.proto->fam->bind(&l->rx, &errmsg);
+ if (err != ERR_NONE) {
+ if (err & ERR_WARN)
+ ha_warning("Resuming listener: %s\n", errmsg);
+ else if (err & ERR_ALERT)
+ ha_alert("Resuming listener: %s\n", errmsg);
+ ha_free(&errmsg);
+ if (err & (ERR_FATAL | ERR_ABORT)) {
+ ret = 0;
+ goto end;
+ }
+ }
+
+ /* then, try to listen:
+ * for now there's still always a listening function
+ * (same check performed in protocol_bind_all()
+ */
+ BUG_ON(!l->rx.proto->listen);
+ err = l->rx.proto->listen(l, msg, sizeof(msg));
+ if (err & ERR_ALERT)
+ ha_alert("Resuming listener: %s\n", msg);
+ else if (err & ERR_WARN)
+ ha_warning("Resuming listener: %s\n", msg);
+
+ if (err & (ERR_FATAL | ERR_ABORT)) {
+ ret = 0;
+ goto end;
+ }
+ }
+
+ if (l->state < LI_PAUSED) {
+ ret = 0;
+ goto end;
+ }
+
+ if (l->state == LI_PAUSED && l->rx.proto->rx_resume &&
+ l->rx.proto->rx_resume(&l->rx) <= 0)
+ ret = 0;
+ end:
+ return ret;
+}
+
+
+/* This function tries to temporarily disable a listener, depending on the OS
+ * capabilities. Linux unbinds the listen socket after a SHUT_RD, and ignores
+ * SHUT_WR. Solaris refuses either shutdown(). OpenBSD ignores SHUT_RD but
+ * closes upon SHUT_WR and refuses to rebind. So a common validation path
+ * involves SHUT_WR && listen && SHUT_RD. In case of success, the FD's polling
+ * is disabled. It normally returns non-zero, unless an error is reported.
+ * suspend() may totally stop a listener if it doesn't support the PAUSED
+ * state, in which case state will be set to ASSIGNED.
+ * It will need to operate under the proxy's lock and the listener's lock.
+ * The caller is responsible for indicating in lpx, lli whether the respective
+ * locks are already held (non-zero) or not (zero) so that the function pick
+ * the missing ones, in this order.
+ */
+int suspend_listener(struct listener *l, int lpx, int lli)
+{
+ struct proxy *px = l->bind_conf->frontend;
+ int ret = 1;
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ if (!lli)
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!(l->flags & LI_F_FINALIZED) || l->state <= LI_PAUSED)
+ goto end;
+
+ if (l->rx.proto->suspend) {
+ ret = l->rx.proto->suspend(l);
+ /* if the suspend() fails, we don't want to change the
+ * current listener state
+ */
+ if (ret < 0)
+ goto end;
+ }
+
+ MT_LIST_DELETE(&l->wait_queue);
+
+ /* ret == 0 means that the suspend() has been turned into
+ * an unbind(), meaning the listener is now stopped (ie: ABNS), we need
+ * to report this state change properly
+ */
+ listener_set_state(l, ((ret) ? LI_PAUSED : LI_ASSIGNED));
+
+ if (px && !(l->flags & LI_F_SUSPENDED))
+ px->li_suspended++;
+ l->flags |= LI_F_SUSPENDED;
+
+ /* at this point, everything is under control, no error should be
+ * returned to calling function
+ */
+ ret = 1;
+
+ if (px && !(px->flags & PR_FL_PAUSED) && !px->li_ready) {
+ /* PROXY_LOCK is required */
+ proxy_cond_pause(px);
+ ha_warning("Paused %s %s.\n", proxy_cap_str(px->cap), px->id);
+ send_log(px, LOG_WARNING, "Paused %s %s.\n", proxy_cap_str(px->cap), px->id);
+ }
+ end:
+ if (!lli)
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return ret;
+}
+
+/* This function tries to resume a temporarily disabled listener. Paused, full,
+ * limited and disabled listeners are handled, which means that this function
+ * may replace enable_listener(). The resulting state will either be LI_READY
+ * or LI_FULL. 0 is returned in case of failure to resume (eg: dead socket).
+ * Listeners bound to a different process are not woken up unless we're in
+ * foreground mode, and are ignored. If the listener was only in the assigned
+ * state, it's totally rebound. This can happen if a suspend() has completely
+ * stopped it. If the resume fails, 0 is returned and an error might be
+ * displayed.
+ * It will need to operate under the proxy's lock and the listener's lock.
+ * The caller is responsible for indicating in lpx, lli whether the respective
+ * locks are already held (non-zero) or not (zero) so that the function pick
+ * the missing ones, in this order.
+ */
+int resume_listener(struct listener *l, int lpx, int lli)
+{
+ struct proxy *px = l->bind_conf->frontend;
+ int ret = 1;
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ if (!lli)
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ /* check that another thread didn't to the job in parallel (e.g. at the
+ * end of listen_accept() while we'd come from dequeue_all_listeners().
+ */
+ if (MT_LIST_INLIST(&l->wait_queue))
+ goto end;
+
+ if (!(l->flags & LI_F_FINALIZED) || l->state == LI_READY)
+ goto end;
+
+ if (l->rx.proto->resume) {
+ ret = l->rx.proto->resume(l);
+ if (!ret)
+ goto end; /* failure to resume */
+ }
+
+ if (l->bind_conf->maxconn && l->nbconn >= l->bind_conf->maxconn) {
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_FULL);
+ goto done;
+ }
+
+ l->rx.proto->enable(l);
+ listener_set_state(l, LI_READY);
+
+ done:
+ if (px && (l->flags & LI_F_SUSPENDED))
+ px->li_suspended--;
+ l->flags &= ~LI_F_SUSPENDED;
+
+ if (px && (px->flags & PR_FL_PAUSED) && !px->li_suspended) {
+ /* PROXY_LOCK is required */
+ proxy_cond_resume(px);
+ ha_warning("Resumed %s %s.\n", proxy_cap_str(px->cap), px->id);
+ send_log(px, LOG_WARNING, "Resumed %s %s.\n", proxy_cap_str(px->cap), px->id);
+ }
+ end:
+ if (!lli)
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return ret;
+}
+
+/* Same as resume_listener(), but will only work to resume from
+ * LI_FULL or LI_LIMITED states because we try to relax listeners that
+ * were temporarily restricted and not to resume inactive listeners that
+ * may have been paused or completely stopped in the meantime.
+ * Returns positive value for success and 0 for failure.
+ * It will need to operate under the proxy's lock and the listener's lock.
+ * The caller is responsible for indicating in lpx, lli whether the respective
+ * locks are already held (non-zero) or not (zero) so that the function pick
+ * the missing ones, in this order.
+ */
+int relax_listener(struct listener *l, int lpx, int lli)
+{
+ struct proxy *px = l->bind_conf->frontend;
+ int ret = 1;
+
+ if (!lpx && px)
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ if (!lli)
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+
+ if (l->state != LI_FULL && l->state != LI_LIMITED)
+ goto end; /* listener may be suspended or even stopped */
+ ret = resume_listener(l, 1, 1);
+
+ end:
+ if (!lli)
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!lpx && px)
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return ret;
+}
+
+/* Marks a ready listener as full so that the stream code tries to re-enable
+ * it upon next close() using relax_listener().
+ */
+static void listener_full(struct listener *l)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+ if (l->state >= LI_READY) {
+ MT_LIST_DELETE(&l->wait_queue);
+ if (l->state != LI_FULL) {
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_FULL);
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+}
+
+/* Marks a ready listener as limited so that we only try to re-enable it when
+ * resources are free again. It will be queued into the specified queue.
+ */
+static void limit_listener(struct listener *l, struct mt_list *list)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &l->lock);
+ if (l->state == LI_READY) {
+ MT_LIST_TRY_APPEND(list, &l->wait_queue);
+ l->rx.proto->disable(l);
+ listener_set_state(l, LI_LIMITED);
+ }
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &l->lock);
+}
+
+/* Dequeues all listeners waiting for a resource the global wait queue */
+void dequeue_all_listeners()
+{
+ struct listener *listener;
+
+ while ((listener = MT_LIST_POP(&global_listener_queue, struct listener *, wait_queue))) {
+ /* This cannot fail because the listeners are by definition in
+ * the LI_LIMITED state.
+ */
+ relax_listener(listener, 0, 0);
+ }
+}
+
+/* Dequeues all listeners waiting for a resource in proxy <px>'s queue */
+void dequeue_proxy_listeners(struct proxy *px)
+{
+ struct listener *listener;
+
+ while ((listener = MT_LIST_POP(&px->listener_queue, struct listener *, wait_queue))) {
+ /* This cannot fail because the listeners are by definition in
+ * the LI_LIMITED state.
+ */
+ relax_listener(listener, 0, 0);
+ }
+}
+
+
+/* default function used to unbind a listener. This is for use by standard
+ * protocols working on top of accepted sockets. The receiver's rx_unbind()
+ * will automatically be used after the listener is disabled if the socket is
+ * still bound. This must be used under the listener's lock.
+ */
+void default_unbind_listener(struct listener *listener)
+{
+ if (listener->state <= LI_ASSIGNED)
+ goto out_close;
+
+ if (listener->rx.fd == -1) {
+ listener_set_state(listener, LI_ASSIGNED);
+ goto out_close;
+ }
+
+ if (listener->state >= LI_READY) {
+ listener->rx.proto->disable(listener);
+ if (listener->rx.flags & RX_F_BOUND)
+ listener_set_state(listener, LI_LISTEN);
+ }
+
+ out_close:
+ if (listener->rx.flags & RX_F_BOUND)
+ listener->rx.proto->rx_unbind(&listener->rx);
+}
+
+/* This function closes the listening socket for the specified listener,
+ * provided that it's already in a listening state. The protocol's unbind()
+ * is called to put the listener into LI_ASSIGNED or LI_LISTEN and handle
+ * the unbinding tasks. The listener enters then the LI_ASSIGNED state if
+ * the receiver is unbound. Must be called with the lock held.
+ */
+void do_unbind_listener(struct listener *listener)
+{
+ MT_LIST_DELETE(&listener->wait_queue);
+
+ if (listener->rx.proto->unbind)
+ listener->rx.proto->unbind(listener);
+
+ /* we may have to downgrade the listener if the rx was closed */
+ if (!(listener->rx.flags & RX_F_BOUND) && listener->state > LI_ASSIGNED)
+ listener_set_state(listener, LI_ASSIGNED);
+}
+
+/* This function closes the listening socket for the specified listener,
+ * provided that it's already in a listening state. The listener enters the
+ * LI_ASSIGNED state, except if the FD is not closed, in which case it may
+ * remain in LI_LISTEN. This function is intended to be used as a generic
+ * function for standard protocols.
+ */
+void unbind_listener(struct listener *listener)
+{
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+ do_unbind_listener(listener);
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+}
+
+/* creates one or multiple listeners for bind_conf <bc> on sockaddr <ss> on port
+ * range <portl> to <porth>, and possibly attached to fd <fd> (or -1 for auto
+ * allocation). The address family is taken from ss->ss_family, and the protocol
+ * passed in <proto> must be usable on this family. The protocol's default iocb
+ * is automatically preset as the receivers' iocb. The number of jobs and
+ * listeners is automatically increased by the number of listeners created. It
+ * returns non-zero on success, zero on error with the error message set in <err>.
+ */
+int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss,
+ int portl, int porth, int fd, struct protocol *proto, char **err)
+{
+ struct listener *l;
+ int port;
+
+ for (port = portl; port <= porth; port++) {
+ l = calloc(1, sizeof(*l));
+ if (!l) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+ l->obj_type = OBJ_TYPE_LISTENER;
+ LIST_APPEND(&bc->frontend->conf.listeners, &l->by_fe);
+ LIST_APPEND(&bc->listeners, &l->by_bind);
+ l->bind_conf = bc;
+ l->rx.settings = &bc->settings;
+ l->rx.owner = l;
+ l->rx.iocb = proto->default_iocb;
+ l->rx.fd = fd;
+
+ l->rx.rhttp.task = NULL;
+ l->rx.rhttp.srv = NULL;
+ l->rx.rhttp.pend_conn = NULL;
+
+ memcpy(&l->rx.addr, ss, sizeof(*ss));
+ if (proto->fam->set_port)
+ proto->fam->set_port(&l->rx.addr, port);
+
+ MT_LIST_INIT(&l->wait_queue);
+ listener_set_state(l, LI_INIT);
+
+ proto->add(proto, l);
+
+ if (fd != -1)
+ l->rx.flags |= RX_F_INHERITED;
+
+ l->extra_counters = NULL;
+
+ HA_RWLOCK_INIT(&l->lock);
+ _HA_ATOMIC_INC(&jobs);
+ _HA_ATOMIC_INC(&listeners);
+ }
+ return 1;
+}
+
+/* Optionally allocates a new shard info (if si == NULL) for receiver rx and
+ * assigns it to it, or attaches to an existing one. If the rx already had a
+ * shard_info, it is simply returned. It is illegal to call this function with
+ * an rx that's part of a group that is already attached. Attaching means the
+ * shard_info's thread count and group count are updated so the rx's group is
+ * added to the shard_info's group mask. The rx are added to the members in the
+ * attachment order, though it must not matter. It is meant for boot time setup
+ * and is not thread safe. NULL is returned on allocation failure.
+ */
+struct shard_info *shard_info_attach(struct receiver *rx, struct shard_info *si)
+{
+ if (rx->shard_info)
+ return rx->shard_info;
+
+ if (!si) {
+ si = calloc(1, sizeof(*si));
+ if (!si)
+ return NULL;
+
+ si->ref = rx;
+ }
+
+ rx->shard_info = si;
+ BUG_ON (si->tgroup_mask & 1UL << (rx->bind_tgroup - 1));
+ si->tgroup_mask |= 1UL << (rx->bind_tgroup - 1);
+ si->nbgroups = my_popcountl(si->tgroup_mask);
+ si->nbthreads += my_popcountl(rx->bind_thread);
+ si->members[si->nbgroups - 1] = rx;
+ return si;
+}
+
+/* Detaches the rx from an optional shard_info it may be attached to. If so,
+ * the thread counts, group masks and refcounts are updated. The members list
+ * remains contiguous by replacing the current entry with the last one. The
+ * reference continues to point to the first receiver. If the group count
+ * reaches zero, the shard_info is automatically released.
+ */
+void shard_info_detach(struct receiver *rx)
+{
+ struct shard_info *si = rx->shard_info;
+ uint gr;
+
+ if (!si)
+ return;
+
+ rx->shard_info = NULL;
+
+ /* find the member slot this rx was attached to */
+ for (gr = 0; gr < MAX_TGROUPS && si->members[gr] != rx; gr++)
+ ;
+
+ BUG_ON(gr == MAX_TGROUPS);
+
+ si->nbthreads -= my_popcountl(rx->bind_thread);
+ si->tgroup_mask &= ~(1UL << (rx->bind_tgroup - 1));
+ si->nbgroups = my_popcountl(si->tgroup_mask);
+
+ /* replace the member by the last one. If we removed the reference, we
+ * have to switch to another one. It's always the first entry so we can
+ * simply enforce it upon every removal.
+ */
+ si->members[gr] = si->members[si->nbgroups];
+ si->members[si->nbgroups] = NULL;
+ si->ref = si->members[0];
+
+ if (!si->nbgroups)
+ free(si);
+}
+
+/* clones listener <src> and returns the new one. All dynamically allocated
+ * fields are reallocated (name for now). The new listener is inserted before
+ * the original one in the bind_conf and frontend lists. This allows it to be
+ * duplicated while iterating over the current list. The original listener must
+ * only be in the INIT or ASSIGNED states, and the new listener will only be
+ * placed into the INIT state. The counters are always set to NULL. Maxsock is
+ * updated. Returns NULL on allocation error. The shard_info is never taken so
+ * that the caller can decide what to do with it depending on how it intends to
+ * clone the listener.
+ */
+struct listener *clone_listener(struct listener *src)
+{
+ struct listener *l;
+
+ l = calloc(1, sizeof(*l));
+ if (!l)
+ goto oom1;
+ memcpy(l, src, sizeof(*l));
+
+ if (l->name) {
+ l->name = strdup(l->name);
+ if (!l->name)
+ goto oom2;
+ }
+
+ l->rx.owner = l;
+ l->rx.shard_info = NULL;
+ l->state = LI_INIT;
+ l->counters = NULL;
+ l->extra_counters = NULL;
+
+ LIST_APPEND(&src->by_fe, &l->by_fe);
+ LIST_APPEND(&src->by_bind, &l->by_bind);
+
+ MT_LIST_INIT(&l->wait_queue);
+
+ l->rx.proto->add(l->rx.proto, l);
+
+ HA_RWLOCK_INIT(&l->lock);
+ _HA_ATOMIC_INC(&jobs);
+ _HA_ATOMIC_INC(&listeners);
+ global.maxsock++;
+ return l;
+
+ oom2:
+ free(l);
+ oom1:
+ return NULL;
+}
+
+/* Delete a listener from its protocol's list of listeners. The listener's
+ * state is automatically updated from LI_ASSIGNED to LI_INIT. The protocol's
+ * number of listeners is updated, as well as the global number of listeners
+ * and jobs. Note that the listener must have previously been unbound. This
+ * is a low-level function expected to be called with the proto_lock and the
+ * listener's lock held.
+ */
+void __delete_listener(struct listener *listener)
+{
+ if (listener->state == LI_ASSIGNED) {
+ listener_set_state(listener, LI_INIT);
+ LIST_DELETE(&listener->rx.proto_list);
+ shard_info_detach(&listener->rx);
+ listener->rx.proto->nb_receivers--;
+ _HA_ATOMIC_DEC(&jobs);
+ _HA_ATOMIC_DEC(&listeners);
+ }
+}
+
+/* Delete a listener from its protocol's list of listeners (please check
+ * __delete_listener() above). The proto_lock and the listener's lock will
+ * be grabbed in this order.
+ */
+void delete_listener(struct listener *listener)
+{
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &listener->lock);
+ __delete_listener(listener);
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &listener->lock);
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* Returns a suitable value for a listener's backlog. It uses the listener's,
+ * otherwise the frontend's backlog, otherwise the listener's maxconn,
+ * otherwise the frontend's maxconn, otherwise 1024.
+ */
+int listener_backlog(const struct listener *l)
+{
+ if (l->bind_conf->backlog)
+ return l->bind_conf->backlog;
+
+ if (l->bind_conf->frontend->backlog)
+ return l->bind_conf->frontend->backlog;
+
+ if (l->bind_conf->maxconn)
+ return l->bind_conf->maxconn;
+
+ if (l->bind_conf->frontend->maxconn)
+ return l->bind_conf->frontend->maxconn;
+
+ return 1024;
+}
+
+/* Returns true if listener <l> must check maxconn limit prior to accept. */
+static inline int listener_uses_maxconn(const struct listener *l)
+{
+ return !(l->bind_conf->options & (BC_O_UNLIMITED|BC_O_XPRT_MAXCONN));
+}
+
+/* This function is called on a read event from a listening socket, corresponding
+ * to an accept. It tries to accept as many connections as possible, and for each
+ * calls the listener's accept handler (generally the frontend's accept handler).
+ */
+void listener_accept(struct listener *l)
+{
+ struct connection *cli_conn;
+ struct proxy *p;
+ unsigned int max_accept;
+ int next_conn = 0;
+ int next_feconn = 0;
+ int next_actconn = 0;
+ int expire;
+ int ret;
+
+ p = l->bind_conf->frontend;
+
+ /* if l->bind_conf->maxaccept is -1, then max_accept is UINT_MAX. It is
+ * not really illimited, but it is probably enough.
+ */
+ max_accept = l->bind_conf->maxaccept ? l->bind_conf->maxaccept : 1;
+
+ if (!(l->bind_conf->options & BC_O_UNLIMITED) && global.sps_lim) {
+ int max = freq_ctr_remain(&global.sess_per_sec, global.sps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.sess_per_sec, global.sps_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+
+ if (!(l->bind_conf->options & BC_O_UNLIMITED) && global.cps_lim) {
+ int max = freq_ctr_remain(&global.conn_per_sec, global.cps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.conn_per_sec, global.cps_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+#ifdef USE_OPENSSL
+ if (!(l->bind_conf->options & BC_O_UNLIMITED) && global.ssl_lim &&
+ l->bind_conf && l->bind_conf->options & BC_O_USE_SSL) {
+ int max = freq_ctr_remain(&global.ssl_per_sec, global.ssl_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&global.ssl_per_sec, global.ssl_lim, 0));
+ goto limit_global;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+#endif
+ if (p && p->fe_sps_lim) {
+ int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0);
+
+ if (unlikely(!max)) {
+ /* frontend accept rate limit was reached */
+ expire = tick_add(now_ms, next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0));
+ goto limit_proxy;
+ }
+
+ if (max_accept > max)
+ max_accept = max;
+ }
+
+ /* Note: if we fail to allocate a connection because of configured
+ * limits, we'll schedule a new attempt worst 1 second later in the
+ * worst case. If we fail due to system limits or temporary resource
+ * shortage, we try again 100ms later in the worst case.
+ */
+ for (; max_accept; next_conn = next_feconn = next_actconn = 0, max_accept--) {
+ unsigned int count;
+ int status;
+ __decl_thread(unsigned long mask);
+
+ /* pre-increase the number of connections without going too far.
+ * We process the listener, then the proxy, then the process.
+ * We know which ones to unroll based on the next_xxx value.
+ */
+ do {
+ count = l->nbconn;
+ if (unlikely(l->bind_conf->maxconn && count >= l->bind_conf->maxconn)) {
+ /* the listener was marked full or another
+ * thread is going to do it.
+ */
+ next_conn = 0;
+ listener_full(l);
+ goto end;
+ }
+ next_conn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&l->nbconn, (int *)(&count), next_conn));
+
+ if (p) {
+ do {
+ count = p->feconn;
+ if (unlikely(count >= p->maxconn)) {
+ /* the frontend was marked full or another
+ * thread is going to do it.
+ */
+ next_feconn = 0;
+ expire = TICK_ETERNITY;
+ goto limit_proxy;
+ }
+ next_feconn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&p->feconn, &count, next_feconn));
+ }
+
+ if (listener_uses_maxconn(l)) {
+ next_actconn = increment_actconn();
+ if (!next_actconn) {
+ /* the process was marked full or another
+ * thread is going to do it.
+ */
+ expire = tick_add(now_ms, 1000); /* try again in 1 second */
+ goto limit_global;
+ }
+ }
+
+ /* be careful below, the listener might be shutting down in
+ * another thread on error and we must not dereference its
+ * FD without a bit of protection.
+ */
+ cli_conn = NULL;
+ status = CO_AC_PERMERR;
+
+ HA_RWLOCK_RDLOCK(LISTENER_LOCK, &l->lock);
+ if (l->rx.flags & RX_F_BOUND)
+ cli_conn = l->rx.proto->accept_conn(l, &status);
+ HA_RWLOCK_RDUNLOCK(LISTENER_LOCK, &l->lock);
+
+ if (!cli_conn) {
+ switch (status) {
+ case CO_AC_DONE:
+ goto end;
+
+ case CO_AC_RETRY: /* likely a signal */
+ _HA_ATOMIC_DEC(&l->nbconn);
+ if (p)
+ _HA_ATOMIC_DEC(&p->feconn);
+ if (listener_uses_maxconn(l))
+ _HA_ATOMIC_DEC(&actconn);
+ continue;
+
+ case CO_AC_YIELD:
+ max_accept = 0;
+ goto end;
+
+ default:
+ goto transient_error;
+ }
+ }
+
+ /* The connection was accepted, it must be counted as such */
+ if (l->counters)
+ HA_ATOMIC_UPDATE_MAX(&l->counters->conn_max, next_conn);
+
+ if (p) {
+ HA_ATOMIC_UPDATE_MAX(&p->fe_counters.conn_max, next_feconn);
+ proxy_inc_fe_conn_ctr(l, p);
+ }
+
+ if (!(l->bind_conf->options & BC_O_UNLIMITED)) {
+ count = update_freq_ctr(&global.conn_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.cps_max, count);
+ }
+
+ _HA_ATOMIC_INC(&activity[tid].accepted);
+
+ /* count the number of times an accepted connection resulted in
+ * maxconn being reached.
+ */
+ if (unlikely(_HA_ATOMIC_LOAD(&actconn) + 1 >= global.maxconn))
+ _HA_ATOMIC_INC(&maxconn_reached);
+
+ /* past this point, l->bind_conf->accept() will automatically decrement
+ * l->nbconn, feconn and actconn once done. Setting next_*conn=0
+ * allows the error path not to rollback on nbconn. It's more
+ * convenient than duplicating all exit labels.
+ */
+ next_conn = 0;
+ next_feconn = 0;
+ next_actconn = 0;
+
+
+#if defined(USE_THREAD)
+ if (!(global.tune.options & GTUNE_LISTENER_MQ_ANY) || stopping)
+ goto local_accept;
+
+ /* we want to perform thread rebalancing if the listener is
+ * bound to more than one thread or if it's part of a shard
+ * with more than one listener.
+ */
+ mask = l->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled);
+ if (l->rx.shard_info || atleast2(mask)) {
+ struct accept_queue_ring *ring;
+ struct listener *new_li;
+ uint r1, r2, t, t1, t2;
+ ulong n0, n1;
+ const struct tgroup_info *g1, *g2;
+ ulong m1, m2;
+ ulong *thr_idx_ptr;
+
+ /* The principle is that we have two running indexes,
+ * each visiting in turn all threads bound to this
+ * listener's shard. The connection will be assigned to
+ * the one with the least connections, and the other
+ * one will be updated. This provides a good fairness
+ * on short connections (round robin) and on long ones
+ * (conn count), without ever missing any idle thread.
+ * Each thread number is encoded as a combination of
+ * times the receiver number and its local thread
+ * number from 0 to MAX_THREADS_PER_GROUP - 1. The two
+ * indexes are stored as 10/12 bit numbers in the thr_idx
+ * array, since there are up to LONGBITS threads and
+ * groups that can be represented. They are represented
+ * like this:
+ * 31:20 19:15 14:10 9:5 4:0
+ * 32b: [ counter | r2num | t2num | r1num | t1num ]
+ *
+ * 63:24 23:18 17:12 11:6 5:0
+ * 64b: [ counter | r2num | t2num | r1num | t1num ]
+ *
+ * The change counter is only used to avoid swapping too
+ * old a value when the value loops back.
+ *
+ * In the loop below we have this for each index:
+ * - n is the thread index
+ * - r is the receiver number
+ * - g is the receiver's thread group
+ * - t is the thread number in this receiver
+ * - m is the receiver's thread mask shifted by the thread number
+ */
+
+ /* keep a copy for the final update. thr_idx is composite
+ * and made of (n2<<16) + n1.
+ */
+ thr_idx_ptr = l->rx.shard_info ? &((struct listener *)(l->rx.shard_info->ref->owner))->thr_idx : &l->thr_idx;
+ while (1) {
+ int q0, q1, q2;
+
+ /* calculate r1/g1/t1 first (ascending idx) */
+ n0 = _HA_ATOMIC_LOAD(thr_idx_ptr);
+ new_li = NULL;
+
+ t1 = (uint)n0 & (LONGBITS - 1);
+ r1 = ((uint)n0 / LONGBITS) & (LONGBITS - 1);
+
+ while (1) {
+ if (l->rx.shard_info) {
+ /* multiple listeners, take the group into account */
+ if (r1 >= l->rx.shard_info->nbgroups)
+ r1 = 0;
+
+ g1 = &ha_tgroup_info[l->rx.shard_info->members[r1]->bind_tgroup - 1];
+ m1 = l->rx.shard_info->members[r1]->bind_thread;
+ } else {
+ /* single listener */
+ r1 = 0;
+ g1 = tg;
+ m1 = l->rx.bind_thread;
+ }
+ m1 &= _HA_ATOMIC_LOAD(&g1->threads_enabled);
+ m1 >>= t1;
+
+ /* find first existing thread */
+ if (unlikely(!(m1 & 1))) {
+ m1 &= ~1UL;
+ if (!m1) {
+ /* no more threads here, switch to
+ * first thread of next group.
+ */
+ t1 = 0;
+ if (l->rx.shard_info)
+ r1++;
+ /* loop again */
+ continue;
+ }
+ t1 += my_ffsl(m1) - 1;
+ }
+ /* done: r1 and t1 are OK */
+ break;
+ }
+
+ /* now r2/g2/t2 (descending idx) */
+ t2 = ((uint)n0 / LONGBITS / LONGBITS) & (LONGBITS - 1);
+ r2 = ((uint)n0 / LONGBITS / LONGBITS / LONGBITS) & (LONGBITS - 1);
+
+ /* if running in round-robin mode ("fair"), we don't need
+ * to go further.
+ */
+ if ((global.tune.options & GTUNE_LISTENER_MQ_ANY) == GTUNE_LISTENER_MQ_FAIR) {
+ t = g1->base + t1;
+ if (l->rx.shard_info && t != tid)
+ new_li = l->rx.shard_info->members[r1]->owner;
+ goto updt_t1;
+ }
+
+ while (1) {
+ if (l->rx.shard_info) {
+ /* multiple listeners, take the group into account */
+ if (r2 >= l->rx.shard_info->nbgroups)
+ r2 = l->rx.shard_info->nbgroups - 1;
+
+ g2 = &ha_tgroup_info[l->rx.shard_info->members[r2]->bind_tgroup - 1];
+ m2 = l->rx.shard_info->members[r2]->bind_thread;
+ } else {
+ /* single listener */
+ r2 = 0;
+ g2 = tg;
+ m2 = l->rx.bind_thread;
+ }
+ m2 &= _HA_ATOMIC_LOAD(&g2->threads_enabled);
+ m2 &= nbits(t2 + 1);
+
+ /* find previous existing thread */
+ if (unlikely(!(m2 & (1UL << t2)) || (g1 == g2 && t1 == t2))) {
+ /* highest bit not set or colliding threads, let's check
+ * if we still have other threads available after this
+ * one.
+ */
+ m2 &= ~(1UL << t2);
+ if (!m2) {
+ /* no more threads here, switch to
+ * last thread of previous group.
+ */
+ t2 = MAX_THREADS_PER_GROUP - 1;
+ if (l->rx.shard_info)
+ r2--;
+ /* loop again */
+ continue;
+ }
+ t2 = my_flsl(m2) - 1;
+ }
+ /* done: r2 and t2 are OK */
+ break;
+ }
+
+ /* tests show that it's worth checking that other threads have not
+ * already changed the index to save the rest of the calculation,
+ * or we'd have to redo it anyway.
+ */
+ if (n0 != _HA_ATOMIC_LOAD(thr_idx_ptr))
+ continue;
+
+ /* here we have (r1,g1,t1) that designate the first receiver, its
+ * thread group and local thread, and (r2,g2,t2) that designate
+ * the second receiver, its thread group and local thread. We'll
+ * also consider the local thread with q0.
+ */
+ q0 = accept_queue_ring_len(&accept_queue_rings[tid]);
+ q1 = accept_queue_ring_len(&accept_queue_rings[g1->base + t1]);
+ q2 = accept_queue_ring_len(&accept_queue_rings[g2->base + t2]);
+
+ /* add to this the currently active connections */
+ q0 += _HA_ATOMIC_LOAD(&l->thr_conn[ti->ltid]);
+ if (l->rx.shard_info) {
+ q1 += _HA_ATOMIC_LOAD(&((struct listener *)l->rx.shard_info->members[r1]->owner)->thr_conn[t1]);
+ q2 += _HA_ATOMIC_LOAD(&((struct listener *)l->rx.shard_info->members[r2]->owner)->thr_conn[t2]);
+ } else {
+ q1 += _HA_ATOMIC_LOAD(&l->thr_conn[t1]);
+ q2 += _HA_ATOMIC_LOAD(&l->thr_conn[t2]);
+ }
+
+ /* we have 3 possibilities now :
+ * q1 < q2 : t1 is less loaded than t2, so we pick it
+ * and update t2 (since t1 might still be
+ * lower than another thread)
+ * q1 > q2 : t2 is less loaded than t1, so we pick it
+ * and update t1 (since t2 might still be
+ * lower than another thread)
+ * q1 = q2 : both are equally loaded, thus we pick t1
+ * and update t1 as it will become more loaded
+ * than t2.
+ * On top of that, if in the end the current thread appears
+ * to be as good of a deal, we'll prefer it over a foreign
+ * one as it will improve locality and avoid a migration.
+ */
+
+ if (q1 - q2 < 0) {
+ t = g1->base + t1;
+ if (q0 <= q1)
+ t = tid;
+
+ if (l->rx.shard_info && t != tid)
+ new_li = l->rx.shard_info->members[r1]->owner;
+
+ t2--;
+ if (t2 >= MAX_THREADS_PER_GROUP) {
+ if (l->rx.shard_info)
+ r2--;
+ t2 = MAX_THREADS_PER_GROUP - 1;
+ }
+ }
+ else if (q1 - q2 > 0) {
+ t = g2->base + t2;
+ if (q0 <= q2)
+ t = tid;
+
+ if (l->rx.shard_info && t != tid)
+ new_li = l->rx.shard_info->members[r2]->owner;
+ goto updt_t1;
+ }
+ else { // q1 == q2
+ t = g1->base + t1;
+ if (q0 < q1) // local must be strictly better than both
+ t = tid;
+
+ if (l->rx.shard_info && t != tid)
+ new_li = l->rx.shard_info->members[r1]->owner;
+ updt_t1:
+ t1++;
+ if (t1 >= MAX_THREADS_PER_GROUP) {
+ if (l->rx.shard_info)
+ r1++;
+ t1 = 0;
+ }
+ }
+
+ /* The target thread number is in <t> now. Let's
+ * compute the new index and try to update it.
+ */
+
+ /* take previous counter and increment it */
+ n1 = n0 & -(ulong)(LONGBITS * LONGBITS * LONGBITS * LONGBITS);
+ n1 += LONGBITS * LONGBITS * LONGBITS * LONGBITS;
+ n1 += (((r2 * LONGBITS) + t2) * LONGBITS * LONGBITS);
+ n1 += (r1 * LONGBITS) + t1;
+ if (likely(_HA_ATOMIC_CAS(thr_idx_ptr, &n0, n1)))
+ break;
+
+ /* bah we lost the race, try again */
+ __ha_cpu_relax();
+ } /* end of main while() loop */
+
+ /* we may need to update the listener in the connection
+ * if we switched to another group.
+ */
+ if (new_li)
+ cli_conn->target = &new_li->obj_type;
+
+ /* here we have the target thread number in <t> and we hold a
+ * reservation in the target ring.
+ */
+
+ if (l->rx.proto && l->rx.proto->set_affinity) {
+ if (l->rx.proto->set_affinity(cli_conn, t)) {
+ /* Failed migration, stay on the same thread. */
+ goto local_accept;
+ }
+ }
+
+ /* We successfully selected the best thread "t" for this
+ * connection. We use deferred accepts even if it's the
+ * local thread because tests show that it's the best
+ * performing model, likely due to better cache locality
+ * when processing this loop.
+ */
+ ring = &accept_queue_rings[t];
+ if (accept_queue_push_mp(ring, cli_conn)) {
+ _HA_ATOMIC_INC(&activity[t].accq_pushed);
+ tasklet_wakeup(ring->tasklet);
+ continue;
+ }
+ /* If the ring is full we do a synchronous accept on
+ * the local thread here.
+ */
+ _HA_ATOMIC_INC(&activity[t].accq_full);
+ }
+#endif // USE_THREAD
+
+ local_accept:
+ /* restore the connection's listener in case we failed to migrate above */
+ cli_conn->target = &l->obj_type;
+ _HA_ATOMIC_INC(&l->thr_conn[ti->ltid]);
+ ret = l->bind_conf->accept(cli_conn);
+ if (unlikely(ret <= 0)) {
+ /* The connection was closed by stream_accept(). Either
+ * we just have to ignore it (ret == 0) or it's a critical
+ * error due to a resource shortage, and we must stop the
+ * listener (ret < 0).
+ */
+ if (ret == 0) /* successful termination */
+ continue;
+
+ goto transient_error;
+ }
+
+ /* increase the per-process number of cumulated sessions, this
+ * may only be done once l->bind_conf->accept() has accepted the
+ * connection.
+ */
+ if (!(l->bind_conf->options & BC_O_UNLIMITED)) {
+ count = update_freq_ctr(&global.sess_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.sps_max, count);
+ }
+#ifdef USE_OPENSSL
+ if (!(l->bind_conf->options & BC_O_UNLIMITED) &&
+ l->bind_conf && l->bind_conf->options & BC_O_USE_SSL) {
+ count = update_freq_ctr(&global.ssl_per_sec, 1);
+ HA_ATOMIC_UPDATE_MAX(&global.ssl_max, count);
+ }
+#endif
+
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_STUCK); // this thread is still running
+ } /* end of for (max_accept--) */
+
+ end:
+ if (next_conn)
+ _HA_ATOMIC_DEC(&l->nbconn);
+
+ if (p && next_feconn)
+ _HA_ATOMIC_DEC(&p->feconn);
+
+ if (next_actconn)
+ _HA_ATOMIC_DEC(&actconn);
+
+ if ((l->state == LI_FULL && (!l->bind_conf->maxconn || l->nbconn < l->bind_conf->maxconn)) ||
+ (l->state == LI_LIMITED &&
+ ((!p || p->feconn < p->maxconn) && (actconn < global.maxconn) &&
+ (!tick_isset(global_listener_queue_task->expire) ||
+ tick_is_expired(global_listener_queue_task->expire, now_ms))))) {
+ /* at least one thread has to this when quitting */
+ relax_listener(l, 0, 0);
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ if (p && !MT_LIST_ISEMPTY(&p->listener_queue) &&
+ (!p->fe_sps_lim || freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0) > 0))
+ dequeue_proxy_listeners(p);
+ }
+ return;
+
+ transient_error:
+ /* pause the listener for up to 100 ms */
+ expire = tick_add(now_ms, 100);
+
+ /* This may be a shared socket that was paused by another process.
+ * Let's put it to pause in this case.
+ */
+ if (l->rx.proto && l->rx.proto->rx_listening(&l->rx) == 0) {
+ suspend_listener(l, 0, 0);
+ goto end;
+ }
+
+ limit_global:
+ /* (re-)queue the listener to the global queue and set it to expire no
+ * later than <expire> ahead. The listener turns to LI_LIMITED.
+ */
+ limit_listener(l, &global_listener_queue);
+ HA_RWLOCK_RDLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ task_schedule(global_listener_queue_task, expire);
+ HA_RWLOCK_RDUNLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ goto end;
+
+ limit_proxy:
+ /* (re-)queue the listener to the proxy's queue and set it to expire no
+ * later than <expire> ahead. The listener turns to LI_LIMITED.
+ */
+ limit_listener(l, &p->listener_queue);
+ if (p->task && tick_isset(expire))
+ task_schedule(p->task, expire);
+ goto end;
+}
+
+/* Notify the listener that a connection initiated from it was released. This
+ * is used to keep the connection count consistent and to possibly re-open
+ * listening when it was limited.
+ */
+void listener_release(struct listener *l)
+{
+ struct proxy *fe = l->bind_conf->frontend;
+
+ if (listener_uses_maxconn(l))
+ _HA_ATOMIC_DEC(&actconn);
+ if (fe)
+ _HA_ATOMIC_DEC(&fe->feconn);
+ _HA_ATOMIC_DEC(&l->nbconn);
+ _HA_ATOMIC_DEC(&l->thr_conn[ti->ltid]);
+
+ if (l->state == LI_FULL || l->state == LI_LIMITED)
+ relax_listener(l, 0, 0);
+
+ /* Dequeues all of the listeners waiting for a resource */
+ dequeue_all_listeners();
+
+ if (fe && !MT_LIST_ISEMPTY(&fe->listener_queue) &&
+ (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_sess_per_sec, fe->fe_sps_lim, 0) > 0))
+ dequeue_proxy_listeners(fe);
+}
+
+/* Initializes the listener queues. Returns 0 on success, otherwise ERR_* flags */
+static int listener_queue_init()
+{
+ global_listener_queue_task = task_new_anywhere();
+ if (!global_listener_queue_task) {
+ ha_alert("Out of memory when initializing global listener queue\n");
+ return ERR_FATAL|ERR_ABORT;
+ }
+ /* very simple initialization, users will queue the task if needed */
+ global_listener_queue_task->context = NULL; /* not even a context! */
+ global_listener_queue_task->process = manage_global_listener_queue;
+ HA_RWLOCK_INIT(&global_listener_rwlock);
+
+ return 0;
+}
+
+static void listener_queue_deinit()
+{
+ task_destroy(global_listener_queue_task);
+ global_listener_queue_task = NULL;
+}
+
+REGISTER_CONFIG_POSTPARSER("multi-threaded listener queue", listener_queue_init);
+REGISTER_POST_DEINIT(listener_queue_deinit);
+
+
+/* This is the global management task for listeners. It enables listeners waiting
+ * for global resources when there are enough free resource, or at least once in
+ * a while. It is designed to be called as a task. It's exported so that it's easy
+ * to spot in "show tasks" or "show profiling".
+ */
+struct task *manage_global_listener_queue(struct task *t, void *context, unsigned int state)
+{
+ /* If there are still too many concurrent connections, let's wait for
+ * some of them to go away. We don't need to re-arm the timer because
+ * each of them will scan the queue anyway.
+ */
+ if (unlikely(actconn >= global.maxconn))
+ goto out;
+
+ /* We should periodically try to enable listeners waiting for a global
+ * resource here, because it is possible, though very unlikely, that
+ * they have been blocked by a temporary lack of global resource such
+ * as a file descriptor or memory and that the temporary condition has
+ * disappeared.
+ */
+ dequeue_all_listeners();
+
+ out:
+ HA_RWLOCK_WRLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ t->expire = TICK_ETERNITY;
+ HA_RWLOCK_WRUNLOCK(LISTENER_LOCK, &global_listener_rwlock);
+ return t;
+}
+
+/* Applies the thread mask, shards etc to the bind_conf. It normally returns 0
+ * otherwie the number of errors. Upon error it may set error codes (ERR_*) in
+ * err_code. It is supposed to be called only once very late in the boot process
+ * after the bind_conf's thread_set is fixed. The function may emit warnings and
+ * alerts. Extra listeners may be created on the fly.
+ */
+int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code)
+{
+ struct proxy *fe = bind_conf->frontend;
+ struct listener *li, *new_li, *ref;
+ struct thread_set new_ts;
+ int shard, shards, todo, done, grp, dups;
+ ulong mask, gmask, bit;
+ int cfgerr = 0;
+ char *err;
+
+ err = NULL;
+ if (thread_resolve_group_mask(&bind_conf->thread_set, 0, &err) < 0) {
+ ha_alert("%s '%s': %s in 'bind %s' at [%s:%d].\n",
+ proxy_type_str(fe),
+ fe->id, err, bind_conf->arg, bind_conf->file, bind_conf->line);
+ free(err);
+ cfgerr++;
+ return cfgerr;
+ }
+
+ /* apply thread masks and groups to all receivers */
+ list_for_each_entry(li, &bind_conf->listeners, by_bind) {
+ shards = bind_conf->settings.shards;
+ todo = thread_set_count(&bind_conf->thread_set);
+
+ /* special values: -1 = "by-thread", -2 = "by-group" */
+ if (shards == -1) {
+ if (protocol_supports_flag(li->rx.proto, PROTO_F_REUSEPORT_SUPPORTED))
+ shards = todo;
+ else {
+ if (fe != global.cli_fe)
+ ha_diag_warning("[%s:%d]: Disabling per-thread sharding for listener in"
+ " %s '%s' because SO_REUSEPORT is disabled\n",
+ bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id);
+ shards = 1;
+ }
+ }
+ else if (shards == -2)
+ shards = protocol_supports_flag(li->rx.proto, PROTO_F_REUSEPORT_SUPPORTED) ? my_popcountl(bind_conf->thread_set.grps) : 1;
+
+ /* no more shards than total threads */
+ if (shards > todo)
+ shards = todo;
+
+ /* We also need to check if an explicit shards count was set and cannot be honored */
+ if (shards > 1 && !protocol_supports_flag(li->rx.proto, PROTO_F_REUSEPORT_SUPPORTED)) {
+ ha_warning("[%s:%d]: Disabling sharding for listener in %s '%s' because SO_REUSEPORT is disabled\n",
+ bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id);
+ shards = 1;
+ }
+
+ shard = done = grp = bit = mask = 0;
+ new_li = li;
+
+ while (shard < shards) {
+ memset(&new_ts, 0, sizeof(new_ts));
+ while (grp < global.nbtgroups && done < todo) {
+ /* enlarge mask to cover next bit of bind_thread till we
+ * have enough bits for one shard. We restart from the
+ * current grp+bit.
+ */
+
+ /* first let's find the first non-empty group starting at <mask> */
+ if (!(bind_conf->thread_set.rel[grp] & ha_tgroup_info[grp].threads_enabled & ~mask)) {
+ grp++;
+ mask = 0;
+ continue;
+ }
+
+ /* take next unassigned bit */
+ bit = (bind_conf->thread_set.rel[grp] & ~mask) & -(bind_conf->thread_set.rel[grp] & ~mask);
+ new_ts.rel[grp] |= bit;
+ mask |= bit;
+ new_ts.grps |= 1UL << grp;
+
+ done += shards;
+ };
+
+ BUG_ON(!new_ts.grps); // no more bits left unassigned
+
+ /* Create all required listeners for all bound groups. If more than one group is
+ * needed, the first receiver serves as a reference, and subsequent ones point to
+ * it. We already have a listener available in new_li() so we only allocate a new
+ * one if we're not on the last one. We count the remaining groups by copying their
+ * mask into <gmask> and dropping the lowest bit at the end of the loop until there
+ * is no more. Ah yes, it's not pretty :-/
+ */
+ ref = new_li;
+ gmask = new_ts.grps;
+ for (dups = 0; gmask; dups++) {
+ /* assign the first (and only) thread and group */
+ new_li->rx.bind_thread = thread_set_nth_tmask(&new_ts, dups);
+ new_li->rx.bind_tgroup = thread_set_nth_group(&new_ts, dups);
+
+ if (dups) {
+ /* it has been allocated already in the previous round */
+ shard_info_attach(&new_li->rx, ref->rx.shard_info);
+ new_li->rx.flags |= RX_F_MUST_DUP;
+ }
+
+ gmask &= gmask - 1; // drop lowest bit
+ if (gmask) {
+ /* yet another listener expected in this shard, let's
+ * chain it.
+ */
+ struct listener *tmp_li = clone_listener(new_li);
+
+ if (!tmp_li) {
+ ha_alert("Out of memory while trying to allocate extra listener for group %u of shard %d in %s %s\n",
+ new_li->rx.bind_tgroup, shard, proxy_type_str(fe), fe->id);
+ cfgerr++;
+ *err_code |= ERR_FATAL | ERR_ALERT;
+ return cfgerr;
+ }
+
+ /* if we're forced to create at least two listeners, we have to
+ * allocate a shared shard_info that's linked to from the reference
+ * and each other listener, so we'll create it here.
+ */
+ if (!shard_info_attach(&ref->rx, NULL)) {
+ ha_alert("Out of memory while trying to allocate shard_info for listener for group %u of shard %d in %s %s\n",
+ new_li->rx.bind_tgroup, shard, proxy_type_str(fe), fe->id);
+ cfgerr++;
+ *err_code |= ERR_FATAL | ERR_ALERT;
+ return cfgerr;
+ }
+ new_li = tmp_li;
+ }
+ }
+ done -= todo;
+
+ shard++;
+ if (shard >= shards)
+ break;
+
+ /* create another listener for new shards */
+ new_li = clone_listener(li);
+ if (!new_li) {
+ ha_alert("Out of memory while trying to allocate extra listener for shard %d in %s %s\n",
+ shard, proxy_type_str(fe), fe->id);
+ cfgerr++;
+ *err_code |= ERR_FATAL | ERR_ALERT;
+ return cfgerr;
+ }
+ }
+ }
+
+ /* success */
+ return cfgerr;
+}
+
+/*
+ * Registers the bind keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void bind_register_keywords(struct bind_kw_list *kwl)
+{
+ LIST_APPEND(&bind_keywords.list, &kwl->list);
+}
+
+/* Return a pointer to the bind keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct bind_kw *bind_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct bind_kw_list *kwl;
+ struct bind_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/* Dumps all registered "bind" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not
+ * found.
+ */
+void bind_dump_kws(char **out)
+{
+ struct bind_kw_list *kwl;
+ int index;
+
+ if (!out)
+ return;
+
+ *out = NULL;
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].parse ||
+ bind_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
+ memprintf(out, "%s[%4s] %s%s%s\n", *out ? *out : "",
+ kwl->scope,
+ kwl->kw[index].kw,
+ kwl->kw[index].skip ? " <arg>" : "",
+ kwl->kw[index].parse ? "" : " (not supported)");
+ }
+ }
+ }
+}
+
+/* Try to find in srv_keyword the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL.
+ */
+const char *bind_find_best_kw(const char *word)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct bind_kw_list *kwl;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, &bind_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* allocate an bind_conf struct for a bind line, and chain it to the frontend <fe>.
+ * If <arg> is not NULL, it is duplicated into ->arg to store useful config
+ * information for error reporting. NULL is returned on error.
+ */
+struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file,
+ int line, const char *arg, struct xprt_ops *xprt)
+{
+ struct bind_conf *bind_conf = calloc(1, sizeof(*bind_conf));
+
+ if (!bind_conf)
+ goto err;
+
+ bind_conf->file = strdup(file);
+ if (!bind_conf->file)
+ goto err;
+ bind_conf->line = line;
+ if (arg) {
+ bind_conf->arg = strdup(arg);
+ if (!bind_conf->arg)
+ goto err;
+ }
+
+ LIST_APPEND(&fe->conf.bind, &bind_conf->by_fe);
+ bind_conf->settings.ux.uid = -1;
+ bind_conf->settings.ux.gid = -1;
+ bind_conf->settings.ux.mode = 0;
+ bind_conf->settings.shards = global.tune.default_shards;
+ bind_conf->xprt = xprt;
+ bind_conf->frontend = fe;
+ bind_conf->analysers = fe->fe_req_ana;
+ bind_conf->severity_output = CLI_SEVERITY_NONE;
+#ifdef USE_OPENSSL
+ HA_RWLOCK_INIT(&bind_conf->sni_lock);
+ bind_conf->sni_ctx = EB_ROOT;
+ bind_conf->sni_w_ctx = EB_ROOT;
+#endif
+#ifdef USE_QUIC
+ /* Use connection socket for QUIC by default. */
+ bind_conf->quic_mode = QUIC_SOCK_MODE_CONN;
+ bind_conf->max_cwnd =
+ global.tune.bufsize * global.tune.quic_streams_buf;
+#endif
+ LIST_INIT(&bind_conf->listeners);
+
+ bind_conf->rhttp_srvname = NULL;
+
+ return bind_conf;
+
+ err:
+ if (bind_conf) {
+ ha_free(&bind_conf->file);
+ ha_free(&bind_conf->arg);
+ }
+ ha_free(&bind_conf);
+ return NULL;
+}
+
+const char *listener_state_str(const struct listener *l)
+{
+ static const char *states[8] = {
+ "NEW", "INI", "ASS", "PAU", "LIS", "RDY", "FUL", "LIM",
+ };
+ unsigned int st = l->state;
+
+ if (st >= sizeof(states) / sizeof(*states))
+ return "INVALID";
+ return states[st];
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* set temp integer to the number of connexions to the same listening socket */
+static int
+smp_fetch_dconn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->listener->nbconn;
+ return 1;
+}
+
+/* set temp integer to the id of the socket (listener) */
+static int
+smp_fetch_so_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->sess->listener->luid;
+ return 1;
+}
+static int
+smp_fetch_so_name(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.str.area = smp->sess->listener->name;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+
+/* parse the "accept-proxy" bind keyword */
+static int bind_parse_accept_proxy(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ conf->options |= BC_O_ACC_PROXY;
+ return 0;
+}
+
+/* parse the "accept-netscaler-cip" bind keyword */
+static int bind_parse_accept_netscaler_cip(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ uint32_t val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val <= 0) {
+ memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->options |= BC_O_ACC_CIP;
+ conf->ns_cip_magic = val;
+ return 0;
+}
+
+/* parse the "backlog" bind keyword */
+static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < 0) {
+ memprintf(err, "'%s' : invalid value %d, must be > 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->backlog = val;
+ return 0;
+}
+
+/* parse the "id" bind keyword */
+static int bind_parse_id(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct eb32_node *node;
+ struct listener *l, *new;
+ char *error;
+
+ if (conf->listeners.n != conf->listeners.p) {
+ memprintf(err, "'%s' can only be used with a single socket", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : expects an integer argument", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ new = LIST_NEXT(&conf->listeners, struct listener *, by_bind);
+ new->luid = strtol(args[cur_arg + 1], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "'%s' : expects an integer argument, found '%s'", args[cur_arg], args[cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ new->conf.id.key = new->luid;
+
+ if (new->luid <= 0) {
+ memprintf(err, "'%s' : custom id has to be > 0", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ node = eb32_lookup(&px->conf.used_listener_id, new->luid);
+ if (node) {
+ l = container_of(node, struct listener, conf.id);
+ memprintf(err, "'%s' : custom id %d already used at %s:%d ('bind %s')",
+ args[cur_arg], l->luid, l->bind_conf->file, l->bind_conf->line,
+ l->bind_conf->arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ eb32_insert(&px->conf.used_listener_id, &new->conf.id);
+ return 0;
+}
+
+/* Complete a bind_conf by parsing the args after the address. <args> is the
+ * arguments array, <cur_arg> is the first one to be considered. <section> is
+ * the section name to report in error messages, and <file> and <linenum> are
+ * the file name and line number respectively. Note that args[0..1] are used
+ * in error messages to provide some context. The return value is an error
+ * code, zero on success or an OR of ERR_{FATAL,ABORT,ALERT,WARN}.
+ */
+int bind_parse_args_list(struct bind_conf *bind_conf, char **args, int cur_arg, const char *section, const char *file, int linenum)
+{
+ int err_code = 0;
+
+ while (*(args[cur_arg])) {
+ struct bind_kw *kw;
+ const char *best;
+
+ kw = bind_find_kw(args[cur_arg]);
+ if (kw) {
+ char *err = NULL;
+ int code;
+
+ if (!kw->parse) {
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : '%s' option is not implemented in this version (check build options).\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+ cur_arg += 1 + kw->skip ;
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((bind_conf->options & BC_O_REVERSE_HTTP) && !kw->rhttp_ok) {
+ ha_alert("'%s' option is not accepted for reverse HTTP\n",
+ args[cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ code = kw->parse(args, cur_arg, bind_conf->frontend, bind_conf, &err);
+ err_code |= code;
+
+ if (code) {
+ if (err && *err) {
+ indent_msg(&err, 2);
+ if (((code & (ERR_WARN|ERR_ALERT)) == ERR_WARN))
+ ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : %s\n", file, linenum, args[0], args[1], section, err);
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : %s\n", file, linenum, args[0], args[1], section, err);
+ }
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : error encountered while processing '%s'.\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+ if (code & ERR_FATAL) {
+ free(err);
+ cur_arg += 1 + kw->skip;
+ goto out;
+ }
+ }
+ free(err);
+ cur_arg += 1 + kw->skip;
+ continue;
+ }
+
+ best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s': unknown keyword '%s'; did you mean '%s' maybe ?\n",
+ file, linenum, args[0], args[1], section, args[cur_arg], best);
+ else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s': unknown keyword '%s'.\n",
+ file, linenum, args[0], args[1], section, args[cur_arg]);
+
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((bind_conf->options & (BC_O_USE_SOCK_DGRAM|BC_O_USE_SOCK_STREAM)) == (BC_O_USE_SOCK_DGRAM|BC_O_USE_SOCK_STREAM) ||
+ (bind_conf->options & (BC_O_USE_XPRT_DGRAM|BC_O_USE_XPRT_STREAM)) == (BC_O_USE_XPRT_DGRAM|BC_O_USE_XPRT_STREAM)) {
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : cannot mix datagram and stream protocols.\n",
+ file, linenum, args[0], args[1], section);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* The transport layer automatically switches to QUIC when QUIC is
+ * selected, regardless of bind_conf settings. We then need to
+ * initialize QUIC params.
+ */
+ if ((bind_conf->options & (BC_O_USE_SOCK_DGRAM|BC_O_USE_XPRT_STREAM)) == (BC_O_USE_SOCK_DGRAM|BC_O_USE_XPRT_STREAM)) {
+#ifdef USE_QUIC
+ bind_conf->xprt = xprt_get(XPRT_QUIC);
+ if (!(bind_conf->options & BC_O_USE_SSL)) {
+ bind_conf->options |= BC_O_USE_SSL;
+ ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : QUIC protocol detected, enabling ssl. Use 'ssl' to shut this warning.\n",
+ file, linenum, args[0], args[1], section);
+ }
+ quic_transport_params_init(&bind_conf->quic_params, 1);
+#else
+ ha_alert("parsing [%s:%d] : '%s %s' in section '%s' : QUIC protocol selected but support not compiled in (check build options).\n",
+ file, linenum, args[0], args[1], section);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+#endif
+ }
+ else if (bind_conf->options & BC_O_USE_SSL) {
+ bind_conf->xprt = xprt_get(XPRT_SSL);
+ }
+
+ out:
+ return err_code;
+}
+
+/* parse the "maxconn" bind keyword */
+static int bind_parse_maxconn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < 0) {
+ memprintf(err, "'%s' : invalid value %d, must be >= 0", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->maxconn = val;
+ return 0;
+}
+
+/* parse the "name" bind keyword */
+static int bind_parse_name(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct listener *l;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing name", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ list_for_each_entry(l, &conf->listeners, by_bind)
+ l->name = strdup(args[cur_arg + 1]);
+
+ return 0;
+}
+
+/* parse the "nbconn" bind keyword */
+static int bind_parse_nbconn(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+ const struct listener *l;
+
+ /* TODO duplicated code from check_kw_experimental() */
+ if (!experimental_directives_allowed) {
+ memprintf(err, "'%s' is experimental, must be allowed via a global 'expose-experimental-directives'",
+ args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+
+ l = LIST_NEXT(&conf->listeners, struct listener *, by_bind);
+ if (l->rx.addr.ss_family != AF_CUST_RHTTP_SRV) {
+ memprintf(err, "'%s' : only valid for reverse HTTP listeners.", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value.", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val <= 0) {
+ memprintf(err, "'%s' : invalid value %d, must be > 0.", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->rhttp_nbconn = val;
+ return 0;
+}
+
+/* parse the "nice" bind keyword */
+static int bind_parse_nice(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ val = atol(args[cur_arg + 1]);
+ if (val < -1024 || val > 1024) {
+ memprintf(err, "'%s' : invalid value %d, allowed range is -1024..1024", args[cur_arg], val);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ conf->nice = val;
+ return 0;
+}
+
+/* parse the "process" bind keyword */
+static int bind_parse_process(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ memprintf(err, "'process %s' on 'bind' lines is not supported anymore, please use 'thread' instead.", args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* parse the "proto" bind keyword */
+static int bind_parse_proto(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ struct ist proto;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ proto = ist(args[cur_arg + 1]);
+ conf->mux_proto = get_mux_proto(proto);
+ if (!conf->mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[cur_arg], args[cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "shards" bind keyword. Takes an integer, "by-thread", or "by-group" */
+static int bind_parse_shards(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ int val;
+
+ if (!*args[cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[cur_arg + 1], "by-thread") == 0) {
+ val = -1; /* -1 = "by-thread", will be fixed in check_config_validity() */
+ } else if (strcmp(args[cur_arg + 1], "by-group") == 0) {
+ val = -2; /* -2 = "by-group", will be fixed in check_config_validity() */
+ } else {
+ val = atol(args[cur_arg + 1]);
+ if (val < 1 || val > MAX_THREADS) {
+ memprintf(err, "'%s' : invalid value %d, allowed range is %d..%d or 'by-thread'", args[cur_arg], val, 1, MAX_THREADS);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ conf->settings.shards = val;
+ return 0;
+}
+
+/* parse the "thread" bind keyword. This will replace any preset thread_set */
+static int bind_parse_thread(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+ const struct listener *l;
+
+ /* note that the thread set is zeroed before first call, and we don't
+ * want to reset it so that it remains possible to chain multiple
+ * "thread" directives.
+ */
+ if (parse_thread_set(args[cur_arg+1], &conf->thread_set, err) < 0)
+ return ERR_ALERT | ERR_FATAL;
+
+ l = LIST_NEXT(&conf->listeners, struct listener *, by_bind);
+ if (l->rx.addr.ss_family == AF_CUST_RHTTP_SRV &&
+ atleast2(conf->thread_set.grps)) {
+ memprintf(err, "'%s' : reverse HTTP bind cannot span multiple thread groups.", args[cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* config parser for global "tune.listener.default-shards" */
+static int cfg_parse_tune_listener_shards(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "by-thread") == 0)
+ global.tune.default_shards = -1;
+ else if (strcmp(args[1], "by-group") == 0)
+ global.tune.default_shards = -2;
+ else if (strcmp(args[1], "by-process") == 0)
+ global.tune.default_shards = 1;
+ else {
+ memprintf(err, "'%s' expects either 'by-process', 'by-group', or 'by-thread' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.listener.multi-queue", accepts "on", "fair" or "off" */
+static int cfg_parse_tune_listener_mq(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options = (global.tune.options & ~GTUNE_LISTENER_MQ_ANY) | GTUNE_LISTENER_MQ_OPT;
+ else if (strcmp(args[1], "fair") == 0)
+ global.tune.options = (global.tune.options & ~GTUNE_LISTENER_MQ_ANY) | GTUNE_LISTENER_MQ_FAIR;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_LISTENER_MQ_ANY;
+ else {
+ memprintf(err, "'%s' expects either 'on', 'fair', or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "dst_conn", smp_fetch_dconn, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "so_id", smp_fetch_so_id, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "so_name", smp_fetch_so_name, 0, NULL, SMP_T_STR, SMP_USE_FTEND, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ */
+static struct bind_kw_list bind_kws = { "ALL", { }, {
+ { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1, 0 }, /* enable NetScaler Client IP insertion protocol */
+ { "accept-proxy", bind_parse_accept_proxy, 0, 0 }, /* enable PROXY protocol */
+ { "backlog", bind_parse_backlog, 1, 0 }, /* set backlog of listening socket */
+ { "id", bind_parse_id, 1, 1 }, /* set id of listening socket */
+ { "maxconn", bind_parse_maxconn, 1, 0 }, /* set maxconn of listening socket */
+ { "name", bind_parse_name, 1, 1 }, /* set name of listening socket */
+ { "nbconn", bind_parse_nbconn, 1, 1 }, /* set number of connection on active preconnect */
+ { "nice", bind_parse_nice, 1, 0 }, /* set nice of listening socket */
+ { "process", bind_parse_process, 1, 0 }, /* set list of allowed process for this socket */
+ { "proto", bind_parse_proto, 1, 0 }, /* set the proto to use for all incoming connections */
+ { "shards", bind_parse_shards, 1, 0 }, /* set number of shards */
+ { "thread", bind_parse_thread, 1, 1 }, /* set list of allowed threads for this socket */
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, bind_register_keywords, &bind_kws);
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.listener.default-shards", cfg_parse_tune_listener_shards },
+ { CFG_GLOBAL, "tune.listener.multi-queue", cfg_parse_tune_listener_mq },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..010ace9
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,4659 @@
+/*
+ * General logging functions.
+ *
+ * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <sys/time.h>
+#include <sys/uio.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/sink.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/action.h>
+#include <haproxy/time.h>
+#include <haproxy/hash.h>
+#include <haproxy/tools.h>
+
+/* global recv logs counter */
+int cum_log_messages;
+
+/* log forward proxy list */
+struct proxy *cfg_log_forward;
+
+struct log_fmt_st {
+ char *name;
+};
+
+static const struct log_fmt_st log_formats[LOG_FORMATS] = {
+ [LOG_FORMAT_LOCAL] = {
+ .name = "local",
+ },
+ [LOG_FORMAT_RFC3164] = {
+ .name = "rfc3164",
+ },
+ [LOG_FORMAT_RFC5424] = {
+ .name = "rfc5424",
+ },
+ [LOG_FORMAT_PRIO] = {
+ .name = "priority",
+ },
+ [LOG_FORMAT_SHORT] = {
+ .name = "short",
+ },
+ [LOG_FORMAT_TIMED] = {
+ .name = "timed",
+ },
+ [LOG_FORMAT_ISO] = {
+ .name = "iso",
+ },
+ [LOG_FORMAT_RAW] = {
+ .name = "raw",
+ },
+};
+
+/*
+ * This map is used with all the FD_* macros to check whether a particular bit
+ * is set or not. Each bit represents an ASCII code. ha_bit_set() sets those
+ * bytes which should be escaped. When ha_bit_test() returns non-zero, it means
+ * that the byte should be escaped. Be careful to always pass bytes from 0 to
+ * 255 exclusively to the macros.
+ */
+long rfc5424_escape_map[(256/8) / sizeof(long)];
+long hdr_encode_map[(256/8) / sizeof(long)];
+long url_encode_map[(256/8) / sizeof(long)];
+long http_encode_map[(256/8) / sizeof(long)];
+
+
+const char *log_facilities[NB_LOG_FACILITIES] = {
+ "kern", "user", "mail", "daemon",
+ "auth", "syslog", "lpr", "news",
+ "uucp", "cron", "auth2", "ftp",
+ "ntp", "audit", "alert", "cron2",
+ "local0", "local1", "local2", "local3",
+ "local4", "local5", "local6", "local7"
+};
+
+const char *log_levels[NB_LOG_LEVELS] = {
+ "emerg", "alert", "crit", "err",
+ "warning", "notice", "info", "debug"
+};
+
+const char sess_term_cond[16] = "-LcCsSPRIDKUIIII"; /* normal, Local, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */
+const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */
+
+
+/* log_format */
+struct logformat_type {
+ char *name;
+ int type;
+ int mode;
+ int lw; /* logwait bitsfield */
+ int (*config_callback)(struct logformat_node *node, struct proxy *curproxy);
+};
+
+int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy);
+
+/* log_format variable names */
+static const struct logformat_type logformat_keywords[] = {
+ { "o", LOG_FMT_GLOBAL, PR_MODE_TCP, 0, NULL }, /* global option */
+
+ /* please keep these lines sorted ! */
+ { "B", LOG_FMT_BYTES, PR_MODE_TCP, LW_BYTES, NULL }, /* bytes from server to client */
+ { "CC", LOG_FMT_CCLIENT, PR_MODE_HTTP, LW_REQHDR, NULL }, /* client cookie */
+ { "CS", LOG_FMT_CSERVER, PR_MODE_HTTP, LW_RSPHDR, NULL }, /* server cookie */
+ { "H", LOG_FMT_HOSTNAME, PR_MODE_TCP, LW_INIT, NULL }, /* Hostname */
+ { "ID", LOG_FMT_UNIQUEID, PR_MODE_TCP, LW_BYTES, NULL }, /* Unique ID */
+ { "ST", LOG_FMT_STATUS, PR_MODE_TCP, LW_RESP, NULL }, /* status code */
+ { "T", LOG_FMT_DATEGMT, PR_MODE_TCP, LW_INIT, NULL }, /* date GMT */
+ { "Ta", LOG_FMT_Ta, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time active (tr to end) */
+ { "Tc", LOG_FMT_TC, PR_MODE_TCP, LW_BYTES, NULL }, /* Tc */
+ { "Th", LOG_FMT_Th, PR_MODE_TCP, LW_BYTES, NULL }, /* Time handshake */
+ { "Ti", LOG_FMT_Ti, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time idle */
+ { "Tl", LOG_FMT_DATELOCAL, PR_MODE_TCP, LW_INIT, NULL }, /* date local timezone */
+ { "Tq", LOG_FMT_TQ, PR_MODE_HTTP, LW_BYTES, NULL }, /* Tq=Th+Ti+TR */
+ { "Tr", LOG_FMT_Tr, PR_MODE_HTTP, LW_BYTES, NULL }, /* Tr */
+ { "TR", LOG_FMT_TR, PR_MODE_HTTP, LW_BYTES, NULL }, /* Time to receive a valid request */
+ { "Td", LOG_FMT_TD, PR_MODE_TCP, LW_BYTES, NULL }, /* Td = Tt - (Tq + Tw + Tc + Tr) */
+ { "Ts", LOG_FMT_TS, PR_MODE_TCP, LW_INIT, NULL }, /* timestamp GMT */
+ { "Tt", LOG_FMT_TT, PR_MODE_TCP, LW_BYTES, NULL }, /* Tt */
+ { "Tu", LOG_FMT_TU, PR_MODE_TCP, LW_BYTES, NULL }, /* Tu = Tt -Ti */
+ { "Tw", LOG_FMT_TW, PR_MODE_TCP, LW_BYTES, NULL }, /* Tw */
+ { "U", LOG_FMT_BYTES_UP, PR_MODE_TCP, LW_BYTES, NULL }, /* bytes from client to server */
+ { "ac", LOG_FMT_ACTCONN, PR_MODE_TCP, LW_BYTES, NULL }, /* actconn */
+ { "b", LOG_FMT_BACKEND, PR_MODE_TCP, LW_INIT, NULL }, /* backend */
+ { "bc", LOG_FMT_BECONN, PR_MODE_TCP, LW_BYTES, NULL }, /* beconn */
+ { "bi", LOG_FMT_BACKENDIP, PR_MODE_TCP, LW_BCKIP, prepare_addrsource }, /* backend source ip */
+ { "bp", LOG_FMT_BACKENDPORT, PR_MODE_TCP, LW_BCKIP, prepare_addrsource }, /* backend source port */
+ { "bq", LOG_FMT_BCKQUEUE, PR_MODE_TCP, LW_BYTES, NULL }, /* backend_queue */
+ { "ci", LOG_FMT_CLIENTIP, PR_MODE_TCP, LW_CLIP | LW_XPRT, NULL }, /* client ip */
+ { "cp", LOG_FMT_CLIENTPORT, PR_MODE_TCP, LW_CLIP | LW_XPRT, NULL }, /* client port */
+ { "f", LOG_FMT_FRONTEND, PR_MODE_TCP, LW_INIT, NULL }, /* frontend */
+ { "fc", LOG_FMT_FECONN, PR_MODE_TCP, LW_BYTES, NULL }, /* feconn */
+ { "fi", LOG_FMT_FRONTENDIP, PR_MODE_TCP, LW_FRTIP | LW_XPRT, NULL }, /* frontend ip */
+ { "fp", LOG_FMT_FRONTENDPORT, PR_MODE_TCP, LW_FRTIP | LW_XPRT, NULL }, /* frontend port */
+ { "ft", LOG_FMT_FRONTEND_XPRT, PR_MODE_TCP, LW_INIT, NULL }, /* frontend with transport mode */
+ { "hr", LOG_FMT_HDRREQUEST, PR_MODE_TCP, LW_REQHDR, NULL }, /* header request */
+ { "hrl", LOG_FMT_HDRREQUESTLIST, PR_MODE_TCP, LW_REQHDR, NULL }, /* header request list */
+ { "hs", LOG_FMT_HDRRESPONS, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response */
+ { "hsl", LOG_FMT_HDRRESPONSLIST, PR_MODE_TCP, LW_RSPHDR, NULL }, /* header response list */
+ { "HM", LOG_FMT_HTTP_METHOD, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP method */
+ { "HP", LOG_FMT_HTTP_PATH, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP relative or absolute path */
+ { "HPO", LOG_FMT_HTTP_PATH_ONLY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP path only (without host nor query string) */
+ { "HQ", LOG_FMT_HTTP_QUERY, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP query */
+ { "HU", LOG_FMT_HTTP_URI, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP full URI */
+ { "HV", LOG_FMT_HTTP_VERSION, PR_MODE_HTTP, LW_REQ, NULL }, /* HTTP version */
+ { "lc", LOG_FMT_LOGCNT, PR_MODE_TCP, LW_INIT, NULL }, /* log counter */
+ { "ms", LOG_FMT_MS, PR_MODE_TCP, LW_INIT, NULL }, /* accept date millisecond */
+ { "pid", LOG_FMT_PID, PR_MODE_TCP, LW_INIT, NULL }, /* log pid */
+ { "r", LOG_FMT_REQ, PR_MODE_HTTP, LW_REQ, NULL }, /* request */
+ { "rc", LOG_FMT_RETRIES, PR_MODE_TCP, LW_BYTES, NULL }, /* retries */
+ { "rt", LOG_FMT_COUNTER, PR_MODE_TCP, LW_REQ, NULL }, /* request counter (HTTP or TCP session) */
+ { "s", LOG_FMT_SERVER, PR_MODE_TCP, LW_SVID, NULL }, /* server */
+ { "sc", LOG_FMT_SRVCONN, PR_MODE_TCP, LW_BYTES, NULL }, /* srv_conn */
+ { "si", LOG_FMT_SERVERIP, PR_MODE_TCP, LW_SVIP, NULL }, /* server destination ip */
+ { "sp", LOG_FMT_SERVERPORT, PR_MODE_TCP, LW_SVIP, NULL }, /* server destination port */
+ { "sq", LOG_FMT_SRVQUEUE, PR_MODE_TCP, LW_BYTES, NULL }, /* srv_queue */
+ { "sslc", LOG_FMT_SSL_CIPHER, PR_MODE_TCP, LW_XPRT, NULL }, /* client-side SSL ciphers */
+ { "sslv", LOG_FMT_SSL_VERSION, PR_MODE_TCP, LW_XPRT, NULL }, /* client-side SSL protocol version */
+ { "t", LOG_FMT_DATE, PR_MODE_TCP, LW_INIT, NULL }, /* date */
+ { "tr", LOG_FMT_tr, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request */
+ { "trg",LOG_FMT_trg, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request, GMT */
+ { "trl",LOG_FMT_trl, PR_MODE_HTTP, LW_INIT, NULL }, /* date of start of request, local */
+ { "ts", LOG_FMT_TERMSTATE, PR_MODE_TCP, LW_BYTES, NULL },/* termination state */
+ { "tsc", LOG_FMT_TERMSTATE_CK, PR_MODE_TCP, LW_INIT, NULL },/* termination state */
+ { 0, 0, 0, 0, NULL }
+};
+
+char httpclient_log_format[] = "%ci:%cp [%tr] %ft -/- %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r";
+char default_http_log_format[] = "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r"; // default format
+char default_https_log_format[] = "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r %[fc_err]/%[ssl_fc_err,hex]/%[ssl_c_err]/%[ssl_c_ca_err]/%[ssl_fc_is_resumed] %[ssl_fc_sni]/%sslv/%sslc";
+char clf_http_log_format[] = "%{+Q}o %{-Q}ci - - [%trg] %r %ST %B \"\" \"\" %cp %ms %ft %b %s %TR %Tw %Tc %Tr %Ta %tsc %ac %fc %bc %sc %rc %sq %bq %CC %CS %hrl %hsl";
+char default_tcp_log_format[] = "%ci:%cp [%t] %ft %b/%s %Tw/%Tc/%Tt %B %ts %ac/%fc/%bc/%sc/%rc %sq/%bq";
+char *log_format = NULL;
+
+/* Default string used for structured-data part in RFC5424 formatted
+ * syslog messages.
+ */
+char default_rfc5424_sd_log_format[] = "- ";
+
+/* total number of dropped logs */
+unsigned int dropped_logs = 0;
+
+/* This is a global syslog message buffer, common to all outgoing
+ * messages. It contains only the data part.
+ */
+THREAD_LOCAL char *logline = NULL;
+
+/* A global syslog message buffer, common to all RFC5424 syslog messages.
+ * Currently, it is used for generating the structured-data part.
+ */
+THREAD_LOCAL char *logline_rfc5424 = NULL;
+
+struct logformat_var_args {
+ char *name;
+ int mask;
+};
+
+struct logformat_var_args var_args_list[] = {
+// global
+ { "M", LOG_OPT_MANDATORY },
+ { "Q", LOG_OPT_QUOTE },
+ { "X", LOG_OPT_HEXA },
+ { "E", LOG_OPT_ESC },
+ { 0, 0 }
+};
+
+/*
+ * callback used to configure addr source retrieval
+ */
+int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy)
+{
+ curproxy->options2 |= PR_O2_SRC_ADDR;
+
+ return 0;
+}
+
+
+/*
+ * Parse args in a logformat_var. Returns 0 in error
+ * case, otherwise, it returns 1.
+ */
+int parse_logformat_var_args(char *args, struct logformat_node *node, char **err)
+{
+ int i = 0;
+ int end = 0;
+ int flags = 0; // 1 = + 2 = -
+ char *sp = NULL; // start pointer
+
+ if (args == NULL) {
+ memprintf(err, "internal error: parse_logformat_var_args() expects non null 'args'");
+ return 0;
+ }
+
+ while (1) {
+ if (*args == '\0')
+ end = 1;
+
+ if (*args == '+') {
+ // add flag
+ sp = args + 1;
+ flags = 1;
+ }
+ if (*args == '-') {
+ // delete flag
+ sp = args + 1;
+ flags = 2;
+ }
+
+ if (*args == '\0' || *args == ',') {
+ *args = '\0';
+ for (i = 0; sp && var_args_list[i].name; i++) {
+ if (strcmp(sp, var_args_list[i].name) == 0) {
+ if (flags == 1) {
+ node->options |= var_args_list[i].mask;
+ break;
+ } else if (flags == 2) {
+ node->options &= ~var_args_list[i].mask;
+ break;
+ }
+ }
+ }
+ sp = NULL;
+ if (end)
+ break;
+ }
+ args++;
+ }
+ return 1;
+}
+
+/*
+ * Parse a variable '%varname' or '%{args}varname' in log-format. The caller
+ * must pass the args part in the <arg> pointer with its length in <arg_len>,
+ * and varname with its length in <var> and <var_len> respectively. <arg> is
+ * ignored when arg_len is 0. Neither <var> nor <var_len> may be null.
+ * Returns false in error case and err is filled, otherwise returns true.
+ */
+int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions, char **err)
+{
+ int j;
+ struct logformat_node *node = NULL;
+
+ for (j = 0; logformat_keywords[j].name; j++) { // search a log type
+ if (strlen(logformat_keywords[j].name) == var_len &&
+ strncmp(var, logformat_keywords[j].name, var_len) == 0) {
+ if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) {
+ node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ goto error_free;
+ }
+ node->type = logformat_keywords[j].type;
+ node->options = *defoptions;
+ if (arg_len) {
+ node->arg = my_strndup(arg, arg_len);
+ if (!parse_logformat_var_args(node->arg, node, err))
+ goto error_free;
+ }
+ if (node->type == LOG_FMT_GLOBAL) {
+ *defoptions = node->options;
+ free(node->arg);
+ free(node);
+ } else {
+ if (logformat_keywords[j].config_callback &&
+ logformat_keywords[j].config_callback(node, curproxy) != 0) {
+ goto error_free;
+ }
+ curproxy->to_log |= logformat_keywords[j].lw;
+ LIST_APPEND(list_format, &node->list);
+ }
+ return 1;
+ } else {
+ memprintf(err, "format variable '%s' is reserved for HTTP mode",
+ logformat_keywords[j].name);
+ goto error_free;
+ }
+ }
+ }
+
+ j = var[var_len];
+ var[var_len] = 0;
+ memprintf(err, "no such format variable '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", var);
+ var[var_len] = j;
+
+ error_free:
+ if (node) {
+ free(node->arg);
+ free(node);
+ }
+ return 0;
+}
+
+/*
+ * push to the logformat linked list
+ *
+ * start: start pointer
+ * end: end text pointer
+ * type: string type
+ * list_format: destination list
+ *
+ * LOG_TEXT: copy chars from start to end excluding end.
+ *
+*/
+int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err)
+{
+ char *str;
+
+ if (type == LF_TEXT) { /* type text */
+ struct logformat_node *node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ str = calloc(1, end - start + 1);
+ strncpy(str, start, end - start);
+ str[end - start] = '\0';
+ node->arg = str;
+ node->type = LOG_FMT_TEXT; // type string
+ LIST_APPEND(list_format, &node->list);
+ } else if (type == LF_SEPARATOR) {
+ struct logformat_node *node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ node->type = LOG_FMT_SEPARATOR;
+ LIST_APPEND(list_format, &node->list);
+ }
+ return 1;
+}
+
+/*
+ * Parse the sample fetch expression <text> and add a node to <list_format> upon
+ * success. At the moment, sample converters are not yet supported but fetch arguments
+ * should work. The curpx->conf.args.ctx must be set by the caller. If an end pointer
+ * is passed in <endptr>, it will be updated with the pointer to the first character
+ * not part of the sample expression.
+ *
+ * In error case, the function returns 0, otherwise it returns 1.
+ */
+int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err, char **endptr)
+{
+ char *cmd[2];
+ struct sample_expr *expr = NULL;
+ struct logformat_node *node = NULL;
+ int cmd_arg;
+
+ cmd[0] = text;
+ cmd[1] = "";
+ cmd_arg = 0;
+
+ expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err,
+ &curpx->conf.args, endptr);
+ if (!expr) {
+ memprintf(err, "failed to parse sample expression <%s> : %s", text, *err);
+ goto error_free;
+ }
+
+ node = calloc(1, sizeof(*node));
+ if (!node) {
+ memprintf(err, "out of memory error");
+ goto error_free;
+ }
+ node->type = LOG_FMT_EXPR;
+ node->expr = expr;
+ node->options = options;
+
+ if (arg_len) {
+ node->arg = my_strndup(arg, arg_len);
+ if (!parse_logformat_var_args(node->arg, node, err))
+ goto error_free;
+ }
+ if (expr->fetch->val & cap & SMP_VAL_REQUEST)
+ node->options |= LOG_OPT_REQ_CAP; /* fetch method is request-compatible */
+
+ if (expr->fetch->val & cap & SMP_VAL_RESPONSE)
+ node->options |= LOG_OPT_RES_CAP; /* fetch method is response-compatible */
+
+ if (!(expr->fetch->val & cap)) {
+ memprintf(err, "sample fetch <%s> may not be reliably used here because it needs '%s' which is not available here",
+ text, sample_src_names(expr->fetch->use));
+ goto error_free;
+ }
+
+ if ((options & LOG_OPT_HTTP) && (expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_L6RES))) {
+ ha_warning("parsing [%s:%d] : L6 sample fetch <%s> ignored in HTTP log-format string.\n",
+ curpx->conf.args.file, curpx->conf.args.line, text);
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ /* Note, we may also need to set curpx->to_log with certain fetches */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags
+ * needed with some sample fetches (eg: ssl*). We always set it for
+ * now on, but this will leave with sample capabilities soon.
+ */
+ curpx->to_log |= LW_XPRT;
+ if (curpx->http_needed)
+ curpx->to_log |= LW_REQ;
+ LIST_APPEND(list_format, &node->list);
+ return 1;
+
+ error_free:
+ release_sample_expr(expr);
+ if (node) {
+ free(node->arg);
+ free(node);
+ }
+ return 0;
+}
+
+/*
+ * Parse the log_format string and fill a linked list.
+ * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname
+ * You can set arguments using { } : %{many arguments}varname.
+ * The curproxy->conf.args.ctx must be set by the caller.
+ *
+ * fmt: the string to parse
+ * curproxy: the proxy affected
+ * list_format: the destination list
+ * options: LOG_OPT_* to force on every node
+ * cap: all SMP_VAL_* flags supported by the consumer
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is filled.
+ */
+int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err)
+{
+ char *sp, *str, *backfmt; /* start pointer for text parts */
+ char *arg = NULL; /* start pointer for args */
+ char *var = NULL; /* start pointer for vars */
+ int arg_len = 0;
+ int var_len = 0;
+ int cformat; /* current token format */
+ int pformat; /* previous token format */
+ struct logformat_node *tmplf, *back;
+
+ sp = str = backfmt = strdup(fmt);
+ if (!str) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ curproxy->to_log |= LW_INIT;
+
+ /* flush the list first. */
+ list_for_each_entry_safe(tmplf, back, list_format, list) {
+ LIST_DELETE(&tmplf->list);
+ release_sample_expr(tmplf->expr);
+ free(tmplf->arg);
+ free(tmplf);
+ }
+
+ for (cformat = LF_INIT; cformat != LF_END; str++) {
+ pformat = cformat;
+
+ if (!*str)
+ cformat = LF_END; // preset it to save all states from doing this
+
+ /* The principle of the two-step state machine below is to first detect a change, and
+ * second have all common paths processed at one place. The common paths are the ones
+ * encountered in text areas (LF_INIT, LF_TEXT, LF_SEPARATOR) and at the end (LF_END).
+ * We use the common LF_INIT state to dispatch to the different final states.
+ */
+ switch (pformat) {
+ case LF_STARTVAR: // text immediately following a '%'
+ arg = NULL; var = NULL;
+ arg_len = var_len = 0;
+ if (*str == '{') { // optional argument
+ cformat = LF_STARG;
+ arg = str + 1;
+ }
+ else if (*str == '[') {
+ cformat = LF_STEXPR;
+ var = str + 1; // store expr in variable name
+ }
+ else if (isalpha((unsigned char)*str)) { // variable name
+ cformat = LF_VAR;
+ var = str;
+ }
+ else if (*str == '%')
+ cformat = LF_TEXT; // convert this character to a literal (useful for '%')
+ else if (isdigit((unsigned char)*str) || *str == ' ' || *str == '\t') {
+ /* single '%' followed by blank or digit, send them both */
+ cformat = LF_TEXT;
+ pformat = LF_TEXT; /* finally we include the previous char as well */
+ sp = str - 1; /* send both the '%' and the current char */
+ memprintf(err, "unexpected variable name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'",
+ *str, (int)(str - backfmt), fmt);
+ goto fail;
+
+ }
+ else
+ cformat = LF_INIT; // handle other cases of literals
+ break;
+
+ case LF_STARG: // text immediately following '%{'
+ if (*str == '}') { // end of arg
+ cformat = LF_EDARG;
+ arg_len = str - arg;
+ *str = 0; // used for reporting errors
+ }
+ break;
+
+ case LF_EDARG: // text immediately following '%{arg}'
+ if (*str == '[') {
+ cformat = LF_STEXPR;
+ var = str + 1; // store expr in variable name
+ break;
+ }
+ else if (isalnum((unsigned char)*str)) { // variable name
+ cformat = LF_VAR;
+ var = str;
+ break;
+ }
+ memprintf(err, "parse argument modifier without variable name near '%%{%s}'", arg);
+ goto fail;
+
+ case LF_STEXPR: // text immediately following '%['
+ /* the whole sample expression is parsed at once,
+ * returning the pointer to the first character not
+ * part of the expression, which MUST be the trailing
+ * angle bracket.
+ */
+ if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &str))
+ goto fail;
+
+ if (*str == ']') {
+ // end of arg, go on with next state
+ cformat = pformat = LF_EDEXPR;
+ sp = str;
+ }
+ else {
+ char c = *str;
+ *str = 0;
+ if (isprint((unsigned char)c))
+ memprintf(err, "expected ']' after '%s', but found '%c'", var, c);
+ else
+ memprintf(err, "missing ']' after '%s'", var);
+ goto fail;
+ }
+ break;
+
+ case LF_VAR: // text part of a variable name
+ var_len = str - var;
+ if (!isalnum((unsigned char)*str))
+ cformat = LF_INIT; // not variable name anymore
+ break;
+
+ default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END, LF_EDEXPR
+ cformat = LF_INIT;
+ }
+
+ if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */
+ switch (*str) {
+ case '%': cformat = LF_STARTVAR; break;
+ case 0 : cformat = LF_END; break;
+ case ' ':
+ if (options & LOG_OPT_MERGE_SPACES) {
+ cformat = LF_SEPARATOR;
+ break;
+ }
+ __fallthrough;
+ default : cformat = LF_TEXT; break;
+ }
+ }
+
+ if (cformat != pformat || pformat == LF_SEPARATOR) {
+ switch (pformat) {
+ case LF_VAR:
+ if (!parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options, err))
+ goto fail;
+ break;
+ case LF_TEXT:
+ case LF_SEPARATOR:
+ if (!add_to_logformat_list(sp, str, pformat, list_format, err))
+ goto fail;
+ break;
+ }
+ sp = str; /* new start of text at every state switch and at every separator */
+ }
+ }
+
+ if (pformat == LF_STARTVAR || pformat == LF_STARG || pformat == LF_STEXPR) {
+ memprintf(err, "truncated line after '%s'", var ? var : arg ? arg : "%");
+ goto fail;
+ }
+ free(backfmt);
+
+ return 1;
+ fail:
+ free(backfmt);
+ return 0;
+}
+
+/*
+ * Parse the first range of indexes from a string made of a list of comma separated
+ * ranges of indexes. Note that an index may be considered as a particular range
+ * with a high limit to the low limit.
+ */
+int get_logger_smp_range(unsigned int *low, unsigned int *high, char **arg, char **err)
+{
+ char *end, *p;
+
+ *low = *high = 0;
+
+ p = *arg;
+ end = strchr(p, ',');
+ if (!end)
+ end = p + strlen(p);
+
+ *high = *low = read_uint((const char **)&p, end);
+ if (!*low || (p != end && *p != '-'))
+ goto err;
+
+ if (p == end)
+ goto done;
+
+ p++;
+ *high = read_uint((const char **)&p, end);
+ if (!*high || *high <= *low || p != end)
+ goto err;
+
+ done:
+ if (*end == ',')
+ end++;
+ *arg = end;
+ return 1;
+
+ err:
+ memprintf(err, "wrong sample range '%s'", *arg);
+ return 0;
+}
+
+/*
+ * Returns 1 if the range defined by <low> and <high> overlaps
+ * one of them in <rgs> array of ranges with <sz> the size of this
+ * array, 0 if not.
+ */
+int smp_log_ranges_overlap(struct smp_log_range *rgs, size_t sz,
+ unsigned int low, unsigned int high, char **err)
+{
+ size_t i;
+
+ for (i = 0; i < sz; i++) {
+ if ((low >= rgs[i].low && low <= rgs[i].high) ||
+ (high >= rgs[i].low && high <= rgs[i].high)) {
+ memprintf(err, "ranges are overlapping");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int smp_log_range_cmp(const void *a, const void *b)
+{
+ const struct smp_log_range *rg_a = a;
+ const struct smp_log_range *rg_b = b;
+
+ if (rg_a->high < rg_b->low)
+ return -1;
+ else if (rg_a->low > rg_b->high)
+ return 1;
+
+ return 0;
+}
+
+/* helper func */
+static inline void init_log_target(struct log_target *target)
+{
+ target->type = 0;
+ target->flags = LOG_TARGET_FL_NONE;
+ target->addr = NULL;
+ target->resolv_name = NULL;
+}
+
+void deinit_log_target(struct log_target *target)
+{
+ ha_free(&target->addr);
+ if (!(target->flags & LOG_TARGET_FL_RESOLVED))
+ ha_free(&target->resolv_name);
+}
+
+/* returns 0 on failure and positive value on success */
+static int dup_log_target(struct log_target *def, struct log_target *cpy)
+{
+ BUG_ON((def->flags & LOG_TARGET_FL_RESOLVED)); /* postparsing already done, invalid use */
+ init_log_target(cpy);
+ if (def->addr) {
+ cpy->addr = malloc(sizeof(*cpy->addr));
+ if (!cpy->addr)
+ goto error;
+ *cpy->addr = *def->addr;
+ }
+ if (def->resolv_name) {
+ cpy->resolv_name = strdup(def->resolv_name);
+ if (!cpy->resolv_name)
+ goto error;
+ }
+ cpy->type = def->type;
+ return 1;
+ error:
+ deinit_log_target(cpy);
+ return 0;
+}
+
+/* must be called under the lbprm lock */
+static void _log_backend_srv_queue(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ /* queue the server in the proxy lb array to make it easily searchable by
+ * log-balance algorithms. Here we use the srv array as a general server
+ * pool of in-use servers, lookup is done using a relative positional id
+ * (array is contiguous)
+ *
+ * We use the avail server list to get a quick hand on available servers
+ * (those that are UP)
+ */
+ if (srv->flags & SRV_F_BACKUP) {
+ if (!p->srv_act)
+ p->lbprm.log.srv[p->srv_bck] = srv;
+ p->srv_bck++;
+ }
+ else {
+ if (!p->srv_act) {
+ /* we will be switching to act tree in LB logic, thus we need to
+ * reset the lastid
+ */
+ HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0);
+ }
+ p->lbprm.log.srv[p->srv_act] = srv;
+ p->srv_act++;
+ }
+ /* append the server to the list of available servers */
+ LIST_APPEND(&p->lbprm.log.avail, &srv->lb_list);
+
+ p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck;
+}
+
+static void log_backend_srv_up(struct server *srv)
+{
+ struct proxy *p __maybe_unused = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return; /* nothing to do */
+ if (srv_currently_usable(srv) || !srv_willbe_usable(srv))
+ return; /* false alarm */
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ _log_backend_srv_queue(srv);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* must be called under lbprm lock */
+static void _log_backend_srv_recalc(struct proxy *p)
+{
+ unsigned int it = 0;
+ struct server *cur_srv;
+
+ list_for_each_entry(cur_srv, &p->lbprm.log.avail, lb_list) {
+ uint8_t backup = cur_srv->flags & SRV_F_BACKUP;
+
+ if ((!p->srv_act && backup) ||
+ (p->srv_act && !backup))
+ p->lbprm.log.srv[it++] = cur_srv;
+ }
+}
+
+/* must be called under the lbprm lock */
+static void _log_backend_srv_dequeue(struct server *srv)
+{
+ struct proxy *p = srv->proxy;
+
+ if (srv->flags & SRV_F_BACKUP) {
+ p->srv_bck--;
+ }
+ else {
+ p->srv_act--;
+ if (!p->srv_act) {
+ /* we will be switching to bck tree in LB logic, thus we need to
+ * reset the lastid
+ */
+ HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0);
+ }
+ }
+
+ /* remove the srv from the list of available (UP) servers */
+ LIST_DELETE(&srv->lb_list);
+
+ /* reconstruct the array of usable servers */
+ _log_backend_srv_recalc(p);
+
+ p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck;
+}
+
+static void log_backend_srv_down(struct server *srv)
+{
+ struct proxy *p __maybe_unused = srv->proxy;
+
+ if (!srv_lb_status_changed(srv))
+ return; /* nothing to do */
+ if (!srv_currently_usable(srv) || srv_willbe_usable(srv))
+ return; /* false alarm */
+
+ HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock);
+ _log_backend_srv_dequeue(srv);
+ HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock);
+}
+
+/* check that current configuration is compatible with "mode log" */
+static int _postcheck_log_backend_compat(struct proxy *be)
+{
+ int err_code = ERR_NONE;
+
+ if (!LIST_ISEMPTY(&be->tcp_req.inspect_rules) ||
+ !LIST_ISEMPTY(&be->tcp_req.l4_rules) ||
+ !LIST_ISEMPTY(&be->tcp_req.l5_rules)) {
+ ha_warning("Cannot use tcp-request rules with 'mode log' in %s '%s'. They will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ free_act_rules(&be->tcp_req.inspect_rules);
+ free_act_rules(&be->tcp_req.l4_rules);
+ free_act_rules(&be->tcp_req.l5_rules);
+ }
+ if (!LIST_ISEMPTY(&be->tcp_rep.inspect_rules)) {
+ ha_warning("Cannot use tcp-response rules with 'mode log' in %s '%s'. They will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ free_act_rules(&be->tcp_rep.inspect_rules);
+ }
+ if (be->table) {
+ ha_warning("Cannot use stick table with 'mode log' in %s '%s'. It will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ stktable_deinit(be->table);
+ ha_free(&be->table);
+ }
+ if (!LIST_ISEMPTY(&be->storersp_rules) ||
+ !LIST_ISEMPTY(&be->sticking_rules)) {
+ ha_warning("Cannot use sticking rules with 'mode log' in %s '%s'. They will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ free_stick_rules(&be->storersp_rules);
+ free_stick_rules(&be->sticking_rules);
+ }
+ if (isttest(be->server_id_hdr_name)) {
+ ha_warning("Cannot set \"server_id_hdr_name\" with 'mode log' in %s '%s'. It will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ istfree(&be->server_id_hdr_name);
+ }
+ if (be->dyncookie_key) {
+ ha_warning("Cannot set \"dynamic-cookie-key\" with 'mode log' in %s '%s'. It will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ ha_free(&be->dyncookie_key);
+ }
+ if (!LIST_ISEMPTY(&be->server_rules)) {
+ ha_warning("Cannot use \"use-server\" rules with 'mode log' in %s '%s'. They will be ignored.\n",
+ proxy_type_str(be), be->id);
+
+ err_code |= ERR_WARN;
+ free_server_rules(&be->server_rules);
+ }
+ return err_code;
+}
+
+static int postcheck_log_backend(struct proxy *be)
+{
+ char *msg = NULL;
+ struct server *srv;
+ int err_code = ERR_NONE;
+ int target_type = -1; // -1 is unused in log_tgt enum
+
+ if (be->mode != PR_MODE_SYSLOG ||
+ (be->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ return ERR_NONE; /* nothing to do */
+
+ err_code |= _postcheck_log_backend_compat(be);
+ if (err_code & ERR_CODE)
+ return err_code;
+
+ /* First time encountering this log backend, perform some init
+ */
+ be->lbprm.set_server_status_up = log_backend_srv_up;
+ be->lbprm.set_server_status_down = log_backend_srv_down;
+ be->lbprm.log.lastid = 0; /* initial value */
+ LIST_INIT(&be->lbprm.log.avail);
+
+ /* alloc srv array (it will be used for active and backup server lists in turn,
+ * so we ensure that the longest list will fit
+ */
+ be->lbprm.log.srv = calloc(MAX(be->srv_act, be->srv_bck),
+ sizeof(*be->lbprm.log.srv));
+
+ if (!be->lbprm.log.srv ) {
+ memprintf(&msg, "memory error when allocating server array (%d entries)",
+ MAX(be->srv_act, be->srv_bck));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* reinit srv counters, lbprm queueing will recount */
+ be->srv_act = 0;
+ be->srv_bck = 0;
+
+ /* "log-balance hash" needs to compile its expression */
+ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) {
+ struct sample_expr *expr;
+ char *expr_str = NULL;
+ char *err_str = NULL;
+ int idx = 0;
+
+ /* only map-based hash method is supported for now */
+ if ((be->lbprm.algo & BE_LB_HASH_TYPE) != BE_LB_HASH_MAP) {
+ memprintf(&msg, "unsupported hash method (from \"hash-type\")");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* a little bit of explanation about what we're going to do here:
+ * as the user gave us a list of converters, instead of the fetch+conv list
+ * tuple as we're used to, we need to insert a dummy fetch at the start of
+ * the converter list so that sample_parse_expr() is able to properly parse
+ * the expr. We're explicitly using str() as dummy fetch, since the input
+ * sample that will be passed to the converter list at runtime will be a
+ * string (the log message about to be sent). Doing so allows sample_parse_expr()
+ * to ensure that the provided converters will be compatible with string type.
+ */
+ memprintf(&expr_str, "str(dummy),%s", be->lbprm.arg_str);
+ if (!expr_str) {
+ memprintf(&msg, "memory error during converter list argument parsing (from \"log-balance hash\")");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ expr = sample_parse_expr((char*[]){expr_str, NULL}, &idx,
+ be->conf.file,
+ be->conf.line,
+ &err_str, NULL, NULL);
+ if (!expr) {
+ memprintf(&msg, "%s (from converter list argument in \"log-balance hash\")", err_str);
+ ha_free(&err_str);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ ha_free(&expr_str);
+ goto end;
+ }
+
+ /* We expect the log_message->conv_list expr to resolve as a binary-compatible
+ * value because its output will be passed to gen_hash() to compute the hash.
+ *
+ * So we check the last converter's output type to ensure that it can be
+ * converted into the expected type. Invalid output type will result in an
+ * error to prevent unexpected results during runtime.
+ */
+ if (sample_casts[smp_expr_output_type(expr)][SMP_T_BIN] == NULL) {
+ memprintf(&msg, "invalid output type at the end of converter list for \"log-balance hash\" directive");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ release_sample_expr(expr);
+ ha_free(&expr_str);
+ goto end;
+ }
+ ha_free(&expr_str);
+ be->lbprm.expr = expr;
+ }
+
+ /* finish the initialization of proxy's servers */
+ srv = be->srv;
+ while (srv) {
+ BUG_ON(srv->log_target);
+ BUG_ON(srv->addr_type.proto_type != PROTO_TYPE_DGRAM &&
+ srv->addr_type.proto_type != PROTO_TYPE_STREAM);
+
+ srv->log_target = malloc(sizeof(*srv->log_target));
+ if (!srv->log_target) {
+ memprintf(&msg, "memory error when allocating log server '%s'\n", srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ init_log_target(srv->log_target);
+ if (srv->addr_type.proto_type == PROTO_TYPE_DGRAM) {
+ srv->log_target->type = LOG_TARGET_DGRAM;
+ /* Try to allocate log target addr (only used in DGRAM mode) */
+ srv->log_target->addr = calloc(1, sizeof(*srv->log_target->addr));
+ if (!srv->log_target->addr) {
+ memprintf(&msg, "memory error when allocating log server '%s'\n", srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ /* We must initialize it with known addr:svc_port, it will then
+ * be updated automatically by the server API for runtime changes
+ */
+ ipcpy(&srv->addr, srv->log_target->addr);
+ set_host_port(srv->log_target->addr, srv->svc_port);
+ }
+ else {
+ /* for now BUFFER type only supports TCP server to it's almost
+ * explicit
+ */
+ srv->log_target->type = LOG_TARGET_BUFFER;
+ srv->log_target->sink = sink_new_from_srv(srv, "log backend");
+ if (!srv->log_target->sink) {
+ memprintf(&msg, "error when creating sink from '%s' log server", srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+
+ if (target_type == -1)
+ target_type = srv->log_target->type;
+
+ if (target_type != srv->log_target->type) {
+ memprintf(&msg, "cannot mix server types within a log backend, '%s' srv's network type differs from previous server", srv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ srv->log_target->flags |= LOG_TARGET_FL_RESOLVED;
+ srv->cur_eweight = 1; /* ignore weights, all servers have the same weight */
+ _log_backend_srv_queue(srv);
+ srv = srv->next;
+ }
+ end:
+ if (err_code & ERR_CODE) {
+ ha_free(&be->lbprm.log.srv); /* free log servers array */
+ ha_alert("log backend '%s': failed to initialize: %s.\n", be->id, msg);
+ ha_free(&msg);
+ }
+
+ return err_code;
+}
+
+/* resolves a single logger entry (it is expected to be called
+ * at postparsing stage)
+ *
+ * <logger> is parent logger used for implicit settings
+ *
+ * Returns err_code which defaults to ERR_NONE and can be set to a combination
+ * of ERR_WARN, ERR_ALERT, ERR_FATAL and ERR_ABORT in case of errors.
+ * <msg> could be set at any time (it will usually be set on error, but
+ * could also be set when no error occurred to report a diag warning), thus is
+ * up to the caller to check it and to free it.
+ */
+int resolve_logger(struct logger *logger, char **msg)
+{
+ struct log_target *target = &logger->target;
+ int err_code = ERR_NONE;
+
+ if (target->type == LOG_TARGET_BUFFER)
+ err_code = sink_resolve_logger_buffer(logger, msg);
+ else if (target->type == LOG_TARGET_BACKEND) {
+ struct proxy *be;
+
+ /* special case */
+ be = proxy_find_by_name(target->be_name, PR_CAP_BE, 0);
+ if (!be) {
+ memprintf(msg, "uses unknown log backend '%s'", target->be_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ else if (be->mode != PR_MODE_SYSLOG) {
+ memprintf(msg, "uses incompatible log backend '%s'", target->be_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ ha_free(&target->be_name); /* backend is resolved and will replace name hint */
+ target->be = be;
+ }
+
+ end:
+ target->flags |= LOG_TARGET_FL_RESOLVED;
+
+ return err_code;
+}
+
+/* tries to duplicate <def> logger
+ *
+ * Returns the newly allocated and duplicated logger or NULL
+ * in case of error.
+ */
+struct logger *dup_logger(struct logger *def)
+{
+ struct logger *cpy = malloc(sizeof(*cpy));
+
+ /* copy everything that can be easily copied */
+ memcpy(cpy, def, sizeof(*cpy));
+
+ /* default values */
+ cpy->conf.file = NULL;
+ LIST_INIT(&cpy->list);
+
+ /* special members */
+ if (dup_log_target(&def->target, &cpy->target) == 0)
+ goto error;
+ if (def->conf.file) {
+ cpy->conf.file = strdup(def->conf.file);
+ if (!cpy->conf.file)
+ goto error;
+ }
+
+ /* inherit from original reference if set */
+ cpy->ref = (def->ref) ? def->ref : def;
+
+ return cpy;
+
+ error:
+ free_logger(cpy);
+ return NULL;
+}
+
+/* frees <logger> after freeing all of its allocated fields. The
+ * server must not belong to a list anymore. Logsrv may be NULL, which is
+ * silently ignored.
+ */
+void free_logger(struct logger *logger)
+{
+ if (!logger)
+ return;
+
+ BUG_ON(LIST_INLIST(&logger->list));
+ ha_free(&logger->conf.file);
+ deinit_log_target(&logger->target);
+ free(logger);
+}
+
+/* Parse single log target
+ * Returns 0 on failure and positive value on success
+ */
+static int parse_log_target(char *raw, struct log_target *target, char **err)
+{
+ int port1, port2, fd;
+ struct protocol *proto;
+ struct sockaddr_storage *sk;
+
+ init_log_target(target);
+ // target addr is NULL at this point
+
+ if (strncmp(raw, "ring@", 5) == 0) {
+ target->type = LOG_TARGET_BUFFER;
+ target->ring_name = strdup(raw + 5);
+ goto done;
+ }
+ else if (strncmp(raw, "backend@", 8) == 0) {
+ target->type = LOG_TARGET_BACKEND;
+ target->be_name = strdup(raw + 8);
+ goto done;
+ }
+
+ /* try to allocate log target addr */
+ target->addr = malloc(sizeof(*target->addr));
+ if (!target->addr) {
+ memprintf(err, "memory error");
+ goto error;
+ }
+
+ target->type = LOG_TARGET_DGRAM; // default type
+
+ /* parse the target address */
+ sk = str2sa_range(raw, NULL, &port1, &port2, &fd, &proto, NULL,
+ err, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_RAW_FD | PA_O_DGRAM | PA_O_STREAM | PA_O_DEFAULT_DGRAM);
+ if (!sk)
+ goto error;
+ if (fd != -1)
+ target->type = LOG_TARGET_FD;
+ *target->addr = *sk;
+
+ if (sk->ss_family == AF_INET || sk->ss_family == AF_INET6) {
+ if (!port1)
+ set_host_port(target->addr, SYSLOG_PORT);
+ }
+
+ if (proto && proto->xprt_type == PROTO_TYPE_STREAM) {
+ static unsigned long ring_ids;
+
+ /* Implicit sink buffer will be initialized in post_check
+ * (target->addr is set in this case)
+ */
+ target->type = LOG_TARGET_BUFFER;
+ /* compute unique name for the ring */
+ memprintf(&target->ring_name, "ring#%lu", ++ring_ids);
+ }
+
+ done:
+ return 1;
+ error:
+ deinit_log_target(target);
+ return 0;
+}
+
+/*
+ * Parse "log" keyword and update <loggers> list accordingly.
+ *
+ * When <do_del> is set, it means the "no log" line was parsed, so all log
+ * servers in <loggers> are released.
+ *
+ * Otherwise, we try to parse the "log" line. First of all, when the list is not
+ * the global one, we look for the parameter "global". If we find it,
+ * global.loggers is copied. Else we parse each arguments.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int parse_logger(char **args, struct list *loggers, int do_del, const char *file, int linenum, char **err)
+{
+ struct smp_log_range *smp_rgs = NULL;
+ struct logger *logger = NULL;
+ int cur_arg;
+
+ /*
+ * "no log": delete previous herited or defined syslog
+ * servers.
+ */
+ if (do_del) {
+ struct logger *back;
+
+ if (*(args[1]) != 0) {
+ memprintf(err, "'no log' does not expect arguments");
+ goto error;
+ }
+
+ list_for_each_entry_safe(logger, back, loggers, list) {
+ LIST_DEL_INIT(&logger->list);
+ free_logger(logger);
+ }
+ return 1;
+ }
+
+ /*
+ * "log global": copy global.loggers linked list to the end of loggers
+ * list. But first, we check (loggers != global.loggers).
+ */
+ if (*(args[1]) && *(args[2]) == 0 && strcmp(args[1], "global") == 0) {
+ if (loggers == &global.loggers) {
+ memprintf(err, "'global' is not supported for a global syslog server");
+ goto error;
+ }
+ list_for_each_entry(logger, &global.loggers, list) {
+ struct logger *node;
+
+ list_for_each_entry(node, loggers, list) {
+ if (node->ref == logger)
+ goto skip_logger;
+ }
+
+ /* duplicate logger from global */
+ node = dup_logger(logger);
+ if (!node) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ /* manually override some values */
+ ha_free(&node->conf.file);
+ node->conf.file = strdup(file);
+ node->conf.line = linenum;
+
+ /* add to list */
+ LIST_APPEND(loggers, &node->list);
+
+ skip_logger:
+ continue;
+ }
+ return 1;
+ }
+
+ /*
+ * "log <address> ...: parse a syslog server line
+ */
+ if (*(args[1]) == 0 || *(args[2]) == 0) {
+ memprintf(err, "expects <address> and <facility> %s as arguments",
+ ((loggers == &global.loggers) ? "" : "or global"));
+ goto error;
+ }
+
+ /* take care of "stdout" and "stderr" as regular aliases for fd@1 / fd@2 */
+ if (strcmp(args[1], "stdout") == 0)
+ args[1] = "fd@1";
+ else if (strcmp(args[1], "stderr") == 0)
+ args[1] = "fd@2";
+
+ logger = calloc(1, sizeof(*logger));
+ if (!logger) {
+ memprintf(err, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&logger->list);
+ logger->conf.file = strdup(file);
+ logger->conf.line = linenum;
+
+ /* skip address for now, it will be parsed at the end */
+ cur_arg = 2;
+
+ /* just after the address, a length may be specified */
+ logger->maxlen = MAX_SYSLOG_LEN;
+ if (strcmp(args[cur_arg], "len") == 0) {
+ int len = atoi(args[cur_arg+1]);
+ if (len < 80 || len > 65535) {
+ memprintf(err, "invalid log length '%s', must be between 80 and 65535",
+ args[cur_arg+1]);
+ goto error;
+ }
+ logger->maxlen = len;
+ cur_arg += 2;
+ }
+ if (logger->maxlen > global.max_syslog_len)
+ global.max_syslog_len = logger->maxlen;
+
+ /* after the length, a format may be specified */
+ if (strcmp(args[cur_arg], "format") == 0) {
+ logger->format = get_log_format(args[cur_arg+1]);
+ if (logger->format == LOG_FORMAT_UNSPEC) {
+ memprintf(err, "unknown log format '%s'", args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg += 2;
+ }
+
+ if (strcmp(args[cur_arg], "sample") == 0) {
+ unsigned low, high;
+ char *p, *beg, *end, *smp_sz_str;
+ size_t smp_rgs_sz = 0, smp_sz = 0, new_smp_sz;
+
+ p = args[cur_arg+1];
+ smp_sz_str = strchr(p, ':');
+ if (!smp_sz_str) {
+ memprintf(err, "Missing sample size");
+ goto error;
+ }
+
+ *smp_sz_str++ = '\0';
+
+ end = p + strlen(p);
+
+ while (p != end) {
+ if (!get_logger_smp_range(&low, &high, &p, err))
+ goto error;
+
+ if (smp_rgs && smp_log_ranges_overlap(smp_rgs, smp_rgs_sz, low, high, err))
+ goto error;
+
+ smp_rgs = my_realloc2(smp_rgs, (smp_rgs_sz + 1) * sizeof *smp_rgs);
+ if (!smp_rgs) {
+ memprintf(err, "out of memory error");
+ goto error;
+ }
+
+ smp_rgs[smp_rgs_sz].low = low;
+ smp_rgs[smp_rgs_sz].high = high;
+ smp_rgs[smp_rgs_sz].sz = high - low + 1;
+ if (smp_rgs[smp_rgs_sz].high > smp_sz)
+ smp_sz = smp_rgs[smp_rgs_sz].high;
+ smp_rgs_sz++;
+ }
+
+ if (smp_rgs == NULL) {
+ memprintf(err, "no sampling ranges given");
+ goto error;
+ }
+
+ beg = smp_sz_str;
+ end = beg + strlen(beg);
+ new_smp_sz = read_uint((const char **)&beg, end);
+ if (!new_smp_sz || beg != end) {
+ memprintf(err, "wrong sample size '%s' for sample range '%s'",
+ smp_sz_str, args[cur_arg+1]);
+ goto error;
+ }
+
+ if (new_smp_sz < smp_sz) {
+ memprintf(err, "sample size %zu should be greater or equal to "
+ "%zu the maximum of the high ranges limits",
+ new_smp_sz, smp_sz);
+ goto error;
+ }
+ smp_sz = new_smp_sz;
+
+ /* Let's order <smp_rgs> array. */
+ qsort(smp_rgs, smp_rgs_sz, sizeof(struct smp_log_range), smp_log_range_cmp);
+
+ logger->lb.smp_rgs = smp_rgs;
+ logger->lb.smp_rgs_sz = smp_rgs_sz;
+ logger->lb.smp_sz = smp_sz;
+
+ cur_arg += 2;
+ }
+
+ /* parse the facility */
+ logger->facility = get_log_facility(args[cur_arg]);
+ if (logger->facility < 0) {
+ memprintf(err, "unknown log facility '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+
+ /* parse the max syslog level (default: debug) */
+ logger->level = 7;
+ if (*(args[cur_arg])) {
+ logger->level = get_log_level(args[cur_arg]);
+ if (logger->level < 0) {
+ memprintf(err, "unknown optional log level '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ /* parse the limit syslog level (default: emerg) */
+ logger->minlvl = 0;
+ if (*(args[cur_arg])) {
+ logger->minlvl = get_log_level(args[cur_arg]);
+ if (logger->minlvl < 0) {
+ memprintf(err, "unknown optional minimum log level '%s'", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ /* Too many args */
+ if (*(args[cur_arg])) {
+ memprintf(err, "cannot handle unexpected argument '%s'", args[cur_arg]);
+ goto error;
+ }
+
+ /* now, back to the log target */
+ if (!parse_log_target(args[1], &logger->target, err))
+ goto error;
+
+ done:
+ LIST_APPEND(loggers, &logger->list);
+ return 1;
+
+ error:
+ free(smp_rgs);
+ free_logger(logger);
+ return 0;
+}
+
+
+/*
+ * returns log format, LOG_FORMAT_UNSPEC is return if not found.
+ */
+enum log_fmt get_log_format(const char *fmt)
+{
+ enum log_fmt format;
+
+ format = LOG_FORMATS - 1;
+ while (format > 0 && log_formats[format].name
+ && strcmp(log_formats[format].name, fmt) != 0)
+ format--;
+
+ /* Note: 0 is LOG_FORMAT_UNSPEC */
+ return format;
+}
+
+/*
+ * returns log level for <lev> or -1 if not found.
+ */
+int get_log_level(const char *lev)
+{
+ int level;
+
+ level = NB_LOG_LEVELS - 1;
+ while (level >= 0 && strcmp(log_levels[level], lev) != 0)
+ level--;
+
+ return level;
+}
+
+/*
+ * returns log facility for <fac> or -1 if not found.
+ */
+int get_log_facility(const char *fac)
+{
+ int facility;
+
+ facility = NB_LOG_FACILITIES - 1;
+ while (facility >= 0 && strcmp(log_facilities[facility], fac) != 0)
+ facility--;
+
+ return facility;
+}
+
+/*
+ * Encode the string.
+ *
+ * When using the +E log format option, it will try to escape '"\]'
+ * characters with '\' as prefix. The same prefix should not be used as
+ * <escape>.
+ */
+static char *lf_encode_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string,
+ struct logformat_node *node)
+{
+ if (node->options & LOG_OPT_ESC) {
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map)) {
+ if (!ha_bit_test((unsigned char)(*string), rfc5424_escape_map))
+ *start++ = *string;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = '\\';
+ *start++ = *string;
+ }
+ }
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*string >> 4) & 15];
+ *start++ = hextab[*string & 15];
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ }
+ else {
+ return encode_string(start, stop, escape, map, string);
+ }
+
+ return start;
+}
+
+/*
+ * Encode the chunk.
+ *
+ * When using the +E log format option, it will try to escape '"\]'
+ * characters with '\' as prefix. The same prefix should not be used as
+ * <escape>.
+ */
+static char *lf_encode_chunk(char *start, char *stop,
+ const char escape, const long *map,
+ const struct buffer *chunk,
+ struct logformat_node *node)
+{
+ char *str, *end;
+
+ if (node->options & LOG_OPT_ESC) {
+ if (start < stop) {
+ str = chunk->area;
+ end = chunk->area + chunk->data;
+
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && str < end) {
+ if (!ha_bit_test((unsigned char)(*str), map)) {
+ if (!ha_bit_test((unsigned char)(*str), rfc5424_escape_map))
+ *start++ = *str;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = '\\';
+ *start++ = *str;
+ }
+ }
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*str >> 4) & 15];
+ *start++ = hextab[*str & 15];
+ }
+ str++;
+ }
+ *start = '\0';
+ }
+ }
+ else {
+ return encode_chunk(start, stop, escape, map, chunk);
+ }
+
+ return start;
+}
+
+/*
+ * Write a string in the log string
+ * Take cares of quote and escape options
+ *
+ * Return the address of the \0 character, or NULL on error
+ */
+char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node)
+{
+ if (size < 2)
+ return NULL;
+
+ if (node->options & LOG_OPT_QUOTE) {
+ *(dst++) = '"';
+ size--;
+ }
+
+ if (src && len) {
+ /* escape_string and strlcpy2 will both try to add terminating NULL-byte
+ * to dst, so we need to make sure that extra byte will fit into dst
+ * before calling them
+ */
+ if (node->options & LOG_OPT_ESC) {
+ char *ret;
+
+ ret = escape_string(dst, (dst + size - 1), '\\', rfc5424_escape_map, src, src + len);
+ if (ret == NULL || *ret != '\0')
+ return NULL;
+ len = ret - dst;
+ }
+ else {
+ if (++len > size)
+ len = size;
+ len = strlcpy2(dst, src, len);
+ }
+
+ size -= len;
+ dst += len;
+ }
+ else if ((node->options & (LOG_OPT_QUOTE|LOG_OPT_MANDATORY)) == LOG_OPT_MANDATORY) {
+ if (size < 2)
+ return NULL;
+ *(dst++) = '-';
+ size -= 1;
+ }
+
+ if (node->options & LOG_OPT_QUOTE) {
+ if (size < 2)
+ return NULL;
+ *(dst++) = '"';
+ }
+
+ *dst = '\0';
+ return dst;
+}
+
+static inline char *lf_text(char *dst, const char *src, size_t size, const struct logformat_node *node)
+{
+ return lf_text_len(dst, src, size, size, node);
+}
+
+/*
+ * Write a IP address to the log string
+ * +X option write in hexadecimal notation, most significant byte on the left
+ */
+char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node)
+{
+ char *ret = dst;
+ int iret;
+ char pn[INET6_ADDRSTRLEN];
+
+ if (node->options & LOG_OPT_HEXA) {
+ unsigned char *addr = NULL;
+ switch (sockaddr->sa_family) {
+ case AF_INET:
+ addr = (unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_addr.s_addr;
+ iret = snprintf(dst, size, "%02X%02X%02X%02X", addr[0], addr[1], addr[2], addr[3]);
+ break;
+ case AF_INET6:
+ addr = (unsigned char *)&((struct sockaddr_in6 *)sockaddr)->sin6_addr.s6_addr;
+ iret = snprintf(dst, size, "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X",
+ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7],
+ addr[8], addr[9], addr[10], addr[11], addr[12], addr[13], addr[14], addr[15]);
+ break;
+ default:
+ return NULL;
+ }
+ if (iret < 0 || iret > size)
+ return NULL;
+ ret += iret;
+ } else {
+ addr_to_str((struct sockaddr_storage *)sockaddr, pn, sizeof(pn));
+ ret = lf_text(dst, pn, size, node);
+ if (ret == NULL)
+ return NULL;
+ }
+ return ret;
+}
+
+/*
+ * Write a port to the log
+ * +X option write in hexadecimal notation, most significant byte on the left
+ */
+char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node)
+{
+ char *ret = dst;
+ int iret;
+
+ if (node->options & LOG_OPT_HEXA) {
+ const unsigned char *port = (const unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_port;
+ iret = snprintf(dst, size, "%02X%02X", port[0], port[1]);
+ if (iret < 0 || iret > size)
+ return NULL;
+ ret += iret;
+ } else {
+ ret = ltoa_o(get_host_port((struct sockaddr_storage *)sockaddr), dst, size);
+ if (ret == NULL)
+ return NULL;
+ }
+ return ret;
+}
+
+
+/*
+ * This function sends the syslog message using a printf format string. It
+ * expects an LF-terminated message.
+ */
+void send_log(struct proxy *p, int level, const char *format, ...)
+{
+ va_list argp;
+ int data_len;
+
+ if (level < 0 || format == NULL || logline == NULL)
+ return;
+
+ va_start(argp, format);
+ data_len = vsnprintf(logline, global.max_syslog_len, format, argp);
+ if (data_len < 0 || data_len > global.max_syslog_len)
+ data_len = global.max_syslog_len;
+ va_end(argp);
+
+ __send_log((p ? &p->loggers : NULL), (p ? &p->log_tag : NULL), level,
+ logline, data_len, default_rfc5424_sd_log_format, 2);
+}
+/*
+ * This function builds a log header according to <hdr> settings.
+ *
+ * If hdr.format is set to LOG_FORMAT_UNSPEC, it tries to determine
+ * format based on hdr.metadata. It is useful for log-forwarding to be
+ * able to forward any format without settings.
+ *
+ * This function returns a struct ist array of elements of the header
+ * nbelem is set to the number of available elements.
+ * This function returns currently a maximum of NB_LOG_HDR_IST_ELEMENTS
+ * elements.
+ */
+struct ist *build_log_header(struct log_header hdr, size_t *nbelem)
+{
+ static THREAD_LOCAL struct {
+ struct ist ist_vector[NB_LOG_HDR_MAX_ELEMENTS];
+ char timestamp_buffer[LOG_LEGACYTIME_LEN+1+1];
+ time_t cur_legacy_time;
+ char priority_buffer[6];
+ } hdr_ctx = { .priority_buffer = "<<<<>" };
+
+ struct tm logtime;
+ int len;
+ int fac_level = 0;
+ time_t time = date.tv_sec;
+ struct ist *metadata = hdr.metadata;
+ enum log_fmt format = hdr.format;
+ int facility = hdr.facility;
+ int level = hdr.level;
+
+ *nbelem = 0;
+
+
+ if (format == LOG_FORMAT_UNSPEC) {
+ format = LOG_FORMAT_RAW;
+ if (metadata) {
+ /* If a hostname is set, it appears we want to perform syslog
+ * because only rfc5427 or rfc3164 support an hostname.
+ */
+ if (metadata[LOG_META_HOST].len) {
+ /* If a rfc5424 compliant timestamp is used we consider
+ * that output format is rfc5424, else legacy format
+ * is used as specified default for local logs
+ * in documentation.
+ */
+ if ((metadata[LOG_META_TIME].len == 1 && metadata[LOG_META_TIME].ptr[0] == '-')
+ || (metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN))
+ format = LOG_FORMAT_RFC5424;
+ else
+ format = LOG_FORMAT_RFC3164;
+ }
+ else if (metadata[LOG_META_TAG].len) {
+ /* Tag is present but no hostname, we should
+ * consider we try to emit a local log
+ * in legacy format (analog to RFC3164 but
+ * with stripped hostname).
+ */
+ format = LOG_FORMAT_LOCAL;
+ }
+ else if (metadata[LOG_META_PRIO].len) {
+ /* the source seems a parsed message
+ * offering a valid level/prio prefix
+ * so we consider this format.
+ */
+ format = LOG_FORMAT_PRIO;
+ }
+ }
+ }
+
+ /* prepare priority, stored into 1 single elem */
+ switch (format) {
+ case LOG_FORMAT_LOCAL:
+ case LOG_FORMAT_RFC3164:
+ case LOG_FORMAT_RFC5424:
+ case LOG_FORMAT_PRIO:
+ fac_level = facility << 3;
+ /* further format ignore the facility */
+ __fallthrough;
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_SHORT:
+ fac_level += level;
+ hdr_ctx.ist_vector[*nbelem].ptr = &hdr_ctx.priority_buffer[3]; /* last digit of the log level */
+ do {
+ *hdr_ctx.ist_vector[*nbelem].ptr = '0' + fac_level % 10;
+ fac_level /= 10;
+ hdr_ctx.ist_vector[*nbelem].ptr--;
+ } while (fac_level && hdr_ctx.ist_vector[*nbelem].ptr > &hdr_ctx.priority_buffer[0]);
+ *hdr_ctx.ist_vector[*nbelem].ptr = '<';
+ hdr_ctx.ist_vector[(*nbelem)++].len = &hdr_ctx.priority_buffer[5] - hdr_ctx.ist_vector[0].ptr;
+ break;
+ case LOG_FORMAT_ISO:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+
+ /* prepare timestamp, stored into a max of 4 elems */
+ switch (format) {
+ case LOG_FORMAT_LOCAL:
+ case LOG_FORMAT_RFC3164:
+ /* rfc3164 ex: 'Jan 1 00:00:00 ' */
+ if (metadata && metadata[LOG_META_TIME].len == LOG_LEGACYTIME_LEN) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* time is set, break immediately */
+ break;
+ }
+ else if (metadata && metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN) {
+ int month;
+ char *timestamp = metadata[LOG_META_TIME].ptr;
+
+ /* iso time always begins like this: '1970-01-01T00:00:00' */
+
+ /* compute month */
+ month = 10*(timestamp[5] - '0') + (timestamp[6] - '0');
+ if (month)
+ month--;
+ if (month <= 11) {
+ /* builds log prefix ex: 'Jan 1 ' */
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%s %c%c ", monthname[month],
+ timestamp[8] != '0' ? timestamp[8] : ' ',
+ timestamp[9]);
+ /* we reused the timestamp_buffer, signal that it does not
+ * contain local time anymore
+ */
+ hdr_ctx.cur_legacy_time = 0;
+ if (len == 7) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], len);
+ /* adds 'HH:MM:SS' from iso time */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&timestamp[11], 8);
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* we successfully reuse iso time, we can break */
+ break;
+ }
+ }
+ /* Failed to reuse isotime time, fallback to local legacy time */
+ }
+
+ if (unlikely(time != hdr_ctx.cur_legacy_time)) {
+ /* re-builds timestamp from the current local time */
+ get_localtime(time, &logtime);
+
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%s %2d %02d:%02d:%02d ",
+ monthname[logtime.tm_mon],
+ logtime.tm_mday, logtime.tm_hour, logtime.tm_min, logtime.tm_sec);
+ if (len != LOG_LEGACYTIME_LEN+1)
+ hdr_ctx.cur_legacy_time = 0;
+ else
+ hdr_ctx.cur_legacy_time = time;
+ }
+ if (likely(hdr_ctx.cur_legacy_time))
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], LOG_LEGACYTIME_LEN+1);
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("Jan 1 00:00:00 ", LOG_LEGACYTIME_LEN+1);
+ break;
+ case LOG_FORMAT_RFC5424:
+ /* adds rfc5425 version prefix */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("1 ", 2);
+ if (metadata && metadata[LOG_META_TIME].len == 1 && metadata[LOG_META_TIME].ptr[0] == '-') {
+ /* submitted len is NILVALUE, it is a valid timestamp for rfc5425 */
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ break;
+ }
+ /* let continue as 'timed' and 'iso' format for usual timestamp */
+ __fallthrough;
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_ISO:
+ /* ISO format ex: '1900:01:01T12:00:00.123456Z'
+ * '1900:01:01T14:00:00+02:00'
+ * '1900:01:01T10:00:00.123456-02:00'
+ */
+ if (metadata && metadata[LOG_META_TIME].len >= LOG_ISOTIME_MINLEN) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TIME];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ /* time is set, break immediately */
+ break;
+ }
+ else if (metadata && metadata[LOG_META_TIME].len == LOG_LEGACYTIME_LEN) {
+ int month;
+ char *timestamp = metadata[LOG_META_TIME].ptr;
+
+ for (month = 0; month < 12; month++)
+ if (!memcmp(monthname[month], timestamp, 3))
+ break;
+
+ if (month < 12) {
+
+ /* get local time to retrieve year */
+ get_localtime(time, &logtime);
+
+ /* year seems changed since log */
+ if (logtime.tm_mon < month)
+ logtime.tm_year--;
+
+ /* builds rfc5424 prefix ex: '1900-01-01T' */
+ len = snprintf(hdr_ctx.timestamp_buffer, sizeof(hdr_ctx.timestamp_buffer),
+ "%4d-%02d-%c%cT",
+ logtime.tm_year+1900, month+1,
+ timestamp[4] != ' ' ? timestamp[4] : '0',
+ timestamp[5]);
+
+ /* we reused the timestamp_buffer, signal that it does not
+ * contain local time anymore
+ */
+ hdr_ctx.cur_legacy_time = 0;
+ if (len == 11) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&hdr_ctx.timestamp_buffer[0], len);
+ /* adds HH:MM:SS from legacy timestamp */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(&timestamp[7], 8);
+ /* skip secfraq because it is optional */
+ /* according to rfc: -00:00 means we don't know the timezone */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("-00:00 ", 7);
+ /* we successfully reuse legacy time, we can break */
+ break;
+ }
+ }
+ /* Failed to reuse legacy time, fallback to local iso time */
+ }
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(timeofday_as_iso_us(1), LOG_ISOTIME_MAXLEN + 1);
+ break;
+ case LOG_FORMAT_PRIO:
+ case LOG_FORMAT_SHORT:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+ /* prepare other meta data, stored into a max of 10 elems */
+ switch (format) {
+ case LOG_FORMAT_RFC3164:
+ if (metadata && metadata[LOG_META_HOST].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_HOST];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else /* the caller MUST fill the hostname, this field is mandatory */
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("localhost ", 10);
+ __fallthrough;
+ case LOG_FORMAT_LOCAL:
+ if (!metadata || !metadata[LOG_META_TAG].len)
+ break;
+
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TAG];
+ if (metadata[LOG_META_PID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("[", 1);
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_PID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("]", 1);
+ }
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(": ", 2);
+ break;
+ case LOG_FORMAT_RFC5424:
+ if (metadata && metadata[LOG_META_HOST].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_HOST];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_TAG].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_TAG];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_PID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_PID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_MSGID].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_MSGID];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+
+ if (metadata && metadata[LOG_META_STDATA].len) {
+ hdr_ctx.ist_vector[(*nbelem)++] = metadata[LOG_META_STDATA];
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2(" ", 1);
+ }
+ else
+ hdr_ctx.ist_vector[(*nbelem)++] = ist2("- ", 2);
+ break;
+ case LOG_FORMAT_PRIO:
+ case LOG_FORMAT_SHORT:
+ case LOG_FORMAT_TIMED:
+ case LOG_FORMAT_ISO:
+ case LOG_FORMAT_RAW:
+ break;
+ case LOG_FORMAT_UNSPEC:
+ case LOG_FORMATS:
+ ABORT_NOW();
+ }
+
+ return hdr_ctx.ist_vector;
+}
+
+/*
+ * This function sends a syslog message.
+ * <target> is the actual log target where log will be sent,
+ *
+ * Message will be prefixed by header according to <hdr> setting.
+ * Final message will be truncated <maxlen> parameter and will be
+ * terminated with an LF character.
+ *
+ * Does not return any error
+ */
+static inline void __do_send_log(struct log_target *target, struct log_header hdr,
+ int nblogger, size_t maxlen,
+ char *message, size_t size)
+{
+ static THREAD_LOCAL struct iovec iovec[NB_LOG_HDR_MAX_ELEMENTS+1+1] = { }; /* header elements + message + LF */
+ static THREAD_LOCAL struct msghdr msghdr = {
+ //.msg_iov = iovec,
+ .msg_iovlen = NB_LOG_HDR_MAX_ELEMENTS+2
+ };
+ static THREAD_LOCAL int logfdunix = -1; /* syslog to AF_UNIX socket */
+ static THREAD_LOCAL int logfdinet = -1; /* syslog to AF_INET socket */
+ int *plogfd;
+ int sent;
+ size_t nbelem;
+ struct ist *msg_header = NULL;
+
+ msghdr.msg_iov = iovec;
+
+ /* historically some messages used to already contain the trailing LF
+ * or Zero. Let's remove all trailing LF or Zero
+ */
+ while (size && (message[size-1] == '\n' || (message[size-1] == 0)))
+ size--;
+
+ if (target->type == LOG_TARGET_BUFFER) {
+ plogfd = NULL;
+ goto send;
+ }
+ else if (target->addr->ss_family == AF_CUST_EXISTING_FD) {
+ /* the socket's address is a file descriptor */
+ plogfd = (int *)&((struct sockaddr_in *)target->addr)->sin_addr.s_addr;
+ }
+ else if (target->addr->ss_family == AF_UNIX)
+ plogfd = &logfdunix;
+ else
+ plogfd = &logfdinet;
+
+ if (plogfd && unlikely(*plogfd < 0)) {
+ /* socket not successfully initialized yet */
+ if ((*plogfd = socket(target->addr->ss_family, SOCK_DGRAM,
+ (target->addr->ss_family == AF_UNIX) ? 0 : IPPROTO_UDP)) < 0) {
+ static char once;
+
+ if (!once) {
+ once = 1; /* note: no need for atomic ops here */
+ ha_alert("socket() failed in logger #%d: %s (errno=%d)\n",
+ nblogger, strerror(errno), errno);
+ }
+ return;
+ } else {
+ /* we don't want to receive anything on this socket */
+ setsockopt(*plogfd, SOL_SOCKET, SO_RCVBUF, &zero, sizeof(zero));
+ /* we may want to adjust the output buffer (tune.sndbuf.backend) */
+ if (global.tune.backend_sndbuf)
+ setsockopt(*plogfd, SOL_SOCKET, SO_SNDBUF, &global.tune.backend_sndbuf, sizeof(global.tune.backend_sndbuf));
+ /* does nothing under Linux, maybe needed for others */
+ shutdown(*plogfd, SHUT_RD);
+ fd_set_cloexec(*plogfd);
+ }
+ }
+
+ msg_header = build_log_header(hdr, &nbelem);
+ send:
+ if (target->type == LOG_TARGET_BUFFER) {
+ struct ist msg;
+ size_t e_maxlen = maxlen;
+
+ msg = ist2(message, size);
+
+ /* make room for the final '\n' which may be forcefully inserted
+ * by tcp forwarder applet (sink_forward_io_handler)
+ */
+ e_maxlen -= 1;
+
+ sent = sink_write(target->sink, hdr, e_maxlen, &msg, 1);
+ }
+ else if (target->addr->ss_family == AF_CUST_EXISTING_FD) {
+ struct ist msg;
+
+ msg = ist2(message, size);
+
+ sent = fd_write_frag_line(*plogfd, maxlen, msg_header, nbelem, &msg, 1, 1);
+ }
+ else {
+ int i = 0;
+ int totlen = maxlen - 1; /* save space for the final '\n' */
+
+ for (i = 0 ; i < nbelem ; i++ ) {
+ iovec[i].iov_base = msg_header[i].ptr;
+ iovec[i].iov_len = msg_header[i].len;
+ if (totlen <= iovec[i].iov_len) {
+ iovec[i].iov_len = totlen;
+ totlen = 0;
+ break;
+ }
+ totlen -= iovec[i].iov_len;
+ }
+ if (totlen) {
+ iovec[i].iov_base = message;
+ iovec[i].iov_len = size;
+ if (totlen <= iovec[i].iov_len)
+ iovec[i].iov_len = totlen;
+ i++;
+ }
+ iovec[i].iov_base = "\n"; /* insert a \n at the end of the message */
+ iovec[i].iov_len = 1;
+ i++;
+
+ msghdr.msg_iovlen = i;
+ msghdr.msg_name = (struct sockaddr *)target->addr;
+ msghdr.msg_namelen = get_addr_len(target->addr);
+
+ sent = sendmsg(*plogfd, &msghdr, MSG_DONTWAIT | MSG_NOSIGNAL);
+ }
+
+ if (sent < 0) {
+ static char once;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ _HA_ATOMIC_INC(&dropped_logs);
+ else if (!once) {
+ once = 1; /* note: no need for atomic ops here */
+ ha_alert("sendmsg()/writev() failed in logger #%d: %s (errno=%d)\n",
+ nblogger, strerror(errno), errno);
+ }
+ }
+}
+
+/* does the same as __do_send_log() does for a single target, but here the log
+ * will be sent according to the log backend's lb settings. The function will
+ * leverage __do_send_log() function to actually send the log messages.
+ */
+static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr,
+ int nblogger, size_t maxlen,
+ char *message, size_t size)
+{
+ struct server *srv;
+ uint32_t targetid = ~0; /* default value to check if it was explicitly assigned */
+ uint32_t nb_srv;
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &be->lbprm.lock);
+
+ if (be->srv_act) {
+ nb_srv = be->srv_act;
+ }
+ else if (be->srv_bck) {
+ /* no more active servers but backup ones are, switch to backup farm */
+ nb_srv = be->srv_bck;
+ if (!(be->options & PR_O_USE_ALL_BK)) {
+ /* log balancing disabled on backup farm */
+ targetid = 0; /* use first server */
+ goto skip_lb;
+ }
+ }
+ else {
+ /* no srv available, can't log */
+ goto drop;
+ }
+
+ /* log-balancing logic: */
+
+ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RR) {
+ /* Atomically load and update lastid since it's not protected
+ * by any write lock
+ *
+ * Wrapping is expected and could lead to unexpected ID reset in the
+ * middle of a cycle, but given that this only happens once in every
+ * 4 billions it is quite negligible
+ */
+ targetid = HA_ATOMIC_FETCH_ADD(&be->lbprm.log.lastid, 1) % nb_srv;
+ }
+ else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LS) {
+ /* sticky mode: use first server in the pool, which will always stay
+ * first during dequeuing and requeuing, unless it becomes unavailable
+ * and will be replaced by another one
+ */
+ targetid = 0;
+ }
+ else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RND) {
+ /* random mode */
+ targetid = statistical_prng() % nb_srv;
+ }
+ else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) {
+ struct sample result;
+
+ /* log-balance hash */
+ memset(&result, 0, sizeof(result));
+ result.data.type = SMP_T_STR;
+ result.flags = SMP_F_CONST;
+ result.data.u.str.area = message;
+ result.data.u.str.data = size;
+ result.data.u.str.size = size + 1; /* with terminating NULL byte */
+ if (sample_process_cnv(be->lbprm.expr, &result)) {
+ /* gen_hash takes binary input, ensure that we provide such value to it */
+ if (result.data.type == SMP_T_BIN || sample_casts[result.data.type][SMP_T_BIN]) {
+ sample_casts[result.data.type][SMP_T_BIN](&result);
+ targetid = gen_hash(be, result.data.u.str.area, result.data.u.str.data) % nb_srv;
+ }
+ }
+ }
+
+ skip_lb:
+
+ if (targetid == ~0) {
+ /* no target assigned, nothing to do */
+ goto drop;
+ }
+
+ /* find server based on targetid */
+ srv = be->lbprm.log.srv[targetid];
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock);
+
+ __do_send_log(srv->log_target, hdr, nblogger, maxlen, message, size);
+ return;
+
+ drop:
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock);
+ _HA_ATOMIC_INC(&dropped_logs);
+}
+
+/*
+ * This function sends a syslog message.
+ * It doesn't care about errors nor does it report them.
+ * The argument <metadata> MUST be an array of size
+ * LOG_META_FIELDS*sizeof(struct ist) containing
+ * data to build the header.
+ */
+void process_send_log(struct list *loggers, int level, int facility,
+ struct ist *metadata, char *message, size_t size)
+{
+ struct logger *logger;
+ int nblogger;
+
+ /* Send log messages to syslog server. */
+ nblogger = 0;
+ list_for_each_entry(logger, loggers, list) {
+ int in_range = 1;
+
+ /* we can filter the level of the messages that are sent to each logger */
+ if (level > logger->level)
+ continue;
+
+ if (logger->lb.smp_rgs) {
+ struct smp_log_range *smp_rg;
+ uint next_idx, curr_rg;
+ ullong curr_rg_idx, next_rg_idx;
+
+ curr_rg_idx = _HA_ATOMIC_LOAD(&logger->lb.curr_rg_idx);
+ do {
+ next_idx = (curr_rg_idx & 0xFFFFFFFFU) + 1;
+ curr_rg = curr_rg_idx >> 32;
+ smp_rg = &logger->lb.smp_rgs[curr_rg];
+
+ /* check if the index we're going to take is within range */
+ in_range = smp_rg->low <= next_idx && next_idx <= smp_rg->high;
+ if (in_range) {
+ /* Let's consume this range. */
+ if (next_idx == smp_rg->high) {
+ /* If consumed, let's select the next range. */
+ curr_rg = (curr_rg + 1) % logger->lb.smp_rgs_sz;
+ }
+ }
+
+ next_idx = next_idx % logger->lb.smp_sz;
+ next_rg_idx = ((ullong)curr_rg << 32) + next_idx;
+ } while (!_HA_ATOMIC_CAS(&logger->lb.curr_rg_idx, &curr_rg_idx, next_rg_idx) &&
+ __ha_cpu_relax());
+ }
+ if (in_range) {
+ struct log_header hdr;
+
+ hdr.level = MAX(level, logger->minlvl);
+ hdr.facility = (facility == -1) ? logger->facility : facility;
+ hdr.format = logger->format;
+ hdr.metadata = metadata;
+
+ nblogger += 1;
+ if (logger->target.type == LOG_TARGET_BACKEND) {
+ __do_send_log_backend(logger->target.be, hdr, nblogger, logger->maxlen, message, size);
+ }
+ else {
+ /* normal target */
+ __do_send_log(&logger->target, hdr, nblogger, logger->maxlen, message, size);
+ }
+ }
+ }
+}
+
+/*
+ * This function sends a syslog message.
+ * It doesn't care about errors nor does it report them.
+ * The arguments <sd> and <sd_size> are used for the structured-data part
+ * in RFC5424 formatted syslog messages.
+ */
+void __send_log(struct list *loggers, struct buffer *tagb, int level,
+ char *message, size_t size, char *sd, size_t sd_size)
+{
+ static THREAD_LOCAL pid_t curr_pid;
+ static THREAD_LOCAL char pidstr[16];
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+
+ if (loggers == NULL) {
+ if (!LIST_ISEMPTY(&global.loggers)) {
+ loggers = &global.loggers;
+ }
+ }
+ if (!loggers || LIST_ISEMPTY(loggers))
+ return;
+
+ if (!metadata[LOG_META_HOST].len) {
+ if (global.log_send_hostname)
+ metadata[LOG_META_HOST] = ist(global.log_send_hostname);
+ }
+
+ if (!tagb || !tagb->area)
+ tagb = &global.log_tag;
+
+ if (tagb)
+ metadata[LOG_META_TAG] = ist2(tagb->area, tagb->data);
+
+ if (unlikely(curr_pid != getpid()))
+ metadata[LOG_META_PID].len = 0;
+
+ if (!metadata[LOG_META_PID].len) {
+ curr_pid = getpid();
+ ltoa_o(curr_pid, pidstr, sizeof(pidstr));
+ metadata[LOG_META_PID] = ist2(pidstr, strlen(pidstr));
+ }
+
+ metadata[LOG_META_STDATA] = ist2(sd, sd_size);
+
+ /* Remove trailing space of structured data */
+ while (metadata[LOG_META_STDATA].len && metadata[LOG_META_STDATA].ptr[metadata[LOG_META_STDATA].len-1] == ' ')
+ metadata[LOG_META_STDATA].len--;
+
+ return process_send_log(loggers, level, -1, metadata, message, size);
+}
+
+const char sess_cookie[8] = "NIDVEOU7"; /* No cookie, Invalid cookie, cookie for a Down server, Valid cookie, Expired cookie, Old cookie, Unused, unknown */
+const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found and left unchanged (passive),
+ Set-cookie Deleted, Set-Cookie Inserted, Set-cookie Rewritten,
+ Set-cookie Updated, unknown, unknown */
+
+/*
+ * try to write a character if there is enough space, or goto out
+ */
+#define LOGCHAR(x) do { \
+ if (tmplog < dst + maxsize - 1) { \
+ *(tmplog++) = (x); \
+ } else { \
+ goto out; \
+ } \
+ } while(0)
+
+
+/* Initializes some log data at boot */
+static void init_log()
+{
+ char *tmp;
+ int i;
+
+ /* Initialize the escape map for the RFC5424 structured-data : '"\]'
+ * inside PARAM-VALUE should be escaped with '\' as prefix.
+ * See https://tools.ietf.org/html/rfc5424#section-6.3.3 for more
+ * details.
+ */
+ memset(rfc5424_escape_map, 0, sizeof(rfc5424_escape_map));
+
+ tmp = "\"\\]";
+ while (*tmp) {
+ ha_bit_set(*tmp, rfc5424_escape_map);
+ tmp++;
+ }
+
+ /* initialize the log header encoding map : '{|}"#' should be encoded with
+ * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ).
+ * URL encoding only requires '"', '#' to be encoded as well as non-
+ * printable characters above.
+ */
+ memset(hdr_encode_map, 0, sizeof(hdr_encode_map));
+ memset(url_encode_map, 0, sizeof(url_encode_map));
+ for (i = 0; i < 32; i++) {
+ ha_bit_set(i, hdr_encode_map);
+ ha_bit_set(i, url_encode_map);
+ }
+ for (i = 127; i < 256; i++) {
+ ha_bit_set(i, hdr_encode_map);
+ ha_bit_set(i, url_encode_map);
+ }
+
+ tmp = "\"#{|}";
+ while (*tmp) {
+ ha_bit_set(*tmp, hdr_encode_map);
+ tmp++;
+ }
+
+ tmp = "\"#";
+ while (*tmp) {
+ ha_bit_set(*tmp, url_encode_map);
+ tmp++;
+ }
+
+ /* initialize the http header encoding map. The draft httpbis define the
+ * header content as:
+ *
+ * HTTP-message = start-line
+ * *( header-field CRLF )
+ * CRLF
+ * [ message-body ]
+ * header-field = field-name ":" OWS field-value OWS
+ * field-value = *( field-content / obs-fold )
+ * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+ * obs-fold = CRLF 1*( SP / HTAB )
+ * field-vchar = VCHAR / obs-text
+ * VCHAR = %x21-7E
+ * obs-text = %x80-FF
+ *
+ * All the chars are encoded except "VCHAR", "obs-text", SP and HTAB.
+ * The encoded chars are form 0x00 to 0x08, 0x0a to 0x1f and 0x7f. The
+ * "obs-fold" is voluntarily forgotten because haproxy remove this.
+ */
+ memset(http_encode_map, 0, sizeof(http_encode_map));
+ for (i = 0x00; i <= 0x08; i++)
+ ha_bit_set(i, http_encode_map);
+ for (i = 0x0a; i <= 0x1f; i++)
+ ha_bit_set(i, http_encode_map);
+ ha_bit_set(0x7f, http_encode_map);
+}
+
+INITCALL0(STG_PREPARE, init_log);
+
+/* Initialize log buffers used for syslog messages */
+int init_log_buffers()
+{
+ logline = my_realloc2(logline, global.max_syslog_len + 1);
+ logline_rfc5424 = my_realloc2(logline_rfc5424, global.max_syslog_len + 1);
+ if (!logline || !logline_rfc5424)
+ return 0;
+ return 1;
+}
+
+/* Deinitialize log buffers used for syslog messages */
+void deinit_log_buffers()
+{
+ free(logline);
+ free(logline_rfc5424);
+ logline = NULL;
+ logline_rfc5424 = NULL;
+}
+
+/* Deinitialize log forwarder proxies used for syslog messages */
+void deinit_log_forward()
+{
+ struct proxy *p, *p0;
+
+ p = cfg_log_forward;
+ /* we need to manually clean cfg_log_forward proxy list */
+ while (p) {
+ p0 = p;
+ p = p->next;
+ free_proxy(p0);
+ }
+}
+
+/* Builds a log line in <dst> based on <list_format>, and stops before reaching
+ * <maxsize> characters. Returns the size of the output string in characters,
+ * not counting the trailing zero which is always added if the resulting size
+ * is not zero. It requires a valid session and optionally a stream. If the
+ * stream is NULL, default values will be assumed for the stream part.
+ */
+int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format)
+{
+ struct proxy *fe = sess->fe;
+ struct proxy *be;
+ struct http_txn *txn;
+ const struct strm_logs *logs;
+ struct connection *fe_conn, *be_conn;
+ unsigned int s_flags;
+ unsigned int uniq_id;
+ struct buffer chunk;
+ char *uri;
+ char *spc;
+ char *qmark;
+ char *end;
+ struct tm tm;
+ int t_request;
+ int hdr;
+ int last_isspace = 1;
+ int nspaces = 0;
+ char *tmplog;
+ char *ret;
+ int iret;
+ int status;
+ struct logformat_node *tmp;
+ struct timeval tv;
+ struct strm_logs tmp_strm_log;
+ struct ist path;
+ struct http_uri_parser parser;
+
+ /* FIXME: let's limit ourselves to frontend logging for now. */
+
+ if (likely(s)) {
+ be = s->be;
+ txn = s->txn;
+ be_conn = sc_conn(s->scb);
+ status = (txn ? txn->status : 0);
+ s_flags = s->flags;
+ uniq_id = s->uniq_id;
+ logs = &s->logs;
+ } else {
+ /* we have no stream so we first need to initialize a few
+ * things that are needed later. We do increment the request
+ * ID so that it's uniquely assigned to this request just as
+ * if the request had reached the point of being processed.
+ * A request error is reported as it's the only element we have
+ * here and which justifies emitting such a log.
+ */
+ be = ((obj_type(sess->origin) == OBJ_TYPE_CHECK) ? __objt_check(sess->origin)->proxy : fe);
+ txn = NULL;
+ fe_conn = objt_conn(sess->origin);
+ be_conn = ((obj_type(sess->origin) == OBJ_TYPE_CHECK) ? sc_conn(__objt_check(sess->origin)->sc) : NULL);
+ status = 0;
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ uniq_id = _HA_ATOMIC_FETCH_ADD(&global.req_count, 1);
+
+ /* prepare a valid log structure */
+ tmp_strm_log.accept_ts = sess->accept_ts;
+ tmp_strm_log.accept_date = sess->accept_date;
+ tmp_strm_log.t_handshake = sess->t_handshake;
+ tmp_strm_log.t_idle = (sess->t_idle >= 0 ? sess->t_idle : 0);
+ tmp_strm_log.request_ts = 0;
+ tmp_strm_log.t_queue = -1;
+ tmp_strm_log.t_connect = -1;
+ tmp_strm_log.t_data = -1;
+ tmp_strm_log.t_close = ns_to_ms(now_ns - sess->accept_ts);
+ tmp_strm_log.bytes_in = 0;
+ tmp_strm_log.bytes_out = 0;
+ tmp_strm_log.prx_queue_pos = 0;
+ tmp_strm_log.srv_queue_pos = 0;
+
+ logs = &tmp_strm_log;
+
+ if ((fe->mode == PR_MODE_HTTP) && fe_conn && fe_conn->mux && fe_conn->mux->ctl) {
+ enum mux_exit_status es = fe_conn->mux->ctl(fe_conn, MUX_CTL_EXIT_STATUS, &status);
+
+ switch (es) {
+ case MUX_ES_SUCCESS:
+ break;
+ case MUX_ES_INVALID_ERR:
+ status = (status ? status : 400);
+ if ((fe_conn->flags & CO_FL_ERROR) || conn_xprt_read0_pending(fe_conn))
+ s_flags = SF_ERR_CLICL | SF_FINST_R;
+ else
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ break;
+ case MUX_ES_TOUT_ERR:
+ status = (status ? status : 408);
+ s_flags = SF_ERR_CLITO | SF_FINST_R;
+ break;
+ case MUX_ES_NOTIMPL_ERR:
+ status = (status ? status : 501);
+ s_flags = SF_ERR_PRXCOND | SF_FINST_R;
+ break;
+ case MUX_ES_INTERNAL_ERR:
+ status = (status ? status : 500);
+ s_flags = SF_ERR_INTERNAL | SF_FINST_R;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ t_request = -1;
+ if ((llong)(logs->request_ts - logs->accept_ts) >= 0)
+ t_request = ns_to_ms(logs->request_ts - logs->accept_ts);
+
+ tmplog = dst;
+
+ /* fill logbuffer */
+ if (LIST_ISEMPTY(list_format))
+ return 0;
+
+ list_for_each_entry(tmp, list_format, list) {
+#ifdef USE_OPENSSL
+ struct connection *conn;
+#endif
+ const struct sockaddr_storage *addr;
+ const char *src = NULL;
+ struct sample *key;
+ const struct buffer empty = { };
+
+ switch (tmp->type) {
+ case LOG_FMT_SEPARATOR:
+ if (!last_isspace) {
+ LOGCHAR(' ');
+ last_isspace = 1;
+ }
+ break;
+
+ case LOG_FMT_TEXT: // text
+ src = tmp->arg;
+ iret = strlcpy2(tmplog, src, dst + maxsize - tmplog);
+ if (iret == 0)
+ goto out;
+ tmplog += iret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_EXPR: // sample expression, may be request or response
+ key = NULL;
+ if (tmp->options & LOG_OPT_REQ_CAP)
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (!key && (tmp->options & LOG_OPT_RES_CAP))
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (!key && !(tmp->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli
+ key = sample_fetch_as_type(be, sess, s, SMP_OPT_FINAL, tmp->expr, SMP_T_STR);
+
+ if (tmp->options & LOG_OPT_HTTP)
+ ret = lf_encode_chunk(tmplog, dst + maxsize,
+ '%', http_encode_map, key ? &key->data.u.str : &empty, tmp);
+ else
+ ret = lf_text_len(tmplog,
+ key ? key->data.u.str.area : NULL,
+ key ? key->data.u.str.data : 0,
+ dst + maxsize - tmplog,
+ tmp);
+ if (ret == 0)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CLIENTIP: // %ci
+ addr = (s ? sc_src(s->scf) : sess_src(sess));
+ if (addr)
+ ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CLIENTPORT: // %cp
+ addr = (s ? sc_src(s->scf) : sess_src(sess));
+ if (addr) {
+ /* sess->listener is always defined when the session's owner is an inbound connections */
+ if (addr->ss_family == AF_UNIX)
+ ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog);
+ else
+ ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ }
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTENDIP: // %fi
+ addr = (s ? sc_dst(s->scf) : sess_dst(sess));
+ if (addr)
+ ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTENDPORT: // %fp
+ addr = (s ? sc_dst(s->scf) : sess_dst(sess));
+ if (addr) {
+ /* sess->listener is always defined when the session's owner is an inbound connections */
+ if (addr->ss_family == AF_UNIX)
+ ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog);
+ else
+ ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp);
+ }
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BACKENDIP: // %bi
+ if (be_conn && conn_get_src(be_conn))
+ ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BACKENDPORT: // %bp
+ if (be_conn && conn_get_src(be_conn))
+ ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVERIP: // %si
+ if (be_conn && conn_get_dst(be_conn))
+ ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVERPORT: // %sp
+ if (be_conn && conn_get_dst(be_conn))
+ ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp);
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATE: // %t = accept date
+ get_localtime(logs->accept_date.tv_sec, &tm);
+ ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_tr: // %tr = start of request date
+ /* Note that the timers are valid if we get here */
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_localtime(tv.tv_sec, &tm);
+ ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATEGMT: // %T = accept date, GMT
+ get_gmtime(logs->accept_date.tv_sec, &tm);
+ ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_trg: // %trg = start of request date, GMT
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_gmtime(tv.tv_sec, &tm);
+ ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_DATELOCAL: // %Tl = accept date, local
+ get_localtime(logs->accept_date.tv_sec, &tm);
+ ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_trl: // %trl = start of request date, local
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ get_localtime(tv.tv_sec, &tm);
+ ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TS: // %Ts
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", (unsigned int)logs->accept_date.tv_sec);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(logs->accept_date.tv_sec, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_MS: // %ms
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%02X",(unsigned int)logs->accept_date.tv_usec/1000);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ if ((dst + maxsize - tmplog) < 4)
+ goto out;
+ ret = utoa_pad((unsigned int)logs->accept_date.tv_usec/1000,
+ tmplog, 4);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_FRONTEND: // %f
+ src = fe->id;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FRONTEND_XPRT: // %ft
+ src = fe->id;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ iret = strlcpy2(tmplog, src, dst + maxsize - tmplog);
+ if (iret == 0)
+ goto out;
+ tmplog += iret;
+
+ /* sess->listener may be undefined if the session's owner is a health-check */
+ if (sess->listener && sess->listener->bind_conf->xprt->get_ssl_sock_ctx)
+ LOGCHAR('~');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+#ifdef USE_OPENSSL
+ case LOG_FMT_SSL_CIPHER: // %sslc
+ src = NULL;
+ conn = objt_conn(sess->origin);
+ if (conn) {
+ src = ssl_sock_get_cipher_name(conn);
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SSL_VERSION: // %sslv
+ src = NULL;
+ conn = objt_conn(sess->origin);
+ if (conn) {
+ src = ssl_sock_get_proto_version(conn);
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+#endif
+ case LOG_FMT_BACKEND: // %b
+ src = be->id;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SERVER: // %s
+ switch (obj_type(s ? s->target : sess->origin)) {
+ case OBJ_TYPE_SERVER:
+ src = __objt_server(s->target)->id;
+ break;
+ case OBJ_TYPE_APPLET:
+ src = __objt_applet(s->target)->name;
+ break;
+ case OBJ_TYPE_CHECK:
+ src = (__objt_check(sess->origin)->server
+ ? __objt_check(sess->origin)->server->id
+ : "<NOSRV>");
+ break;
+ default:
+ src = "<NOSRV>";
+ break;
+ }
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Th: // %Th = handshake time
+ ret = ltoa_o(logs->t_handshake, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Ti: // %Ti = HTTP idle time
+ ret = ltoa_o(logs->t_idle, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TR: // %TR = HTTP request time
+ ret = ltoa_o((t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TQ: // %Tq = Th + Ti + TR
+ ret = ltoa_o(t_request, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TW: // %Tw
+ ret = ltoa_o((logs->t_queue >= 0) ? logs->t_queue - t_request : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TC: // %Tc
+ ret = ltoa_o((logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Tr: // %Tr
+ ret = ltoa_o((logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TD: // %Td
+ if (be->mode == PR_MODE_HTTP)
+ ret = ltoa_o((logs->t_data >= 0) ? logs->t_close - logs->t_data : -1,
+ tmplog, dst + maxsize - tmplog);
+ else
+ ret = ltoa_o((logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1,
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_Ta: // %Ta = active time = Tt - Th - Ti
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0),
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TT: // %Tt = total time
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TU: // %Tu = total time seen by user = Tt - Ti
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0),
+ tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_STATUS: // %ST
+ ret = ltoa_o(status, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BYTES: // %B
+ if (!(fe->to_log & LW_BYTES))
+ LOGCHAR('+');
+ ret = lltoa(logs->bytes_out, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BYTES_UP: // %U
+ ret = lltoa(logs->bytes_in, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CCLIENT: // %CC
+ src = txn ? txn->cli_cookie : NULL;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_CSERVER: // %CS
+ src = txn ? txn->srv_cookie : NULL;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TERMSTATE: // %ts
+ LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]);
+ LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]);
+ *tmplog = '\0';
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_TERMSTATE_CK: // %tsc, same as TS with cookie state (for mode HTTP)
+ LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]);
+ LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]);
+ LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-');
+ LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_ACTCONN: // %ac
+ ret = ltoa_o(actconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_FECONN: // %fc
+ ret = ltoa_o(fe->feconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BECONN: // %bc
+ ret = ltoa_o(be->beconn, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SRVCONN: // %sc
+ switch (obj_type(s ? s->target : sess->origin)) {
+ case OBJ_TYPE_SERVER:
+ ret = ultoa_o(__objt_server(s->target)->cur_sess,
+ tmplog, dst + maxsize - tmplog);
+ break;
+ case OBJ_TYPE_CHECK:
+ ret = ultoa_o(__objt_check(sess->origin)->server
+ ? __objt_check(sess->origin)->server->cur_sess
+ : 0, tmplog, dst + maxsize - tmplog);
+ break;
+ default:
+ ret = ultoa_o(0, tmplog, dst + maxsize - tmplog);
+ break;
+ }
+
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_RETRIES: // %rc
+ if (s_flags & SF_REDISP)
+ LOGCHAR('+');
+ ret = ltoa_o((s ? s->conn_retries : 0), tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_SRVQUEUE: // %sq
+ ret = ltoa_o(logs->srv_queue_pos, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_BCKQUEUE: // %bq
+ ret = ltoa_o(logs->prx_queue_pos, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HDRREQUEST: // %hr
+ /* request header */
+ if (fe->nb_req_cap && s && s->req_cap) {
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ LOGCHAR('{');
+ for (hdr = 0; hdr < fe->nb_req_cap; hdr++) {
+ if (hdr)
+ LOGCHAR('|');
+ if (s->req_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->req_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ }
+ }
+ LOGCHAR('}');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_HDRREQUESTLIST: // %hrl
+ /* request header list */
+ if (fe->nb_req_cap && s && s->req_cap) {
+ for (hdr = 0; hdr < fe->nb_req_cap; hdr++) {
+ if (hdr > 0)
+ LOGCHAR(' ');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ if (s->req_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->req_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ } else if (!(tmp->options & LOG_OPT_QUOTE))
+ LOGCHAR('-');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ }
+ break;
+
+
+ case LOG_FMT_HDRRESPONS: // %hs
+ /* response header */
+ if (fe->nb_rsp_cap && s && s->res_cap) {
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ LOGCHAR('{');
+ for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) {
+ if (hdr)
+ LOGCHAR('|');
+ if (s->res_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->res_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ }
+ }
+ LOGCHAR('}');
+ last_isspace = 0;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ }
+ break;
+
+ case LOG_FMT_HDRRESPONSLIST: // %hsl
+ /* response header list */
+ if (fe->nb_rsp_cap && s && s->res_cap) {
+ for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) {
+ if (hdr > 0)
+ LOGCHAR(' ');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ if (s->res_cap[hdr] != NULL) {
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', hdr_encode_map, s->res_cap[hdr], tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ } else if (!(tmp->options & LOG_OPT_QUOTE))
+ LOGCHAR('-');
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ }
+ }
+ break;
+
+ case LOG_FMT_REQ: // %r
+ /* Request */
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ ret = lf_encode_string(tmplog, dst + maxsize,
+ '#', url_encode_map, uri, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_PATH: // %HP
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space or question mark after url
+ spc = uri;
+ while (spc < end && *spc != '?' && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_PATH_ONLY: // %HPO
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space after url
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ path = ist2(uri, spc - uri);
+
+ // extract relative path without query params from url
+ parser = http_uri_parser_init(path);
+ path = iststop(http_parse_path(&parser), '?');
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = path.ptr;
+ chunk.data = path.len;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_QUERY: // %HQ
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ if (!txn || !txn->uri) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ uri = txn->uri;
+ end = uri + strlen(uri);
+ // look for the first question mark
+ while (uri < end && *uri != '?')
+ uri++;
+
+ qmark = uri;
+ // look for first space or question mark after url
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ chunk.area = qmark;
+ chunk.data = uri - qmark;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_URI: // %HU
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for first space after url
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_METHOD: // %HM
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ spc = uri;
+ while (spc < end && !HTTP_IS_SPHT(*spc))
+ spc++;
+
+ if (spc == end) { // odd case, we have txn->uri, but we only got a verb
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else {
+ chunk.area = uri;
+ chunk.data = spc - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_HTTP_VERSION: // %HV
+ uri = txn && txn->uri ? txn->uri : "<BADREQ>";
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ end = uri + strlen(uri);
+ // look for the first whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri)) {
+ uri++; nspaces++;
+ }
+
+ // look for the next whitespace character
+ while (uri < end && !HTTP_IS_SPHT(*uri))
+ uri++;
+
+ // keep advancing past multiple spaces
+ while (uri < end && HTTP_IS_SPHT(*uri))
+ uri++;
+
+ if (!txn || !txn->uri || nspaces == 0) {
+ chunk.area = "<BADREQ>";
+ chunk.data = strlen("<BADREQ>");
+ } else if (uri == end) {
+ chunk.area = "HTTP/0.9";
+ chunk.data = strlen("HTTP/0.9");
+ } else {
+ chunk.area = uri;
+ chunk.data = end - uri;
+ }
+
+ ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp);
+ if (ret == NULL || *ret != '\0')
+ goto out;
+
+ tmplog = ret;
+ if (tmp->options & LOG_OPT_QUOTE)
+ LOGCHAR('"');
+
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_COUNTER: // %rt
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", uniq_id);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(uniq_id, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_LOGCNT: // %lc
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", fe->log_count);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ultoa_o(fe->log_count, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_HOSTNAME: // %H
+ src = hostname;
+ ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ case LOG_FMT_PID: // %pid
+ if (tmp->options & LOG_OPT_HEXA) {
+ iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid);
+ if (iret < 0 || iret > dst + maxsize - tmplog)
+ goto out;
+ last_isspace = 0;
+ tmplog += iret;
+ } else {
+ ret = ltoa_o(pid, tmplog, dst + maxsize - tmplog);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ }
+ break;
+
+ case LOG_FMT_UNIQUEID: // %ID
+ ret = NULL;
+ if (s)
+ ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), tmp);
+ else
+ ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), tmp);
+ if (ret == NULL)
+ goto out;
+ tmplog = ret;
+ last_isspace = 0;
+ break;
+
+ }
+ }
+
+out:
+ /* *tmplog is a unused character */
+ *tmplog = '\0';
+ return tmplog - dst;
+
+}
+
+/*
+ * send a log for the stream when we have enough info about it.
+ * Will not log if the frontend has no log defined.
+ */
+void strm_log(struct stream *s)
+{
+ struct session *sess = s->sess;
+ int size, err, level;
+ int sd_size = 0;
+
+ /* if we don't want to log normal traffic, return now */
+ err = (s->flags & SF_REDISP) ||
+ ((s->flags & SF_ERR_MASK) > SF_ERR_LOCAL) ||
+ (((s->flags & SF_ERR_MASK) == SF_ERR_NONE) && s->conn_retries) ||
+ ((sess->fe->mode == PR_MODE_HTTP) && s->txn && s->txn->status >= 500);
+
+ if (!err && (sess->fe->options2 & PR_O2_NOLOGNORM))
+ return;
+
+ if (LIST_ISEMPTY(&sess->fe->loggers))
+ return;
+
+ if (s->logs.level) { /* loglevel was overridden */
+ if (s->logs.level == -1) {
+ s->logs.logwait = 0; /* logs disabled */
+ return;
+ }
+ level = s->logs.level - 1;
+ }
+ else {
+ level = LOG_INFO;
+ if (err && (sess->fe->options2 & PR_O2_LOGERRORS))
+ level = LOG_ERR;
+ }
+
+ /* if unique-id was not generated */
+ if (!isttest(s->unique_id) && !LIST_ISEMPTY(&sess->fe->format_unique_id)) {
+ stream_generate_unique_id(s, &sess->fe->format_unique_id);
+ }
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) {
+ sd_size = build_logline(s, logline_rfc5424, global.max_syslog_len,
+ &sess->fe->logformat_sd);
+ }
+
+ size = build_logline(s, logline, global.max_syslog_len, &sess->fe->logformat);
+ if (size > 0) {
+ _HA_ATOMIC_INC(&sess->fe->log_count);
+ __send_log(&sess->fe->loggers, &sess->fe->log_tag, level,
+ logline, size + 1, logline_rfc5424, sd_size);
+ s->logs.logwait = 0;
+ }
+}
+
+/*
+ * send a minimalist log for the session. Will not log if the frontend has no
+ * log defined. It is assumed that this is only used to report anomalies that
+ * cannot lead to the creation of a regular stream. Because of this the log
+ * level is LOG_INFO or LOG_ERR depending on the "log-separate-error" setting
+ * in the frontend. The caller must simply know that it should not call this
+ * function to report unimportant events. It is safe to call this function with
+ * sess==NULL (will not do anything).
+ */
+void sess_log(struct session *sess)
+{
+ int size, level;
+ int sd_size = 0;
+
+ if (!sess)
+ return;
+
+ if (LIST_ISEMPTY(&sess->fe->loggers))
+ return;
+
+ level = LOG_INFO;
+ if (sess->fe->options2 & PR_O2_LOGERRORS)
+ level = LOG_ERR;
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) {
+ sd_size = sess_build_logline(sess, NULL,
+ logline_rfc5424, global.max_syslog_len,
+ &sess->fe->logformat_sd);
+ }
+
+ if (!LIST_ISEMPTY(&sess->fe->logformat_error))
+ size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat_error);
+ else
+ size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat);
+ if (size > 0) {
+ _HA_ATOMIC_INC(&sess->fe->log_count);
+ __send_log(&sess->fe->loggers, &sess->fe->log_tag, level,
+ logline, size + 1, logline_rfc5424, sd_size);
+ }
+}
+
+void app_log(struct list *loggers, struct buffer *tag, int level, const char *format, ...)
+{
+ va_list argp;
+ int data_len;
+
+ if (level < 0 || format == NULL || logline == NULL)
+ return;
+
+ va_start(argp, format);
+ data_len = vsnprintf(logline, global.max_syslog_len, format, argp);
+ if (data_len < 0 || data_len > global.max_syslog_len)
+ data_len = global.max_syslog_len;
+ va_end(argp);
+
+ __send_log(loggers, tag, level, logline, data_len, default_rfc5424_sd_log_format, 2);
+}
+/*
+ * This function parse a received log message <buf>, of size <buflen>
+ * it fills <level>, <facility> and <metadata> depending of the detected
+ * header format and message will point on remaining payload of <size>
+ *
+ * <metadata> must point on a preallocated array of LOG_META_FIELDS*sizeof(struct ist)
+ * struct ist len will be set to 0 if field is not found
+ * <level> and <facility> will be set to -1 if not found.
+ */
+void parse_log_message(char *buf, size_t buflen, int *level, int *facility,
+ struct ist *metadata, char **message, size_t *size)
+{
+
+ char *p;
+ int fac_level = 0;
+
+ *level = *facility = -1;
+
+ *message = buf;
+ *size = buflen;
+
+ memset(metadata, 0, LOG_META_FIELDS*sizeof(struct ist));
+
+ p = buf;
+ if (*size < 2 || *p != '<')
+ return;
+
+ p++;
+ while (*p != '>') {
+ if (*p > '9' || *p < '0')
+ return;
+ fac_level = 10*fac_level + (*p - '0');
+ p++;
+ if ((p - buf) > buflen)
+ return;
+ }
+
+ *facility = fac_level >> 3;
+ *level = fac_level & 0x7;
+ p++;
+
+ metadata[LOG_META_PRIO] = ist2(buf, p - buf);
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+
+ /* for rfc5424, prio is always followed by '1' and ' ' */
+ if ((*size > 2) && (p[0] == '1') && (p[1] == ' ')) {
+ /* format is always '1 TIMESTAMP HOSTNAME TAG PID MSGID STDATA '
+ * followed by message.
+ * Each header field can present NILVALUE: '-'
+ */
+
+ p += 2;
+ *size -= 2;
+ /* timestamp is NILVALUE '-' */
+ if (*size > 2 && (p[0] == '-') && p[1] == ' ') {
+ metadata[LOG_META_TIME] = ist2(p, 1);
+ p++;
+ }
+ else if (*size > LOG_ISOTIME_MINLEN) {
+ metadata[LOG_META_TIME].ptr = p;
+
+ /* check if optional secfrac is present
+ * in timestamp.
+ * possible format are:
+ * ex: '1970-01-01T00:00:00.000000Z'
+ * '1970-01-01T00:00:00.000000+00:00'
+ * '1970-01-01T00:00:00.000000-00:00'
+ * '1970-01-01T00:00:00Z'
+ * '1970-01-01T00:00:00+00:00'
+ * '1970-01-01T00:00:00-00:00'
+ */
+ p += 19;
+ if (*p == '.') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ while (*p != 'Z' && *p != '+' && *p != '-') {
+ if ((unsigned char)(*p - '0') > 9)
+ goto bad_format;
+
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ }
+
+ if (*p == 'Z')
+ p++;
+ else
+ p += 6; /* case of '+00:00 or '-00:00' */
+
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+ metadata[LOG_META_TIME].len = p - metadata[LOG_META_TIME].ptr;
+ }
+ else
+ goto bad_format;
+
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_HOST].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_HOST].len = p - metadata[LOG_META_HOST].ptr;
+ if (metadata[LOG_META_HOST].len == 1 && metadata[LOG_META_HOST].ptr[0] == '-')
+ metadata[LOG_META_HOST].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_TAG].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+ if (metadata[LOG_META_TAG].len == 1 && metadata[LOG_META_TAG].ptr[0] == '-')
+ metadata[LOG_META_TAG].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_PID].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_PID].len = p - metadata[LOG_META_PID].ptr;
+ if (metadata[LOG_META_PID].len == 1 && metadata[LOG_META_PID].ptr[0] == '-')
+ metadata[LOG_META_PID].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_MSGID].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_MSGID].len = p - metadata[LOG_META_MSGID].ptr;
+ if (metadata[LOG_META_MSGID].len == 1 && metadata[LOG_META_MSGID].ptr[0] == '-')
+ metadata[LOG_META_MSGID].len = 0;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ /* structured data format is:
+ * ex:
+ * '[key1=value1 key2=value2][key3=value3]'
+ *
+ * space is invalid outside [] because
+ * considered as the end of structured data field
+ */
+ metadata[LOG_META_STDATA].ptr = p;
+ if (*p == '[') {
+ int elem = 0;
+
+ while (1) {
+ if (elem) {
+ /* according to rfc this char is escaped in param values */
+ if (*p == ']' && *(p-1) != '\\')
+ elem = 0;
+ }
+ else {
+ if (*p == '[')
+ elem = 1;
+ else if (*p == ' ')
+ break;
+ else
+ goto bad_format;
+ }
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ }
+ else if (*p == '-') {
+ /* case of NILVALUE */
+ p++;
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+ }
+ else
+ goto bad_format;
+
+ metadata[LOG_META_STDATA].len = p - metadata[LOG_META_STDATA].ptr;
+ if (metadata[LOG_META_STDATA].len == 1 && metadata[LOG_META_STDATA].ptr[0] == '-')
+ metadata[LOG_META_STDATA].len = 0;
+
+ p++;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = p;
+ }
+ else if (*size > LOG_LEGACYTIME_LEN) {
+ int m;
+
+ /* supported header format according to rfc3164.
+ * ex:
+ * 'Jan 1 00:00:00 HOSTNAME TAG[PID]: '
+ * or 'Jan 1 00:00:00 HOSTNAME TAG: '
+ * or 'Jan 1 00:00:00 HOSTNAME '
+ * Note: HOSTNAME is mandatory, and day
+ * of month uses a single space prefix if
+ * less than 10 to ensure hour offset is
+ * always the same.
+ */
+
+ /* Check month to see if it correspond to a rfc3164
+ * header ex 'Jan 1 00:00:00' */
+ for (m = 0; m < 12; m++)
+ if (!memcmp(monthname[m], p, 3))
+ break;
+ /* Month not found */
+ if (m == 12)
+ goto bad_format;
+
+ metadata[LOG_META_TIME] = ist2(p, LOG_LEGACYTIME_LEN);
+
+ p += LOG_LEGACYTIME_LEN;
+ if ((p - buf) >= buflen || *p != ' ')
+ goto bad_format;
+
+ p++;
+ if ((p - buf) >= buflen || *p == ' ')
+ goto bad_format;
+
+ metadata[LOG_META_HOST].ptr = p;
+ while (*p != ' ') {
+ p++;
+ if ((p - buf) >= buflen)
+ goto bad_format;
+ }
+ metadata[LOG_META_HOST].len = p - metadata[LOG_META_HOST].ptr;
+
+ /* TAG seems to no be mandatory */
+ p++;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+
+ if (!buflen)
+ return;
+
+ while (((p - buf) < buflen) && *p != ' ' && *p != ':')
+ p++;
+
+ /* a tag must present a trailing ':' */
+ if (((p - buf) >= buflen) || *p != ':')
+ return;
+ p++;
+ /* followed by a space */
+ if (((p - buf) >= buflen) || *p != ' ')
+ return;
+
+ /* rewind to parse tag and pid */
+ p = buf;
+ metadata[LOG_META_TAG].ptr = p;
+ /* we have the guarantee that ':' will be reach before size limit */
+ while (*p != ':') {
+ if (*p == '[') {
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+ metadata[LOG_META_PID].ptr = p + 1;
+ }
+ else if (*p == ']' && isttest(metadata[LOG_META_PID])) {
+ if (p[1] != ':')
+ return;
+ metadata[LOG_META_PID].len = p - metadata[LOG_META_PID].ptr;
+ }
+ p++;
+ }
+ if (!metadata[LOG_META_TAG].len)
+ metadata[LOG_META_TAG].len = p - metadata[LOG_META_TAG].ptr;
+
+ /* let pass ':' and ' ', we still have warranty size is large enough */
+ p += 2;
+
+ buflen -= p - buf;
+ buf = p;
+
+ *size = buflen;
+ *message = buf;
+ }
+
+ return;
+
+bad_format:
+ /* bad syslog format, we reset all parsed syslog fields
+ * but priority is kept because we are able to re-build
+ * this message using LOF_FORMAT_PRIO.
+ */
+ metadata[LOG_META_TIME].len = 0;
+ metadata[LOG_META_HOST].len = 0;
+ metadata[LOG_META_TAG].len = 0;
+ metadata[LOG_META_PID].len = 0;
+ metadata[LOG_META_MSGID].len = 0;
+ metadata[LOG_META_STDATA].len = 0;
+
+ return;
+}
+
+/*
+ * UDP syslog fd handler
+ */
+void syslog_fd_handler(int fd)
+{
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+ ssize_t ret = 0;
+ struct buffer *buf = get_trash_chunk();
+ size_t size;
+ char *message;
+ int level;
+ int facility;
+ struct listener *l = objt_listener(fdtab[fd].owner);
+ int max_accept;
+
+ BUG_ON(!l);
+
+ if (fdtab[fd].state & FD_POLL_IN) {
+
+ if (!fd_recv_ready(fd))
+ return;
+
+ max_accept = l->bind_conf->maxaccept ? l->bind_conf->maxaccept : 1;
+
+ do {
+ /* Source address */
+ struct sockaddr_storage saddr = {0};
+ socklen_t saddrlen;
+
+ saddrlen = sizeof(saddr);
+
+ ret = recvfrom(fd, buf->area, buf->size, 0, (struct sockaddr *)&saddr, &saddrlen);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ fd_cant_recv(fd);
+ goto out;
+ }
+ buf->data = ret;
+
+ /* update counters */
+ _HA_ATOMIC_INC(&cum_log_messages);
+ proxy_inc_fe_req_ctr(l, l->bind_conf->frontend, 0);
+
+ parse_log_message(buf->area, buf->data, &level, &facility, metadata, &message, &size);
+
+ process_send_log(&l->bind_conf->frontend->loggers, level, facility, metadata, message, size);
+
+ } while (--max_accept);
+ }
+
+out:
+ return;
+}
+
+/*
+ * IO Handler to handle message exchange with a syslog tcp client
+ */
+static void syslog_io_handler(struct appctx *appctx)
+{
+ static THREAD_LOCAL struct ist metadata[LOG_META_FIELDS];
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct proxy *frontend = strm_fe(s);
+ struct listener *l = strm_li(s);
+ struct buffer *buf = get_trash_chunk();
+ int max_accept;
+ int to_skip;
+ int facility;
+ int level;
+ char *message;
+ size_t size;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+
+ max_accept = l->bind_conf->maxaccept ? l->bind_conf->maxaccept : 1;
+ while (1) {
+ char c;
+
+ if (max_accept <= 0)
+ goto missing_budget;
+ max_accept--;
+
+ to_skip = co_getchar(sc_oc(sc), &c);
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (c == '<') {
+ /* rfc-6587, Non-Transparent-Framing: messages separated by
+ * a trailing LF or CR LF
+ */
+ to_skip = co_getline(sc_oc(sc), buf->area, buf->size);
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (buf->area[to_skip - 1] != '\n')
+ goto parse_error;
+
+ buf->data = to_skip - 1;
+
+ /* according to rfc-6587, some devices adds CR before LF */
+ if (buf->data && buf->area[buf->data - 1] == '\r')
+ buf->data--;
+
+ }
+ else if ((unsigned char)(c - '1') <= 8) {
+ /* rfc-6587, Octet-Counting: message length in ASCII
+ * (first digit can not be ZERO), followed by a space
+ * and message length
+ */
+ char *p = NULL;
+ int msglen;
+
+ to_skip = co_getword(sc_oc(sc), buf->area, buf->size, ' ');
+ if (!to_skip)
+ goto missing_data;
+ else if (to_skip < 0)
+ goto cli_abort;
+
+ if (buf->area[to_skip - 1] != ' ')
+ goto parse_error;
+
+ msglen = strtol(buf->area, &p, 10);
+ if (!msglen || p != &buf->area[to_skip - 1])
+ goto parse_error;
+
+ /* message seems too large */
+ if (msglen > buf->size)
+ goto parse_error;
+
+ msglen = co_getblk(sc_oc(sc), buf->area, msglen, to_skip);
+ if (!msglen)
+ goto missing_data;
+ else if (msglen < 0)
+ goto cli_abort;
+
+
+ buf->data = msglen;
+ to_skip += msglen;
+ }
+ else
+ goto parse_error;
+
+ co_skip(sc_oc(sc), to_skip);
+
+ /* update counters */
+ _HA_ATOMIC_INC(&cum_log_messages);
+ proxy_inc_fe_req_ctr(l, frontend, 0);
+
+ parse_log_message(buf->area, buf->data, &level, &facility, metadata, &message, &size);
+
+ process_send_log(&frontend->loggers, level, facility, metadata, message, size);
+
+ }
+
+missing_data:
+ /* we need more data to read */
+ applet_need_more_data(appctx);
+ return;
+
+missing_budget:
+ /* it may remain some stuff to do, let's retry later */
+ appctx_wakeup(appctx);
+ return;
+
+parse_error:
+ if (l->counters)
+ _HA_ATOMIC_INC(&l->counters->failed_req);
+ _HA_ATOMIC_INC(&frontend->fe_counters.failed_req);
+
+ goto error;
+
+cli_abort:
+ if (l->counters)
+ _HA_ATOMIC_INC(&l->counters->cli_aborts);
+ _HA_ATOMIC_INC(&frontend->fe_counters.cli_aborts);
+
+error:
+ se_fl_set(appctx->sedesc, SE_FL_ERROR);
+
+out:
+ return;
+}
+
+static struct applet syslog_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SYSLOG>", /* used for logging */
+ .fct = syslog_io_handler,
+ .release = NULL,
+};
+
+/*
+ * Parse "log-forward" section and create corresponding sink buffer.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_parse_log_forward(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = ERR_NONE;
+ struct proxy *px;
+ char *errmsg = NULL;
+ const char *err = NULL;
+
+ if (strcmp(args[0], "log-forward") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for log-forward section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ px = log_forward_by_name(args[1]);
+ if (px) {
+ ha_alert("Parsing [%s:%d]: log-forward section '%s' has the same name as another log-forward section declared at %s:%d.\n",
+ file, linenum, args[1], px->conf.file, px->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ px = proxy_find_by_name(args[1], 0, 0);
+ if (px) {
+ ha_alert("Parsing [%s:%d]: log forward section '%s' has the same name as %s '%s' declared at %s:%d.\n",
+ file, linenum, args[1], proxy_type_str(px),
+ px->id, px->conf.file, px->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ px = calloc(1, sizeof *px);
+ if (!px) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ init_new_proxy(px);
+ px->next = cfg_log_forward;
+ cfg_log_forward = px;
+ px->conf.file = strdup(file);
+ px->conf.line = linenum;
+ px->mode = PR_MODE_SYSLOG;
+ px->last_change = ns_to_sec(now_ns);
+ px->cap = PR_CAP_FE;
+ px->maxconn = 10;
+ px->timeout.client = TICK_ETERNITY;
+ px->accept = frontend_accept;
+ px->default_target = &syslog_applet.obj_type;
+ px->id = strdup(args[1]);
+ }
+ else if (strcmp(args[0], "maxconn") == 0) { /* maxconn */
+ if (warnifnotcap(cfg_log_forward, PR_CAP_FE, file, linenum, args[0], " Maybe you want 'fullconn' instead ?"))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->maxconn = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "backlog") == 0) { /* backlog */
+ if (warnifnotcap(cfg_log_forward, PR_CAP_FE, file, linenum, args[0], NULL))
+ err_code |= ERR_WARN;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->backlog = atol(args[1]);
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+ }
+ else if (strcmp(args[0], "bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ struct listener *l;
+ int ret;
+
+ cur_arg = 1;
+
+ bind_conf = bind_conf_alloc(cfg_log_forward, file, linenum,
+ NULL, xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : out of memory error.", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+ bind_conf->accept = session_accept_fd;
+
+ if (!str2listener(args[1], cfg_log_forward, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ }
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ global.maxsock++;
+ }
+ cur_arg++;
+
+ ret = bind_parse_args_list(bind_conf, args, cur_arg, cursection, file, linenum);
+ err_code |= ret;
+ if (ret != 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "dgram-bind") == 0) {
+ int cur_arg;
+ struct bind_conf *bind_conf;
+ struct bind_kw *kw;
+ struct listener *l;
+
+ cur_arg = 1;
+
+ bind_conf = bind_conf_alloc(cfg_log_forward, file, linenum,
+ NULL, xprt_get(XPRT_RAW));
+ if (!bind_conf) {
+ ha_alert("parsing [%s:%d] : out of memory error.", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ bind_conf->maxaccept = global.tune.maxaccept ? global.tune.maxaccept : MAX_ACCEPT;
+
+ if (!str2receiver(args[1], cfg_log_forward, bind_conf, file, linenum, &errmsg)) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' : error encountered while parsing listening address %s.\n",
+ file, linenum, args[0], args[1], args[2]);
+ }
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ list_for_each_entry(l, &bind_conf->listeners, by_bind) {
+ /* the fact that the sockets are of type dgram is guaranteed by str2receiver() */
+ l->rx.iocb = syslog_fd_handler;
+ global.maxsock++;
+ }
+ cur_arg++;
+
+ while (*args[cur_arg] && (kw = bind_find_kw(args[cur_arg]))) {
+ int ret;
+
+ ret = kw->parse(args, cur_arg, cfg_log_forward, bind_conf, &errmsg);
+ err_code |= ret;
+ if (ret) {
+ if (errmsg && *errmsg) {
+ indent_msg(&errmsg, 2);
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ }
+ else
+ ha_alert("parsing [%s:%d]: error encountered while processing '%s'\n",
+ file, linenum, args[cur_arg]);
+ if (ret & ERR_FATAL)
+ goto out;
+ }
+ cur_arg += 1 + kw->skip;
+ }
+ if (*args[cur_arg] != 0) {
+ const char *best = bind_find_best_kw(args[cur_arg]);
+ if (best)
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n",
+ file, linenum, args[cur_arg], cursection, best);
+ else
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section.\n",
+ file, linenum, args[cur_arg], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "log") == 0) {
+ if (!parse_logger(args, &cfg_log_forward->loggers, (kwm == KWM_NO), file, linenum, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ const char *res;
+ unsigned timeout;
+
+ if (strcmp(args[1], "client") != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s %s' in log-forward section.\n", file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (*args[2] == 0) {
+ ha_alert("parsing [%s:%d] : missing timeout client value.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(&errmsg, "timer overflow in argument '%s' to 'timeout client' (maximum value is 2147483647 ms or ~24.8 days)", args[2]);
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(&errmsg, "timer underflow in argument '%s' to 'timeout client' (minimum non-null value is 1 ms)", args[2]);
+ }
+ else if (res) {
+ memprintf(&errmsg, "unexpected character '%c' in 'timeout client'", *res);
+ }
+
+ if (res) {
+ ha_alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ cfg_log_forward->timeout.client = MS_TO_TICKS(timeout);
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in log-forward section.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+out:
+ ha_free(&errmsg);
+ return err_code;
+}
+
+/* function: post-resolve a single list of loggers
+ *
+ * Returns err_code which defaults to ERR_NONE and can be set to a combination
+ * of ERR_WARN, ERR_ALERT, ERR_FATAL and ERR_ABORT in case of errors.
+ */
+int postresolve_logger_list(struct list *loggers, const char *section, const char *section_name)
+{
+ int err_code = ERR_NONE;
+ struct logger *logger;
+
+ list_for_each_entry(logger, loggers, list) {
+ int cur_code;
+ char *msg = NULL;
+
+ cur_code = resolve_logger(logger, &msg);
+ if (msg) {
+ void (*e_func)(const char *fmt, ...) = NULL;
+
+ if (cur_code & ERR_ALERT)
+ e_func = ha_alert;
+ else if (cur_code & ERR_WARN)
+ e_func = ha_warning;
+ else
+ e_func = ha_diag_warning;
+ if (!section)
+ e_func("global log directive declared in file %s at line '%d' %s.\n",
+ logger->conf.file, logger->conf.line, msg);
+ else
+ e_func("log directive declared in %s section '%s' in file '%s' at line %d %s.\n",
+ section, section_name, logger->conf.file, logger->conf.line, msg);
+ ha_free(&msg);
+ }
+ err_code |= cur_code;
+ }
+ return err_code;
+}
+
+/* resolve default log directives at end of config. Returns 0 on success
+ * otherwise error flags.
+*/
+static int postresolve_loggers()
+{
+ struct proxy *px;
+ int err_code = ERR_NONE;
+
+ /* global log directives */
+ err_code |= postresolve_logger_list(&global.loggers, NULL, NULL);
+ /* proxy log directives */
+ for (px = proxies_list; px; px = px->next)
+ err_code |= postresolve_logger_list(&px->loggers, "proxy", px->id);
+ /* log-forward log directives */
+ for (px = cfg_log_forward; px; px = px->next)
+ err_code |= postresolve_logger_list(&px->loggers, "log-forward", px->id);
+
+ return err_code;
+}
+
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("log-forward", cfg_parse_log_forward, NULL);
+REGISTER_POST_CHECK(postresolve_loggers);
+REGISTER_POST_PROXY_CHECK(postcheck_log_backend);
+
+REGISTER_PER_THREAD_ALLOC(init_log_buffers);
+REGISTER_PER_THREAD_FREE(deinit_log_buffers);
+
+REGISTER_POST_DEINIT(deinit_log_forward);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/lru.c b/src/lru.c
new file mode 100644
index 0000000..07ef50c
--- /dev/null
+++ b/src/lru.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2015 Willy Tarreau <w@1wt.eu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <import/lru.h>
+
+/* Minimal list manipulation macros for lru64_list */
+#define LIST_INSERT(lh, el) ({ (el)->n = (lh)->n; (el)->n->p = (lh)->n = (el); (el)->p = (lh); })
+#define LIST_DELETE(el) ({ (el)->n->p = (el)->p; (el)->p->n = (el)->n; })
+
+
+/* Lookup key <key> in LRU cache <lru> for use with domain <domain> whose data's
+ * current version is <revision>. It differs from lru64_get as it does not
+ * create missing keys. The function returns NULL if an error or a cache miss
+ * occurs. */
+struct lru64 *lru64_lookup(unsigned long long key, struct lru64_head *lru,
+ void *domain, unsigned long long revision)
+{
+ struct eb64_node *node;
+ struct lru64 *elem;
+
+ node = __eb64_lookup(&lru->keys, key);
+ elem = container_of(node, typeof(*elem), node);
+ if (elem) {
+ /* Existing entry found, check validity then move it at the
+ * head of the LRU list.
+ */
+ if (elem->domain == domain && elem->revision == revision) {
+ LIST_DELETE(&elem->lru);
+ LIST_INSERT(&lru->list, &elem->lru);
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+/* Get key <key> from LRU cache <lru> for use with domain <domain> whose data's
+ * current revision is <revision>. If the key doesn't exist it's first created
+ * with ->domain = NULL. The caller detects this situation by checking ->domain
+ * and must perform the operation to be cached then call lru64_commit() to
+ * complete the operation. A lock (mutex or spinlock) may be added around the
+ * function to permit use in a multi-threaded environment. The function may
+ * return NULL upon memory allocation failure.
+ */
+struct lru64 *lru64_get(unsigned long long key, struct lru64_head *lru,
+ void *domain, unsigned long long revision)
+{
+ struct eb64_node *node;
+ struct lru64 *elem;
+
+ if (!lru->spare) {
+ if (!lru->cache_size)
+ return NULL;
+ lru->spare = malloc(sizeof(*lru->spare));
+ if (!lru->spare)
+ return NULL;
+ lru->spare->domain = NULL;
+ }
+
+ /* Lookup or insert */
+ lru->spare->node.key = key;
+ node = __eb64_insert(&lru->keys, &lru->spare->node);
+ elem = container_of(node, typeof(*elem), node);
+
+ if (elem != lru->spare) {
+ /* Existing entry found, check validity then move it at the
+ * head of the LRU list.
+ */
+ if (elem->domain == domain && elem->revision == revision) {
+ LIST_DELETE(&elem->lru);
+ LIST_INSERT(&lru->list, &elem->lru);
+ return elem;
+ }
+
+ if (!elem->domain)
+ return NULL; // currently locked
+
+ /* recycle this entry */
+ LIST_DELETE(&elem->lru);
+ }
+ else {
+ /* New entry inserted, initialize and move to the head of the
+ * LRU list, and lock it until commit.
+ */
+ lru->cache_usage++;
+ lru->spare = NULL; // used, need a new one next time
+ }
+
+ elem->domain = NULL;
+ LIST_INSERT(&lru->list, &elem->lru);
+
+ if (lru->cache_usage > lru->cache_size) {
+ /* try to kill oldest entry */
+ struct lru64 *old;
+
+ old = container_of(lru->list.p, typeof(*old), lru);
+ if (old->domain) {
+ /* not locked */
+ LIST_DELETE(&old->lru);
+ __eb64_delete(&old->node);
+ if (old->data && old->free)
+ old->free(old->data);
+ if (!lru->spare)
+ lru->spare = old;
+ else {
+ free(old);
+ }
+ lru->cache_usage--;
+ }
+ }
+ return elem;
+}
+
+/* Commit element <elem> with data <data>, domain <domain> and revision
+ * <revision>. <elem> is checked for NULL so that it's possible to call it
+ * with the result from a call to lru64_get(). The caller might lock it using a
+ * spinlock or mutex shared with the one around lru64_get().
+ */
+void lru64_commit(struct lru64 *elem, void *data, void *domain,
+ unsigned long long revision, void (*free)(void *))
+{
+ if (!elem)
+ return;
+
+ elem->data = data;
+ elem->revision = revision;
+ elem->domain = domain;
+ elem->free = free;
+}
+
+/* Create a new LRU cache of <size> entries. Returns the new cache or NULL in
+ * case of allocation failure.
+ */
+struct lru64_head *lru64_new(int size)
+{
+ struct lru64_head *lru;
+
+ lru = malloc(sizeof(*lru));
+ if (lru) {
+ lru->list.p = lru->list.n = &lru->list;
+ lru->keys = EB_ROOT_UNIQUE;
+ lru->spare = NULL;
+ lru->cache_size = size;
+ lru->cache_usage = 0;
+ }
+ return lru;
+}
+
+/* Tries to destroy the LRU cache <lru>. Returns the number of locked entries
+ * that prevent it from being destroyed, or zero meaning everything was done.
+ */
+int lru64_destroy(struct lru64_head *lru)
+{
+ struct lru64 *elem, *next;
+
+ if (!lru)
+ return 0;
+
+ elem = container_of(lru->list.p, typeof(*elem), lru);
+ while (&elem->lru != &lru->list) {
+ next = container_of(elem->lru.p, typeof(*next), lru);
+ if (elem->domain) {
+ /* not locked */
+ LIST_DELETE(&elem->lru);
+ eb64_delete(&elem->node);
+ if (elem->data && elem->free)
+ elem->free(elem->data);
+ free(elem);
+ lru->cache_usage--;
+ lru->cache_size--;
+ }
+ elem = next;
+ }
+
+ if (lru->cache_usage)
+ return lru->cache_usage;
+
+ free(lru);
+ return 0;
+}
+
+/* kill the <nb> least used entries from the <lru> cache */
+void lru64_kill_oldest(struct lru64_head *lru, unsigned long int nb)
+{
+ struct lru64 *elem, *next;
+
+ for (elem = container_of(lru->list.p, typeof(*elem), lru);
+ nb && (&elem->lru != &lru->list);
+ elem = next) {
+ next = container_of(elem->lru.p, typeof(*next), lru);
+ if (!elem->domain)
+ continue; /* locked entry */
+
+ LIST_DELETE(&elem->lru);
+ eb64_delete(&elem->node);
+ if (elem->data && elem->free)
+ elem->free(elem->data);
+ if (!lru->spare)
+ lru->spare = elem;
+ else
+ free(elem);
+ lru->cache_usage--;
+ nb--;
+ }
+}
+
+/* The code below is just for validation and performance testing. It's an
+ * example of a function taking some time to return results that could be
+ * cached.
+ */
+#ifdef STANDALONE
+
+#include <stdio.h>
+
+static unsigned int misses;
+
+static unsigned long long sum(unsigned long long x)
+{
+#ifndef TEST_LRU_FAST_OPERATION
+ if (x < 1)
+ return 0;
+ return x + sum(x * 99 / 100 - 1);
+#else
+ return (x << 16) - (x << 8) - 1;
+#endif
+}
+
+static long get_value(struct lru64_head *lru, long a)
+{
+ struct lru64 *item = NULL;
+
+ if (lru) {
+ item = lru64_get(a, lru, lru, 0);
+ if (item && item->domain)
+ return (long)item->data;
+ }
+ misses++;
+ /* do the painful work here */
+ a = sum(a);
+ if (item)
+ lru64_commit(item, (void *)a, lru, 1, 0);
+ return a;
+}
+
+static inline unsigned int statistical_prng()
+{
+ static unsigned int statistical_prng_state = 0x12345678;
+ unsigned int x = statistical_prng_state;
+
+ x ^= x << 13;
+ x ^= x >> 17;
+ x ^= x << 5;
+ return statistical_prng_state = x;
+}
+
+/* pass #of loops in argv[1] and set argv[2] to something to use the LRU */
+int main(int argc, char **argv)
+{
+ struct lru64_head *lru = NULL;
+ long long ret;
+ int total, loops;
+
+ if (argc < 2) {
+ printf("Need a number of rounds and optionally an LRU cache size (0..65536)\n");
+ exit(1);
+ }
+
+ total = atoi(argv[1]);
+
+ if (argc > 2) /* cache size */
+ lru = lru64_new(atoi(argv[2]));
+
+ ret = 0;
+ for (loops = 0; loops < total; loops++) {
+ ret += get_value(lru, statistical_prng() & 65535);
+ }
+ /* just for accuracy control */
+ printf("ret=%llx, hits=%u, misses=%u (%d %% hits)\n", ret, (unsigned)(total-misses), misses, (int)((float)(total-misses) * 100.0 / total));
+
+ while (lru64_destroy(lru));
+
+ return 0;
+}
+
+#endif
diff --git a/src/mailers.c b/src/mailers.c
new file mode 100644
index 0000000..c09e73c
--- /dev/null
+++ b/src/mailers.c
@@ -0,0 +1,329 @@
+/*
+ * Mailer management.
+ *
+ * Copyright 2015 Horms Solutions Ltd, Simon Horman <horms@verge.net.au>
+ * Copyright 2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/mailers.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/server-t.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+struct mailers *mailers = NULL;
+
+/* Set to 1 to disable email sending through checks even if the
+ * mailers are configured to do so. (e.g.: disable from lua)
+ */
+int send_email_disabled = 0;
+
+DECLARE_STATIC_POOL(pool_head_email_alert, "email_alert", sizeof(struct email_alert));
+
+/****************************** Email alerts ******************************/
+/* NOTE: It may be pertinent to use an applet to handle email alerts */
+/* instead of a tcp-check ruleset */
+/**************************************************************************/
+void email_alert_free(struct email_alert *alert)
+{
+ struct tcpcheck_rule *rule, *back;
+
+ if (!alert)
+ return;
+
+ if (alert->rules.list) {
+ list_for_each_entry_safe(rule, back, alert->rules.list, list) {
+ LIST_DELETE(&rule->list);
+ free_tcpcheck(rule, 1);
+ }
+ free_tcpcheck_vars(&alert->rules.preset_vars);
+ ha_free(&alert->rules.list);
+ }
+ pool_free(pool_head_email_alert, alert);
+}
+
+static struct task *process_email_alert(struct task *t, void *context, unsigned int state)
+{
+ struct check *check = context;
+ struct email_alertq *q;
+ struct email_alert *alert;
+
+ q = container_of(check, typeof(*q), check);
+
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ while (1) {
+ if (!(check->state & CHK_ST_ENABLED)) {
+ if (LIST_ISEMPTY(&q->email_alerts)) {
+ /* All alerts processed, queue the task */
+ t->expire = TICK_ETERNITY;
+ task_queue(t);
+ goto end;
+ }
+
+ alert = LIST_NEXT(&q->email_alerts, typeof(alert), list);
+ LIST_DELETE(&alert->list);
+ t->expire = now_ms;
+ check->tcpcheck_rules = &alert->rules;
+ check->status = HCHK_STATUS_INI;
+ check->state |= CHK_ST_ENABLED;
+ }
+
+ process_chk(t, context, state);
+ if (check->state & CHK_ST_INPROGRESS)
+ break;
+
+ alert = container_of(check->tcpcheck_rules, typeof(*alert), rules);
+ email_alert_free(alert);
+ check->tcpcheck_rules = NULL;
+ check->server = NULL;
+ check->state &= ~CHK_ST_ENABLED;
+ }
+ end:
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ return t;
+}
+
+/* Initializes mailer alerts for the proxy <p> using <mls> parameters.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int init_email_alert(struct mailers *mls, struct proxy *p, char **err)
+{
+ struct mailer *mailer;
+ struct email_alertq *queues;
+ const char *err_str;
+ int i = 0;
+
+ if ((queues = calloc(mls->count, sizeof(*queues))) == NULL) {
+ memprintf(err, "out of memory while allocating mailer alerts queues");
+ goto fail_no_queue;
+ }
+
+ for (mailer = mls->mailer_list; mailer; i++, mailer = mailer->next) {
+ struct email_alertq *q = &queues[i];
+ struct check *check = &q->check;
+ struct task *t;
+
+ LIST_INIT(&q->email_alerts);
+ HA_SPIN_INIT(&q->lock);
+ check->obj_type = OBJ_TYPE_CHECK;
+ check->inter = mls->timeout.mail;
+ check->rise = DEF_AGENT_RISETIME;
+ check->proxy = p;
+ check->fall = DEF_AGENT_FALLTIME;
+ if ((err_str = init_check(check, PR_O2_TCPCHK_CHK))) {
+ memprintf(err, "%s", err_str);
+ goto error;
+ }
+
+ check->xprt = mailer->xprt;
+ check->addr = mailer->addr;
+ check->port = get_host_port(&mailer->addr);
+
+ if ((t = task_new_anywhere()) == NULL) {
+ memprintf(err, "out of memory while allocating mailer alerts task");
+ goto error;
+ }
+
+ check->task = t;
+ t->process = process_email_alert;
+ t->context = check;
+
+ /* check this in one ms */
+ t->expire = TICK_ETERNITY;
+ check->start = now_ns;
+ task_queue(t);
+ }
+
+ mls->users++;
+ free(p->email_alert.mailers.name);
+ p->email_alert.mailers.m = mls;
+ p->email_alert.queues = queues;
+ return 0;
+
+ error:
+ for (i = 0; i < mls->count; i++) {
+ struct email_alertq *q = &queues[i];
+ struct check *check = &q->check;
+
+ free_check(check);
+ }
+ free(queues);
+ fail_no_queue:
+ return 1;
+}
+
+static int enqueue_one_email_alert(struct proxy *p, struct server *s,
+ struct email_alertq *q, const char *msg)
+{
+ struct email_alert *alert;
+ struct tcpcheck_rule *tcpcheck;
+ struct check *check = &q->check;
+
+ if ((alert = pool_alloc(pool_head_email_alert)) == NULL)
+ goto error;
+ LIST_INIT(&alert->list);
+ alert->rules.flags = TCPCHK_RULES_TCP_CHK;
+ alert->rules.list = calloc(1, sizeof(*alert->rules.list));
+ if (!alert->rules.list)
+ goto error;
+ LIST_INIT(alert->rules.list);
+ LIST_INIT(&alert->rules.preset_vars); /* unused for email alerts */
+ alert->srv = s;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ goto error;
+ tcpcheck->action = TCPCHK_ACT_CONNECT;
+ tcpcheck->comment = NULL;
+
+ LIST_APPEND(alert->rules.list, &tcpcheck->list);
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "220 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "HELO ", p->email_alert.myhostname, "\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "MAIL FROM:<", p->email_alert.from, ">\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[4] = { "RCPT TO:<", p->email_alert.to, ">\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[2] = { "DATA\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "354 "))
+ goto error;
+
+ {
+ struct tm tm;
+ char datestr[48];
+ const char * const strs[18] = {
+ "From: ", p->email_alert.from, "\r\n",
+ "To: ", p->email_alert.to, "\r\n",
+ "Date: ", datestr, "\r\n",
+ "Subject: [HAProxy Alert] ", msg, "\r\n",
+ "\r\n",
+ msg, "\r\n",
+ "\r\n",
+ ".\r\n",
+ NULL
+ };
+
+ get_localtime(date.tv_sec, &tm);
+
+ if (strftime(datestr, sizeof(datestr), "%a, %d %b %Y %T %z (%Z)", &tm) == 0) {
+ goto error;
+ }
+
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "250 "))
+ goto error;
+
+ {
+ const char * const strs[2] = { "QUIT\r\n" };
+ if (!add_tcpcheck_send_strs(&alert->rules, strs))
+ goto error;
+ }
+
+ if (!add_tcpcheck_expect_str(&alert->rules, "221 "))
+ goto error;
+
+ HA_SPIN_LOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ task_wakeup(check->task, TASK_WOKEN_MSG);
+ LIST_APPEND(&q->email_alerts, &alert->list);
+ HA_SPIN_UNLOCK(EMAIL_ALERTS_LOCK, &q->lock);
+ return 1;
+
+error:
+ email_alert_free(alert);
+ return 0;
+}
+
+static void enqueue_email_alert(struct proxy *p, struct server *s, const char *msg)
+{
+ int i;
+ struct mailer *mailer;
+
+ for (i = 0, mailer = p->email_alert.mailers.m->mailer_list;
+ i < p->email_alert.mailers.m->count; i++, mailer = mailer->next) {
+ if (!enqueue_one_email_alert(p, s, &p->email_alert.queues[i], msg)) {
+ ha_alert("Email alert [%s] could not be enqueued: out of memory\n", p->id);
+ return;
+ }
+ }
+
+ return;
+}
+
+/*
+ * Send email alert if configured.
+ */
+void send_email_alert(struct server *s, int level, const char *format, ...)
+{
+ va_list argp;
+ char buf[1024];
+ int len;
+ struct proxy *p = s->proxy;
+
+ if (send_email_disabled)
+ return;
+
+ if (!p->email_alert.mailers.m || level > p->email_alert.level || format == NULL)
+ return;
+
+ va_start(argp, format);
+ len = vsnprintf(buf, sizeof(buf), format, argp);
+ va_end(argp);
+
+ if (len < 0 || len >= sizeof(buf)) {
+ ha_alert("Email alert [%s] could not format message\n", p->id);
+ return;
+ }
+
+ enqueue_email_alert(p, s, buf);
+}
diff --git a/src/map.c b/src/map.c
new file mode 100644
index 0000000..ba7fd81
--- /dev/null
+++ b/src/map.c
@@ -0,0 +1,1232 @@
+/*
+ * MAP management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <syslog.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/cli.h>
+#include <haproxy/map.h>
+#include <haproxy/pattern.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+
+/* Parse an IPv4 or IPv6 address and store it into the sample.
+ * The output type is IPv4 or IPv6.
+ */
+int map_parse_ip(const char *text, struct sample_data *data)
+{
+ int len = strlen(text);
+
+ if (buf2ip(text, len, &data->u.ipv4)) {
+ data->type = SMP_T_IPV4;
+ return 1;
+ }
+ if (buf2ip6(text, len, &data->u.ipv6)) {
+ data->type = SMP_T_IPV6;
+ return 1;
+ }
+ return 0;
+}
+
+/* Parse a string and store a pointer to it into the sample. The original
+ * string must be left in memory because we return a direct memory reference.
+ * The output type is SMP_T_STR. There is no risk that the data will be
+ * overwritten because sample_conv_map() makes a const sample with this
+ * output.
+ */
+int map_parse_str(const char *text, struct sample_data *data)
+{
+ data->u.str.area = (char *)text;
+ data->u.str.data = strlen(text);
+ data->u.str.size = data->u.str.data + 1;
+ data->type = SMP_T_STR;
+ return 1;
+}
+
+/* Parse an integer and convert it to a sample. The output type is SINT if the
+ * number is negative, or UINT if it is positive or null. The function returns
+ * zero (error) if the number is too large.
+ */
+int map_parse_int(const char *text, struct sample_data *data)
+{
+ data->type = SMP_T_SINT;
+ data->u.sint = read_int64(&text, text + strlen(text));
+ if (*text != '\0')
+ return 0;
+ return 1;
+}
+
+/* This crete and initialize map descriptor.
+ * Return NULL if out of memory error
+ */
+static struct map_descriptor *map_create_descriptor(struct sample_conv *conv)
+{
+ struct map_descriptor *desc;
+
+ desc = calloc(1, sizeof(*desc));
+ if (!desc)
+ return NULL;
+
+ desc->conv = conv;
+
+ return desc;
+}
+
+/* This function load the map file according with data type declared into
+ * the "struct sample_conv".
+ *
+ * This function choose the indexation type (ebtree or list) according with
+ * the type of match needed.
+ */
+int sample_load_map(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct map_descriptor *desc;
+
+ if (!(global.mode & MODE_STARTING)) {
+ memprintf(err, "map: cannot load map at runtime");
+ return 0;
+ }
+
+ /* create new map descriptor */
+ desc = map_create_descriptor(conv);
+ if (!desc) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ /* Initialize pattern */
+ pattern_init_head(&desc->pat);
+
+ /* This is original pattern, must free */
+ desc->do_free = 1;
+
+ /* Set the match method. */
+ desc->pat.match = pat_match_fcts[(long)conv->private];
+ desc->pat.parse = pat_parse_fcts[(long)conv->private];
+ desc->pat.index = pat_index_fcts[(long)conv->private];
+ desc->pat.prune = pat_prune_fcts[(long)conv->private];
+ desc->pat.expect_type = pat_match_types[(long)conv->private];
+
+ /* Set the output parse method. */
+ switch (desc->conv->out_type) {
+ case SMP_T_STR: desc->pat.parse_smp = map_parse_str; break;
+ case SMP_T_SINT: desc->pat.parse_smp = map_parse_int; break;
+ case SMP_T_ADDR: desc->pat.parse_smp = map_parse_ip; break;
+ default:
+ memprintf(err, "map: internal haproxy error: no default parse case for the input type <%d>.",
+ conv->out_type);
+ free(desc);
+ return 0;
+ }
+
+ /* Load map. */
+ if (!pattern_read_from_file(&desc->pat, PAT_REF_MAP, arg[0].data.str.area, PAT_MF_NO_DNS,
+ 1, err, file, line))
+ return 0;
+
+ /* the maps of type IP support a string as default value. This
+ * string can be an ipv4 or an ipv6, we must convert it.
+ */
+ if (arg[1].type != ARGT_STOP && desc->conv->out_type == SMP_T_ADDR) {
+ struct sample_data data;
+ if (!map_parse_ip(arg[1].data.str.area, &data)) {
+ memprintf(err, "map: cannot parse default ip <%s>.",
+ arg[1].data.str.area);
+ return 0;
+ }
+ chunk_destroy(&arg[1].data.str);
+ if (data.type == SMP_T_IPV4) {
+ arg[1].type = ARGT_IPV4;
+ arg[1].data.ipv4 = data.u.ipv4;
+ } else {
+ arg[1].type = ARGT_IPV6;
+ arg[1].data.ipv6 = data.u.ipv6;
+ }
+ }
+
+ /* replace the first argument by this definition */
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_MAP;
+ arg[0].data.map = desc;
+
+ return 1;
+}
+
+static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct map_descriptor *desc;
+ struct pattern *pat;
+ struct buffer *str;
+
+ /* get config */
+ desc = arg_p[0].data.map;
+
+ /* Execute the match function. */
+ pat = pattern_exec_match(&desc->pat, smp, 1);
+
+ /* Match case. */
+ if (pat) {
+ if (pat->data) {
+ /* In the regm case, merge the sample with the input. */
+ if ((long)private == PAT_MATCH_REGM) {
+ struct buffer *tmptrash;
+ int len;
+
+ /* Copy the content of the sample because it could
+ be scratched by incoming get_trash_chunk */
+ tmptrash = alloc_trash_chunk();
+ if (!tmptrash)
+ return 0;
+
+ tmptrash->data = smp->data.u.str.data;
+ if (tmptrash->data > (tmptrash->size-1))
+ tmptrash->data = tmptrash->size-1;
+
+ memcpy(tmptrash->area, smp->data.u.str.area, tmptrash->data);
+ tmptrash->area[tmptrash->data] = 0;
+
+ str = get_trash_chunk();
+ len = exp_replace(str->area, str->size,
+ tmptrash->area,
+ pat->data->u.str.area,
+ (regmatch_t *)smp->ctx.a[0]);
+ free_trash_chunk(tmptrash);
+
+ if (len == -1)
+ return 0;
+
+ str->data = len;
+ smp->data.u.str = *str;
+ return 1;
+ }
+ /* Copy sample. */
+ smp->data = *pat->data;
+ smp->flags |= SMP_F_CONST;
+ return 1;
+ }
+
+ /* Return just int sample containing 1. */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+ }
+
+ /* If no default value available, the converter fails. */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* Return the default value. */
+ switch (desc->conv->out_type) {
+
+ case SMP_T_STR:
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str = arg_p[1].data.str;
+ break;
+
+ case SMP_T_SINT:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = arg_p[1].data.sint;
+ break;
+
+ case SMP_T_ADDR:
+ if (arg_p[1].type == ARGT_IPV4) {
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = arg_p[1].data.ipv4;
+ } else {
+ smp->data.type = SMP_T_IPV6;
+ smp->data.u.ipv6 = arg_p[1].data.ipv6;
+ }
+ break;
+ }
+
+ return 1;
+}
+
+/* This function is used with map and acl management. It permits to browse
+ * each reference. The variable <getnext> must contain the current node,
+ * <end> point to the root node and the <flags> permit to filter required
+ * nodes.
+ */
+static inline
+struct pat_ref *pat_list_get_next(struct pat_ref *getnext, struct list *end,
+ unsigned int flags)
+{
+ struct pat_ref *ref = getnext;
+
+ while (1) {
+
+ /* Get next list entry. */
+ ref = LIST_NEXT(&ref->list, struct pat_ref *, list);
+
+ /* If the entry is the last of the list, return NULL. */
+ if (&ref->list == end)
+ return NULL;
+
+ /* If the entry match the flag, return it. */
+ if (ref->flags & flags)
+ return ref;
+ }
+}
+
+static inline
+struct pat_ref *pat_ref_lookup_ref(const char *reference)
+{
+ int id;
+ char *error;
+
+ /* If the reference starts by a '#', this is numeric id. */
+ if (reference[0] == '#') {
+ /* Try to convert the numeric id. If the conversion fails, the lookup fails. */
+ id = strtol(reference + 1, &error, 10);
+ if (*error != '\0')
+ return NULL;
+
+ /* Perform the unique id lookup. */
+ return pat_ref_lookupid(id);
+ }
+
+ /* Perform the string lookup. */
+ return pat_ref_lookup(reference);
+}
+
+/* This function is used with map and acl management. It permits to browse
+ * each reference.
+ */
+static inline
+struct pattern_expr *pat_expr_get_next(struct pattern_expr *getnext, struct list *end)
+{
+ struct pattern_expr *expr;
+ expr = LIST_NEXT(&getnext->list, struct pattern_expr *, list);
+ if (&expr->list == end)
+ return NULL;
+ return expr;
+}
+
+/* appctx context for the "{show|get|add|del|*} {map|acl}" commands. This is
+ * used even by commands that only have a parser and no I/O handler because
+ * it provides a unified way to manipulate some fields and will allow to
+ * expand some of them more easily later if needed.
+ */
+struct show_map_ctx {
+ struct pat_ref *ref;
+ struct bref bref; /* back-reference from the pat_ref_elt being dumped */
+ struct pattern_expr *expr;
+ struct buffer chunk;
+ unsigned int display_flags;
+ unsigned int curr_gen; /* current/latest generation, for show/clear */
+ unsigned int prev_gen; /* prev generation, for clear */
+ enum {
+ STATE_INIT = 0, /* initialize list and backrefs */
+ STATE_LIST, /* list entries */
+ STATE_DONE, /* finished */
+ } state; /* state of the dump */
+};
+
+/* expects the current generation ID in ctx->curr_gen */
+static int cli_io_handler_pat_list(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct pat_ref_elt *elt;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) {
+ /* If we're forced to shut down, we might have to remove our
+ * reference to the last ref_elt being dumped.
+ */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ LIST_DEL_INIT(&ctx->bref.users);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ return 1;
+ }
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ ctx->state = STATE_LIST;
+ __fallthrough;
+
+ case STATE_LIST:
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ } else {
+ ctx->bref.ref = ctx->ref->head.n;
+ }
+
+ while (ctx->bref.ref != &ctx->ref->head) {
+ chunk_reset(&trash);
+
+ elt = LIST_ELEM(ctx->bref.ref, struct pat_ref_elt *, list);
+
+ if (elt->gen_id != ctx->curr_gen)
+ goto skip;
+
+ /* build messages */
+ if (elt->sample)
+ chunk_appendf(&trash, "%p %s %s\n",
+ elt, elt->pattern,
+ elt->sample);
+ else
+ chunk_appendf(&trash, "%p %s\n",
+ elt, elt->pattern);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ LIST_APPEND(&elt->back_refs, &ctx->bref.users);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ return 0;
+ }
+ skip:
+ /* get next list entry and check the end of the list */
+ ctx->bref.ref = elt->list.n;
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ __fallthrough;
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+}
+
+static int cli_io_handler_pats_list(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ /* Display the column headers. If the message cannot be sent,
+ * quit the function with returning 0. The function is called
+ * later and restarted at the state "STATE_INIT".
+ */
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "# id (file) description\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* Now, we start the browsing of the references lists.
+ * Note that the following call to LIST_ELEM returns a bad pointer. The only
+ * available field of this pointer is <list>. It is used with the function
+ * pat_list_get_next() for returning the first available entry
+ */
+ ctx->ref = LIST_ELEM(&pattern_reference, struct pat_ref *, list);
+ ctx->ref = pat_list_get_next(ctx->ref, &pattern_reference,
+ ctx->display_flags);
+ ctx->state = STATE_LIST;
+ __fallthrough;
+
+ case STATE_LIST:
+ while (ctx->ref) {
+ chunk_reset(&trash);
+
+ /* Build messages. If the reference is used by another category than
+ * the listed categories, display the information in the message.
+ */
+ chunk_appendf(&trash, "%d (%s) %s. curr_ver=%u next_ver=%u entry_cnt=%llu\n", ctx->ref->unique_id,
+ ctx->ref->reference ? ctx->ref->reference : "",
+ ctx->ref->display, ctx->ref->curr_gen, ctx->ref->next_gen,
+ ctx->ref->entry_cnt);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ return 0;
+ }
+
+ /* get next list entry and check the end of the list */
+ ctx->ref = pat_list_get_next(ctx->ref, &pattern_reference,
+ ctx->display_flags);
+ }
+
+ __fallthrough;
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+ return 0;
+}
+
+static int cli_io_handler_map_lookup(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ struct sample sample;
+ struct pattern *pat;
+ int match_method;
+
+ switch (ctx->state) {
+ case STATE_INIT:
+ /* Init to the first entry. The list cannot be change */
+ ctx->expr = LIST_ELEM(&ctx->ref->pat, struct pattern_expr *, list);
+ ctx->expr = pat_expr_get_next(ctx->expr, &ctx->ref->pat);
+ ctx->state = STATE_LIST;
+ __fallthrough;
+
+ case STATE_LIST:
+ HA_RWLOCK_RDLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* for each lookup type */
+ while (ctx->expr) {
+ /* initialise chunk to build new message */
+ chunk_reset(&trash);
+
+ /* execute pattern matching */
+ sample.data.type = SMP_T_STR;
+ sample.flags = SMP_F_CONST;
+ sample.data.u.str.data = ctx->chunk.data;
+ sample.data.u.str.area = ctx->chunk.area;
+
+ if (ctx->expr->pat_head->match &&
+ sample_convert(&sample, ctx->expr->pat_head->expect_type))
+ pat = ctx->expr->pat_head->match(&sample, ctx->expr, 1);
+ else
+ pat = NULL;
+
+ /* build return message: set type of match */
+ for (match_method=0; match_method<PAT_MATCH_NUM; match_method++)
+ if (ctx->expr->pat_head->match == pat_match_fcts[match_method])
+ break;
+ if (match_method >= PAT_MATCH_NUM)
+ chunk_appendf(&trash, "type=unknown(%p)", ctx->expr->pat_head->match);
+ else
+ chunk_appendf(&trash, "type=%s", pat_match_names[match_method]);
+
+ /* case sensitive */
+ if (ctx->expr->mflags & PAT_MF_IGNORE_CASE)
+ chunk_appendf(&trash, ", case=insensitive");
+ else
+ chunk_appendf(&trash, ", case=sensitive");
+
+ /* Display no match, and set default value */
+ if (!pat) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ chunk_appendf(&trash, ", found=no");
+ else
+ chunk_appendf(&trash, ", match=no");
+ }
+
+ /* Display match and match info */
+ else {
+ /* display match */
+ if (ctx->display_flags == PAT_REF_MAP)
+ chunk_appendf(&trash, ", found=yes");
+ else
+ chunk_appendf(&trash, ", match=yes");
+
+ /* display index mode */
+ if (pat->sflags & PAT_SF_TREE)
+ chunk_appendf(&trash, ", idx=tree");
+ else
+ chunk_appendf(&trash, ", idx=list");
+
+ /* display pattern */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if (pat->ref)
+ chunk_appendf(&trash, ", key=\"%s\"", pat->ref->pattern);
+ else
+ chunk_appendf(&trash, ", key=unknown");
+ }
+ else {
+ if (pat->ref)
+ chunk_appendf(&trash, ", pattern=\"%s\"", pat->ref->pattern);
+ else
+ chunk_appendf(&trash, ", pattern=unknown");
+ }
+
+ /* display return value */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if (pat->data && pat->ref && pat->ref->sample)
+ chunk_appendf(&trash, ", value=\"%s\", type=\"%s\"", pat->ref->sample,
+ smp_to_type[pat->data->type]);
+ else
+ chunk_appendf(&trash, ", value=none");
+ }
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ /* display response */
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ HA_RWLOCK_RDUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ return 0;
+ }
+
+ /* get next entry */
+ ctx->expr = pat_expr_get_next(ctx->expr,
+ &ctx->ref->pat);
+ }
+ HA_RWLOCK_RDUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ __fallthrough;
+
+ default:
+ ctx->state = STATE_DONE;
+ return 1;
+ }
+}
+
+static void cli_release_mlook(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ ha_free(&ctx->chunk.area);
+}
+
+
+static int cli_parse_get_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ /* Set flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* No parameter. */
+ if (!*args[2] || !*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier and/or key.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier and/or key.\n");
+ }
+
+ /* lookup into the maps */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* copy input string. The string must be allocated because
+ * it may be used over multiple iterations. It's released
+ * at the end and upon abort anyway.
+ */
+ ctx->chunk.data = strlen(args[3]);
+ ctx->chunk.size = ctx->chunk.data + 1;
+ ctx->chunk.area = strdup(args[3]);
+ if (!ctx->chunk.area)
+ return cli_err(appctx, "Out of memory error.\n");
+
+ return 0;
+ }
+ return 1;
+}
+
+static int cli_parse_prepare_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ uint next_gen;
+ char *msg = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+ next_gen = pat_ref_newgen(ctx->ref);
+ return cli_dynmsg(appctx, LOG_INFO, memprintf(&msg, "New version created: %u\n", next_gen));
+ }
+
+ return 0;
+}
+
+static void cli_release_show_map(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ LIST_DEL_INIT(&ctx->bref.users);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+}
+
+static int cli_parse_show_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* no parameter: display all map available */
+ if (!*args[2]) {
+ appctx->io_handler = cli_io_handler_pats_list;
+ return 0;
+ }
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* set the desired generation id in curr_gen */
+ if (gen)
+ ctx->curr_gen = str2uic(gen);
+ else
+ ctx->curr_gen = ctx->ref->curr_gen;
+
+ LIST_INIT(&ctx->bref.users);
+ appctx->io_handler = cli_io_handler_pat_list;
+ appctx->io_release = cli_release_show_map;
+ return 0;
+ }
+
+ return 0;
+}
+
+static int cli_parse_set_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0) {
+ char *err;
+
+ /* Set flags. */
+ ctx->display_flags = PAT_REF_MAP;
+
+ /* Expect three parameters: map name, key and new value. */
+ if (!*args[2] || !*args[3] || !*args[4])
+ return cli_err(appctx, "'set map' expects three parameters: map identifier, key and value.\n");
+
+ /* Lookup the reference in the maps. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+
+ /* If the entry identifier start with a '#', it is considered as
+ * pointer id
+ */
+ if (args[3][0] == '#' && args[3][1] == '0' && args[3][2] == 'x') {
+ struct pat_ref_elt *ref;
+ long long int conv;
+ char *error;
+
+ /* Convert argument to integer value. */
+ conv = strtoll(&args[3][1], &error, 16);
+ if (*error != '\0')
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Convert and check integer to pointer. */
+ ref = (struct pat_ref_elt *)(long)conv;
+ if ((long long int)(long)ref != conv)
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Try to modify the entry. */
+ err = NULL;
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_set_by_id(ctx->ref, ref, args[4], &err)) {
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update an entry.\n");
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ else {
+ /* Else, use the entry identifier as pattern
+ * string, and update the value.
+ */
+ err = NULL;
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_set(ctx->ref, args[3], args[4], &err, NULL)) {
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update an entry.\n");
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+
+ /* The set is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 0;
+ }
+ return 1;
+}
+
+static int cli_parse_add_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 ||
+ strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+ uint genid = 0;
+ int ret;
+ char *err;
+
+ /* Set flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* If the keyword is "map", we expect:
+ * - three parameters if there is no payload
+ * - one parameter if there is a payload
+ * If it is "acl", we expect only two parameters
+ */
+ if (ctx->display_flags == PAT_REF_MAP) {
+ if ((!payload && (!*args[2] || !*args[3] || !*args[4])) ||
+ (payload && !*args[2]))
+ return cli_err(appctx,
+ "'add map' expects three parameters (map identifier, key and value)"
+ " or one parameter (map identifier) and a payload\n");
+ }
+ else if (!*args[2] || !*args[3])
+ return cli_err(appctx, "'add acl' expects two parameters: ACL identifier and pattern.\n");
+
+ /* Lookup for the reference. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ if (gen) {
+ genid = str2uic(gen);
+ if ((int)(genid - ctx->ref->next_gen) > 0) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Version number in the future, please use 'prepare map' before.\n");
+ else
+ return cli_err(appctx, "Version number in the future, please use 'prepare acl' before.\n");
+ }
+ }
+
+ /* The command "add acl" is prohibited if the reference
+ * use samples.
+ */
+ if ((ctx->display_flags & PAT_REF_ACL) &&
+ (ctx->ref->flags & PAT_REF_SMP)) {
+ return cli_err(appctx,
+ "This ACL is shared with a map containing samples. "
+ "You must use the command 'add map' to add values.\n");
+ }
+
+ /* Add value(s). If no payload is used, key and value are read
+ * from the command line and only one key is set. If a payload
+ * is passed, one key/value pair is read per line till the end
+ * of the payload is reached.
+ */
+ err = NULL;
+
+ do {
+ char *key = args[3];
+ char *value = args[4];
+ size_t l;
+
+ if (payload) {
+ /* key and value passed as payload, one pair per line */
+ if (!*payload)
+ break;
+
+ key = payload;
+ l = strcspn(key, " \t");
+ payload += l;
+
+ if (!*payload && ctx->display_flags == PAT_REF_MAP)
+ return cli_dynerr(appctx, memprintf(&err, "Missing value for key '%s'.\n", key));
+
+ key[l] = 0;
+ payload++;
+
+ /* value */
+ payload += strspn(payload, " \t");
+ value = payload;
+ l = strcspn(value, "\n");
+ payload += l;
+ if (*payload)
+ payload++;
+ value[l] = 0;
+ }
+
+ if (ctx->display_flags != PAT_REF_MAP)
+ value = NULL;
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ ret = !!pat_ref_load(ctx->ref, gen ? genid : ctx->ref->curr_gen, key, value, -1, &err);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!ret) {
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to add a key.\n");
+ }
+ } while (payload && *payload);
+
+ /* The add is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int cli_parse_del_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* Expect two parameters: map name and key. */
+ if (!*args[2] || !*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "This command expects two parameters: map identifier and key.\n");
+ else
+ return cli_err(appctx, "This command expects two parameters: ACL identifier and key.\n");
+ }
+
+ /* Lookup the reference in the maps. */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags))
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+
+ /* If the entry identifier start with a '#', it is considered as
+ * pointer id
+ */
+ if (args[3][0] == '#' && args[3][1] == '0' && args[3][2] == 'x') {
+ struct pat_ref_elt *ref;
+ long long int conv;
+ char *error;
+
+ /* Convert argument to integer value. */
+ conv = strtoll(&args[3][1], &error, 16);
+ if (*error != '\0')
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Convert and check integer to pointer. */
+ ref = (struct pat_ref_elt *)(long)conv;
+ if ((long long int)(long)ref != conv)
+ return cli_err(appctx, "Malformed identifier. Please use #<id> or <file>.\n");
+
+ /* Try to delete the entry. */
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_delete_by_id(ctx->ref, ref)) {
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* The entry is not found, send message. */
+ return cli_err(appctx, "Key not found.\n");
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+ else {
+ /* Else, use the entry identifier as pattern
+ * string and try to delete the entry.
+ */
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (!pat_ref_delete(ctx->ref, args[3])) {
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ /* The entry is not found, send message. */
+ return cli_err(appctx, "Key not found.\n");
+ }
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+ }
+
+ /* The deletion is done, send message. */
+ appctx->st0 = CLI_ST_PROMPT;
+ return 1;
+}
+
+/* continue to clear a map which was started in the parser. The range of
+ * generations this applies to is taken from ctx->curr_gen for the oldest
+ * and ctx->prev_gen for the latest.
+ */
+static int cli_io_handler_clear_map(struct appctx *appctx)
+{
+ struct show_map_ctx *ctx = appctx->svcctx;
+ int finished;
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ finished = pat_ref_purge_range(ctx->ref, ctx->curr_gen, ctx->prev_gen, 100);
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (!finished) {
+ /* let's come back later */
+ applet_have_more_data(appctx);
+ return 0;
+ }
+
+ trim_all_pools();
+ return 1;
+}
+
+/* note: sets ctx->curr_gen and ctx->prev_gen to the oldest and
+ * latest generations to clear, respectively, and will call the clear_map
+ * handler.
+ */
+static int cli_parse_clear_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ /* For both "map" and "acl" we may have an optional generation
+ * number specified using a "@" character before the pattern
+ * file name.
+ */
+ if (*args[2] == '@') {
+ gen = args[2] + 1;
+ args++;
+ }
+
+ /* no parameter */
+ if (!*args[2]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier.\n");
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[2]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ /* set the desired generation id in curr_gen/prev_gen */
+ if (gen)
+ ctx->prev_gen = ctx->curr_gen = str2uic(gen);
+ else
+ ctx->prev_gen = ctx->curr_gen = ctx->ref->curr_gen;
+
+ /* delegate the clearing to the I/O handler which can yield */
+ return 0;
+ }
+ return 1;
+}
+
+/* note: sets ctx->curr_gen and ctx->prev_gen to the oldest and
+ * latest generations to clear, respectively, and will call the clear_map
+ * handler.
+ */
+static int cli_parse_commit_map(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_map_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[1], "map") == 0 || strcmp(args[1], "acl") == 0) {
+ const char *gen = NULL;
+ uint genid;
+ uint ret;
+
+ /* Set ACL or MAP flags. */
+ if (args[1][0] == 'm')
+ ctx->display_flags = PAT_REF_MAP;
+ else
+ ctx->display_flags = PAT_REF_ACL;
+
+ if (*args[2] != '@')
+ return cli_err(appctx, "Missing version number.\n");
+
+ /* The generation number is mandatory for a commit. The range
+ * of generations that get trashed by a commit starts from the
+ * opposite of the current one and ends at the previous one.
+ */
+ gen = args[2] + 1;
+ genid = str2uic(gen);
+ ctx->prev_gen = genid - 1;
+ ctx->curr_gen = ctx->prev_gen - ((~0U) >> 1);
+
+ /* no parameter */
+ if (!*args[3]) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Missing map identifier.\n");
+ else
+ return cli_err(appctx, "Missing ACL identifier.\n");
+ }
+
+ /* lookup into the refs and check the map flag */
+ ctx->ref = pat_ref_lookup_ref(args[3]);
+ if (!ctx->ref ||
+ !(ctx->ref->flags & ctx->display_flags)) {
+ if (ctx->display_flags == PAT_REF_MAP)
+ return cli_err(appctx, "Unknown map identifier. Please use #<id> or <file>.\n");
+ else
+ return cli_err(appctx, "Unknown ACL identifier. Please use #<id> or <file>.\n");
+ }
+
+ HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock);
+ if (genid - (ctx->ref->curr_gen + 1) <
+ ctx->ref->next_gen - ctx->ref->curr_gen)
+ ret = pat_ref_commit(ctx->ref, genid);
+ else
+ ret = 1;
+ HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock);
+
+ if (ret != 0)
+ return cli_err(appctx, "Version number out of range.\n");
+
+ /* delegate the clearing to the I/O handler which can yield */
+ return 0;
+ }
+ return 1;
+}
+
+/* register cli keywords */
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "add", "acl", NULL }, "add acl [@<ver>] <acl> <pattern> : add an acl entry", cli_parse_add_map, NULL },
+ { { "clear", "acl", NULL }, "clear acl [@<ver>] <acl> : clear the contents of this acl", cli_parse_clear_map, cli_io_handler_clear_map, NULL },
+ { { "commit","acl", NULL }, "commit acl @<ver> <acl> : commit the ACL at this version", cli_parse_commit_map, cli_io_handler_clear_map, NULL },
+ { { "del", "acl", NULL }, "del acl <acl> [<key>|#<ref>] : delete acl entries matching <key>", cli_parse_del_map, NULL },
+ { { "get", "acl", NULL }, "get acl <acl> <value> : report the patterns matching a sample for an ACL", cli_parse_get_map, cli_io_handler_map_lookup, cli_release_mlook },
+ { { "prepare","acl",NULL }, "prepare acl <acl> : prepare a new version for atomic ACL replacement", cli_parse_prepare_map, NULL },
+ { { "show", "acl", NULL }, "show acl [@<ver>] <acl>] : report available acls or dump an acl's contents", cli_parse_show_map, NULL },
+ { { "add", "map", NULL }, "add map [@<ver>] <map> <key> <val> : add a map entry (payload supported instead of key/val)", cli_parse_add_map, NULL },
+ { { "clear", "map", NULL }, "clear map [@<ver>] <map> : clear the contents of this map", cli_parse_clear_map, cli_io_handler_clear_map, NULL },
+ { { "commit","map", NULL }, "commit map @<ver> <map> : commit the map at this version", cli_parse_commit_map, cli_io_handler_clear_map, NULL },
+ { { "del", "map", NULL }, "del map <map> [<key>|#<ref>] : delete map entries matching <key>", cli_parse_del_map, NULL },
+ { { "get", "map", NULL }, "get map <acl> <value> : report the keys and values matching a sample for a map", cli_parse_get_map, cli_io_handler_map_lookup, cli_release_mlook },
+ { { "prepare","map",NULL }, "prepare map <acl> : prepare a new version for atomic map replacement", cli_parse_prepare_map, NULL },
+ { { "set", "map", NULL }, "set map <map> [<key>|#<ref>] <value> : modify a map entry", cli_parse_set_map, NULL },
+ { { "show", "map", NULL }, "show map [@ver] [map] : report available maps or dump a map's contents", cli_parse_show_map, NULL },
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten
+ *
+ * For the map_*_int keywords, the output is declared as SMP_T_UINT, but the converter function
+ * can provide SMP_T_UINT, SMP_T_SINT or SMP_T_BOOL depending on how the patterns found in the
+ * file can be parsed.
+ *
+ * For the map_*_ip keyword, the output is declared as SMP_T_IPV4, but the converter function
+ * can provide SMP_T_IPV4 or SMP_T_IPV6 depending on the patterns found in the file.
+ *
+ * The map_* keywords only emit strings.
+ *
+ * The output type is only used during the configuration parsing. It is used for detecting
+ * compatibility problems.
+ *
+ * The arguments are: <file>[,<default value>]
+ */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "map", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR },
+ { "map_str", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR },
+ { "map_beg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_BEG },
+ { "map_sub", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_SUB },
+ { "map_dir", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DIR },
+ { "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM },
+ { "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END },
+ { "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG },
+ { "map_regm", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REGM},
+ { "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT },
+ { "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP },
+
+ { "map_str_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_STR },
+ { "map_beg_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_BEG },
+ { "map_sub_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_SUB },
+ { "map_dir_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_DIR },
+ { "map_dom_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_DOM },
+ { "map_end_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_END },
+ { "map_reg_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_STR, SMP_T_SINT, (void *)PAT_MATCH_REG },
+ { "map_int_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_SINT, SMP_T_SINT, (void *)PAT_MATCH_INT },
+ { "map_ip_int", sample_conv_map, ARG2(1,STR,SINT), sample_load_map, SMP_T_ADDR, SMP_T_SINT, (void *)PAT_MATCH_IP },
+
+ { "map_str_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_STR },
+ { "map_beg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_BEG },
+ { "map_sub_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_SUB },
+ { "map_dir_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_DIR },
+ { "map_dom_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_DOM },
+ { "map_end_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_END },
+ { "map_reg_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_ADDR, (void *)PAT_MATCH_REG },
+ { "map_int_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_ADDR, (void *)PAT_MATCH_INT },
+ { "map_ip_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_ADDR, (void *)PAT_MATCH_IP },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/mjson.c b/src/mjson.c
new file mode 100644
index 0000000..73b7a57
--- /dev/null
+++ b/src/mjson.c
@@ -0,0 +1,1048 @@
+// Copyright (c) 2018-2020 Cesanta Software Limited
+// All rights reserved
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+#include <float.h>
+#include <math.h>
+
+#include <import/mjson.h>
+
+#if defined(_MSC_VER)
+#define alloca(x) _alloca(x)
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER < 1700
+#define va_copy(x, y) (x) = (y)
+#define isinf(x) !_finite(x)
+#define isnan(x) _isnan(x)
+#endif
+
+static double mystrtod(const char *str, char **end);
+
+static int mjson_esc(int c, int esc) {
+ const char *p, *esc1 = "\b\f\n\r\t\\\"", *esc2 = "bfnrt\\\"";
+ for (p = esc ? esc1 : esc2; *p != '\0'; p++) {
+ if (*p == c) return esc ? esc2[p - esc1] : esc1[p - esc2];
+ }
+ return 0;
+}
+
+static int mjson_escape(int c) {
+ return mjson_esc(c, 1);
+}
+
+static int mjson_pass_string(const char *s, int len) {
+ int i;
+ for (i = 0; i < len; i++) {
+ if (s[i] == '\\' && i + 1 < len && mjson_escape(s[i + 1])) {
+ i++;
+ } else if (s[i] == '\0') {
+ return MJSON_ERROR_INVALID_INPUT;
+ } else if (s[i] == '"') {
+ return i;
+ }
+ }
+ return MJSON_ERROR_INVALID_INPUT;
+}
+
+int mjson(const char *s, int len, mjson_cb_t cb, void *ud) {
+ enum { S_VALUE, S_KEY, S_COLON, S_COMMA_OR_EOO } expecting = S_VALUE;
+ unsigned char nesting[MJSON_MAX_DEPTH];
+ int i, depth = 0;
+#define MJSONCALL(ev) \
+ if (cb != NULL && cb(ev, s, start, i - start + 1, ud)) return i + 1;
+
+// In the ascii table, the distance between `[` and `]` is 2.
+// Ditto for `{` and `}`. Hence +2 in the code below.
+#define MJSONEOO() \
+ do { \
+ if (c != nesting[depth - 1] + 2) return MJSON_ERROR_INVALID_INPUT; \
+ depth--; \
+ if (depth == 0) { \
+ MJSONCALL(tok); \
+ return i + 1; \
+ } \
+ } while (0)
+
+ for (i = 0; i < len; i++) {
+ int start = i;
+ unsigned char c = ((unsigned char *) s)[i];
+ int tok = c;
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r') continue;
+ // printf("- %c [%.*s] %d %d\n", c, i, s, depth, expecting);
+ switch (expecting) {
+ case S_VALUE:
+ if (c == '{') {
+ if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP;
+ nesting[depth++] = c;
+ expecting = S_KEY;
+ break;
+ } else if (c == '[') {
+ if (depth >= (int) sizeof(nesting)) return MJSON_ERROR_TOO_DEEP;
+ nesting[depth++] = c;
+ break;
+ } else if (c == ']' && depth > 0) { // Empty array
+ MJSONEOO();
+ } else if (c == 't' && i + 3 < len && memcmp(&s[i], "true", 4) == 0) {
+ i += 3;
+ tok = MJSON_TOK_TRUE;
+ } else if (c == 'n' && i + 3 < len && memcmp(&s[i], "null", 4) == 0) {
+ i += 3;
+ tok = MJSON_TOK_NULL;
+ } else if (c == 'f' && i + 4 < len && memcmp(&s[i], "false", 5) == 0) {
+ i += 4;
+ tok = MJSON_TOK_FALSE;
+ } else if (c == '-' || ((c >= '0' && c <= '9'))) {
+ char *end = NULL;
+ mystrtod(&s[i], &end);
+ if (end != NULL) i += (int) (end - &s[i] - 1);
+ tok = MJSON_TOK_NUMBER;
+ } else if (c == '"') {
+ int n = mjson_pass_string(&s[i + 1], len - i - 1);
+ if (n < 0) return n;
+ i += n + 1;
+ tok = MJSON_TOK_STRING;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ if (depth == 0) {
+ MJSONCALL(tok);
+ return i + 1;
+ }
+ expecting = S_COMMA_OR_EOO;
+ break;
+
+ case S_KEY:
+ if (c == '"') {
+ int n = mjson_pass_string(&s[i + 1], len - i - 1);
+ if (n < 0) return n;
+ i += n + 1;
+ tok = MJSON_TOK_KEY;
+ expecting = S_COLON;
+ } else if (c == '}') { // Empty object
+ MJSONEOO();
+ expecting = S_COMMA_OR_EOO;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+
+ case S_COLON:
+ if (c == ':') {
+ expecting = S_VALUE;
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+
+ case S_COMMA_OR_EOO:
+ if (depth <= 0) return MJSON_ERROR_INVALID_INPUT;
+ if (c == ',') {
+ expecting = (nesting[depth - 1] == '{') ? S_KEY : S_VALUE;
+ } else if (c == ']' || c == '}') {
+ MJSONEOO();
+ } else {
+ return MJSON_ERROR_INVALID_INPUT;
+ }
+ break;
+ }
+ MJSONCALL(tok);
+ }
+ return MJSON_ERROR_INVALID_INPUT;
+}
+
+struct msjon_get_data {
+ const char *path; // Lookup json path
+ int pos; // Current path index
+ int d1; // Current depth of traversal
+ int d2; // Expected depth of traversal
+ int i1; // Index in an array
+ int i2; // Expected index in an array
+ int obj; // If the value is array/object, offset where it starts
+ const char **tokptr; // Destination
+ int *toklen; // Destination length
+ int tok; // Returned token
+};
+
+#include <stdio.h>
+
+static int plen1(const char *s) {
+ int i = 0, n = 0;
+ while (s[i] != '\0' && s[i] != '.' && s[i] != '[')
+ n++, i += s[i] == '\\' ? 2 : 1;
+ // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n);
+ return n;
+}
+
+static int plen2(const char *s) {
+ int i = 0, __attribute__((unused)) n = 0;
+ while (s[i] != '\0' && s[i] != '.' && s[i] != '[')
+ n++, i += s[i] == '\\' ? 2 : 1;
+ // printf("PLEN: s: [%s], [%.*s] => %d\n", s, i, s, n);
+ return i;
+}
+
+static int kcmp(const char *a, const char *b, int n) {
+ int i = 0, j = 0, r = 0;
+ for (i = 0, j = 0; j < n; i++, j++) {
+ if (b[i] == '\\') i++;
+ if ((r = a[j] - b[i]) != 0) return r;
+ }
+ // printf("KCMP: a: [%.*s], b:[%.*s] ==> %d\n", n, a, i, b, r);
+ return r;
+}
+
+static int mjson_get_cb(int tok, const char *s, int off, int len, void *ud) {
+ struct msjon_get_data *data = (struct msjon_get_data *) ud;
+ // printf("--> %2x %2d %2d %2d %2d\t'%s'\t'%.*s'\t\t'%.*s'\n", tok, data->d1,
+ // data->d2, data->i1, data->i2, data->path + data->pos, off, s, len,
+ // s + off);
+ if (data->tok != MJSON_TOK_INVALID) return 1; // Found
+
+ if (tok == '{') {
+ if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off;
+ data->d1++;
+ } else if (tok == '[') {
+ if (data->d1 == data->d2 && data->path[data->pos] == '[') {
+ data->i1 = 0;
+ data->i2 = (int) mystrtod(&data->path[data->pos + 1], NULL);
+ if (data->i1 == data->i2) {
+ data->d2++;
+ data->pos += 3;
+ }
+ }
+ if (!data->path[data->pos] && data->d1 == data->d2) data->obj = off;
+ data->d1++;
+ } else if (tok == ',') {
+ if (data->d1 == data->d2 + 1) {
+ data->i1++;
+ if (data->i1 == data->i2) {
+ while (data->path[data->pos] != ']') data->pos++;
+ data->pos++;
+ data->d2++;
+ }
+ }
+ } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2 + 1 &&
+ data->path[data->pos] == '.' && s[off] == '"' &&
+ s[off + len - 1] == '"' &&
+ plen1(&data->path[data->pos + 1]) == len - 2 &&
+ kcmp(s + off + 1, &data->path[data->pos + 1], len - 2) == 0) {
+ data->d2++;
+ data->pos += plen2(&data->path[data->pos + 1]) + 1;
+ } else if (tok == MJSON_TOK_KEY && data->d1 == data->d2) {
+ return 1; // Exhausted path, not found
+ } else if (tok == '}' || tok == ']') {
+ data->d1--;
+ // data->d2--;
+ if (!data->path[data->pos] && data->d1 == data->d2 && data->obj != -1) {
+ data->tok = tok - 2;
+ if (data->tokptr) *data->tokptr = s + data->obj;
+ if (data->toklen) *data->toklen = off - data->obj + 1;
+ return 1;
+ }
+ } else if (MJSON_TOK_IS_VALUE(tok)) {
+ // printf("TOK --> %d\n", tok);
+ if (data->d1 == data->d2 && !data->path[data->pos]) {
+ data->tok = tok;
+ if (data->tokptr) *data->tokptr = s + off;
+ if (data->toklen) *data->toklen = len;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+enum mjson_tok mjson_find(const char *s, int len, const char *jp,
+ const char **tokptr, int *toklen) {
+ struct msjon_get_data data = {jp, 1, 0, 0, 0,
+ 0, -1, tokptr, toklen, MJSON_TOK_INVALID};
+ if (jp[0] != '$') return MJSON_TOK_INVALID;
+ if (mjson(s, len, mjson_get_cb, &data) < 0) return MJSON_TOK_INVALID;
+ return (enum mjson_tok) data.tok;
+}
+
+int mjson_get_number(const char *s, int len, const char *path, double *v) {
+ const char *p;
+ int tok, n;
+ if ((tok = mjson_find(s, len, path, &p, &n)) == MJSON_TOK_NUMBER) {
+ if (v != NULL) *v = mystrtod(p, NULL);
+ }
+ return tok == MJSON_TOK_NUMBER ? 1 : 0;
+}
+
+int mjson_get_bool(const char *s, int len, const char *path, int *v) {
+ int tok = mjson_find(s, len, path, NULL, NULL);
+ if (tok == MJSON_TOK_TRUE && v != NULL) *v = 1;
+ if (tok == MJSON_TOK_FALSE && v != NULL) *v = 0;
+ return tok == MJSON_TOK_TRUE || tok == MJSON_TOK_FALSE ? 1 : 0;
+}
+
+static unsigned char mjson_unhex_nimble(const char *s) {
+ unsigned char i, v = 0;
+ for (i = 0; i < 2; i++) {
+ int c = s[i];
+ if (i > 0) v <<= 4;
+ v |= (c >= '0' && c <= '9') ? c - '0'
+ : (c >= 'A' && c <= 'F') ? c - '7' : c - 'W';
+ }
+ return v;
+}
+
+static int mjson_unescape(const char *s, int len, char *to, int n) {
+ int i, j;
+ for (i = 0, j = 0; i < len && j < n; i++, j++) {
+ if (s[i] == '\\' && i + 5 < len && s[i + 1] == 'u') {
+ // \uXXXX escape. We could process a simple one-byte chars
+ // \u00xx from the ASCII range. More complex chars would require
+ // dragging in a UTF8 library, which is too much for us
+ if (s[i + 2] != '0' || s[i + 3] != '0') return -1; // Too much, give up
+ to[j] = mjson_unhex_nimble(s + i + 4);
+ i += 5;
+ } else if (s[i] == '\\' && i + 1 < len) {
+ int c = mjson_esc(s[i + 1], 0);
+ if (c == 0) return -1;
+ to[j] = c;
+ i++;
+ } else {
+ to[j] = s[i];
+ }
+ }
+ if (j >= n) return -1;
+ if (n > 0) to[j] = '\0';
+ return j;
+}
+
+int mjson_get_string(const char *s, int len, const char *path, char *to,
+ int n) {
+ const char *p;
+ int sz;
+ if (mjson_find(s, len, path, &p, &sz) != MJSON_TOK_STRING) return -1;
+ return mjson_unescape(p + 1, sz - 2, to, n);
+}
+
+int mjson_get_hex(const char *s, int len, const char *x, char *to, int n) {
+ const char *p;
+ int i, j, sz;
+ if (mjson_find(s, len, x, &p, &sz) != MJSON_TOK_STRING) return -1;
+ for (i = j = 0; i < sz - 3 && j < n; i += 2, j++) {
+ ((unsigned char *) to)[j] = mjson_unhex_nimble(p + i + 1);
+ }
+ if (j < n) to[j] = '\0';
+ return j;
+}
+
+#if MJSON_ENABLE_BASE64
+static int mjson_base64rev(int c) {
+ if (c >= 'A' && c <= 'Z') {
+ return c - 'A';
+ } else if (c >= 'a' && c <= 'z') {
+ return c + 26 - 'a';
+ } else if (c >= '0' && c <= '9') {
+ return c + 52 - '0';
+ } else if (c == '+') {
+ return 62;
+ } else if (c == '/') {
+ return 63;
+ } else {
+ return 64;
+ }
+}
+
+int mjson_base64_dec(const char *src, int n, char *dst, int dlen) {
+ const char *end = src + n;
+ int len = 0;
+ while (src + 3 < end && len < dlen) {
+ int a = mjson_base64rev(src[0]), b = mjson_base64rev(src[1]),
+ c = mjson_base64rev(src[2]), d = mjson_base64rev(src[3]);
+ dst[len++] = (a << 2) | (b >> 4);
+ if (src[2] != '=' && len < dlen) {
+ dst[len++] = (b << 4) | (c >> 2);
+ if (src[3] != '=' && len < dlen) {
+ dst[len++] = (c << 6) | d;
+ }
+ }
+ src += 4;
+ }
+ if (len < dlen) dst[len] = '\0';
+ return len;
+}
+
+int mjson_get_base64(const char *s, int len, const char *path, char *to,
+ int n) {
+ const char *p;
+ int sz;
+ if (mjson_find(s, len, path, &p, &sz) != MJSON_TOK_STRING) return 0;
+ return mjson_base64_dec(p + 1, sz - 2, to, n);
+}
+#endif // MJSON_ENABLE_BASE64
+
+#if MJSON_ENABLE_NEXT
+struct nextdata {
+ int off, len, depth, t, vo, arrayindex;
+ int *koff, *klen, *voff, *vlen, *vtype;
+};
+
+static int next_cb(int tok, const char *s, int off, int len, void *ud) {
+ struct nextdata *d = (struct nextdata *) ud;
+ // int i;
+ switch (tok) {
+ case '{':
+ case '[':
+ if (d->depth == 0 && tok == '[') d->arrayindex = 0;
+ if (d->depth == 1 && off > d->off) {
+ d->vo = off;
+ d->t = tok == '{' ? MJSON_TOK_OBJECT : MJSON_TOK_ARRAY;
+ if (d->voff) *d->voff = off;
+ if (d->vtype) *d->vtype = d->t;
+ }
+ d->depth++;
+ break;
+ case '}':
+ case ']':
+ d->depth--;
+ if (d->depth == 1 && d->vo) {
+ d->len = off + len;
+ if (d->vlen) *d->vlen = d->len - d->vo;
+ if (d->arrayindex >= 0) {
+ if (d->koff) *d->koff = d->arrayindex; // koff holds array index
+ if (d->klen) *d->klen = 0; // klen holds 0
+ }
+ return 1;
+ }
+ if (d->depth == 1 && d->arrayindex >= 0) d->arrayindex++;
+ break;
+ case ',':
+ case ':':
+ break;
+ case MJSON_TOK_KEY:
+ if (d->depth == 1 && d->off < off) {
+ if (d->koff) *d->koff = off; // And report back to the user
+ if (d->klen) *d->klen = len; // If we have to
+ }
+ break;
+ default:
+ if (d->depth != 1) break;
+ // If we're iterating over the array
+ if (off > d->off) {
+ d->len = off + len;
+ if (d->vlen) *d->vlen = len; // value length
+ if (d->voff) *d->voff = off; // value offset
+ if (d->vtype) *d->vtype = tok; // value type
+ if (d->arrayindex >= 0) {
+ if (d->koff) *d->koff = d->arrayindex; // koff holds array index
+ if (d->klen) *d->klen = 0; // klen holds 0
+ }
+ return 1;
+ }
+ if (d->arrayindex >= 0) d->arrayindex++;
+ break;
+ }
+ (void) s;
+ return 0;
+}
+
+int mjson_next(const char *s, int n, int off, int *koff, int *klen, int *voff,
+ int *vlen, int *vtype) {
+ struct nextdata d = {off, 0, 0, 0, 0, -1, koff, klen, voff, vlen, vtype};
+ mjson(s, n, next_cb, &d);
+ return d.len;
+}
+#endif
+
+#if MJSON_ENABLE_PRINT
+int mjson_print_fixed_buf(const char *ptr, int len, void *fndata) {
+ struct mjson_fixedbuf *fb = (struct mjson_fixedbuf *) fndata;
+ int i, left = fb->size - 1 - fb->len;
+ if (left < len) len = left;
+ for (i = 0; i < len; i++) fb->ptr[fb->len + i] = ptr[i];
+ fb->len += len;
+ fb->ptr[fb->len] = '\0';
+ return len;
+}
+
+// This function allocates memory in chunks of size MJSON_DYNBUF_CHUNK
+// to decrease memory fragmentation, when many calls are executed to
+// print e.g. a base64 string or a hex string.
+int mjson_print_dynamic_buf(const char *ptr, int len, void *fndata) {
+ char *s, *buf = *(char **) fndata;
+ size_t curlen = buf == NULL ? 0 : strlen(buf);
+ size_t new_size = curlen + len + 1 + MJSON_DYNBUF_CHUNK;
+ new_size -= new_size % MJSON_DYNBUF_CHUNK;
+
+ if ((s = (char *) realloc(buf, new_size)) == NULL) {
+ return 0;
+ } else {
+ memcpy(s + curlen, ptr, len);
+ s[curlen + len] = '\0';
+ *(char **) fndata = s;
+ return len;
+ }
+}
+
+int mjson_print_null(const char *ptr, int len, void *userdata) {
+ (void) ptr;
+ (void) userdata;
+ return len;
+}
+
+int mjson_print_buf(mjson_print_fn_t fn, void *fnd, const char *buf, int len) {
+ return fn(buf, len, fnd);
+}
+
+int mjson_print_long(mjson_print_fn_t fn, void *fnd, long val, int is_signed) {
+ unsigned long v = val, s = 0, n, i;
+ char buf[20], t;
+ if (is_signed && val < 0) {
+ buf[s++] = '-', v = -val;
+ }
+ // This loop prints a number in reverse order. I guess this is because we
+ // write numbers from right to left: least significant digit comes last.
+ // Maybe because we use Arabic numbers, and Arabs write RTL?
+ for (n = 0; v > 0; v /= 10) buf[s + n++] = "0123456789"[v % 10];
+ // Reverse a string
+ for (i = 0; i < n / 2; i++)
+ t = buf[s + i], buf[s + i] = buf[s + n - i - 1], buf[s + n - i - 1] = t;
+ if (val == 0) buf[n++] = '0'; // Handle special case
+ return fn(buf, s + n, fnd);
+}
+
+int mjson_print_int(mjson_print_fn_t fn, void *fnd, int v, int s) {
+ return mjson_print_long(fn, fnd, s ? (long) v : (unsigned) v, s);
+}
+
+static int addexp(char *buf, int e, int sign) {
+ int n = 0;
+ buf[n++] = 'e';
+ buf[n++] = sign;
+ if (e > 400) return 0;
+ if (e < 10) buf[n++] = '0';
+ if (e >= 100) buf[n++] = (e / 100) + '0', e -= 100 * (e / 100);
+ if (e >= 10) buf[n++] = (e / 10) + '0', e -= 10 * (e / 10);
+ buf[n++] = e + '0';
+ return n;
+}
+
+int mjson_print_dbl(mjson_print_fn_t fn, void *fnd, double d, int width) {
+ char buf[40];
+ int i, s = 0, n = 0, e = 0;
+ double t, mul, saved;
+ if (d == 0.0) return fn("0", 1, fnd);
+ if (isinf(d)) return fn(d > 0 ? "inf" : "-inf", d > 0 ? 3 : 4, fnd);
+ if (isnan(d)) return fn("nan", 3, fnd);
+ if (d < 0.0) d = -d, buf[s++] = '-';
+
+ // Round
+ saved = d;
+ mul = 1.0;
+ while (d >= 10.0 && d / mul >= 10.0) mul *= 10.0;
+ while (d <= 1.0 && d / mul <= 1.0) mul /= 10.0;
+ for (i = 0, t = mul * 5; i < width; i++) t /= 10.0;
+ d += t;
+ // Calculate exponent, and 'mul' for scientific representation
+ mul = 1.0;
+ while (d >= 10.0 && d / mul >= 10.0) mul *= 10.0, e++;
+ while (d < 1.0 && d / mul < 1.0) mul /= 10.0, e--;
+ // printf(" --> %g %d %g %g\n", saved, e, t, mul);
+
+ if (e >= width) {
+ struct mjson_fixedbuf fb = {buf + s, (int) sizeof(buf) - s, 0};
+ n = mjson_print_dbl(mjson_print_fixed_buf, &fb, saved / mul, width);
+ // printf(" --> %.*g %d [%.*s]\n", 10, d / t, e, fb.len, fb.ptr);
+ n += addexp(buf + s + n, e, '+');
+ return fn(buf, s + n, fnd);
+ } else if (e <= -width) {
+ struct mjson_fixedbuf fb = {buf + s, (int) sizeof(buf) - s, 0};
+ n = mjson_print_dbl(mjson_print_fixed_buf, &fb, saved / mul, width);
+ // printf(" --> %.*g %d [%.*s]\n", 10, d / mul, e, fb.len, fb.ptr);
+ n += addexp(buf + s + n, -e, '-');
+ return fn(buf, s + n, fnd);
+ } else {
+ for (i = 0, t = mul; d >= 1.0 && s + n < (int) sizeof(buf); i++) {
+ int ch = (int) (d / t);
+ if (n > 0 || ch > 0) buf[s + n++] = ch + '0';
+ d -= ch * t;
+ t /= 10.0;
+ }
+ // printf(" --> [%g] -> %g %g (%d) [%.*s]\n", saved, d, t, n, s + n, buf);
+ if (n == 0) buf[s++] = '0';
+ while (t >= 1.0 && n + s < (int) sizeof(buf)) buf[n++] = '0', t /= 10.0;
+ if (s + n < (int) sizeof(buf)) buf[n + s++] = '.';
+ // printf(" 1--> [%g] -> [%.*s]\n", saved, s + n, buf);
+ for (i = 0, t = 0.1; s + n < (int) sizeof(buf) && n < width; i++) {
+ int ch = (int) (d / t);
+ buf[s + n++] = ch + '0';
+ d -= ch * t;
+ t /= 10.0;
+ }
+ }
+ while (n > 0 && buf[s + n - 1] == '0') n--; // Trim trailing zeros
+ if (n > 0 && buf[s + n - 1] == '.') n--; // Trim trailing dot
+ return fn(buf, s + n, fnd);
+}
+
+int mjson_print_str(mjson_print_fn_t fn, void *fnd, const char *s, int len) {
+ int i, n = fn("\"", 1, fnd);
+ for (i = 0; i < len; i++) {
+ char c = mjson_escape(s[i]);
+ if (c) {
+ n += fn("\\", 1, fnd);
+ n += fn(&c, 1, fnd);
+ } else {
+ n += fn(&s[i], 1, fnd);
+ }
+ }
+ return n + fn("\"", 1, fnd);
+}
+
+#if MJSON_ENABLE_BASE64
+int mjson_print_b64(mjson_print_fn_t fn, void *fnd, const unsigned char *s,
+ int n) {
+ const char *t =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+ int i, len = fn("\"", 1, fnd);
+ for (i = 0; i < n; i += 3) {
+ int a = s[i], b = i + 1 < n ? s[i + 1] : 0, c = i + 2 < n ? s[i + 2] : 0;
+ char buf[4] = {t[a >> 2], t[(a & 3) << 4 | (b >> 4)], '=', '='};
+ if (i + 1 < n) buf[2] = t[(b & 15) << 2 | (c >> 6)];
+ if (i + 2 < n) buf[3] = t[c & 63];
+ len += fn(buf, sizeof(buf), fnd);
+ }
+ return len + fn("\"", 1, fnd);
+}
+#endif /* MJSON_ENABLE_BASE64 */
+
+int mjson_vprintf(mjson_print_fn_t fn, void *fnd, const char *fmt,
+ va_list xap) {
+ int i = 0, n = 0;
+ va_list ap;
+ va_copy(ap, xap);
+ while (fmt[i] != '\0') {
+ if (fmt[i] == '%') {
+ char fc = fmt[++i];
+ int is_long = 0;
+ if (fc == 'l') {
+ is_long = 1;
+ fc = fmt[i + 1];
+ }
+ if (fc == 'Q') {
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_str(fn, fnd, buf ? buf : "",
+ buf ? (int) strlen(buf) : 0);
+ } else if (strncmp(&fmt[i], ".*Q", 3) == 0) {
+ int len = va_arg(ap, int);
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_str(fn, fnd, buf, len);
+ i += 2;
+ } else if (fc == 'd' || fc == 'u') {
+ int is_signed = (fc == 'd');
+ if (is_long) {
+ long val = va_arg(ap, long);
+ n += mjson_print_long(fn, fnd, val, is_signed);
+ i++;
+ } else {
+ int val = va_arg(ap, int);
+ n += mjson_print_int(fn, fnd, val, is_signed);
+ }
+ } else if (fc == 'B') {
+ const char *s = va_arg(ap, int) ? "true" : "false";
+ n += mjson_print_buf(fn, fnd, s, (int) strlen(s));
+ } else if (fc == 's') {
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_buf(fn, fnd, buf, (int) strlen(buf));
+ } else if (strncmp(&fmt[i], ".*s", 3) == 0) {
+ int len = va_arg(ap, int);
+ char *buf = va_arg(ap, char *);
+ n += mjson_print_buf(fn, fnd, buf, len);
+ i += 2;
+ } else if (fc == 'g') {
+ n += mjson_print_dbl(fn, fnd, va_arg(ap, double), 6);
+ } else if (strncmp(&fmt[i], ".*g", 3) == 0) {
+ int width = va_arg(ap, int);
+ n += mjson_print_dbl(fn, fnd, va_arg(ap, double), width);
+ i += 2;
+#if MJSON_ENABLE_BASE64
+ } else if (fc == 'V') {
+ int len = va_arg(ap, int);
+ const char *buf = va_arg(ap, const char *);
+ n += mjson_print_b64(fn, fnd, (unsigned char *) buf, len);
+#endif
+ } else if (fc == 'H') {
+ const char *hex = "0123456789abcdef";
+ int i, len = va_arg(ap, int);
+ const unsigned char *p = va_arg(ap, const unsigned char *);
+ n += fn("\"", 1, fnd);
+ for (i = 0; i < len; i++) {
+ n += fn(&hex[(p[i] >> 4) & 15], 1, fnd);
+ n += fn(&hex[p[i] & 15], 1, fnd);
+ }
+ n += fn("\"", 1, fnd);
+ } else if (fc == 'M') {
+ mjson_vprint_fn_t vfn = va_arg(ap, mjson_vprint_fn_t);
+ n += vfn(fn, fnd, &ap);
+ }
+ i++;
+ } else {
+ n += mjson_print_buf(fn, fnd, &fmt[i++], 1);
+ }
+ }
+ va_end(xap);
+ va_end(ap);
+ return n;
+}
+
+int mjson_printf(mjson_print_fn_t fn, void *fnd, const char *fmt, ...) {
+ va_list ap;
+ int len;
+ va_start(ap, fmt);
+ len = mjson_vprintf(fn, fnd, fmt, ap);
+ va_end(ap);
+ return len;
+}
+#endif /* MJSON_ENABLE_PRINT */
+
+static int is_digit(int c) {
+ return c >= '0' && c <= '9';
+}
+
+/* NOTE: strtod() implementation by Yasuhiro Matsumoto. */
+static double mystrtod(const char *str, char **end) {
+ double d = 0.0;
+ int sign = 1, __attribute__((unused)) n = 0;
+ const char *p = str, *a = str;
+
+ /* decimal part */
+ if (*p == '-') {
+ sign = -1;
+ ++p;
+ } else if (*p == '+') {
+ ++p;
+ }
+ if (is_digit(*p)) {
+ d = (double) (*p++ - '0');
+ while (*p && is_digit(*p)) {
+ d = d * 10.0 + (double) (*p - '0');
+ ++p;
+ ++n;
+ }
+ a = p;
+ } else if (*p != '.') {
+ goto done;
+ }
+ d *= sign;
+
+ /* fraction part */
+ if (*p == '.') {
+ double f = 0.0;
+ double base = 0.1;
+ ++p;
+
+ if (is_digit(*p)) {
+ while (*p && is_digit(*p)) {
+ f += base * (*p - '0');
+ base /= 10.0;
+ ++p;
+ ++n;
+ }
+ }
+ d += f * sign;
+ a = p;
+ }
+
+ /* exponential part */
+ if ((*p == 'E') || (*p == 'e')) {
+ int i, e = 0, neg = 0;
+ p++;
+ if (*p == '-') p++, neg++;
+ if (*p == '+') p++;
+ while (is_digit(*p)) e = e * 10 + *p++ - '0';
+ if (neg) e = -e;
+#if 0
+ if (d == 2.2250738585072011 && e == -308) {
+ d = 0.0;
+ a = p;
+ goto done;
+ }
+ if (d == 2.2250738585072012 && e <= -308) {
+ d *= 1.0e-308;
+ a = p;
+ goto done;
+ }
+#endif
+ for (i = 0; i < e; i++) d *= 10;
+ for (i = 0; i < -e; i++) d /= 10;
+ a = p;
+ } else if (p > str && !is_digit(*(p - 1))) {
+ a = str;
+ goto done;
+ }
+
+done:
+ if (end) *end = (char *) a;
+ return d;
+}
+
+#if MJSON_ENABLE_MERGE
+int mjson_merge(const char *s, int n, const char *s2, int n2,
+ mjson_print_fn_t fn, void *userdata) {
+ int koff, klen, voff, vlen, t, t2, k, off = 0, len = 0, comma = 0;
+ if (n < 2) return len;
+ len += fn("{", 1, userdata);
+ while ((off = mjson_next(s, n, off, &koff, &klen, &voff, &vlen, &t)) != 0) {
+ char *path = (char *) alloca(klen + 1);
+ const char *val;
+ memcpy(path, "$.", 2);
+ memcpy(path + 2, s + koff + 1, klen - 2);
+ path[klen] = '\0';
+ if ((t2 = mjson_find(s2, n2, path, &val, &k)) != MJSON_TOK_INVALID) {
+ if (t2 == MJSON_TOK_NULL) continue; // null deletes the key
+ } else {
+ val = s + voff; // Key is not found in the update. Copy the old value.
+ }
+ if (comma) len += fn(",", 1, userdata);
+ len += fn(s + koff, klen, userdata);
+ len += fn(":", 1, userdata);
+ if (t == MJSON_TOK_OBJECT && t2 == MJSON_TOK_OBJECT) {
+ len += mjson_merge(s + voff, vlen, val, k, fn, userdata);
+ } else {
+ if (t2 != MJSON_TOK_INVALID) vlen = k;
+ len += fn(val, vlen, userdata);
+ }
+ comma = 1;
+ }
+ // Add missing keys
+ off = 0;
+ while ((off = mjson_next(s2, n2, off, &koff, &klen, &voff, &vlen, &t)) != 0) {
+ char *path = (char *) alloca(klen + 1);
+ const char *val;
+ if (t == MJSON_TOK_NULL) continue;
+ memcpy(path, "$.", 2);
+ memcpy(path + 2, s2 + koff + 1, klen - 2);
+ path[klen] = '\0';
+ if (mjson_find(s, n, path, &val, &vlen) != MJSON_TOK_INVALID) continue;
+ if (comma) len += fn(",", 1, userdata);
+ len += fn(s2 + koff, klen, userdata);
+ len += fn(":", 1, userdata);
+ len += fn(s2 + voff, vlen, userdata);
+ comma = 1;
+ }
+ len += fn("}", 1, userdata);
+ return len;
+}
+#endif // MJSON_ENABLE_MERGE
+
+#if MJSON_ENABLE_PRETTY
+struct prettydata {
+ int level;
+ int len;
+ int prev;
+ const char *pad;
+ int padlen;
+ mjson_print_fn_t fn;
+ void *userdata;
+};
+
+static int pretty_cb(int ev, const char *s, int off, int len, void *ud) {
+ struct prettydata *d = (struct prettydata *) ud;
+ int i;
+ switch (ev) {
+ case '{':
+ case '[':
+ d->level++;
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ case '}':
+ case ']':
+ d->level--;
+ if (d->prev != '[' && d->prev != '{' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ case ',':
+ d->len += d->fn(s + off, len, d->userdata);
+ if (d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ break;
+ case ':':
+ d->len += d->fn(s + off, len, d->userdata);
+ if (d->padlen > 0) d->len += d->fn(" ", 1, d->userdata);
+ break;
+ case MJSON_TOK_KEY:
+ if (d->prev == '{' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ default:
+ if (d->prev == '[' && d->padlen > 0) {
+ d->len += d->fn("\n", 1, d->userdata);
+ for (i = 0; i < d->level; i++)
+ d->len += d->fn(d->pad, d->padlen, d->userdata);
+ }
+ d->len += d->fn(s + off, len, d->userdata);
+ break;
+ }
+ d->prev = ev;
+ return 0;
+}
+
+int mjson_pretty(const char *s, int n, const char *pad, mjson_print_fn_t fn,
+ void *userdata) {
+ struct prettydata d = {0, 0, 0, pad, (int) strlen(pad), fn, userdata};
+ if (mjson(s, n, pretty_cb, &d) < 0) return -1;
+ return d.len;
+}
+#endif // MJSON_ENABLE_PRETTY
+
+#if MJSON_ENABLE_RPC
+struct jsonrpc_ctx jsonrpc_default_context;
+
+int mjson_globmatch(const char *s1, int n1, const char *s2, int n2) {
+ int i = 0, j = 0, ni = 0, nj = 0;
+ while (i < n1 || j < n2) {
+ if (i < n1 && j < n2 && (s1[i] == '?' || s2[j] == s1[i])) {
+ i++, j++;
+ } else if (i < n1 && (s1[i] == '*' || s1[i] == '#')) {
+ ni = i, nj = j + 1, i++;
+ } else if (nj > 0 && nj <= n2 && (s1[i - 1] == '#' || s2[j] != '/')) {
+ i = ni, j = nj;
+ } else {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void jsonrpc_return_errorv(struct jsonrpc_request *r, int code,
+ const char *message, const char *data_fmt,
+ va_list ap) {
+ if (r->id_len == 0) return;
+ mjson_printf(r->fn, r->fndata,
+ "{\"id\":%.*s,\"error\":{\"code\":%d,\"message\":%Q", r->id_len,
+ r->id, code, message == NULL ? "" : message);
+ if (data_fmt != NULL) {
+ mjson_printf(r->fn, r->fndata, ",\"data\":");
+ mjson_vprintf(r->fn, r->fndata, data_fmt, ap);
+ }
+ mjson_printf(r->fn, r->fndata, "}}\n");
+}
+
+void jsonrpc_return_error(struct jsonrpc_request *r, int code,
+ const char *message, const char *data_fmt, ...) {
+ va_list ap;
+ va_start(ap, data_fmt);
+ jsonrpc_return_errorv(r, code, message, data_fmt, ap);
+ va_end(ap);
+}
+
+void jsonrpc_return_successv(struct jsonrpc_request *r, const char *result_fmt,
+ va_list ap) {
+ if (r->id_len == 0) return;
+ mjson_printf(r->fn, r->fndata, "{\"id\":%.*s,\"result\":", r->id_len, r->id);
+ if (result_fmt != NULL) {
+ mjson_vprintf(r->fn, r->fndata, result_fmt, ap);
+ } else {
+ mjson_printf(r->fn, r->fndata, "%s", "null");
+ }
+ mjson_printf(r->fn, r->fndata, "}\n");
+}
+
+void jsonrpc_return_success(struct jsonrpc_request *r, const char *result_fmt,
+ ...) {
+ va_list ap;
+ va_start(ap, result_fmt);
+ jsonrpc_return_successv(r, result_fmt, ap);
+ va_end(ap);
+}
+
+void jsonrpc_ctx_process(struct jsonrpc_ctx *ctx, const char *buf, int len,
+ mjson_print_fn_t fn, void *fndata, void *ud) {
+ const char *result = NULL, *error = NULL;
+ int result_sz = 0, error_sz = 0;
+ struct jsonrpc_method *m = NULL;
+ struct jsonrpc_request r = {ctx, buf, len, 0, 0, 0, 0, 0, 0, fn, fndata, ud};
+
+ // Is is a response frame?
+ mjson_find(buf, len, "$.result", &result, &result_sz);
+ if (result == NULL) mjson_find(buf, len, "$.error", &error, &error_sz);
+ if (result_sz > 0 || error_sz > 0) {
+ if (ctx->response_cb) ctx->response_cb(buf, len, ctx->response_cb_data);
+ return;
+ }
+
+ // Method must exist and must be a string
+ if (mjson_find(buf, len, "$.method", &r.method, &r.method_len) !=
+ MJSON_TOK_STRING) {
+ mjson_printf(fn, fndata, "{\"error\":{\"code\":-32700,\"message\":%.*Q}}\n",
+ len, buf);
+ return;
+ }
+
+ // id and params are optional
+ mjson_find(buf, len, "$.id", &r.id, &r.id_len);
+ mjson_find(buf, len, "$.params", &r.params, &r.params_len);
+
+ for (m = ctx->methods; m != NULL; m = m->next) {
+ if (mjson_globmatch(m->method, m->method_sz, r.method + 1,
+ r.method_len - 2) > 0) {
+ if (r.params == NULL) r.params = "";
+ m->cb(&r);
+ break;
+ }
+ }
+ if (m == NULL) {
+ jsonrpc_return_error(&r, JSONRPC_ERROR_NOT_FOUND, "method not found", NULL);
+ }
+}
+
+static int jsonrpc_print_methods(mjson_print_fn_t fn, void *fndata,
+ va_list *ap) {
+ struct jsonrpc_ctx *ctx = va_arg(*ap, struct jsonrpc_ctx *);
+ struct jsonrpc_method *m;
+ int len = 0;
+ for (m = ctx->methods; m != NULL; m = m->next) {
+ if (m != ctx->methods) len += mjson_print_buf(fn, fndata, ",", 1);
+ len += mjson_print_str(fn, fndata, m->method, (int) strlen(m->method));
+ }
+ return len;
+}
+
+static void rpclist(struct jsonrpc_request *r) {
+ jsonrpc_return_success(r, "[%M]", jsonrpc_print_methods, r->ctx);
+}
+
+void jsonrpc_ctx_init(struct jsonrpc_ctx *ctx, mjson_print_fn_t response_cb,
+ void *response_cb_data) {
+ ctx->response_cb = response_cb;
+ ctx->response_cb_data = response_cb_data;
+ jsonrpc_ctx_export(ctx, MJSON_RPC_LIST_NAME, rpclist);
+}
+
+void jsonrpc_init(mjson_print_fn_t response_cb, void *userdata) {
+ jsonrpc_ctx_init(&jsonrpc_default_context, response_cb, userdata);
+}
+#endif // MJSON_ENABLE_RPC
diff --git a/src/mqtt.c b/src/mqtt.c
new file mode 100644
index 0000000..5688296
--- /dev/null
+++ b/src/mqtt.c
@@ -0,0 +1,1281 @@
+/*
+ * MQTT Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/chunk.h>
+#include <haproxy/mqtt.h>
+
+uint8_t mqtt_cpt_flags[MQTT_CPT_ENTRIES] = {
+ [MQTT_CPT_INVALID] = 0x00,
+ [MQTT_CPT_CONNECT] = 0x00,
+ [MQTT_CPT_CONNACK] = 0x00,
+
+ /* MQTT_CPT_PUBLISH flags can have different values (DUP, QoS, RETAIN), must be
+ * check more carefully
+ */
+ [MQTT_CPT_PUBLISH] = 0x0F,
+
+ [MQTT_CPT_PUBACK] = 0x00,
+ [MQTT_CPT_PUBREC] = 0x00,
+ [MQTT_CPT_PUBREL] = 0x02,
+ [MQTT_CPT_PUBCOMP] = 0x00,
+ [MQTT_CPT_SUBSCRIBE] = 0x02,
+ [MQTT_CPT_SUBACK] = 0x00,
+ [MQTT_CPT_UNSUBSCRIBE] = 0x02,
+ [MQTT_CPT_UNSUBACK] = 0x00,
+ [MQTT_CPT_PINGREQ] = 0x00,
+ [MQTT_CPT_PINGRESP] = 0x00,
+ [MQTT_CPT_DISCONNECT] = 0x00,
+ [MQTT_CPT_AUTH] = 0x00,
+};
+
+const struct ist mqtt_fields_string[MQTT_FN_ENTRIES] = {
+ [MQTT_FN_INVALID] = IST(""),
+
+ /* it's MQTT 3.1, 3.1.1 and 5.0, those fields have no unique id, so we use strings */
+ [MQTT_FN_FLAGS] = IST("flags"),
+ [MQTT_FN_REASON_CODE] = IST("reason_code"), /* MQTT 3.1 and 3.1.1: return_code */
+ [MQTT_FN_PROTOCOL_NAME] = IST("protocol_name"),
+ [MQTT_FN_PROTOCOL_VERSION] = IST("protocol_version"), /* MQTT 3.1.1: protocol_level */
+ [MQTT_FN_CLIENT_IDENTIFIER] = IST("client_identifier"),
+ [MQTT_FN_WILL_TOPIC] = IST("will_topic"),
+ [MQTT_FN_WILL_PAYLOAD] = IST("will_payload"), /* MQTT 3.1 and 3.1.1: will_message */
+ [MQTT_FN_USERNAME] = IST("username"),
+ [MQTT_FN_PASSWORD] = IST("password"),
+ [MQTT_FN_KEEPALIVE] = IST("keepalive"),
+ /* from here, it's MQTT 5.0 only */
+ [MQTT_FN_PAYLOAD_FORMAT_INDICATOR] = IST("1"),
+ [MQTT_FN_MESSAGE_EXPIRY_INTERVAL] = IST("2"),
+ [MQTT_FN_CONTENT_TYPE] = IST("3"),
+ [MQTT_FN_RESPONSE_TOPIC] = IST("8"),
+ [MQTT_FN_CORRELATION_DATA] = IST("9"),
+ [MQTT_FN_SUBSCRIPTION_IDENTIFIER] = IST("11"),
+ [MQTT_FN_SESSION_EXPIRY_INTERVAL] = IST("17"),
+ [MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER] = IST("18"),
+ [MQTT_FN_SERVER_KEEPALIVE] = IST("19"),
+ [MQTT_FN_AUTHENTICATION_METHOD] = IST("21"),
+ [MQTT_FN_AUTHENTICATION_DATA] = IST("22"),
+ [MQTT_FN_REQUEST_PROBLEM_INFORMATION] = IST("23"),
+ [MQTT_FN_DELAY_INTERVAL] = IST("24"),
+ [MQTT_FN_REQUEST_RESPONSE_INFORMATION] = IST("25"),
+ [MQTT_FN_RESPONSE_INFORMATION] = IST("26"),
+ [MQTT_FN_SERVER_REFERENCE] = IST("28"),
+ [MQTT_FN_REASON_STRING] = IST("31"),
+ [MQTT_FN_RECEIVE_MAXIMUM] = IST("33"),
+ [MQTT_FN_TOPIC_ALIAS_MAXIMUM] = IST("34"),
+ [MQTT_FN_TOPIC_ALIAS] = IST("35"),
+ [MQTT_FN_MAXIMUM_QOS] = IST("36"),
+ [MQTT_FN_RETAIN_AVAILABLE] = IST("37"),
+ [MQTT_FN_USER_PROPERTY] = IST("38"),
+ [MQTT_FN_MAXIMUM_PACKET_SIZE] = IST("39"),
+ [MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE] = IST("40"),
+ [MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE] = IST("41"),
+ [MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE] = IST("42"),
+};
+
+/* list of supported capturable field names for each MQTT control packet type */
+const uint64_t mqtt_fields_per_packet[MQTT_CPT_ENTRIES] = {
+ [MQTT_CPT_INVALID] = 0,
+
+ [MQTT_CPT_CONNECT] = MQTT_FN_BIT_PROTOCOL_NAME | MQTT_FN_BIT_PROTOCOL_VERSION |
+ MQTT_FN_BIT_FLAGS | MQTT_FN_BIT_KEEPALIVE |
+ MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL | MQTT_FN_BIT_RECEIVE_MAXIMUM |
+ MQTT_FN_BIT_MAXIMUM_PACKET_SIZE | MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM |
+ MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION | MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION |
+ MQTT_FN_BIT_USER_PROPERTY | MQTT_FN_BIT_AUTHENTICATION_METHOD |
+ MQTT_FN_BIT_AUTHENTICATION_DATA | MQTT_FN_BIT_CLIENT_IDENTIFIER |
+ MQTT_FN_BIT_DELAY_INTERVAL | MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR |
+ MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL | MQTT_FN_BIT_CONTENT_TYPE |
+ MQTT_FN_BIT_RESPONSE_TOPIC | MQTT_FN_BIT_CORRELATION_DATA |
+ MQTT_FN_BIT_USER_PROPERTY | MQTT_FN_BIT_WILL_TOPIC |
+ MQTT_FN_BIT_WILL_PAYLOAD | MQTT_FN_BIT_USERNAME |
+ MQTT_FN_BIT_PASSWORD,
+
+ [MQTT_CPT_CONNACK] = MQTT_FN_BIT_FLAGS | MQTT_FN_BIT_PROTOCOL_VERSION |
+ MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_RECEIVE_MAXIMUM | MQTT_FN_BIT_MAXIMUM_QOS |
+ MQTT_FN_BIT_RETAIN_AVAILABLE | MQTT_FN_BIT_MAXIMUM_PACKET_SIZE |
+ MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER | MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM |
+ MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE |
+ MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIERS_AVAILABLE| MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE |
+ MQTT_FN_BIT_SERVER_KEEPALIVE | MQTT_FN_BIT_RESPONSE_INFORMATION |
+ MQTT_FN_BIT_SERVER_REFERENCE | MQTT_FN_BIT_USER_PROPERTY |
+ MQTT_FN_BIT_AUTHENTICATION_METHOD | MQTT_FN_BIT_AUTHENTICATION_DATA,
+
+ [MQTT_CPT_PUBLISH] = MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR | MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_CONTENT_TYPE | MQTT_FN_BIT_RESPONSE_TOPIC |
+ MQTT_FN_BIT_CORRELATION_DATA | MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER |
+ MQTT_FN_BIT_TOPIC_ALIAS | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBACK] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBREC] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBREL] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PUBCOMP] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_SUBSCRIBE] = MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_SUBACK] = MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_UNSUBSCRIBE] = MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_UNSUBACK] = MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_PINGREQ] = 0,
+
+ [MQTT_CPT_PINGRESP] = 0,
+
+ [MQTT_CPT_DISCONNECT] = MQTT_FN_BIT_REASON_CODE | MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL |
+ MQTT_FN_BIT_SERVER_REFERENCE | MQTT_FN_BIT_REASON_STRING |
+ MQTT_FN_BIT_USER_PROPERTY,
+
+ [MQTT_CPT_AUTH] = MQTT_FN_BIT_AUTHENTICATION_METHOD | MQTT_FN_BIT_AUTHENTICATION_DATA |
+ MQTT_FN_BIT_REASON_STRING | MQTT_FN_BIT_USER_PROPERTY,
+};
+
+/* Checks the first byte of a message to read the fixed header and extract the
+ * packet type and flags. <parser> is supposed to point to the fix header byte.
+ *
+ * Fix header looks like:
+ * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+
+ * | bit | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
+ * +-------+-----------+-----------+-----------+---------+----------+----------+---------+------------+
+ * | field | MQTT Control Packet Type | Flags specific to each Control Packet type |
+ * +-------+---------------------------------------------+--------------------------------------------+
+ *
+ * On success, <ptk> is updated with the packet type and flags and the new parser
+ * state is returned. On error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_fixed_hdr(struct ist parser, struct mqtt_pkt *pkt)
+{
+ uint8_t type = (uint8_t)*istptr(parser);
+ uint8_t ptype = (type & 0xF0) >> 4;
+ uint8_t flags = type & 0x0F;
+
+ if (ptype == MQTT_CPT_INVALID || ptype >= MQTT_CPT_ENTRIES || flags != mqtt_cpt_flags[ptype])
+ return IST_NULL;
+
+ pkt->fixed_hdr.type = ptype;
+ pkt->fixed_hdr.flags = flags;
+ return istnext(parser);
+}
+
+/* Reads a one byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901007
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_1byte_int(struct ist parser, uint8_t *i)
+{
+ if (istlen(parser) < 1)
+ return IST_NULL;
+ if (i)
+ *i = (uint8_t)*istptr(parser);
+ parser = istnext(parser);
+ return parser;
+}
+
+/* Reads a two byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901008
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_2byte_int(struct ist parser, uint16_t *i)
+{
+ if (istlen(parser) < 2)
+ return IST_NULL;
+ if (i) {
+ *i = (uint8_t)*istptr(parser) << 8;
+ *i += (uint8_t)*(istptr(parser) + 1);
+ }
+ parser = istadv(parser, 2);
+ return parser;
+}
+
+/* Reads a four byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901009
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i>, if provided, and the new parser state is returned. On
+ * error, IST_NULL is returned.
+*/
+static inline struct ist mqtt_read_4byte_int(struct ist parser, uint32_t *i)
+{
+ if (istlen(parser) < 4)
+ return IST_NULL;
+ if (i) {
+ *i = (uint8_t)*istptr(parser) << 24;
+ *i += (uint8_t)*(istptr(parser) + 1) << 16;
+ *i += (uint8_t)*(istptr(parser) + 2) << 8;
+ *i += (uint8_t)*(istptr(parser) + 3);
+ }
+ parser = istadv(parser, 4);
+ return parser;
+}
+
+/* Reads a variable byte integer. more information here :
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718023
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901011
+ *
+ * It is encoded using a variable length encoding scheme which uses a single
+ * byte for values up to 127. Larger values are handled as follows. The least
+ * significant seven bits of each byte encode the data, and the most significant
+ * bit is used to indicate that there are following bytes in the representation.
+ * Thus each byte encodes 128 values and a "continuation bit".
+ *
+ * The maximum number of bytes in the Remaining Length field is four
+ * (MQTT_REMAINING_LENGHT_MAX_SIZE).
+ *
+ * <parser> is supposed to point to the first byte of the integer. On success
+ * the integer is stored in <*i> and the new parser state is returned. On
+ * error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_varint(struct ist parser, uint32_t *i)
+{
+ int off, m;
+
+ off = m = 0;
+ if (i)
+ *i = 0;
+ for (off = 0; off < MQTT_REMAINING_LENGHT_MAX_SIZE && istlen(parser); off++) {
+ uint8_t byte = (uint8_t)*istptr(parser);
+
+ if (i) {
+ *i += (byte & 127) << m;
+ m += 7; /* preparing <m> for next byte */
+ }
+ parser = istnext(parser);
+
+ /* we read the latest byte for the remaining length field */
+ if (byte <= 127)
+ break;
+ }
+
+ if (off == MQTT_REMAINING_LENGHT_MAX_SIZE)
+ return IST_NULL;
+ return parser;
+}
+
+/* Reads a MQTT string. more information here :
+ * http://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718016
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901010
+ *
+ * In MQTT, strings are prefixed by their size, encoded over 2 bytes:
+ * byte 1: length MSB
+ * byte 2: length LSB
+ * byte 3: string
+ * ...
+ *
+ * string size is MSB * 256 + LSB
+ *
+ * <parser> is supposed to point to the first byte of the string. On success the
+ * string is stored in <*str>, if provided, and the new parser state is
+ * returned. On error, IST_NULL is returned.
+ */
+static inline struct ist mqtt_read_string(struct ist parser, struct ist *str)
+{
+ uint16_t len = 0;
+
+ /* read and compute the string length */
+ if (istlen(parser) < 2)
+ goto error;
+
+ parser = mqtt_read_2byte_int(parser, &len);
+ if (!isttest(parser) || istlen(parser) < len)
+ goto error;
+
+ if (str) {
+ str->ptr = istptr(parser);
+ str->len = len;
+ }
+
+ return istadv(parser, len);
+
+ error:
+ return IST_NULL;
+}
+
+/* Helper function to convert a unsigned integer to a string. The result is
+ * written in <buf>. On success, the written size is returned, otherwise, on
+ * error, 0 is returned.
+ */
+static inline size_t mqtt_uint2str(struct buffer *buf, uint32_t i)
+{
+ char *end;
+
+ end = ultoa_o(i, buf->area, buf->size);
+ if (!end)
+ return 0;
+ buf->data = end - buf->area;
+ return buf->data;
+}
+
+/* Extracts the value of a <fieldname_id> of type <type> from a given MQTT
+ * message <msg>. IST_NULL is returned if an error occurred while parsing or if
+ * the field could not be found. If more data are required, the message with a
+ * length set to 0 is returned. If the field is found, the response is returned
+ * as a struct ist.
+ */
+struct ist mqtt_field_value(struct ist msg, int type, int fieldname_id)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct mqtt_pkt mpkt;
+ struct ist res;
+
+ switch (mqtt_validate_message(msg, &mpkt)) {
+ case MQTT_VALID_MESSAGE:
+ if (mpkt.fixed_hdr.type != type)
+ goto not_found_or_invalid;
+ break;
+ case MQTT_NEED_MORE_DATA:
+ goto need_more;
+ case MQTT_INVALID_MESSAGE:
+ goto not_found_or_invalid;
+ }
+
+ switch (type) {
+ case MQTT_CPT_CONNECT:
+ switch (fieldname_id) {
+ case MQTT_FN_FLAGS:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.flags))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PROTOCOL_NAME:
+ if (!istlen(mpkt.data.connect.var_hdr.protocol_name))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.protocol_name;
+ goto end;
+
+ case MQTT_FN_PROTOCOL_VERSION:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.protocol_version))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_CLIENT_IDENTIFIER:
+ if (!istlen(mpkt.data.connect.payload.client_identifier))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.client_identifier;
+ goto end;
+
+ case MQTT_FN_WILL_TOPIC:
+ if (!istlen(mpkt.data.connect.payload.will_topic))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_topic;
+ goto end;
+
+ case MQTT_FN_WILL_PAYLOAD:
+ if (!istlen(mpkt.data.connect.payload.will_payload))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_payload;
+ goto end;
+
+ case MQTT_FN_USERNAME:
+ if (!istlen(mpkt.data.connect.payload.username))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.username;
+ goto end;
+
+ case MQTT_FN_PASSWORD:
+ if (!istlen(mpkt.data.connect.payload.password))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.password;
+ goto end;
+
+ case MQTT_FN_KEEPALIVE:
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.keepalive))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PAYLOAD_FORMAT_INDICATOR:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.payload_format_indicator))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MESSAGE_EXPIRY_INTERVAL:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.message_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_CONTENT_TYPE:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.content_type))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.content_type;
+ goto end;
+
+ case MQTT_FN_RESPONSE_TOPIC:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.response_topic))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.response_topic;
+ goto end;
+
+ case MQTT_FN_CORRELATION_DATA:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.payload.will_props.correlation_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.payload.will_props.correlation_data;
+ goto end;
+
+ case MQTT_FN_SESSION_EXPIRY_INTERVAL:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.session_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_METHOD:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.var_hdr.props.authentication_method))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.props.authentication_method;
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_DATA:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connect.var_hdr.props.authentication_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connect.var_hdr.props.authentication_data;
+ goto end;
+
+ case MQTT_FN_REQUEST_PROBLEM_INFORMATION:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.request_problem_information))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_DELAY_INTERVAL:
+ if ((mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0) ||
+ !(mpkt.data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL))
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.payload.will_props.delay_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_REQUEST_RESPONSE_INFORMATION:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.request_response_information))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_RECEIVE_MAXIMUM:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.receive_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_TOPIC_ALIAS_MAXIMUM:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.topic_alias_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_PACKET_SIZE:
+ if (mpkt.data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connect.var_hdr.props.maximum_packet_size))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ default:
+ goto not_found_or_invalid;
+ }
+ break;
+
+ case MQTT_CPT_CONNACK:
+ switch (fieldname_id) {
+ case MQTT_FN_FLAGS:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.flags))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_REASON_CODE:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.reason_code))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_PROTOCOL_VERSION:
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.protocol_version))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SESSION_EXPIRY_INTERVAL:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.session_expiry_interval))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_ASSIGNED_CLIENT_IDENTIFIER:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.assigned_client_identifier))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.assigned_client_identifier;
+ goto end;
+
+ case MQTT_FN_SERVER_KEEPALIVE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.server_keepalive))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_METHOD:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.authentication_method))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.authentication_method;
+ goto end;
+
+ case MQTT_FN_AUTHENTICATION_DATA:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.authentication_data))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.authentication_data;
+ goto end;
+
+ case MQTT_FN_RESPONSE_INFORMATION:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.response_information))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.response_information;
+ goto end;
+
+ case MQTT_FN_SERVER_REFERENCE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.server_reference))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.server_reference;
+ goto end;
+
+ case MQTT_FN_REASON_STRING:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!istlen(mpkt.data.connack.var_hdr.props.reason_string))
+ goto not_found_or_invalid;
+ res = mpkt.data.connack.var_hdr.props.reason_string;
+ goto end;
+
+ case MQTT_FN_RECEIVE_MAXIMUM:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.receive_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_TOPIC_ALIAS_MAXIMUM:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.topic_alias_maximum))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_QOS:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.maximum_qos))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_RETAIN_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.retain_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_MAXIMUM_PACKET_SIZE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.maximum_packet_size))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_WILDCARD_SUBSCRIPTION_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.wildcard_subscription_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SUBSCRIPTION_IDENTIFIERS_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.subscription_identifiers_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ case MQTT_FN_SHARED_SUBSCRIPTION_AVAILABLE:
+ if (mpkt.data.connack.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto not_found_or_invalid;
+ if (!mqtt_uint2str(trash, mpkt.data.connack.var_hdr.props.shared_subsription_available))
+ goto not_found_or_invalid;
+ res = ist2(trash->area, trash->data);
+ goto end;
+
+ default:
+ goto not_found_or_invalid;
+ }
+ break;
+
+ default:
+ goto not_found_or_invalid;
+ }
+
+ end:
+ return res;
+
+ need_more:
+ return ist2(istptr(msg), 0);
+
+ not_found_or_invalid:
+ return IST_NULL;
+}
+
+/* Parses a CONNECT packet :
+ * https://public.dhe.ibm.com/software/dw/webservices/ws-mqtt/mqtt-v3r1.html#connect
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718028
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901033
+ *
+ * <parser> should point right after the MQTT fixed header. The remaining length
+ * was already checked, thus missing data is an error. On success, the result of
+ * the parsing is stored in <mpkt>.
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the CONNECT message is invalid
+ * MQTT_VALID_MESSAGE if the CONNECT message looks valid
+ */
+static int mqtt_parse_connect(struct ist parser, struct mqtt_pkt *mpkt)
+{
+ /* The parser length is stored to be sure exactly consumed the announced
+ * remaining length. */
+ size_t orig_len = istlen(parser);
+ int ret = MQTT_INVALID_MESSAGE;
+
+ /*
+ * parsing variable header
+ */
+ /* read protocol_name */
+ parser = mqtt_read_string(parser, &mpkt->data.connect.var_hdr.protocol_name);
+ if (!isttest(parser) || !(isteqi(mpkt->data.connect.var_hdr.protocol_name, ist("MQTT")) || isteqi(mpkt->data.connect.var_hdr.protocol_name, ist("MQIsdp"))))
+ goto end;
+
+ /* read protocol_version */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connect.var_hdr.protocol_version);
+ if (!isttest(parser))
+ goto end;
+ if (mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_3_1 &&
+ mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_3_1_1 &&
+ mpkt->data.connect.var_hdr.protocol_version != MQTT_VERSION_5_0)
+ goto end;
+
+ /* read flags */
+ /* bit 1 is 'reserved' and must be set to 0 in CONNECT message flags */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connect.var_hdr.flags);
+ if (!isttest(parser) || (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_RESERVED))
+ goto end;
+
+ /* if WILL flag must be set to have WILL_QOS flag or WILL_RETAIN set */
+ if ((mpkt->data.connect.var_hdr.flags & (MQTT_CONNECT_FL_WILL|MQTT_CONNECT_FL_WILL_QOS|MQTT_CONNECT_FL_WILL_RETAIN)) == MQTT_CONNECT_FL_WILL_QOS)
+ goto end;
+
+ /* read keepalive */
+ parser = mqtt_read_2byte_int(parser, &mpkt->data.connect.var_hdr.keepalive);
+ if (!isttest(parser))
+ goto end;
+
+ /* read properties, only available in MQTT_VERSION_5_0 */
+ if (mpkt->data.connect.var_hdr.protocol_version == MQTT_VERSION_5_0) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_SESSION_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.session_expiry_interval);
+ fields |= MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_RECEIVE_MAXIMUM:
+ if (fields & MQTT_FN_BIT_RECEIVE_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.receive_maximum);
+ /* cannot be 0 */
+ if (!mpkt->data.connect.var_hdr.props.receive_maximum)
+ goto end;
+ fields |= MQTT_FN_BIT_RECEIVE_MAXIMUM;
+ break;
+
+ case MQTT_PROP_MAXIMUM_PACKET_SIZE:
+ if (fields & MQTT_FN_BIT_MAXIMUM_PACKET_SIZE)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.maximum_packet_size);
+ /* cannot be 0 */
+ if (!mpkt->data.connect.var_hdr.props.maximum_packet_size)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_PACKET_SIZE;
+ break;
+
+ case MQTT_PROP_TOPIC_ALIAS_MAXIMUM:
+ if (fields & MQTT_FN_BIT_TOPIC_ALIAS)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.topic_alias_maximum);
+ fields |= MQTT_FN_BIT_TOPIC_ALIAS;
+ break;
+
+ case MQTT_PROP_REQUEST_RESPONSE_INFORMATION:
+ if (fields & MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.request_response_information);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.var_hdr.props.request_response_information > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_REQUEST_RESPONSE_INFORMATION;
+ break;
+
+ case MQTT_PROP_REQUEST_PROBLEM_INFORMATION:
+ if (fields & MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.var_hdr.props.request_problem_information);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.var_hdr.props.request_problem_information > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_REQUEST_PROBLEM_INFORMATION;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connect.var_hdr.props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_METHOD:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_METHOD)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.authentication_method);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_METHOD;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_DATA:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.var_hdr.props.authentication_data);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_DATA;
+ break;
+
+ default:
+ goto end;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ /* cannot have auth data without auth method */
+ if (!istlen(mpkt->data.connect.var_hdr.props.authentication_method) &&
+ istlen(mpkt->data.connect.var_hdr.props.authentication_data))
+ goto end;
+
+ /* parsing payload
+ *
+ * Content of payload is related to flags parsed above and the field order is pre-defined:
+ * Client Identifier, Will Topic, Will Message, User Name, Password
+ */
+ /* read client identifier */
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.client_identifier);
+ if (!isttest(parser))
+ goto end;
+
+ /* read Will Properties, for MQTT v5 only
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901060
+ */
+ if ((mpkt->data.connect.var_hdr.protocol_version == MQTT_VERSION_5_0) &&
+ (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL)) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_WILL_DELAY_INTERVAL:
+ if (fields & MQTT_FN_BIT_DELAY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.payload.will_props.delay_interval);
+ fields |= MQTT_FN_BIT_DELAY_INTERVAL;
+ break;
+
+ case MQTT_PROP_PAYLOAD_FORMAT_INDICATOR:
+ if (fields & MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connect.payload.will_props.payload_format_indicator);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connect.payload.will_props.payload_format_indicator > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_PAYLOAD_FORMAT_INDICATOR;
+ break;
+
+ case MQTT_PROP_MESSAGE_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connect.payload.will_props.message_expiry_interval);
+ fields |= MQTT_FN_BIT_MESSAGE_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_CONTENT_TYPE:
+ if (fields & MQTT_FN_BIT_CONTENT_TYPE)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.content_type);
+ fields |= MQTT_FN_BIT_CONTENT_TYPE;
+ break;
+
+ case MQTT_PROP_RESPONSE_TOPIC:
+ if (fields & MQTT_FN_BIT_RESPONSE_TOPIC)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.response_topic);
+ fields |= MQTT_FN_BIT_RESPONSE_TOPIC;
+ break;
+
+ case MQTT_PROP_CORRELATION_DATA:
+ if (fields & MQTT_FN_BIT_CORRELATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.correlation_data);
+ fields |= MQTT_FN_BIT_CORRELATION_DATA;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connect.payload.will_props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connect.payload.will_props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ default:
+ goto end;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ /* read Will Topic and Will Message (MQTT 3.1.1) or Payload (MQTT 5.0) */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_WILL) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.will_topic);
+ if (!isttest(parser))
+ goto end;
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.will_payload);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ /* read User Name */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_USERNAME) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.username);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ /* read Password */
+ if (mpkt->data.connect.var_hdr.flags & MQTT_CONNECT_FL_PASSWORD) {
+ parser = mqtt_read_string(parser, &mpkt->data.connect.payload.password);
+ if (!isttest(parser))
+ goto end;
+ }
+
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+
+ end:
+ return ret;
+}
+
+/* Parses a CONNACK packet :
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718033
+ * https://docs.oasis-open.org/mqtt/mqtt/v5.0/os/mqtt-v5.0-os.html#_Toc3901074
+ *
+ * <parser> should point right after the MQTT fixed header. The remaining length
+ * was already checked, thus missing data is an error. On success, the result of
+ * the parsing is stored in <mpkt>.
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the CONNECT message is invalid
+ * MQTT_VALID_MESSAGE if the CONNECT message looks valid
+ */
+static int mqtt_parse_connack(struct ist parser, struct mqtt_pkt *mpkt)
+{
+ /* The parser length is stored to be sure exactly consumed the announced
+ * remaining length. */
+ size_t orig_len = istlen(parser);
+ int ret = MQTT_INVALID_MESSAGE;
+
+ if (istlen(parser) < 2)
+ goto end;
+ else if (istlen(parser) == 2)
+ mpkt->data.connack.var_hdr.protocol_version = MQTT_VERSION_3_1_1;
+ else
+ mpkt->data.connack.var_hdr.protocol_version = MQTT_VERSION_5_0;
+
+ /*
+ * parsing variable header
+ */
+ /* read flags */
+ /* bits 7 to 1 on flags are reserved and must be 0 */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connack.var_hdr.flags);
+ if (!isttest(parser) || (mpkt->data.connack.var_hdr.flags & 0xFE))
+ goto end;
+
+ /* read reason_code */
+ parser = mqtt_read_1byte_int(parser, &mpkt->data.connack.var_hdr.reason_code);
+ if (!isttest(parser))
+ goto end;
+
+ /* we can leave here for MQTT 3.1.1 */
+ if (mpkt->data.connack.var_hdr.protocol_version == MQTT_VERSION_3_1_1) {
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+ goto end;
+ }
+
+ /* read properties, only available in MQTT_VERSION_5_0 */
+ if (mpkt->data.connack.var_hdr.protocol_version == MQTT_VERSION_5_0) {
+ struct ist props;
+ unsigned int user_prop_idx = 0;
+ uint64_t fields = 0;
+ uint32_t plen = 0;
+
+ parser = mqtt_read_varint(parser, &plen);
+ if (!isttest(parser) || istlen(parser) < plen)
+ goto end;
+ props = ist2(istptr(parser), plen);
+ parser = istadv(parser, props.len);
+
+ while (istlen(props) > 0) {
+ switch (*istptr(props)) {
+ case MQTT_PROP_SESSION_EXPIRY_INTERVAL:
+ if (fields & MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.session_expiry_interval);
+ fields |= MQTT_FN_BIT_SESSION_EXPIRY_INTERVAL;
+ break;
+
+ case MQTT_PROP_RECEIVE_MAXIMUM:
+ if (fields & MQTT_FN_BIT_RECEIVE_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.receive_maximum);
+ /* cannot be 0 */
+ if (!mpkt->data.connack.var_hdr.props.receive_maximum)
+ goto end;
+ fields |= MQTT_FN_BIT_RECEIVE_MAXIMUM;
+ break;
+
+ case MQTT_PROP_MAXIMUM_QOS:
+ if (fields & MQTT_FN_BIT_MAXIMUM_QOS)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.maximum_qos);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.maximum_qos > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_QOS;
+ break;
+
+ case MQTT_PROP_RETAIN_AVAILABLE:
+ if (fields & MQTT_FN_BIT_RETAIN_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.retain_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.retain_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_RETAIN_AVAILABLE;
+ break;
+
+ case MQTT_PROP_MAXIMUM_PACKET_SIZE:
+ if (fields & MQTT_FN_BIT_MAXIMUM_PACKET_SIZE)
+ goto end;
+ props = mqtt_read_4byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.maximum_packet_size);
+ /* cannot be 0 */
+ if (!mpkt->data.connack.var_hdr.props.maximum_packet_size)
+ goto end;
+ fields |= MQTT_FN_BIT_MAXIMUM_PACKET_SIZE;
+ break;
+
+ case MQTT_PROP_ASSIGNED_CLIENT_IDENTIFIER:
+ if (fields & MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.assigned_client_identifier);
+ if (!istlen(mpkt->data.connack.var_hdr.props.assigned_client_identifier))
+ goto end;
+ fields |= MQTT_FN_BIT_ASSIGNED_CLIENT_IDENTIFIER;
+ break;
+
+ case MQTT_PROP_TOPIC_ALIAS_MAXIMUM:
+ if (fields & MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.topic_alias_maximum);
+ fields |= MQTT_FN_BIT_TOPIC_ALIAS_MAXIMUM;
+ break;
+
+ case MQTT_PROP_REASON_STRING:
+ if (fields & MQTT_FN_BIT_REASON_STRING)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.reason_string);
+ fields |= MQTT_FN_BIT_REASON_STRING;
+ break;
+
+ case MQTT_PROP_WILDCARD_SUBSCRIPTION_AVAILABLE:
+ if (fields & MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.wildcard_subscription_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.wildcard_subscription_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_WILDCARD_SUBSCRIPTION_AVAILABLE;
+ break;
+
+ case MQTT_PROP_SUBSCRIPTION_IDENTIFIERS_AVAILABLE:
+ if (fields & MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.subscription_identifiers_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.subscription_identifiers_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_SUBSCRIPTION_IDENTIFIER;
+ break;
+
+ case MQTT_PROP_SHARED_SUBSRIPTION_AVAILABLE:
+ if (fields & MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE)
+ goto end;
+ props = mqtt_read_1byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.shared_subsription_available);
+ /* can have only 2 values: 0 or 1 */
+ if (mpkt->data.connack.var_hdr.props.shared_subsription_available > 1)
+ goto end;
+ fields |= MQTT_FN_BIT_SHARED_SUBSCRIPTION_AVAILABLE;
+ break;
+
+ case MQTT_PROP_SERVER_KEEPALIVE:
+ if (fields & MQTT_FN_BIT_SERVER_KEEPALIVE)
+ goto end;
+ props = mqtt_read_2byte_int(istnext(props), &mpkt->data.connack.var_hdr.props.server_keepalive);
+ fields |= MQTT_FN_BIT_SERVER_KEEPALIVE;
+ break;
+
+ case MQTT_PROP_RESPONSE_INFORMATION:
+ if (fields & MQTT_FN_BIT_RESPONSE_INFORMATION)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.response_information);
+ fields |= MQTT_FN_BIT_RESPONSE_INFORMATION;
+ break;
+
+ case MQTT_PROP_SERVER_REFERENCE:
+ if (fields & MQTT_FN_BIT_SERVER_REFERENCE)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.server_reference);
+ fields |= MQTT_FN_BIT_SERVER_REFERENCE;
+ break;
+
+ case MQTT_PROP_USER_PROPERTIES:
+ /* if we reached MQTT_PROP_USER_PROPERTY_ENTRIES already, then
+ * we start writing over the first property */
+ if (user_prop_idx >= MQTT_PROP_USER_PROPERTY_ENTRIES)
+ user_prop_idx = 0;
+
+ /* read user property name and value */
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.user_props[user_prop_idx].name);
+ if (!isttest(props))
+ goto end;
+ props = mqtt_read_string(props, &mpkt->data.connack.var_hdr.props.user_props[user_prop_idx].value);
+ ++user_prop_idx;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_METHOD:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_METHOD)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.authentication_method);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_METHOD;
+ break;
+
+ case MQTT_PROP_AUTHENTICATION_DATA:
+ if (fields & MQTT_FN_BIT_AUTHENTICATION_DATA)
+ goto end;
+ props = mqtt_read_string(istnext(props), &mpkt->data.connack.var_hdr.props.authentication_data);
+ fields |= MQTT_FN_BIT_AUTHENTICATION_DATA;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (!isttest(props))
+ goto end;
+ }
+ }
+
+ if ((orig_len - istlen(parser)) == mpkt->fixed_hdr.remaining_length)
+ ret = MQTT_VALID_MESSAGE;
+ end:
+ return ret;
+}
+
+
+/* Parses and validates a MQTT packet
+ * https://docs.oasis-open.org/mqtt/mqtt/v3.1.1/os/mqtt-v3.1.1-os.html#_Toc398718028
+ *
+ * For now, due to HAProxy limitation, only validation of CONNECT and CONNACK packets
+ * are supported.
+ *
+ * - check FIXED_HDR
+ * - check remaining length
+ * - check variable headers and payload
+ *
+ * if <mpkt> is not NULL, then this structure will be filled up as well. An
+ * unsupported packet type is considered as invalid. It is not a problem for now
+ * because only the first packet on each side can be parsed (CONNECT for the
+ * client and CONNACK for the server).
+ *
+ * Returns:
+ * MQTT_INVALID_MESSAGE if the message is invalid
+ * MQTT_NEED_MORE_DATA if we need more data to fully validate the message
+ * MQTT_VALID_MESSAGE if the message looks valid
+ */
+int mqtt_validate_message(const struct ist msg, struct mqtt_pkt *mpkt)
+{
+ struct ist parser;
+ struct mqtt_pkt tmp_mpkt;
+ int ret = MQTT_INVALID_MESSAGE;
+
+ if (!mpkt)
+ mpkt = &tmp_mpkt;
+ memset(mpkt, 0, sizeof(*mpkt));
+
+ parser = msg;
+ if (istlen(msg) < MQTT_MIN_PKT_SIZE) {
+ ret = MQTT_NEED_MORE_DATA;
+ goto end;
+ }
+
+ /* parse the MQTT fixed header */
+ parser = mqtt_read_fixed_hdr(parser, mpkt);
+ if (!isttest(parser)) {
+ ret = MQTT_INVALID_MESSAGE;
+ goto end;
+ }
+
+ /* Now parsing "remaining length" field */
+ parser = mqtt_read_varint(parser, &mpkt->fixed_hdr.remaining_length);
+ if (!isttest(parser)) {
+ ret = MQTT_INVALID_MESSAGE;
+ goto end;
+ }
+
+ if (istlen(parser) < mpkt->fixed_hdr.remaining_length)
+ return MQTT_NEED_MORE_DATA;
+
+ /* Now parsing the variable header and payload, which is based on the packet type */
+ switch (mpkt->fixed_hdr.type) {
+ case MQTT_CPT_CONNECT:
+ ret = mqtt_parse_connect(parser, mpkt);
+ break;
+ case MQTT_CPT_CONNACK:
+ ret = mqtt_parse_connack(parser, mpkt);
+ break;
+ default:
+ break;
+ }
+
+ end:
+ return ret;
+}
diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c
new file mode 100644
index 0000000..0230e6b
--- /dev/null
+++ b/src/mux_fcgi.c
@@ -0,0 +1,4268 @@
+/*
+ * FastCGI mux-demux for connections
+ *
+ * Copyright (C) 2019 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/ist.h>
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fcgi-app.h>
+#include <haproxy/fcgi.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/mux_fcgi-t.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/version.h>
+
+/* 32 buffers: one for the ring's root, rest for the mbuf itself */
+#define FCGI_C_MBUF_CNT 32
+
+/* Size for a record header (also size of empty record) */
+#define FCGI_RECORD_HEADER_SZ 8
+
+/* FCGI connection descriptor */
+struct fcgi_conn {
+ struct connection *conn;
+
+ enum fcgi_conn_st state; /* FCGI connection state */
+ int16_t max_id; /* highest ID known on this connection, <0 before mgmt records */
+ uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
+ uint32_t flags; /* Connection flags: FCGI_CF_* */
+
+ int16_t dsi; /* dmux stream ID (<0 = idle ) */
+ uint16_t drl; /* demux record length (if dsi >= 0) */
+ uint8_t drt; /* demux record type (if dsi >= 0) */
+ uint8_t drp; /* demux record padding (if dsi >= 0) */
+
+ struct buffer dbuf; /* demux buffer */
+ struct buffer mbuf[FCGI_C_MBUF_CNT]; /* mux buffers (ring) */
+
+ int timeout; /* idle timeout duration in ticks */
+ int shut_timeout; /* idle timeout duration in ticks after shutdown */
+ unsigned int nb_streams; /* number of streams in the tree */
+ unsigned int nb_sc; /* number of attached stream connectors */
+ unsigned int nb_reserved; /* number of reserved streams */
+ unsigned int stream_cnt; /* total number of streams seen */
+
+ struct proxy *proxy; /* the proxy this connection was created for */
+ struct fcgi_app *app; /* FCGI application used by this mux */
+ struct task *task; /* timeout management task */
+ struct eb_root streams_by_id; /* all active streams by their ID */
+
+ struct list send_list; /* list of blocked streams requesting to send */
+
+ struct buffer_wait buf_wait; /* Wait list for buffer allocation */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+};
+
+
+/* FCGI stream descriptor */
+struct fcgi_strm {
+ struct sedesc *sd;
+ struct session *sess;
+ struct fcgi_conn *fconn;
+
+ int32_t id; /* stream ID */
+
+ uint32_t flags; /* Connection flags: FCGI_SF_* */
+ enum fcgi_strm_st state; /* FCGI stream state */
+ int proto_status; /* FCGI_PS_* */
+
+ struct h1m h1m; /* response parser state for H1 */
+
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+
+ struct eb32_node by_id; /* place in fcgi_conn's streams_by_id */
+ struct wait_event *subs; /* Address of the wait_event the stream connector associated is waiting on */
+ struct list send_list; /* To be used when adding in fcgi_conn->send_list */
+ struct tasklet *shut_tl; /* deferred shutdown tasklet, to retry to close after we failed to by lack of space */
+};
+
+/* Flags representing all default FCGI parameters */
+#define FCGI_SP_CGI_GATEWAY 0x00000001
+#define FCGI_SP_DOC_ROOT 0x00000002
+#define FCGI_SP_SCRIPT_NAME 0x00000004
+#define FCGI_SP_PATH_INFO 0x00000008
+#define FCGI_SP_REQ_URI 0x00000010
+#define FCGI_SP_REQ_METH 0x00000020
+#define FCGI_SP_REQ_QS 0x00000040
+#define FCGI_SP_SRV_PORT 0x00000080
+#define FCGI_SP_SRV_PROTO 0x00000100
+#define FCGI_SP_SRV_NAME 0x00000200
+#define FCGI_SP_REM_ADDR 0x00000400
+#define FCGI_SP_REM_PORT 0x00000800
+#define FCGI_SP_SCRIPT_FILE 0x00001000
+#define FCGI_SP_PATH_TRANS 0x00002000
+#define FCGI_SP_CONT_LEN 0x00004000
+#define FCGI_SP_HTTPS 0x00008000
+#define FCGI_SP_SRV_SOFT 0x00010000
+#define FCGI_SP_MASK 0x0001FFFF
+#define FCGI_SP_URI_MASK (FCGI_SP_SCRIPT_NAME|FCGI_SP_PATH_INFO|FCGI_SP_REQ_QS)
+
+/* FCGI parameters used when PARAMS record is sent */
+struct fcgi_strm_params {
+ uint32_t mask;
+ struct ist docroot;
+ struct ist scriptname;
+ struct ist pathinfo;
+ struct ist meth;
+ struct ist uri;
+ struct ist vsn;
+ struct ist qs;
+ struct ist srv_name;
+ struct ist srv_port;
+ struct ist rem_addr;
+ struct ist rem_port;
+ struct ist cont_len;
+ struct ist srv_soft;
+ int https;
+ struct buffer *p;
+};
+
+/* Maximum amount of data we're OK with re-aligning for buffer optimizations */
+#define MAX_DATA_REALIGN 1024
+
+/* trace source and events */
+static void fcgi_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * fconn - internal FCGI connection
+ * fstrm - internal FCGI stream
+ * strm - application layer
+ * rx - data receipt
+ * tx - data transmission
+ * rsp - response parsing
+ */
+static const struct trace_event fcgi_trace_events[] = {
+#define FCGI_EV_FCONN_NEW (1ULL << 0)
+ { .mask = FCGI_EV_FCONN_NEW, .name = "fconn_new", .desc = "new FCGI connection" },
+#define FCGI_EV_FCONN_RECV (1ULL << 1)
+ { .mask = FCGI_EV_FCONN_RECV, .name = "fconn_recv", .desc = "Rx on FCGI connection" },
+#define FCGI_EV_FCONN_SEND (1ULL << 2)
+ { .mask = FCGI_EV_FCONN_SEND, .name = "fconn_send", .desc = "Tx on FCGI connection" },
+#define FCGI_EV_FCONN_BLK (1ULL << 3)
+ { .mask = FCGI_EV_FCONN_BLK, .name = "fconn_blk", .desc = "FCGI connection blocked" },
+#define FCGI_EV_FCONN_WAKE (1ULL << 4)
+ { .mask = FCGI_EV_FCONN_WAKE, .name = "fconn_wake", .desc = "FCGI connection woken up" },
+#define FCGI_EV_FCONN_END (1ULL << 5)
+ { .mask = FCGI_EV_FCONN_END, .name = "fconn_end", .desc = "FCGI connection terminated" },
+#define FCGI_EV_FCONN_ERR (1ULL << 6)
+ { .mask = FCGI_EV_FCONN_ERR, .name = "fconn_err", .desc = "error on FCGI connection" },
+
+#define FCGI_EV_RX_FHDR (1ULL << 7)
+ { .mask = FCGI_EV_RX_FHDR, .name = "rx_fhdr", .desc = "FCGI record header received" },
+#define FCGI_EV_RX_RECORD (1ULL << 8)
+ { .mask = FCGI_EV_RX_RECORD, .name = "rx_record", .desc = "receipt of any FCGI record" },
+#define FCGI_EV_RX_EOI (1ULL << 9)
+ { .mask = FCGI_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of FCGI input" },
+#define FCGI_EV_RX_GETVAL (1ULL << 10)
+ { .mask = FCGI_EV_RX_GETVAL, .name = "rx_get_values", .desc = "receipt of FCGI GET_VALUES_RESULT record" },
+#define FCGI_EV_RX_STDOUT (1ULL << 11)
+ { .mask = FCGI_EV_RX_STDOUT, .name = "rx_stdout", .desc = "receipt of FCGI STDOUT record" },
+#define FCGI_EV_RX_STDERR (1ULL << 12)
+ { .mask = FCGI_EV_RX_STDERR, .name = "rx_stderr", .desc = "receipt of FCGI STDERR record" },
+#define FCGI_EV_RX_ENDREQ (1ULL << 13)
+ { .mask = FCGI_EV_RX_ENDREQ, .name = "rx_end_req", .desc = "receipt of FCGI END_REQUEST record" },
+
+#define FCGI_EV_TX_RECORD (1ULL << 14)
+ { .mask = FCGI_EV_TX_RECORD, .name = "tx_record", .desc = "transmission of any FCGI record" },
+#define FCGI_EV_TX_EOI (1ULL << 15)
+ { .mask = FCGI_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of FCGI end of input" },
+#define FCGI_EV_TX_BEGREQ (1ULL << 16)
+ { .mask = FCGI_EV_TX_BEGREQ, .name = "tx_begin_request", .desc = "transmission of FCGI BEGIN_REQUEST record" },
+#define FCGI_EV_TX_GETVAL (1ULL << 17)
+ { .mask = FCGI_EV_TX_GETVAL, .name = "tx_get_values", .desc = "transmission of FCGI GET_VALUES record" },
+#define FCGI_EV_TX_PARAMS (1ULL << 18)
+ { .mask = FCGI_EV_TX_PARAMS, .name = "tx_params", .desc = "transmission of FCGI PARAMS record" },
+#define FCGI_EV_TX_STDIN (1ULL << 19)
+ { .mask = FCGI_EV_TX_STDIN, .name = "tx_stding", .desc = "transmission of FCGI STDIN record" },
+#define FCGI_EV_TX_ABORT (1ULL << 20)
+ { .mask = FCGI_EV_TX_ABORT, .name = "tx_abort", .desc = "transmission of FCGI ABORT record" },
+
+#define FCGI_EV_RSP_DATA (1ULL << 21)
+ { .mask = FCGI_EV_RSP_DATA, .name = "rsp_data", .desc = "parse any data of H1 response" },
+#define FCGI_EV_RSP_EOM (1ULL << 22)
+ { .mask = FCGI_EV_RSP_EOM, .name = "rsp_eom", .desc = "reach the end of message of H1 response" },
+#define FCGI_EV_RSP_HDRS (1ULL << 23)
+ { .mask = FCGI_EV_RSP_HDRS, .name = "rsp_headers", .desc = "parse headers of H1 response" },
+#define FCGI_EV_RSP_BODY (1ULL << 24)
+ { .mask = FCGI_EV_RSP_BODY, .name = "rsp_body", .desc = "parse body part of H1 response" },
+#define FCGI_EV_RSP_TLRS (1ULL << 25)
+ { .mask = FCGI_EV_RSP_TLRS, .name = "rsp_trailerus", .desc = "parse trailers of H1 response" },
+
+#define FCGI_EV_FSTRM_NEW (1ULL << 26)
+ { .mask = FCGI_EV_FSTRM_NEW, .name = "fstrm_new", .desc = "new FCGI stream" },
+#define FCGI_EV_FSTRM_BLK (1ULL << 27)
+ { .mask = FCGI_EV_FSTRM_BLK, .name = "fstrm_blk", .desc = "FCGI stream blocked" },
+#define FCGI_EV_FSTRM_END (1ULL << 28)
+ { .mask = FCGI_EV_FSTRM_END, .name = "fstrm_end", .desc = "FCGI stream terminated" },
+#define FCGI_EV_FSTRM_ERR (1ULL << 29)
+ { .mask = FCGI_EV_FSTRM_ERR, .name = "fstrm_err", .desc = "error on FCGI stream" },
+
+#define FCGI_EV_STRM_NEW (1ULL << 30)
+ { .mask = FCGI_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define FCGI_EV_STRM_RECV (1ULL << 31)
+ { .mask = FCGI_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define FCGI_EV_STRM_SEND (1ULL << 32)
+ { .mask = FCGI_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define FCGI_EV_STRM_FULL (1ULL << 33)
+ { .mask = FCGI_EV_STRM_FULL, .name = "strm_full", .desc = "stream buffer full" },
+#define FCGI_EV_STRM_WAKE (1ULL << 34)
+ { .mask = FCGI_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define FCGI_EV_STRM_SHUT (1ULL << 35)
+ { .mask = FCGI_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define FCGI_EV_STRM_END (1ULL << 36)
+ { .mask = FCGI_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define FCGI_EV_STRM_ERR (1ULL << 37)
+ { .mask = FCGI_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+
+ { }
+};
+
+static const struct name_desc fcgi_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="fstrm", .desc="FCGI stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+
+static const struct name_desc fcgi_trace_decoding[] = {
+#define FCGI_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define FCGI_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only fconn/fstrm state and flags, no real decoding" },
+#define FCGI_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define FCGI_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or record decoding when available" },
+#define FCGI_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_fcgi __read_mostly = {
+ .name = IST("fcgi"),
+ .desc = "FastCGI multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = fcgi_trace,
+ .known_events = fcgi_trace_events,
+ .lockon_args = fcgi_trace_lockon_args,
+ .decoding = fcgi_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_fcgi
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* FCGI connection and stream pools */
+DECLARE_STATIC_POOL(pool_head_fcgi_conn, "fcgi_conn", sizeof(struct fcgi_conn));
+DECLARE_STATIC_POOL(pool_head_fcgi_strm, "fcgi_strm", sizeof(struct fcgi_strm));
+
+struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state);
+static int fcgi_process(struct fcgi_conn *fconn);
+/* fcgi_io_cb is exported to see it resolved in "show fd" */
+struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state);
+static inline struct fcgi_strm *fcgi_conn_st_by_id(struct fcgi_conn *fconn, int id);
+struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state);
+static struct fcgi_strm *fcgi_stconn_new(struct fcgi_conn *fconn, struct stconn *sc, struct session *sess);
+static void fcgi_strm_notify_recv(struct fcgi_strm *fstrm);
+static void fcgi_strm_notify_send(struct fcgi_strm *fstrm);
+static void fcgi_strm_alert(struct fcgi_strm *fstrm);
+static int fcgi_strm_send_abort(struct fcgi_conn *fconn, struct fcgi_strm *fstrm);
+
+/* a dummy closed endpoint */
+static const struct sedesc closed_ep = {
+ .sc = NULL,
+ .flags = SE_FL_DETACHED,
+};
+
+/* a dmumy management stream */
+static const struct fcgi_strm *fcgi_mgmt_stream = &(const struct fcgi_strm){
+ .sd = (struct sedesc*)&closed_ep,
+ .fconn = NULL,
+ .state = FCGI_SS_CLOSED,
+ .flags = FCGI_SF_NONE,
+ .id = 0,
+};
+
+/* and a dummy idle stream for use with any unknown stream */
+static const struct fcgi_strm *fcgi_unknown_stream = &(const struct fcgi_strm){
+ .sd = (struct sedesc*)&closed_ep,
+ .fconn = NULL,
+ .state = FCGI_SS_IDLE,
+ .flags = FCGI_SF_NONE,
+ .id = 0,
+};
+
+/* returns the stconn associated to the FCGI stream */
+static forceinline struct stconn *fcgi_strm_sc(const struct fcgi_strm *fstrm)
+{
+ return fstrm->sd->sc;
+}
+
+
+/* the FCGI traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive fconn), that arg2, if non-null, is of type fstrm,
+ * and that arg3, if non-null, is a htx for rx/tx headers.
+ */
+static void fcgi_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ struct fcgi_conn *fconn = conn ? conn->ctx : NULL;
+ const struct fcgi_strm *fstrm = a2;
+ const struct htx *htx = a3;
+ const size_t *val = a4;
+
+ if (!fconn)
+ fconn = (fstrm ? fstrm->fconn : NULL);
+
+ if (!fconn || src->verbosity < FCGI_VERB_CLEAN)
+ return;
+
+ /* Display the response state if fstrm is defined */
+ if (fstrm)
+ chunk_appendf(&trace_buf, " [rsp:%s]", h1m_state_str(fstrm->h1m.state));
+
+ if (src->verbosity == FCGI_VERB_CLEAN)
+ return;
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > FCGI_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = __htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+ /* Display fconn info and, if defined, fstrm info */
+ chunk_appendf(&trace_buf, " - fconn=%p(%s,0x%08x)", fconn, fconn_st_to_str(fconn->state), fconn->flags);
+ if (fstrm)
+ chunk_appendf(&trace_buf, " fstrm=%p(%d,%s,0x%08x)", fstrm, fstrm->id, fstrm_st_to_str(fstrm->state), fstrm->flags);
+
+ if (!fstrm || fstrm->id <= 0)
+ chunk_appendf(&trace_buf, " dsi=%d", fconn->dsi);
+ if (fconn->dsi >= 0 && (mask & FCGI_EV_RX_FHDR))
+ chunk_appendf(&trace_buf, " drt=%s", fcgi_rt_str(fconn->drt));
+
+ if (src->verbosity == FCGI_VERB_MINIMAL)
+ return;
+
+ /* Display mbuf and dbuf info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ if (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_FCONN_RECV|FCGI_EV_RX_RECORD))))
+ chunk_appendf(&trace_buf, " dbuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&fconn->dbuf), b_orig(&fconn->dbuf),
+ (unsigned int)b_head_ofs(&fconn->dbuf), (unsigned int)b_size(&fconn->dbuf));
+ if (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_FCONN_SEND|FCGI_EV_TX_RECORD)))) {
+ struct buffer *hmbuf = br_head(fconn->mbuf);
+ struct buffer *tmbuf = br_tail(fconn->mbuf);
+
+ chunk_appendf(&trace_buf, " .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ br_head_idx(fconn->mbuf), br_tail_idx(fconn->mbuf), br_size(fconn->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+ }
+
+ if (fstrm && (src->verbosity == FCGI_VERB_COMPLETE ||
+ (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_STRM_RECV|FCGI_EV_RSP_DATA)))))
+ chunk_appendf(&trace_buf, " rxbuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&fstrm->rxbuf), b_orig(&fstrm->rxbuf),
+ (unsigned int)b_head_ofs(&fstrm->rxbuf), (unsigned int)b_size(&fstrm->rxbuf));
+ }
+
+ /* Display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == FCGI_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == FCGI_VERB_ADVANCED && (mask & (FCGI_EV_RSP_HDRS|FCGI_EV_TX_PARAMS)))
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+
+/* Indicates whether or not the we may call the fcgi_recv() function to attempt
+ * to receive data into the buffer and/or demux pending data. The condition is
+ * a bit complex due to some API limits for now. The rules are the following :
+ * - if an error or a shutdown was detected on the connection and the buffer
+ * is empty, we must not attempt to receive
+ * - if the demux buf failed to be allocated, we must not try to receive and
+ * we know there is nothing pending
+ * - if no flag indicates a blocking condition, we may attempt to receive,
+ * regardless of whether the demux buffer is full or not, so that only
+ * de demux part decides whether or not to block. This is needed because
+ * the connection API indeed prevents us from re-enabling receipt that is
+ * already enabled in a polled state, so we must always immediately stop
+ * as soon as the demux can't proceed so as never to hit an end of read
+ * with data pending in the buffers.
+ * - otherwise must may not attempt
+ */
+static inline int fcgi_recv_allowed(const struct fcgi_conn *fconn)
+{
+ if (fconn->flags & (FCGI_CF_EOS|FCGI_CF_ERROR))
+ return 0;
+
+ if (b_data(&fconn->dbuf) == 0 && fconn->state == FCGI_CS_CLOSED)
+ return 0;
+
+ if (!(fconn->flags & FCGI_CF_DEM_DALLOC) &&
+ !(fconn->flags & FCGI_CF_DEM_BLOCK_ANY))
+ return 1;
+
+ return 0;
+}
+
+/* Restarts reading on the connection if it was not enabled */
+static inline void fcgi_conn_restart_reading(const struct fcgi_conn *fconn, int consider_buffer)
+{
+ if (!fcgi_recv_allowed(fconn))
+ return;
+ if ((!consider_buffer || !b_data(&fconn->dbuf)) &&
+ (fconn->wait_event.events & SUB_RETRY_RECV))
+ return;
+ tasklet_wakeup(fconn->wait_event.tasklet);
+}
+
+
+/* Tries to grab a buffer and to re-enable processing on mux <target>. The
+ * fcgi_conn flags are used to figure what buffer was requested. It returns 1 if
+ * the allocation succeeds, in which case the connection is woken up, or 0 if
+ * it's impossible to wake up and we prefer to be woken up later.
+ */
+static int fcgi_buf_available(void *target)
+{
+ struct fcgi_conn *fconn = target;
+ struct fcgi_strm *fstrm;
+
+ if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf)) {
+ TRACE_STATE("unblocking fconn, dbuf allocated", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn);
+ fconn->flags &= ~FCGI_CF_DEM_DALLOC;
+ fcgi_conn_restart_reading(fconn, 1);
+ return 1;
+ }
+
+ if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf))) {
+ TRACE_STATE("unblocking fconn, mbuf allocated", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn);
+ fconn->flags &= ~FCGI_CF_MUX_MALLOC;
+ if (fconn->flags & FCGI_CF_DEM_MROOM) {
+ fconn->flags &= ~FCGI_CF_DEM_MROOM;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+ return 1;
+ }
+
+ if ((fconn->flags & FCGI_CF_DEM_SALLOC) &&
+ (fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi)) && fcgi_strm_sc(fstrm) &&
+ b_alloc(&fstrm->rxbuf)) {
+ TRACE_STATE("unblocking fstrm, rxbuf allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ fconn->flags &= ~FCGI_CF_DEM_SALLOC;
+ fcgi_conn_restart_reading(fconn, 1);
+ fcgi_strm_notify_recv(fstrm);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline struct buffer *fcgi_get_buf(struct fcgi_conn *fconn, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&fconn->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ fconn->buf_wait.target = fconn;
+ fconn->buf_wait.wakeup_cb = fcgi_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &fconn->buf_wait.list);
+ }
+ return buf;
+}
+
+static inline void fcgi_release_buf(struct fcgi_conn *fconn, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(NULL, 1);
+ }
+}
+
+static inline void fcgi_release_mbuf(struct fcgi_conn *fconn)
+{
+ struct buffer *buf;
+ unsigned int count = 0;
+
+ while (b_size(buf = br_head_pick(fconn->mbuf))) {
+ b_free(buf);
+ count++;
+ }
+ if (count)
+ offer_buffers(NULL, count);
+}
+
+/* Returns the number of allocatable outgoing streams for the connection taking
+ * the number reserved streams into account.
+ */
+static inline int fcgi_streams_left(const struct fcgi_conn *fconn)
+{
+ int ret;
+
+ ret = (unsigned int)(0x7FFF - fconn->max_id) - fconn->nb_reserved - 1;
+ if (ret < 0)
+ ret = 0;
+ return ret;
+}
+
+/* Returns the number of streams in use on a connection to figure if it's
+ * idle or not. We check nb_sc and not nb_streams as the caller will want
+ * to know if it was the last one after a detach().
+ */
+static int fcgi_used_streams(struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+
+ return fconn->nb_sc;
+}
+
+/* Returns the number of concurrent streams available on the connection */
+static int fcgi_avail_streams(struct connection *conn)
+{
+ struct server *srv = objt_server(conn->target);
+ struct fcgi_conn *fconn = conn->ctx;
+ int ret1, ret2;
+
+ /* Don't open new stream if the connection is closed */
+ if (fconn->state == FCGI_CS_CLOSED)
+ return 0;
+
+ /* May be negative if this setting has changed */
+ ret1 = (fconn->streams_limit - fconn->nb_streams);
+
+ /* we must also consider the limit imposed by stream IDs */
+ ret2 = fcgi_streams_left(fconn);
+ ret1 = MIN(ret1, ret2);
+ if (ret1 > 0 && srv && srv->max_reuse >= 0) {
+ ret2 = ((fconn->stream_cnt <= srv->max_reuse) ? srv->max_reuse - fconn->stream_cnt + 1: 0);
+ ret1 = MIN(ret1, ret2);
+ }
+ return ret1;
+}
+
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* Initializes the mux once it's attached. Only outgoing connections are
+ * supported. So the context is already initialized before installing the
+ * mux. <input> is always used as Input buffer and may contain data. It is the
+ * caller responsibility to not reuse it anymore. Returns < 0 on error.
+ */
+static int fcgi_init(struct connection *conn, struct proxy *px, struct session *sess,
+ struct buffer *input)
+{
+ struct fcgi_conn *fconn;
+ struct fcgi_strm *fstrm;
+ struct fcgi_app *app = get_px_fcgi_app(px);
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW);
+
+ if (!app) {
+ TRACE_ERROR("No FCGI app found, don't create fconn", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail_conn;
+ }
+
+ fconn = pool_alloc(pool_head_fcgi_conn);
+ if (!fconn) {
+ TRACE_ERROR("fconn allocation failure", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail_conn;
+ }
+
+ fconn->shut_timeout = fconn->timeout = px->timeout.server;
+ if (tick_isset(px->timeout.serverfin))
+ fconn->shut_timeout = px->timeout.serverfin;
+
+ fconn->flags = FCGI_CF_NONE;
+
+ /* Retrieve useful info from the FCGI app */
+ if (app->flags & FCGI_APP_FL_KEEP_CONN)
+ fconn->flags |= FCGI_CF_KEEP_CONN;
+ if (app->flags & FCGI_APP_FL_GET_VALUES)
+ fconn->flags |= FCGI_CF_GET_VALUES;
+ if (app->flags & FCGI_APP_FL_MPXS_CONNS)
+ fconn->flags |= FCGI_CF_MPXS_CONNS;
+
+ fconn->proxy = px;
+ fconn->app = app;
+ fconn->task = NULL;
+ if (tick_isset(fconn->timeout)) {
+ t = task_new_here();
+ if (!t) {
+ TRACE_ERROR("fconn task allocation failure", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ goto fail;
+ }
+
+ fconn->task = t;
+ t->process = fcgi_timeout_task;
+ t->context = fconn;
+ t->expire = tick_add(now_ms, fconn->timeout);
+ }
+
+ fconn->wait_event.tasklet = tasklet_new();
+ if (!fconn->wait_event.tasklet)
+ goto fail;
+ fconn->wait_event.tasklet->process = fcgi_io_cb;
+ fconn->wait_event.tasklet->context = fconn;
+ fconn->wait_event.events = 0;
+
+ /* Initialise the context. */
+ fconn->state = FCGI_CS_INIT;
+ fconn->conn = conn;
+ fconn->streams_limit = app->maxreqs;
+ fconn->max_id = -1;
+ fconn->nb_streams = 0;
+ fconn->nb_sc = 0;
+ fconn->nb_reserved = 0;
+ fconn->stream_cnt = 0;
+
+ fconn->dbuf = *input;
+ fconn->dsi = -1;
+
+ br_init(fconn->mbuf, sizeof(fconn->mbuf) / sizeof(fconn->mbuf[0]));
+ fconn->streams_by_id = EB_ROOT;
+ LIST_INIT(&fconn->send_list);
+ LIST_INIT(&fconn->buf_wait.list);
+
+ conn->ctx = fconn;
+
+ if (t)
+ task_queue(t);
+
+ /* FIXME: this is temporary, for outgoing connections we need to
+ * immediately allocate a stream until the code is modified so that the
+ * caller calls ->attach(). For now the outgoing sc is stored as
+ * conn->ctx by the caller and saved in conn_ctx.
+ */
+ fstrm = fcgi_stconn_new(fconn, conn_ctx, sess);
+ if (!fstrm)
+ goto fail;
+
+
+ /* Repare to read something */
+ fcgi_conn_restart_reading(fconn, 1);
+ TRACE_LEAVE(FCGI_EV_FCONN_NEW, conn);
+ return 0;
+
+ fail:
+ task_destroy(t);
+ tasklet_free(fconn->wait_event.tasklet);
+ pool_free(pool_head_fcgi_conn, fconn);
+ fail_conn:
+ conn->ctx = conn_ctx; // restore saved ctx
+ TRACE_DEVEL("leaving in error", FCGI_EV_FCONN_NEW|FCGI_EV_FCONN_END|FCGI_EV_FCONN_ERR);
+ return -1;
+}
+
+/* Returns the next allocatable outgoing stream ID for the FCGI connection, or
+ * -1 if no more is allocatable.
+ */
+static inline int32_t fcgi_conn_get_next_sid(const struct fcgi_conn *fconn)
+{
+ int32_t id = (fconn->max_id + 1) | 1;
+
+ if ((id & 0x80000000U))
+ id = -1;
+ return id;
+}
+
+/* Returns the stream associated with id <id> or NULL if not found */
+static inline struct fcgi_strm *fcgi_conn_st_by_id(struct fcgi_conn *fconn, int id)
+{
+ struct eb32_node *node;
+
+ if (id == 0)
+ return (struct fcgi_strm *)fcgi_mgmt_stream;
+
+ if (id > fconn->max_id)
+ return (struct fcgi_strm *)fcgi_unknown_stream;
+
+ node = eb32_lookup(&fconn->streams_by_id, id);
+ if (!node)
+ return (struct fcgi_strm *)fcgi_unknown_stream;
+ return container_of(node, struct fcgi_strm, by_id);
+}
+
+
+/* Release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void fcgi_release(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+
+ TRACE_POINT(FCGI_EV_FCONN_END);
+
+ if (LIST_INLIST(&fconn->buf_wait.list))
+ LIST_DEL_INIT(&fconn->buf_wait.list);
+
+ fcgi_release_buf(fconn, &fconn->dbuf);
+ fcgi_release_mbuf(fconn);
+
+ if (fconn->task) {
+ fconn->task->context = NULL;
+ task_wakeup(fconn->task, TASK_WOKEN_OTHER);
+ fconn->task = NULL;
+ }
+ tasklet_free(fconn->wait_event.tasklet);
+ if (conn && fconn->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, fconn->wait_event.events,
+ &fconn->wait_event);
+
+ pool_free(pool_head_fcgi_conn, fconn);
+
+ if (conn) {
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", FCGI_EV_FCONN_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/* Detect a pending read0 for a FCGI connection. It happens if a read0 is
+ * pending on the connection AND if there is no more data in the demux
+ * buffer. The function returns 1 to report a read0 or 0 otherwise.
+ */
+static int fcgi_conn_read0_pending(struct fcgi_conn *fconn)
+{
+ if ((fconn->flags & FCGI_CF_EOS) && !b_data(&fconn->dbuf))
+ return 1;
+ return 0;
+}
+
+
+/* Returns true if the FCGI connection must be release */
+static inline int fcgi_conn_is_dead(struct fcgi_conn *fconn)
+{
+ if (eb_is_empty(&fconn->streams_by_id) && /* don't close if streams exist */
+ (!(fconn->flags & FCGI_CF_KEEP_CONN) || /* don't keep the connection alive */
+ (fconn->flags & FCGI_CF_ERROR) || /* errors close immediately */
+ (fconn->state == FCGI_CS_CLOSED && !fconn->task) ||/* a timeout stroke earlier */
+ (!(fconn->conn->owner)) || /* Nobody's left to take care of the connection, drop it now */
+ (!br_data(fconn->mbuf) && /* mux buffer empty, also process clean events below */
+ (fconn->flags & FCGI_CF_EOS))))
+ return 1;
+ return 0;
+}
+
+
+/********************************************************/
+/* functions below are for the FCGI protocol processing */
+/********************************************************/
+
+/* Marks an error on the stream. */
+static inline void fcgi_strm_error(struct fcgi_strm *fstrm)
+{
+ if (fstrm->id && fstrm->state != FCGI_SS_ERROR) {
+ TRACE_POINT(FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ if (fstrm->state < FCGI_SS_ERROR) {
+ fstrm->state = FCGI_SS_ERROR;
+ TRACE_STATE("switching to ERROR", FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ }
+ se_fl_set_error(fstrm->sd);
+ }
+}
+
+/* Attempts to notify the data layer of recv availability */
+static void fcgi_strm_notify_recv(struct fcgi_strm *fstrm)
+{
+ if (fstrm->subs && (fstrm->subs->events & SUB_RETRY_RECV)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_RECV;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ }
+}
+
+/* Attempts to notify the data layer of send availability */
+static void fcgi_strm_notify_send(struct fcgi_strm *fstrm)
+{
+ if (fstrm->subs && (fstrm->subs->events & SUB_RETRY_SEND)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ }
+ else if (fstrm->flags & (FCGI_SF_WANT_SHUTR | FCGI_SF_WANT_SHUTW)) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+}
+
+/* Alerts the data layer, trying to wake it up by all means, following
+ * this sequence :
+ * - if the fcgi stream' data layer is subscribed to recv, then it's woken up
+ * for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ * It is safe to call this function with a closed stream which doesn't have a
+ * stream connector anymore.
+ */
+static void fcgi_strm_alert(struct fcgi_strm *fstrm)
+{
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ if (fstrm->subs ||
+ (fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW))) {
+ fcgi_strm_notify_recv(fstrm);
+ fcgi_strm_notify_send(fstrm);
+ }
+ else if (fcgi_strm_sc(fstrm) && fcgi_strm_sc(fstrm)->app_ops->wake != NULL) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fstrm->fconn->conn, fstrm);
+ fcgi_strm_sc(fstrm)->app_ops->wake(fcgi_strm_sc(fstrm));
+ }
+}
+
+/* Writes the 16-bit record size <len> at address <record> */
+static inline void fcgi_set_record_size(void *record, uint16_t len)
+{
+ uint8_t *out = (record + 4);
+
+ *out = (len >> 8);
+ *(out + 1) = (len & 0xff);
+}
+
+/* Writes the 16-bit stream id <id> at address <record> */
+static inline void fcgi_set_record_id(void *record, uint16_t id)
+{
+ uint8_t *out = (record + 2);
+
+ *out = (id >> 8);
+ *(out + 1) = (id & 0xff);
+}
+
+/* Marks a FCGI stream as CLOSED and decrement the number of active streams for
+ * its connection if the stream was not yet closed. Please use this exclusively
+ * before closing a stream to ensure stream count is well maintained.
+ */
+static inline void fcgi_strm_close(struct fcgi_strm *fstrm)
+{
+ if (fstrm->state != FCGI_SS_CLOSED) {
+ TRACE_ENTER(FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ fstrm->fconn->nb_streams--;
+ if (!fstrm->id)
+ fstrm->fconn->nb_reserved--;
+ if (fcgi_strm_sc(fstrm)) {
+ if (!se_fl_test(fstrm->sd, SE_FL_EOS) && !b_data(&fstrm->rxbuf))
+ fcgi_strm_notify_recv(fstrm);
+ }
+ fstrm->state = FCGI_SS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_FSTRM_END, fstrm->fconn->conn, fstrm);
+ }
+}
+
+/* Detaches a FCGI stream from its FCGI connection and releases it to the
+ * fcgi_strm pool.
+ */
+static void fcgi_strm_destroy(struct fcgi_strm *fstrm)
+{
+ struct connection *conn = fstrm->fconn->conn;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_END, conn, fstrm);
+
+ fcgi_strm_close(fstrm);
+ eb32_delete(&fstrm->by_id);
+ if (b_size(&fstrm->rxbuf)) {
+ b_free(&fstrm->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+ if (fstrm->subs)
+ fstrm->subs->events = 0;
+ /* There's no need to explicitly call unsubscribe here, the only
+ * reference left would be in the fconn send_list/fctl_list, and if
+ * we're in it, we're getting out anyway
+ */
+ LIST_DEL_INIT(&fstrm->send_list);
+ tasklet_free(fstrm->shut_tl);
+ BUG_ON(fstrm->sd && !se_fl_test(fstrm->sd, SE_FL_ORPHAN));
+ sedesc_free(fstrm->sd);
+ pool_free(pool_head_fcgi_strm, fstrm);
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_END, conn);
+}
+
+/* Allocates a new stream <id> for connection <fconn> and adds it into fconn's
+ * stream tree. In case of error, nothing is added and NULL is returned. The
+ * causes of errors can be any failed memory allocation. The caller is
+ * responsible for checking if the connection may support an extra stream prior
+ * to calling this function.
+ */
+static struct fcgi_strm *fcgi_strm_new(struct fcgi_conn *fconn, int id)
+{
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, fconn->conn);
+
+ fstrm = pool_alloc(pool_head_fcgi_strm);
+ if (!fstrm) {
+ TRACE_ERROR("fstrm allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ goto out;
+ }
+
+ fstrm->shut_tl = tasklet_new();
+ if (!fstrm->shut_tl) {
+ TRACE_ERROR("fstrm shut tasklet allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ pool_free(pool_head_fcgi_strm, fstrm);
+ goto out;
+ }
+ fstrm->subs = NULL;
+ fstrm->shut_tl->process = fcgi_deferred_shut;
+ fstrm->shut_tl->context = fstrm;
+ LIST_INIT(&fstrm->send_list);
+ fstrm->fconn = fconn;
+ fstrm->sd = NULL;
+ fstrm->flags = FCGI_SF_NONE;
+ fstrm->proto_status = 0;
+ fstrm->state = FCGI_SS_IDLE;
+ fstrm->rxbuf = BUF_NULL;
+
+ h1m_init_res(&fstrm->h1m);
+ fstrm->h1m.err_pos = -1; // don't care about errors on the request path
+ fstrm->h1m.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ fstrm->by_id.key = fstrm->id = id;
+ if (id > 0)
+ fconn->max_id = id;
+ else
+ fconn->nb_reserved++;
+
+ eb32_insert(&fconn->streams_by_id, &fstrm->by_id);
+ fconn->nb_streams++;
+ fconn->stream_cnt++;
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, fconn->conn, fstrm);
+ return fstrm;
+
+ out:
+ TRACE_DEVEL("leaving in error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR|FCGI_EV_FSTRM_END, fconn->conn);
+ return NULL;
+}
+
+/* Allocates a new stream associated to stream connector <sc> on the FCGI connection
+ * <fconn> and returns it, or NULL in case of memory allocation error or if the
+ * highest possible stream ID was reached.
+ */
+static struct fcgi_strm *fcgi_stconn_new(struct fcgi_conn *fconn, struct stconn *sc,
+ struct session *sess)
+{
+ struct fcgi_strm *fstrm = NULL;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, fconn->conn);
+ if (fconn->nb_streams >= fconn->streams_limit) {
+ TRACE_ERROR("streams_limit reached", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+
+ if (fcgi_streams_left(fconn) < 1) {
+ TRACE_ERROR("!streams_left", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+
+ /* Defer choosing the ID until we send the first message to create the stream */
+ fstrm = fcgi_strm_new(fconn, 0);
+ if (!fstrm) {
+ TRACE_ERROR("fstream allocation failure", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ goto out;
+ }
+ if (sc_attach_mux(sc, fstrm, fconn->conn) < 0)
+ goto out;
+ fstrm->sd = sc->sedesc;
+ fstrm->sess = sess;
+ fconn->nb_sc++;
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, fconn->conn, fstrm);
+ return fstrm;
+
+ out:
+ TRACE_DEVEL("leaving on error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_END|FCGI_EV_FSTRM_ERR, fconn->conn);
+ fcgi_strm_destroy(fstrm);
+ return NULL;
+}
+
+/* Wakes a specific stream and assign its stream connector some SE_FL_* flags among
+ * SE_FL_ERR_PENDING and SE_FL_ERROR if needed. The stream's state is
+ * automatically updated accordingly. If the stream is orphaned, it is
+ * destroyed.
+ */
+static void fcgi_strm_wake_one_stream(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+
+ if (!fcgi_strm_sc(fstrm)) {
+ /* this stream was already orphaned */
+ fcgi_strm_destroy(fstrm);
+ TRACE_DEVEL("leaving with no fstrm", FCGI_EV_STRM_WAKE, fconn->conn);
+ return;
+ }
+
+ if (fcgi_conn_read0_pending(fconn)) {
+ if (fstrm->state == FCGI_SS_OPEN) {
+ fstrm->state = FCGI_SS_HREM;
+ TRACE_STATE("switching to HREM", FCGI_EV_STRM_WAKE|FCGI_EV_FSTRM_END, fconn->conn, fstrm);
+ }
+ else if (fstrm->state == FCGI_SS_HLOC)
+ fcgi_strm_close(fstrm);
+ }
+
+ if (fconn->state == FCGI_CS_CLOSED || (fconn->flags & (FCGI_CF_ERR_PENDING|FCGI_CF_ERROR))) {
+ se_fl_set_error(fstrm->sd);
+
+ if (fstrm->state < FCGI_SS_ERROR) {
+ fstrm->state = FCGI_SS_ERROR;
+ TRACE_STATE("switching to ERROR", FCGI_EV_STRM_WAKE|FCGI_EV_FSTRM_END, fconn->conn, fstrm);
+ }
+ }
+
+ fcgi_strm_alert(fstrm);
+
+ TRACE_LEAVE(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+}
+
+/* Wakes unassigned streams (ID == 0) attached to the connection. */
+static void fcgi_wake_unassigned_streams(struct fcgi_conn *fconn)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ node = eb32_lookup(&fconn->streams_by_id, 0);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fstrm->id > 0)
+ break;
+ node = eb32_next(node);
+ fcgi_strm_wake_one_stream(fstrm);
+ }
+}
+
+/* Wakes the streams attached to the connection, whose id is greater than <last>
+ * or unassigned.
+ */
+static void fcgi_wake_some_streams(struct fcgi_conn *fconn, int last)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_STRM_WAKE, fconn->conn);
+
+ /* Wake all streams with ID > last */
+ node = eb32_lookup_ge(&fconn->streams_by_id, last + 1);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ node = eb32_next(node);
+ fcgi_strm_wake_one_stream(fstrm);
+ }
+ fcgi_wake_unassigned_streams(fconn);
+
+ TRACE_LEAVE(FCGI_EV_STRM_WAKE, fconn->conn);
+}
+
+static int fcgi_set_default_param(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx, struct htx_sl *sl,
+ struct fcgi_strm_params *params)
+{
+ struct connection *cli_conn = objt_conn(fstrm->sess->origin);
+ const struct sockaddr_storage *src = (sc_check(fcgi_strm_sc(fstrm)) ? conn_src(fconn->conn) : sc_src(sc_opposite(fcgi_strm_sc(fstrm))));
+ const struct sockaddr_storage *dst = (sc_check(fcgi_strm_sc(fstrm)) ? conn_dst(fconn->conn) : sc_dst(sc_opposite(fcgi_strm_sc(fstrm))));
+ struct ist p;
+
+ if (!sl)
+ goto error;
+
+ if (!(params->mask & FCGI_SP_DOC_ROOT))
+ params->docroot = fconn->app->docroot;
+
+ if (!(params->mask & FCGI_SP_REQ_METH)) {
+ p = htx_sl_req_meth(sl);
+ params->meth = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_REQ_URI)) {
+ p = h1_get_uri(sl);
+ params->uri = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_SRV_PROTO)) {
+ p = htx_sl_req_vsn(sl);
+ params->vsn = ist2(b_tail(params->p), p.len);
+ chunk_istcat(params->p, p);
+ }
+ if (!(params->mask & FCGI_SP_SRV_PORT)) {
+ char *end;
+ int port = 0;
+ if (dst)
+ port = get_host_port(dst);
+ end = ultoa_o(port, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->srv_port = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->srv_port.len;
+ }
+ if (!(params->mask & FCGI_SP_SRV_NAME)) {
+ /* If no Host header found, use the server address to fill
+ * srv_name */
+ if (!istlen(params->srv_name)) {
+ char *ptr = NULL;
+
+ if (dst)
+ if (addr_to_str(dst, b_tail(params->p), b_room(params->p)) != -1)
+ ptr = b_tail(params->p);
+ if (ptr) {
+ params->srv_name = ist(ptr);
+ params->p->data += params->srv_name.len;
+ }
+ }
+ }
+ if (!(params->mask & FCGI_SP_REM_ADDR)) {
+ char *ptr = NULL;
+
+ if (src)
+ if (addr_to_str(src, b_tail(params->p), b_room(params->p)) != -1)
+ ptr = b_tail(params->p);
+ if (ptr) {
+ params->rem_addr = ist(ptr);
+ params->p->data += params->rem_addr.len;
+ }
+ }
+ if (!(params->mask & FCGI_SP_REM_PORT)) {
+ char *end;
+ int port = 0;
+ if (src)
+ port = get_host_port(src);
+ end = ultoa_o(port, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->rem_port = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->rem_port.len;
+ }
+ if (!(params->mask & FCGI_SP_CONT_LEN)) {
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ char *end;
+ size_t len = 0;
+
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA)
+ len += htx_get_blksz(blk);
+ }
+ end = ultoa_o(len, b_tail(params->p), b_room(params->p));
+ if (!end)
+ goto error;
+ params->cont_len = ist2(b_tail(params->p), end - b_tail(params->p));
+ params->p->data += params->cont_len.len;
+ }
+
+ if (!(params->mask & FCGI_SP_HTTPS)) {
+ if (cli_conn)
+ params->https = conn_is_ssl(cli_conn);
+ }
+
+ if ((params->mask & FCGI_SP_URI_MASK) != FCGI_SP_URI_MASK) {
+ /* one of scriptname, pathinfo or query_string is no set */
+ struct http_uri_parser parser = http_uri_parser_init(params->uri);
+ struct ist path = http_parse_path(&parser);
+ int len;
+
+ /* No scrit_name set but no valid path ==> error */
+ if (!(params->mask & FCGI_SP_SCRIPT_NAME) && !istlen(path))
+ goto error;
+
+ /* If there is a query-string, Set it if not already set */
+ if (!(params->mask & FCGI_SP_REQ_QS)) {
+ struct ist qs = istfind(path, '?');
+
+ /* Update the path length */
+ path.len -= qs.len;
+
+ /* Set the query-string skipping the '?', if any */
+ if (istlen(qs))
+ params->qs = istnext(qs);
+ }
+
+ /* If the script_name is set, don't try to deduce the path_info
+ * too. The opposite is not true.
+ */
+ if (params->mask & FCGI_SP_SCRIPT_NAME) {
+ params->mask |= FCGI_SP_PATH_INFO;
+ goto end;
+ }
+
+ /* Decode the path. it must first be copied to keep the URI
+ * untouched.
+ */
+ chunk_istcat(params->p, path);
+ path.ptr = b_tail(params->p) - path.len;
+ len = url_decode(ist0(path), 0);
+ if (len < 0)
+ goto error;
+ path.len = len;
+
+ /* script_name not set, preset it with the path for now */
+ params->scriptname = path;
+
+ /* If there is no regex to match the pathinfo, just to the last
+ * part and see if the index must be used.
+ */
+ if (!fconn->app->pathinfo_re)
+ goto check_index;
+
+ /* If some special characters are found in the decoded path (\n
+ * or \0), the PATH_INFO regex cannot match. This is theoretically
+ * valid, but probably unexpected, to have such characters. So,
+ * to avoid any surprises, an error is triggered in this
+ * case.
+ */
+ if (istchr(path, '\n') || istchr(path, '\0'))
+ goto error;
+
+ /* The regex does not match, just to the last part and see if
+ * the index must be used.
+ */
+ if (!regex_exec_match2(fconn->app->pathinfo_re, path.ptr, len, MAX_MATCH, pmatch, 0))
+ goto check_index;
+
+ /* We must have at least 1 capture for the script name,
+ * otherwise we do nothing and jump to the last part.
+ */
+ if (pmatch[1].rm_so == -1 || pmatch[1].rm_eo == -1)
+ goto check_index;
+
+ /* Finally we can set the script_name and the path_info. The
+ * path_info is set if not already defined, and if it was
+ * captured
+ */
+ params->scriptname = ist2(path.ptr + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so);
+ if (!(params->mask & FCGI_SP_PATH_INFO) && !(pmatch[2].rm_so == -1 || pmatch[2].rm_eo == -1))
+ params->pathinfo = ist2(path.ptr + pmatch[2].rm_so, pmatch[2].rm_eo - pmatch[2].rm_so);
+
+ check_index:
+ len = params->scriptname.len;
+ /* the script_name if finished by a '/' so we can add the index
+ * part, if any.
+ */
+ if (istlen(fconn->app->index) && params->scriptname.ptr[len-1] == '/') {
+ struct ist sn = params->scriptname;
+
+ params->scriptname = ist2(b_tail(params->p), len+fconn->app->index.len);
+ chunk_istcat(params->p, sn);
+ chunk_istcat(params->p, fconn->app->index);
+ }
+ }
+
+ if (!(params->mask & FCGI_SP_SRV_SOFT)) {
+ params->srv_soft = ist2(b_tail(params->p), 0);
+ chunk_appendf(params->p, "HAProxy %s", haproxy_version);
+ params->srv_soft.len = b_tail(params->p) - params->srv_soft.ptr;
+ }
+
+ end:
+ return 1;
+ error:
+ return 0;
+}
+
+static int fcgi_encode_default_param(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct fcgi_strm_params *params, struct buffer *outbuf, int flag)
+{
+ struct fcgi_param p;
+
+ if (params->mask & flag)
+ return 1;
+
+ chunk_reset(&trash);
+
+ switch (flag) {
+ case FCGI_SP_CGI_GATEWAY:
+ p.n = ist("GATEWAY_INTERFACE");
+ p.v = ist("CGI/1.1");
+ goto encode;
+ case FCGI_SP_DOC_ROOT:
+ p.n = ist("DOCUMENT_ROOT");
+ p.v = params->docroot;
+ goto encode;
+ case FCGI_SP_SCRIPT_NAME:
+ p.n = ist("SCRIPT_NAME");
+ p.v = params->scriptname;
+ goto encode;
+ case FCGI_SP_PATH_INFO:
+ p.n = ist("PATH_INFO");
+ p.v = params->pathinfo;
+ goto encode;
+ case FCGI_SP_REQ_URI:
+ p.n = ist("REQUEST_URI");
+ p.v = params->uri;
+ goto encode;
+ case FCGI_SP_REQ_METH:
+ p.n = ist("REQUEST_METHOD");
+ p.v = params->meth;
+ goto encode;
+ case FCGI_SP_REQ_QS:
+ p.n = ist("QUERY_STRING");
+ p.v = params->qs;
+ goto encode;
+ case FCGI_SP_SRV_NAME:
+ p.n = ist("SERVER_NAME");
+ p.v = params->srv_name;
+ goto encode;
+ case FCGI_SP_SRV_PORT:
+ p.n = ist("SERVER_PORT");
+ p.v = params->srv_port;
+ goto encode;
+ case FCGI_SP_SRV_PROTO:
+ p.n = ist("SERVER_PROTOCOL");
+ p.v = params->vsn;
+ goto encode;
+ case FCGI_SP_REM_ADDR:
+ p.n = ist("REMOTE_ADDR");
+ p.v = params->rem_addr;
+ goto encode;
+ case FCGI_SP_REM_PORT:
+ p.n = ist("REMOTE_PORT");
+ p.v = params->rem_port;
+ goto encode;
+ case FCGI_SP_SCRIPT_FILE:
+ p.n = ist("SCRIPT_FILENAME");
+ chunk_istcat(&trash, params->docroot);
+ chunk_istcat(&trash, params->scriptname);
+ p.v = ist2(b_head(&trash), b_data(&trash));
+ goto encode;
+ case FCGI_SP_PATH_TRANS:
+ if (!istlen(params->pathinfo))
+ goto skip;
+ p.n = ist("PATH_TRANSLATED");
+ chunk_istcat(&trash, params->docroot);
+ chunk_istcat(&trash, params->pathinfo);
+ p.v = ist2(b_head(&trash), b_data(&trash));
+ goto encode;
+ case FCGI_SP_CONT_LEN:
+ p.n = ist("CONTENT_LENGTH");
+ p.v = params->cont_len;
+ goto encode;
+ case FCGI_SP_HTTPS:
+ if (!params->https)
+ goto skip;
+ p.n = ist("HTTPS");
+ p.v = ist("on");
+ goto encode;
+ case FCGI_SP_SRV_SOFT:
+ p.n = ist("SERVER_SOFTWARE");
+ p.v = params->srv_soft;
+ goto encode;
+ default:
+ goto skip;
+ }
+
+ encode:
+ if (!istlen(p.v))
+ goto skip;
+ if (!fcgi_encode_param(outbuf, &p))
+ return 0;
+ skip:
+ params->mask |= flag;
+ return 1;
+}
+
+/* Sends a GET_VALUES record. Returns > 0 on success, 0 if it couldn't do
+ * anything. It is highly unexpected, but if the record is larger than a buffer
+ * and cannot be encoded in one time, an error is triggered and the connection is
+ * closed. GET_VALUES record cannot be split.
+ */
+static int fcgi_conn_send_get_values(struct fcgi_conn *fconn)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct fcgi_param max_reqs = { .n = ist("FCGI_MAX_REQS"), .v = ist("")};
+ struct fcgi_param mpxs_conns = { .n = ist("FCGI_MPXS_CONNS"), .v = ist("")};
+ int ret = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fconn->flags |= FCGI_CF_DEM_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (9)FCGI_GET_VALUES, id: 0x0000,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x09\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ /* Note: Don't send the param FCGI_MAX_CONNS because its value cannot be
+ * handled by HAProxy.
+ */
+ if (!fcgi_encode_param(&outbuf, &max_reqs) || !fcgi_encode_param(&outbuf, &mpxs_conns))
+ goto full;
+
+ /* update the record's size now */
+ TRACE_PROTO("FCGI GET_VALUES record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn, 0, 0, (size_t[]){outbuf.data-8});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+ return ret;
+ full:
+ /* Too large to be encoded. For GET_VALUES records, it is an error */
+ if (!b_data(mbuf)) {
+ TRACE_ERROR("GET_VALUES record too large", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fconn->flags |= FCGI_CF_DEM_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+ fail:
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_END, fconn->conn);
+ TRACE_DEVEL("leaving on error", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ return 0;
+}
+
+/* Processes a GET_VALUES_RESULT record. Returns > 0 on success, 0 if it
+ * couldn't do anything. It is highly unexpected, but if the record is larger
+ * than a buffer and cannot be decoded in one time, an error is triggered and
+ * the connection is closed. GET_VALUES_RESULT record cannot be split.
+ */
+static int fcgi_conn_handle_values_result(struct fcgi_conn *fconn)
+{
+ struct buffer inbuf;
+ struct buffer *dbuf;
+ size_t offset;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+
+ dbuf = &fconn->dbuf;
+
+ /* Record too large to be fully decoded */
+ if (b_size(dbuf) < (fconn->drl + fconn->drp))
+ goto fail;
+
+ /* process full record only */
+ if (b_data(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_DEVEL("leaving on missing data", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ return 0;
+ }
+
+ if (unlikely(b_contig_data(dbuf, b_head_ofs(dbuf)) < fconn->drl)) {
+ /* Realign the dmux buffer if the record wraps. It is unexpected
+ * at this stage because it should be the first record received
+ * from the FCGI application.
+ */
+ b_slow_realign_ofs(dbuf, trash.area, 0);
+ }
+
+ inbuf = b_make(b_head(dbuf), b_data(dbuf), 0, fconn->drl);
+
+ for (offset = 0; offset < b_data(&inbuf); ) {
+ struct fcgi_param p;
+ size_t ret;
+
+ ret = fcgi_aligned_decode_param(&inbuf, offset, &p);
+ if (!ret) {
+ /* name or value too large to be decoded at once */
+ TRACE_ERROR("error decoding GET_VALUES_RESULT param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+ offset += ret;
+
+ if (isteqi(p.n, ist("FCGI_MPXS_CONNS"))) {
+ if (isteq(p.v, ist("1"))) {
+ TRACE_STATE("set mpxs param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){1});
+ fconn->flags |= FCGI_CF_MPXS_CONNS;
+ }
+ else {
+ TRACE_STATE("set mpxs param", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){0});
+ fconn->flags &= ~FCGI_CF_MPXS_CONNS;
+ }
+ }
+ else if (isteqi(p.n, ist("FCGI_MAX_REQS"))) {
+ fconn->streams_limit = strl2ui(p.v.ptr, p.v.len);
+ TRACE_STATE("set streams_limit", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){fconn->streams_limit});
+ }
+ /*
+ * Ignore all other params
+ */
+ }
+
+ /* Reset the number of concurrent streams supported if the FCGI
+ * application does not support connection multiplexing
+ */
+ if (!(fconn->flags & FCGI_CF_MPXS_CONNS)) {
+ fconn->streams_limit = 1;
+ TRACE_STATE("no mpxs for streams_limit to 1", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ }
+
+ /* We must be sure to have read exactly the announced record length, no
+ * more no less
+ */
+ if (offset != fconn->drl) {
+ TRACE_ERROR("invalid GET_VALUES_RESULT record length", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+
+ TRACE_PROTO("FCGI GET_VALUES_RESULT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn, 0, 0, (size_t[]){fconn->drl});
+ b_del(&fconn->dbuf, fconn->drl + fconn->drp);
+ fconn->drl = 0;
+ fconn->drp = 0;
+ fconn->state = FCGI_CS_RECORD_H;
+ fcgi_wake_unassigned_streams(fconn);
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ return 1;
+ fail:
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_STATE("switching to CLOSED", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ return 0;
+}
+
+/* Sends an ABORT_REQUEST record for each active streams. Closed streams are
+ * excluded, as the streams which already received the end-of-stream. It returns
+ * > 0 if the record was sent tp all streams. Otherwise it returns 0.
+ */
+static int fcgi_conn_send_aborts(struct fcgi_conn *fconn)
+{
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD, fconn->conn);
+
+ node = eb32_lookup_ge(&fconn->streams_by_id, 1);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ node = eb32_next(node);
+ if (fstrm->state != FCGI_SS_CLOSED &&
+ !(fstrm->flags & (FCGI_SF_ES_RCVD|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ return 0;
+ }
+ fconn->flags |= FCGI_CF_ABRTS_SENT;
+ TRACE_STATE("aborts sent to all fstrms", FCGI_EV_TX_RECORD, fconn->conn);
+ TRACE_LEAVE(FCGI_EV_TX_RECORD, fconn->conn);
+ return 1;
+}
+
+/* Sends a BEGIN_REQUEST record. It returns > 0 on success, 0 if it couldn't do
+ * anything. BEGIN_REQUEST record cannot be split. So we wait to have enough
+ * space to proceed. It is small enough to be encoded in an empty buffer.
+ */
+static int fcgi_strm_send_begin_request(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct fcgi_begin_request rec = { .role = FCGI_RESPONDER, .flags = 0};
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (1)FCGI_BEGIN_REQUEST, id: fstrm->id,
+ * len: 0x0008, padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x01\x00\x00\x00\x08\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ if (fconn->flags & FCGI_CF_KEEP_CONN) {
+ TRACE_STATE("keep connection opened", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ rec.flags |= FCGI_KEEP_CONN;
+ }
+ if (!fcgi_encode_begin_request(&outbuf, &rec))
+ goto full;
+
+ /* commit the record */
+ TRACE_PROTO("FCGI BEGIN_REQUEST record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm, 0, (size_t[]){0});
+ b_add(mbuf, outbuf.data);
+ fstrm->flags |= FCGI_SF_BEGIN_SENT;
+ fstrm->state = FCGI_SS_OPEN;
+ TRACE_STATE("switching to OPEN", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn);
+ ret = 0;
+ goto end;
+}
+
+/* Sends an empty record of type <rtype>. It returns > 0 on success, 0 if it
+ * couldn't do anything. Empty record cannot be split. So we wait to have enough
+ * space to proceed. It is small enough to be encoded in an empty buffer.
+ */
+static int fcgi_strm_send_empty_record(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ enum fcgi_record_type rtype)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD, fconn->conn, fstrm);
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: rtype, id: fstrm->id,
+ * len: 0x0000, padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ outbuf.area[1] = rtype;
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ /* commit the record */
+ b_add(mbuf, outbuf.data);
+ ret = 1;
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD, fconn->conn, fstrm);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ ret = 0;
+ goto end;
+}
+
+
+/* Sends an empty PARAMS record. It relies on fcgi_strm_send_empty_record(). It
+ * marks the end of params.
+ */
+static int fcgi_strm_send_empty_params(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_PARAMS);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_EP_SENT;
+ TRACE_PROTO("FCGI PARAMS record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){0});
+ }
+ return ret;
+}
+
+/* Sends an empty STDIN record. It relies on fcgi_strm_send_empty_record(). It
+ * marks the end of input. On success, all the request was successfully sent.
+ */
+static int fcgi_strm_send_empty_stdin(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_STDIN);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_ES_SENT;
+ TRACE_PROTO("FCGI STDIN record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_USER("FCGI request fully xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ TRACE_STATE("stdin data fully sent", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN|FCGI_EV_TX_EOI, fconn->conn, fstrm);
+ }
+ return ret;
+}
+
+/* Sends an ABORT_REQUEST record. It relies on fcgi_strm_send_empty_record(). It
+ * stops the request processing.
+ */
+static int fcgi_strm_send_abort(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_POINT(FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ ret = fcgi_strm_send_empty_record(fconn, fstrm, FCGI_ABORT_REQUEST);
+ if (ret) {
+ fstrm->flags |= FCGI_SF_ABRT_SENT;
+ TRACE_PROTO("FCGI ABORT record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_USER("FCGI request aborted", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ TRACE_STATE("abort sent", FCGI_EV_TX_RECORD|FCGI_EV_TX_ABORT, fconn->conn, fstrm);
+ }
+ return ret;
+}
+
+/* Sends a PARAMS record. Returns > 0 on success, 0 if it couldn't do
+ * anything. If there are too much K/V params to be encoded in a PARAMS record,
+ * several records are sent. However, a K/V param cannot be split between 2
+ * records.
+ */
+static size_t fcgi_strm_send_params(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_blk *blk;
+ struct htx_sl *sl = NULL;
+ struct fcgi_strm_params params;
+ size_t total = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+
+ memset(&params, 0, sizeof(params));
+ params.p = get_trash_chunk();
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+ }
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (4)FCGI_PARAMS, id: fstrm->id,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x04\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type;
+ uint32_t size = htx_get_blksz(blk);
+ struct fcgi_param p;
+
+ type = htx_get_blk_type(blk);
+ switch (type) {
+ case HTX_BLK_REQ_SL:
+ sl = htx_get_blk_ptr(htx, blk);
+ if (sl->info.req.meth == HTTP_METH_HEAD)
+ fstrm->h1m.flags |= H1_MF_METH_HEAD;
+ if (sl->flags & HTX_SL_F_VER_11)
+ fstrm->h1m.flags |= H1_MF_VER_11;
+ break;
+
+ case HTX_BLK_HDR:
+ p.n = htx_get_blk_name(htx, blk);
+ p.v = htx_get_blk_value(htx, blk);
+
+ if (istmatch(p.n, ist(":fcgi-"))) {
+ p.n = istadv(p.n, 6);
+ if (isteq(p.n, ist("gateway_interface")))
+ params.mask |= FCGI_SP_CGI_GATEWAY;
+ else if (isteq(p.n, ist("document_root"))) {
+ params.mask |= FCGI_SP_DOC_ROOT;
+ params.docroot = p.v;
+ }
+ else if (isteq(p.n, ist("script_name"))) {
+ params.mask |= FCGI_SP_SCRIPT_NAME;
+ params.scriptname = p.v;
+ }
+ else if (isteq(p.n, ist("path_info"))) {
+ params.mask |= FCGI_SP_PATH_INFO;
+ params.pathinfo = p.v;
+ }
+ else if (isteq(p.n, ist("request_uri"))) {
+ params.mask |= FCGI_SP_REQ_URI;
+ params.uri = p.v;
+ }
+ else if (isteq(p.n, ist("request_meth")))
+ params.mask |= FCGI_SP_REQ_METH;
+ else if (isteq(p.n, ist("query_string")))
+ params.mask |= FCGI_SP_REQ_QS;
+ else if (isteq(p.n, ist("server_name")))
+ params.mask |= FCGI_SP_SRV_NAME;
+ else if (isteq(p.n, ist("server_port")))
+ params.mask |= FCGI_SP_SRV_PORT;
+ else if (isteq(p.n, ist("server_protocol")))
+ params.mask |= FCGI_SP_SRV_PROTO;
+ else if (isteq(p.n, ist("remote_addr")))
+ params.mask |= FCGI_SP_REM_ADDR;
+ else if (isteq(p.n, ist("remote_port")))
+ params.mask |= FCGI_SP_REM_PORT;
+ else if (isteq(p.n, ist("script_filename")))
+ params.mask |= FCGI_SP_SCRIPT_FILE;
+ else if (isteq(p.n, ist("path_translated")))
+ params.mask |= FCGI_SP_PATH_TRANS;
+ else if (isteq(p.n, ist("https")))
+ params.mask |= FCGI_SP_HTTPS;
+ else if (isteq(p.n, ist("server_software")))
+ params.mask |= FCGI_SP_SRV_SOFT;
+ }
+ else if (isteq(p.n, ist("content-length"))) {
+ p.n = ist("CONTENT_LENGTH");
+ params.mask |= FCGI_SP_CONT_LEN;
+ }
+ else if (isteq(p.n, ist("content-type")))
+ p.n = ist("CONTENT_TYPE");
+ else {
+ struct ist n;
+
+ if (isteq(p.n, ist("host")))
+ params.srv_name = p.v;
+ else if (isteq(p.n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ p.v = istist(p.v, ist("trailers"));
+ if (!isttest(p.v) || (p.v.len > 8 && p.v.ptr[8] != ','))
+ break;
+ p.v = ist("trailers");
+ }
+
+ /* Skip header if same name is used to add the server name */
+ if (isttest(fconn->proxy->server_id_hdr_name) && isteq(p.n, fconn->proxy->server_id_hdr_name))
+ break;
+
+ n = ist2(trash.area, 0);
+ istcat(&n, ist("http_"), trash.size);
+ istcat(&n, p.n, trash.size);
+ p.n = n;
+ }
+
+ if (!fcgi_encode_param(&outbuf, &p)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_EOH:
+ if (isttest(fconn->proxy->server_id_hdr_name)) {
+ struct server *srv = objt_server(fconn->conn->target);
+
+ if (!srv)
+ goto done;
+
+ p.n = ist2(trash.area, 0);
+ istcat(&p.n, ist("http_"), trash.size);
+ istcat(&p.n, fconn->proxy->server_id_hdr_name, trash.size);
+ p.v = ist(srv->id);
+
+ if (!fcgi_encode_param(&outbuf, &p)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ }
+ TRACE_STATE("add server name header", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm);
+ }
+ goto done;
+
+ default:
+ break;
+ }
+ total += size;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ done:
+ if (!fcgi_set_default_param(fconn, fstrm, htx, sl, &params)) {
+ TRACE_ERROR("error setting default params", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ goto error;
+ }
+
+ if (!fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_CGI_GATEWAY) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_DOC_ROOT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SCRIPT_NAME) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_PATH_INFO) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_URI) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_METH) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REQ_QS) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_NAME) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_PORT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_PROTO) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REM_ADDR) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_REM_PORT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SCRIPT_FILE) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_PATH_TRANS) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_CONT_LEN) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_SRV_SOFT) ||
+ !fcgi_encode_default_param(fconn, fstrm, &params, &outbuf, FCGI_SP_HTTPS)) {
+ TRACE_ERROR("error encoding default params", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ goto error;
+ }
+
+ /* update the record's size */
+ TRACE_PROTO("FCGI PARAMS record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, 0, (size_t[]){outbuf.data - FCGI_RECORD_HEADER_SZ});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ if (total)
+ goto error;
+ goto end;
+
+ error:
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("processing error sending PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ goto end;
+}
+
+/* Sends a STDIN record. Returns > 0 on success, 0 if it couldn't do
+ * anything. STDIN records contain the request body.
+ */
+static size_t fcgi_strm_send_stdin(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct htx *htx, size_t count, struct buffer *buf)
+{
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ uint32_t size, extra_bytes;
+ size_t total = 0;
+
+ extra_bytes = 0;
+
+ TRACE_ENTER(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){count});
+ if (!count)
+ goto end;
+
+ mbuf = br_tail(fconn->mbuf);
+ retry:
+ if (!fcgi_get_buf(fconn, mbuf)) {
+ fconn->flags |= FCGI_CF_MUX_MALLOC;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("waiting for fconn mbuf ring allocation", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+ }
+
+ /* Perform some optimizations to reduce the number of buffer copies.
+ * First, if the mux's buffer is empty and the htx area contains exactly
+ * one data block of the same size as the requested count, and this
+ * count fits within the record size, then it's possible to simply swap
+ * the caller's buffer with the mux's output buffer and adjust offsets
+ * and length to match the entire DATA HTX block in the middle. In this
+ * case we perform a true zero-copy operation from end-to-end. This is
+ * the situation that happens all the time with large files. Second, if
+ * this is not possible, but the mux's output buffer is empty, we still
+ * have an opportunity to avoid the copy to the intermediary buffer, by
+ * making the intermediary buffer's area point to the output buffer's
+ * area. In this case we want to skip the HTX header to make sure that
+ * copies remain aligned and that this operation remains possible all
+ * the time. This goes for headers, data blocks and any data extracted
+ * from the HTX blocks.
+ */
+ blk = htx_get_head_blk(htx);
+ if (!blk)
+ goto end;
+ type = htx_get_blk_type(blk);
+ size = htx_get_blksz(blk);
+ if (unlikely(size == count && htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) {
+ void *old_area = mbuf->area;
+ int eom = (htx->flags & HTX_FL_EOM);
+
+ /* Last block of the message: Reserve the size for the empty stdin record */
+ if (eom)
+ extra_bytes = FCGI_RECORD_HEADER_SZ;
+
+ if (b_data(mbuf)) {
+ /* Too bad there are data left there. We're willing to memcpy/memmove
+ * up to 1/4 of the buffer, which means that it's OK to copy a large
+ * record into a buffer containing few data if it needs to be realigned,
+ * and that it's also OK to copy few data without realigning. Otherwise
+ * we'll pretend the mbuf is full and wait for it to become empty.
+ */
+ if (size + FCGI_RECORD_HEADER_SZ + extra_bytes <= b_room(mbuf) &&
+ (b_data(mbuf) <= b_size(mbuf) / 4 ||
+ (size <= b_size(mbuf) / 4 && size + FCGI_RECORD_HEADER_SZ + extra_bytes <= b_contig_space(mbuf))))
+ goto copy;
+ goto full;
+ }
+
+ TRACE_PROTO("sending stding data (zero-copy)", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){size});
+ /* map a FCGI record to the HTX block so that we can put the
+ * record header there.
+ */
+ *mbuf = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - FCGI_RECORD_HEADER_SZ, size + FCGI_RECORD_HEADER_SZ);
+ outbuf.area = b_head(mbuf);
+
+ /* prepend a FCGI record header just before the DATA block */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ fcgi_set_record_size(outbuf.area, size);
+
+ /* and exchange with our old area */
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+ total += size;
+
+ htx = (struct htx *)buf->area;
+ htx_reset(htx);
+ if (eom)
+ goto empty_stdin;
+ goto end;
+ }
+
+ copy:
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= FCGI_RECORD_HEADER_SZ + extra_bytes || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < FCGI_RECORD_HEADER_SZ + extra_bytes)
+ goto full;
+
+ /* vsn: 1(FCGI_VERSION), type: (5)FCGI_STDIN, id: fstrm->id,
+ * len: 0x0000 (fill later), padding: 0x00, rsv: 0x00 */
+ memcpy(outbuf.area, "\x01\x05\x00\x00\x00\x00\x00\x00", FCGI_RECORD_HEADER_SZ);
+ fcgi_set_record_id(outbuf.area, fstrm->id);
+ outbuf.data = FCGI_RECORD_HEADER_SZ;
+
+ blk = htx_get_head_blk(htx);
+ while (blk && count) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+ uint32_t size = htx_get_blksz(blk);
+ struct ist v;
+
+ switch (type) {
+ case HTX_BLK_DATA:
+ TRACE_PROTO("sending stding data", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){size});
+ v = htx_get_blk_value(htx, blk);
+
+ if (htx_is_unique_blk(htx, blk) && (htx->flags & HTX_FL_EOM))
+ extra_bytes = FCGI_RECORD_HEADER_SZ; /* Last block of the message */
+
+ if (v.len > count) {
+ v.len = count;
+ extra_bytes = 0;
+ }
+
+ if (v.len + FCGI_RECORD_HEADER_SZ + extra_bytes > b_room(&outbuf)) {
+ /* It doesn't fit at once. If it at least fits once split and
+ * the amount of data to move is low, let's defragment the
+ * buffer now.
+ */
+ if (b_space_wraps(mbuf) &&
+ b_data(&outbuf) + v.len + extra_bytes <= b_room(mbuf) &&
+ b_data(mbuf) <= MAX_DATA_REALIGN)
+ goto realign_again;
+ v.len = (FCGI_RECORD_HEADER_SZ + extra_bytes > b_room(&outbuf)
+ ? 0
+ : b_room(&outbuf) - FCGI_RECORD_HEADER_SZ - extra_bytes);
+ }
+ if (!v.len || !chunk_memcat(&outbuf, v.ptr, v.len)) {
+ if (outbuf.data == FCGI_RECORD_HEADER_SZ)
+ goto full;
+ goto done;
+ }
+ if (v.len != size) {
+ total += v.len;
+ count -= v.len;
+ htx_cut_data_blk(htx, blk, v.len);
+ goto done;
+ }
+ break;
+
+ default:
+ break;
+ }
+ total += size;
+ count -= size;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ done:
+ /* update the record's size */
+ TRACE_PROTO("FCGI STDIN record xferred", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, 0, (size_t[]){outbuf.data - FCGI_RECORD_HEADER_SZ});
+ fcgi_set_record_size(outbuf.area, outbuf.data - FCGI_RECORD_HEADER_SZ);
+ b_add(mbuf, outbuf.data);
+
+ /* Send the empty stding here to finish the message */
+ if (htx_is_empty(htx) && (htx->flags & HTX_FL_EOM)) {
+ empty_stdin:
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ if (!fcgi_strm_send_empty_stdin(fconn, fstrm)) {
+ /* bytes already reserved for this record. It should not fail */
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("processing error sending empty STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ }
+ }
+
+ end:
+ TRACE_LEAVE(FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+ full:
+ if ((mbuf = br_tail_add(fconn->mbuf)) != NULL)
+ goto retry;
+ fconn->flags |= FCGI_CF_MUX_MFULL;
+ fstrm->flags |= FCGI_SF_BLK_MROOM;
+ TRACE_STATE("mbuf ring full", FCGI_EV_TX_RECORD|FCGI_EV_FSTRM_BLK|FCGI_EV_FCONN_BLK, fconn->conn, fstrm);
+ goto end;
+}
+
+/* Processes a STDOUT record. Returns > 0 on success, 0 if it couldn't do
+ * anything. STDOUT records contain the entire response. All the content is
+ * copied in the stream's rxbuf. The parsing will be handled in fcgi_rcv_buf().
+ */
+static int fcgi_strm_handle_stdout(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer *dbuf;
+ size_t ret;
+ size_t max;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+
+ dbuf = &fconn->dbuf;
+
+ /* Only padding remains */
+ if (fconn->state == FCGI_CS_RECORD_P)
+ goto end_transfer;
+
+ if (b_data(dbuf) < (fconn->drl + fconn->drp) &&
+ b_size(dbuf) > (fconn->drl + fconn->drp) &&
+ buf_room_for_htx_data(dbuf))
+ goto fail; // incomplete record
+
+ if (!fcgi_get_buf(fconn, &fstrm->rxbuf)) {
+ fconn->flags |= FCGI_CF_DEM_SALLOC;
+ TRACE_STATE("waiting for fstrm rxbuf allocation", FCGI_EV_RX_RECORD|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ /*max = MIN(b_room(&fstrm->rxbuf), fconn->drl);*/
+ max = buf_room_for_htx_data(&fstrm->rxbuf);
+ if (!b_data(&fstrm->rxbuf))
+ fstrm->rxbuf.head = sizeof(struct htx);
+ if (max > fconn->drl)
+ max = fconn->drl;
+
+ ret = b_xfer(&fstrm->rxbuf, dbuf, max);
+ if (!ret)
+ goto fail;
+ fconn->drl -= ret;
+ TRACE_DATA("move some data to fstrm rxbuf", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){ret});
+ TRACE_PROTO("FCGI STDOUT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){ret});
+
+ if (!buf_room_for_htx_data(&fstrm->rxbuf)) {
+ fconn->flags |= FCGI_CF_DEM_SFULL;
+ TRACE_STATE("fstrm rxbuf full", FCGI_EV_RX_RECORD|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ }
+
+ if (fconn->drl)
+ goto fail;
+
+ end_transfer:
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl)
+ goto fail;
+
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 1;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 0;
+}
+
+
+/* Processes an empty STDOUT. Returns > 0 on success, 0 if it couldn't do
+ * anything. It only skip the padding in fact, there is no payload for such
+ * records. It marks the end of the response.
+ */
+static int fcgi_strm_handle_empty_stdout(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+
+ fconn->state = FCGI_CS_RECORD_P;
+ TRACE_STATE("switching to RECORD_P", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 0;
+ }
+ fconn->state = FCGI_CS_RECORD_H;
+ fstrm->flags |= FCGI_SF_ES_RCVD;
+ TRACE_PROTO("FCGI STDOUT record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm, 0, (size_t[]){0});
+ TRACE_STATE("stdout data fully send, switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_EOI, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ return 1;
+}
+
+/* Processes a STDERR record. Returns > 0 on success, 0 if it couldn't do
+ * anything.
+ */
+static int fcgi_strm_handle_stderr(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer *dbuf;
+ struct buffer tag;
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ dbuf = &fconn->dbuf;
+
+ /* Only padding remains */
+ if (fconn->state == FCGI_CS_RECORD_P || !fconn->drl)
+ goto end_transfer;
+
+ if (b_data(dbuf) < (fconn->drl + fconn->drp) &&
+ b_size(dbuf) > (fconn->drl + fconn->drp) &&
+ buf_room_for_htx_data(dbuf))
+ goto fail; // incomplete record
+
+ chunk_reset(&trash);
+ ret = b_force_xfer(&trash, dbuf, MIN(b_room(&trash), fconn->drl));
+ if (!ret)
+ goto fail;
+ fconn->drl -= ret;
+ TRACE_PROTO("FCGI STDERR record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm, 0, (size_t[]){ret});
+
+ trash.area[ret] = '\n';
+ trash.area[ret+1] = '\0';
+ tag.area = fconn->app->name; tag.data = strlen(fconn->app->name);
+ app_log(&fconn->app->loggers, &tag, LOG_ERR, "%s", trash.area);
+
+ if (fconn->drl)
+ goto fail;
+
+ end_transfer:
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ if (fconn->drl)
+ goto fail;
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ return 1;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ return 0;
+}
+
+/* Processes an END_REQUEST record. Returns > 0 on success, 0 if it couldn't do
+ * anything. If the empty STDOUT record is not already received, this one marks
+ * the end of the response. It is highly unexpected, but if the record is larger
+ * than a buffer and cannot be decoded in one time, an error is triggered and
+ * the connection is closed. END_REQUEST record cannot be split.
+ */
+static int fcgi_strm_handle_end_request(struct fcgi_conn *fconn, struct fcgi_strm *fstrm)
+{
+ struct buffer inbuf;
+ struct buffer *dbuf;
+ struct fcgi_end_request endreq;
+
+ TRACE_ENTER(FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ dbuf = &fconn->dbuf;
+
+ /* Record too large to be fully decoded */
+ if (b_size(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_ERROR("END_REQUEST record too large", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ /* process full record only */
+ if (b_data(dbuf) < (fconn->drl + fconn->drp)) {
+ TRACE_DEVEL("leaving on missing data", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn);
+ return 0;
+ }
+
+ if (unlikely(b_contig_data(dbuf, b_head_ofs(dbuf)) < fconn->drl)) {
+ /* Realign the dmux buffer if the record wraps. It is unexpected
+ * at this stage because it should be the first record received
+ * from the FCGI application.
+ */
+ b_slow_realign_ofs(dbuf, trash.area, 0);
+ }
+
+ inbuf = b_make(b_head(dbuf), b_data(dbuf), 0, fconn->drl);
+
+ if (!fcgi_decode_end_request(&inbuf, 0, &endreq)) {
+ TRACE_ERROR("END_REQUEST record decoding failure", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+
+ fstrm->flags |= FCGI_SF_ES_RCVD;
+ TRACE_STATE("end of script reported", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_RX_EOI, fconn->conn, fstrm);
+ TRACE_PROTO("FCGI END_REQUEST record rcvd", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm, 0, (size_t[]){fconn->drl});
+ fstrm->proto_status = endreq.errcode;
+ fcgi_strm_close(fstrm);
+
+ b_del(&fconn->dbuf, fconn->drl + fconn->drp);
+ fconn->drl = 0;
+ fconn->drp = 0;
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn, fstrm);
+ TRACE_LEAVE(FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ return 1;
+
+ fail:
+ fcgi_strm_error(fstrm);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ return 0;
+}
+
+/* process Rx records to be demultiplexed */
+static void fcgi_process_demux(struct fcgi_conn *fconn)
+{
+ struct fcgi_strm *fstrm = NULL, *tmp_fstrm;
+ struct fcgi_header hdr;
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, fconn->conn);
+
+ if (fconn->state == FCGI_CS_CLOSED)
+ return;
+
+ if (unlikely(fconn->state < FCGI_CS_RECORD_H)) {
+ if (fconn->state == FCGI_CS_INIT) {
+ TRACE_STATE("waiting FCGI GET_VALUES to be sent", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL, fconn->conn);
+ return;
+ }
+ if (fconn->state == FCGI_CS_SETTINGS) {
+ /* ensure that what is pending is a valid GET_VALUES_RESULT record. */
+ TRACE_STATE("receiving FCGI record header", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ ret = fcgi_decode_record_hdr(&fconn->dbuf, 0, &hdr);
+ if (!ret) {
+ TRACE_ERROR("header record decoding failure", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ|FCGI_EV_FSTRM_ERR, fconn->conn, fstrm);
+ goto fail;
+ }
+ b_del(&fconn->dbuf, ret);
+
+ if (hdr.id || (hdr.type != FCGI_GET_VALUES_RESULT && hdr.type != FCGI_UNKNOWN_TYPE)) {
+ fconn->state = FCGI_CS_CLOSED;
+ TRACE_ERROR("unexpected record type or flags", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ TRACE_STATE("switching to CLOSED", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR|FCGI_EV_RX_GETVAL|FCGI_EV_FCONN_ERR, fconn->conn);
+ goto fail;
+ }
+ goto new_record;
+ }
+ }
+
+ /* process as many incoming records as possible below */
+ while (1) {
+ if (!b_data(&fconn->dbuf)) {
+ TRACE_DEVEL("no more Rx data", FCGI_EV_RX_RECORD, fconn->conn);
+ break;
+ }
+
+ if (fconn->state == FCGI_CS_CLOSED) {
+ TRACE_STATE("end of connection reported", FCGI_EV_RX_RECORD|FCGI_EV_RX_EOI, fconn->conn);
+ break;
+ }
+
+ if (fconn->state == FCGI_CS_RECORD_H) {
+ TRACE_PROTO("receiving FCGI record header", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ ret = fcgi_decode_record_hdr(&fconn->dbuf, 0, &hdr);
+ if (!ret)
+ break;
+ b_del(&fconn->dbuf, ret);
+
+ new_record:
+ fconn->dsi = hdr.id;
+ fconn->drt = hdr.type;
+ fconn->drl = hdr.len;
+ fconn->drp = hdr.padding;
+ fconn->state = FCGI_CS_RECORD_D;
+ TRACE_STATE("FCGI record header rcvd, switching to RECORD_D", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ }
+
+ /* Only FCGI_CS_RECORD_D or FCGI_CS_RECORD_P */
+ tmp_fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi);
+
+ if (tmp_fstrm != fstrm && fstrm && fcgi_strm_sc(fstrm) &&
+ (b_data(&fstrm->rxbuf) ||
+ fcgi_conn_read0_pending(fconn) ||
+ fstrm->state == FCGI_SS_CLOSED ||
+ (fstrm->flags & FCGI_SF_ES_RCVD) ||
+ se_fl_test(fstrm->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", FCGI_EV_RX_RECORD|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE);
+ fcgi_strm_notify_recv(fstrm);
+ }
+ fstrm = tmp_fstrm;
+
+ if (fstrm->state == FCGI_SS_CLOSED && fconn->dsi != 0) {
+ /* ignore all record for closed streams */
+ goto ignore_record;
+ }
+ if (fstrm->state == FCGI_SS_IDLE) {
+ /* ignore all record for unknown streams */
+ goto ignore_record;
+ }
+
+ switch (fconn->drt) {
+ case FCGI_GET_VALUES_RESULT:
+ TRACE_PROTO("receiving FCGI GET_VALUES_RESULT record", FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ ret = fcgi_conn_handle_values_result(fconn);
+ break;
+
+ case FCGI_STDOUT:
+ if (fstrm->flags & FCGI_SF_ES_RCVD)
+ goto ignore_record;
+
+ TRACE_PROTO("receiving FCGI STDOUT record", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDOUT, fconn->conn, fstrm);
+ if (fconn->drl)
+ ret = fcgi_strm_handle_stdout(fconn, fstrm);
+ else
+ ret = fcgi_strm_handle_empty_stdout(fconn, fstrm);
+ break;
+
+ case FCGI_STDERR:
+ TRACE_PROTO("receiving FCGI STDERR record", FCGI_EV_RX_RECORD|FCGI_EV_RX_STDERR, fconn->conn, fstrm);
+ ret = fcgi_strm_handle_stderr(fconn, fstrm);
+ break;
+
+ case FCGI_END_REQUEST:
+ TRACE_PROTO("receiving FCGI END_REQUEST record", FCGI_EV_RX_RECORD|FCGI_EV_RX_ENDREQ, fconn->conn, fstrm);
+ ret = fcgi_strm_handle_end_request(fconn, fstrm);
+ break;
+
+ /* implement all extra record types here */
+ default:
+ ignore_record:
+ /* drop records that we ignore. They may be
+ * larger than the buffer so we drain all of
+ * their contents until we reach the end.
+ */
+ fconn->state = FCGI_CS_RECORD_P;
+ fconn->drl += fconn->drp;
+ fconn->drp = 0;
+ ret = MIN(b_data(&fconn->dbuf), fconn->drl);
+ TRACE_PROTO("receiving FCGI ignored record", FCGI_EV_RX_RECORD, fconn->conn, fstrm, 0, (size_t[]){ret});
+ TRACE_STATE("switching to RECORD_P", FCGI_EV_RX_RECORD, fconn->conn, fstrm);
+ b_del(&fconn->dbuf, ret);
+ fconn->drl -= ret;
+ ret = (fconn->drl == 0);
+ }
+
+ /* error or missing data condition met above ? */
+ if (ret <= 0) {
+ TRACE_DEVEL("insufficient data to proceed", FCGI_EV_RX_RECORD, fconn->conn, fstrm);
+ break;
+ }
+
+ if (fconn->state != FCGI_CS_RECORD_H && !(fconn->drl+fconn->drp)) {
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ }
+ }
+
+ fail:
+ /* we can go here on missing data, blocked response or error */
+ if (fstrm && fcgi_strm_sc(fstrm) &&
+ (b_data(&fstrm->rxbuf) ||
+ fcgi_conn_read0_pending(fconn) ||
+ fstrm->state == FCGI_SS_CLOSED ||
+ (fstrm->flags & FCGI_SF_ES_RCVD) ||
+ se_fl_test(fstrm->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", FCGI_EV_RX_RECORD|FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE);
+ fcgi_strm_notify_recv(fstrm);
+ }
+
+ fcgi_conn_restart_reading(fconn, 0);
+}
+
+/* process Tx records from streams to be multiplexed. Returns > 0 if it reached
+ * the end.
+ */
+static int fcgi_process_mux(struct fcgi_conn *fconn)
+{
+ struct fcgi_strm *fstrm, *fstrm_back;
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, fconn->conn);
+
+ if (unlikely(fconn->state < FCGI_CS_RECORD_H)) {
+ if (unlikely(fconn->state == FCGI_CS_INIT)) {
+ if (!(fconn->flags & FCGI_CF_GET_VALUES)) {
+ fconn->state = FCGI_CS_RECORD_H;
+ TRACE_STATE("switching to RECORD_H", FCGI_EV_TX_RECORD|FCGI_EV_RX_RECORD|FCGI_EV_RX_FHDR, fconn->conn);
+ fcgi_wake_unassigned_streams(fconn);
+ goto mux;
+ }
+ TRACE_PROTO("sending FCGI GET_VALUES record", FCGI_EV_TX_RECORD|FCGI_EV_TX_GETVAL, fconn->conn);
+ if (unlikely(!fcgi_conn_send_get_values(fconn)))
+ goto fail;
+ fconn->state = FCGI_CS_SETTINGS;
+ TRACE_STATE("switching to SETTINGS", FCGI_EV_TX_RECORD|FCGI_EV_RX_RECORD|FCGI_EV_RX_GETVAL, fconn->conn);
+ }
+ /* need to wait for the other side */
+ if (fconn->state < FCGI_CS_RECORD_H)
+ goto done;
+ }
+
+ mux:
+ list_for_each_entry_safe(fstrm, fstrm_back, &fconn->send_list, send_list) {
+ if (fconn->state == FCGI_CS_CLOSED || fconn->flags & FCGI_CF_MUX_BLOCK_ANY)
+ break;
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)) &&
+ (!fstrm->subs || !(fstrm->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&fstrm->send_list);
+ continue;
+ }
+
+ if (fstrm->subs && fstrm->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_BLK_ANY;
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ } else {
+ /* it's the shut request that was queued */
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+ }
+
+ fail:
+ if (fconn->state == FCGI_CS_CLOSED) {
+ if (fconn->stream_cnt - fconn->nb_reserved > 0) {
+ fcgi_conn_send_aborts(fconn);
+ if (fconn->flags & FCGI_CF_MUX_BLOCK_ANY) {
+ TRACE_DEVEL("leaving in blocked situation", FCGI_EV_FCONN_WAKE|FCGI_EV_FCONN_BLK, fconn->conn);
+ return 0;
+ }
+ }
+ }
+
+ done:
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE, fconn->conn);
+ return 1;
+}
+
+
+/* Attempt to read data, and subscribe if none available.
+ * The function returns 1 if data has been received, otherwise zero.
+ */
+static int fcgi_recv(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+ struct buffer *buf;
+ int max;
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_FCONN_RECV, conn);
+
+ if (fconn->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", FCGI_EV_FCONN_RECV, conn);
+ return (b_data(&fconn->dbuf));
+ }
+
+ if (!fcgi_recv_allowed(fconn)) {
+ TRACE_DEVEL("leaving on !recv_allowed", FCGI_EV_FCONN_RECV, conn);
+ return 1;
+ }
+
+ buf = fcgi_get_buf(fconn, &fconn->dbuf);
+ if (!buf) {
+ TRACE_DEVEL("waiting for fconn dbuf allocation", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags |= FCGI_CF_DEM_DALLOC;
+ return 0;
+ }
+
+ if (!b_data(buf)) {
+ /* try to pre-align the buffer like the
+ * rxbufs will be to optimize memory copies. We'll make
+ * sure that the record header lands at the end of the
+ * HTX block to alias it upon recv. We cannot use the
+ * head because rcv_buf() will realign the buffer if
+ * it's empty. Thus we cheat and pretend we already
+ * have a few bytes there.
+ */
+ max = buf_room_for_htx_data(buf) + (fconn->state == FCGI_CS_RECORD_H ? FCGI_RECORD_HEADER_SZ : 0);
+ buf->head = sizeof(struct htx) - (fconn->state == FCGI_CS_RECORD_H ? FCGI_RECORD_HEADER_SZ : 0);
+ }
+ else
+ max = buf_room_for_htx_data(buf);
+
+ ret = max ? conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, max, 0) : 0;
+
+ if (max && !ret && fcgi_recv_allowed(fconn)) {
+ TRACE_DATA("failed to receive data, subscribing", FCGI_EV_FCONN_RECV, conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &fconn->wait_event);
+ }
+ else
+ TRACE_DATA("recv data", FCGI_EV_FCONN_RECV, conn, 0, 0, (size_t[]){ret});
+
+ if (conn_xprt_read0_pending(conn)) {
+ TRACE_DATA("received read0", FCGI_EV_FCONN_RECV, conn);
+ fconn->flags |= FCGI_CF_EOS;
+ }
+ if (conn->flags & CO_FL_ERROR) {
+ TRACE_DATA("connection error", FCGI_EV_FCONN_RECV, conn);
+ fconn->flags |= FCGI_CF_ERROR;
+ }
+
+ if (!b_data(buf)) {
+ fcgi_release_buf(fconn, &fconn->dbuf);
+ goto end;
+ }
+
+ if (ret == max) {
+ TRACE_DEVEL("fconn dbuf full", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags |= FCGI_CF_DEM_DFULL;
+ }
+
+end:
+ TRACE_LEAVE(FCGI_EV_FCONN_RECV, conn);
+ return !!ret || (fconn->flags & (FCGI_CF_EOS|FCGI_CF_ERROR));
+}
+
+
+/* Try to send data if possible.
+ * The function returns 1 if data have been sent, otherwise zero.
+ */
+static int fcgi_send(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+ int done;
+ int sent = 0;
+
+ TRACE_ENTER(FCGI_EV_FCONN_SEND, conn);
+
+ if (fconn->flags & (FCGI_CF_ERROR|FCGI_CF_ERR_PENDING)) {
+ TRACE_DEVEL("leaving on connection error", FCGI_EV_FCONN_SEND, conn);
+ if (fconn->flags & FCGI_CF_EOS)
+ fconn->flags |= FCGI_CF_ERROR;
+ b_reset(br_tail(fconn->mbuf));
+ return 1;
+ }
+
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ /* a handshake was requested */
+ goto schedule;
+ }
+
+ /* This loop is quite simple : it tries to fill as much as it can from
+ * pending streams into the existing buffer until it's reportedly full
+ * or the end of send requests is reached. Then it tries to send this
+ * buffer's contents out, marks it not full if at least one byte could
+ * be sent, and tries again.
+ *
+ * The snd_buf() function normally takes a "flags" argument which may
+ * be made of a combination of CO_SFL_MSG_MORE to indicate that more
+ * data immediately comes and CO_SFL_STREAMER to indicate that the
+ * connection is streaming lots of data (used to increase TLS record
+ * size at the expense of latency). The former can be sent any time
+ * there's a buffer full flag, as it indicates at least one stream
+ * attempted to send and failed so there are pending data. An
+ * alternative would be to set it as long as there's an active stream
+ * but that would be problematic for ACKs until we have an absolute
+ * guarantee that all waiters have at least one byte to send. The
+ * latter should possibly not be set for now.
+ */
+
+ done = 0;
+ while (!done) {
+ unsigned int flags = 0;
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ /* fill as much as we can into the current buffer */
+ while (((fconn->flags & (FCGI_CF_MUX_MFULL|FCGI_CF_MUX_MALLOC)) == 0) && !done)
+ done = fcgi_process_mux(fconn);
+
+ if (fconn->flags & FCGI_CF_MUX_MALLOC)
+ done = 1; // we won't go further without extra buffers
+
+ if (conn->flags & CO_FL_ERROR)
+ break;
+
+ if (fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM))
+ flags |= CO_SFL_MSG_MORE;
+
+ for (buf = br_head(fconn->mbuf); b_size(buf); buf = br_del_head(fconn->mbuf)) {
+ if (b_data(buf)) {
+ int ret;
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, b_data(buf), flags);
+ if (!ret) {
+ done = 1;
+ break;
+ }
+ sent = 1;
+ TRACE_DATA("send data", FCGI_EV_FCONN_SEND, conn, 0, 0, (size_t[]){ret});
+ b_del(buf, ret);
+ if (b_data(buf)) {
+ done = 1;
+ break;
+ }
+ }
+ b_free(buf);
+ released++;
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+
+ /* wrote at least one byte, the buffer is not full anymore */
+ if (fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM))
+ TRACE_STATE("fconn mbuf ring not fill anymore", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK, conn);
+ fconn->flags &= ~(FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM);
+ }
+
+ if (conn->flags & CO_FL_ERROR) {
+ fconn->flags |= FCGI_CF_ERR_PENDING;
+ if (fconn->flags & FCGI_CF_EOS)
+ fconn->flags |= FCGI_CF_ERROR;
+ b_reset(br_tail(fconn->mbuf));
+ }
+
+ /* We're not full anymore, so we can wake any task that are waiting
+ * for us.
+ */
+ if (!(fconn->flags & (FCGI_CF_MUX_MFULL | FCGI_CF_DEM_MROOM)) && fconn->state >= FCGI_CS_RECORD_H) {
+ struct fcgi_strm *fstrm;
+
+ list_for_each_entry(fstrm, &fconn->send_list, send_list) {
+ if (fconn->state == FCGI_CS_CLOSED || fconn->flags & FCGI_CF_MUX_BLOCK_ANY)
+ break;
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)) &&
+ (!fstrm->subs || !(fstrm->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&fstrm->send_list);
+ continue;
+ }
+
+ if (fstrm->subs && fstrm->subs->events & SUB_RETRY_SEND) {
+ TRACE_DEVEL("waking up pending stream", FCGI_EV_FCONN_SEND|FCGI_EV_STRM_WAKE, conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_BLK_ANY;
+ fstrm->flags |= FCGI_SF_NOTIFIED;
+ tasklet_wakeup(fstrm->subs->tasklet);
+ fstrm->subs->events &= ~SUB_RETRY_SEND;
+ if (!fstrm->subs->events)
+ fstrm->subs = NULL;
+ } else {
+ /* it's the shut request that was queued */
+ TRACE_POINT(FCGI_EV_STRM_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fstrm->shut_tl);
+ }
+ }
+ }
+ /* We're done, no more to send */
+ if (!br_data(fconn->mbuf)) {
+ TRACE_DEVEL("leaving with everything sent", FCGI_EV_FCONN_SEND, conn);
+ goto end;
+ }
+schedule:
+ if (!(conn->flags & CO_FL_ERROR) && !(fconn->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", FCGI_EV_FCONN_SEND, conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &fconn->wait_event);
+ }
+
+ TRACE_DEVEL("leaving with some data left to send", FCGI_EV_FCONN_SEND, conn);
+end:
+ return sent || (fconn->flags & (FCGI_CF_ERR_PENDING|FCGI_CF_ERROR));
+}
+
+/* this is the tasklet referenced in fconn->wait_event.tasklet */
+struct task *fcgi_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct fcgi_conn *fconn = ctx;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = fconn->conn;
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+
+ conn_in_list = conn->flags & CO_FL_LIST_MASK;
+ if (conn_in_list)
+ conn_delete_from_tree(conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = fconn->conn;
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ ret = fcgi_send(fconn);
+ if (!(fconn->wait_event.events & SUB_RETRY_RECV))
+ ret |= fcgi_recv(fconn);
+ if (ret || b_data(&fconn->dbuf))
+ ret = fcgi_process(fconn);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless fcgi_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if fcgi_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection (which normally doesn not happen in FCGI).
+ */
+static int fcgi_process(struct fcgi_conn *fconn)
+{
+ struct connection *conn = fconn->conn;
+
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+
+ if (b_data(&fconn->dbuf) && !(fconn->flags & FCGI_CF_DEM_BLOCK_ANY)) {
+ fcgi_process_demux(fconn);
+
+ if (fconn->state == FCGI_CS_CLOSED || (fconn->flags & FCGI_CF_ERROR))
+ b_reset(&fconn->dbuf);
+
+ if (buf_room_for_htx_data(&fconn->dbuf))
+ fconn->flags &= ~FCGI_CF_DEM_DFULL;
+ }
+ fcgi_send(fconn);
+
+ if (unlikely(fconn->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ /* frontend is stopping, reload likely in progress, let's try
+ * to announce a graceful shutdown if not yet done. We don't
+ * care if it fails, it will be tried again later.
+ */
+ TRACE_STATE("proxy stopped, sending ABORT to all streams", FCGI_EV_FCONN_WAKE|FCGI_EV_TX_RECORD, conn);
+ if (!(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ if (fconn->stream_cnt - fconn->nb_reserved > 0)
+ fcgi_conn_send_aborts(fconn);
+ }
+ }
+
+ /*
+ * If we received early data, and the handshake is done, wake
+ * any stream that was waiting for it.
+ */
+ if (!(fconn->flags & FCGI_CF_WAIT_FOR_HS) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT | CO_FL_EARLY_DATA)) == CO_FL_EARLY_DATA) {
+ struct eb32_node *node;
+ struct fcgi_strm *fstrm;
+
+ fconn->flags |= FCGI_CF_WAIT_FOR_HS;
+ node = eb32_lookup_ge(&fconn->streams_by_id, 1);
+
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fcgi_strm_sc(fstrm) && se_fl_test(fstrm->sd, SE_FL_WAIT_FOR_HS))
+ fcgi_strm_notify_recv(fstrm);
+ node = eb32_next(node);
+ }
+ }
+
+ if ((fconn->flags & FCGI_CF_ERROR) || fcgi_conn_read0_pending(fconn) ||
+ fconn->state == FCGI_CS_CLOSED || (fconn->flags & FCGI_CF_ABRTS_FAILED) ||
+ eb_is_empty(&fconn->streams_by_id)) {
+ fcgi_wake_some_streams(fconn, 0);
+
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* no more stream, kill the connection now */
+ fcgi_release(fconn);
+ TRACE_DEVEL("leaving after releasing the connection", FCGI_EV_FCONN_WAKE);
+ return -1;
+ }
+ }
+
+ if (!b_data(&fconn->dbuf))
+ fcgi_release_buf(fconn, &fconn->dbuf);
+
+ if (fconn->state == FCGI_CS_CLOSED || (fconn->flags & FCGI_CF_ABRTS_FAILED) ||
+ (!br_data(fconn->mbuf) && ((fconn->flags & FCGI_CF_MUX_BLOCK_ANY) || LIST_ISEMPTY(&fconn->send_list))))
+ fcgi_release_mbuf(fconn);
+
+ if (fconn->task) {
+ fconn->task->expire = tick_add(now_ms, (fconn->state == FCGI_CS_CLOSED ? fconn->shut_timeout : fconn->timeout));
+ task_queue(fconn->task);
+ }
+
+ fcgi_send(fconn);
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE, conn);
+ return 0;
+}
+
+
+/* wake-up function called by the connection layer (mux_ops.wake) */
+static int fcgi_wake(struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+
+ TRACE_POINT(FCGI_EV_FCONN_WAKE, conn);
+ return (fcgi_process(fconn));
+}
+
+
+static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ switch (mux_ctl) {
+ case MUX_CTL_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_CTL_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+ default:
+ return -1;
+ }
+}
+
+static int fcgi_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output)
+{
+ int ret = 0;
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+
+ switch (mux_sctl) {
+ case MUX_SCTL_SID:
+ if (output)
+ *((int64_t *)output) = fstrm->id;
+ return ret;
+
+ default:
+ return -1;
+ }
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed. If the
+ * MUX buffer still has lying data or is not allocatable, the connection is
+ * immediately killed. If it's allocatable and empty, we attempt to send a
+ * ABORT records.
+ */
+struct task *fcgi_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct fcgi_conn *fconn = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(FCGI_EV_FCONN_WAKE, (fconn ? fconn->conn : NULL));
+
+ if (fconn) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ fconn = NULL;
+ goto do_leave;
+ }
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", FCGI_EV_FCONN_WAKE, fconn->conn);
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (fconn->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(fconn->conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+do_leave:
+ task_destroy(t);
+
+ if (!fconn) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more fconn)", FCGI_EV_FCONN_WAKE);
+ return NULL;
+ }
+
+ fconn->task = NULL;
+ fconn->state = FCGI_CS_CLOSED;
+ fcgi_wake_some_streams(fconn, 0);
+
+ if (br_data(fconn->mbuf)) {
+ /* don't even try to send aborts, the buffer is stuck */
+ fconn->flags |= FCGI_CF_ABRTS_FAILED;
+ goto end;
+ }
+
+ /* try to send but no need to insist */
+ if (!fcgi_conn_send_aborts(fconn))
+ fconn->flags |= FCGI_CF_ABRTS_FAILED;
+
+ if (br_data(fconn->mbuf) && !(fconn->flags & FCGI_CF_ABRTS_FAILED) &&
+ conn_xprt_ready(fconn->conn)) {
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ for (buf = br_head(fconn->mbuf); b_size(buf); buf = br_del_head(fconn->mbuf)) {
+ if (b_data(buf)) {
+ int ret = fconn->conn->xprt->snd_buf(fconn->conn, fconn->conn->xprt_ctx,
+ buf, b_data(buf), 0);
+ if (!ret)
+ break;
+ b_del(buf, ret);
+ if (b_data(buf))
+ break;
+ b_free(buf);
+ released++;
+ }
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+ }
+
+ end:
+ /* either we can release everything now or it will be done later once
+ * the last stream closes.
+ */
+ if (eb_is_empty(&fconn->streams_by_id))
+ fcgi_release(fconn);
+
+ TRACE_LEAVE(FCGI_EV_FCONN_WAKE);
+ return NULL;
+}
+
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/* Append the description of what is present in error snapshot <es> into <out>.
+ * The description must be small enough to always fit in a buffer. The output
+ * buffer may be the trash so the trash must not be used inside this function.
+ */
+static void fcgi_show_error_snapshot(struct buffer *out, const struct error_snapshot *es)
+{
+ chunk_appendf(out,
+ " FCGI connection flags 0x%08x, FCGI stream flags 0x%08x\n"
+ " H1 msg state %s(%d), H1 msg flags 0x%08x\n"
+ " H1 chunk len %lld bytes, H1 body len %lld bytes :\n",
+ es->ctx.h1.c_flags, es->ctx.h1.s_flags,
+ h1m_state_str(es->ctx.h1.state), es->ctx.h1.state,
+ es->ctx.h1.m_flags, es->ctx.h1.m_clen, es->ctx.h1.m_blen);
+}
+/*
+ * Capture a bad response and archive it in the proxy's structure. By default
+ * it tries to report the error position as h1m->err_pos. However if this one is
+ * not set, it will then report h1m->next, which is the last known parsing
+ * point. The function is able to deal with wrapping buffers. It always displays
+ * buffers as a contiguous area starting at buf->p. The direction is determined
+ * thanks to the h1m's flags.
+ */
+static void fcgi_strm_capture_bad_message(struct fcgi_conn *fconn, struct fcgi_strm *fstrm,
+ struct h1m *h1m, struct buffer *buf)
+{
+ struct session *sess = fstrm->sess;
+ struct proxy *proxy = fconn->proxy;
+ struct proxy *other_end;
+ union error_snapshot_ctx ctx;
+
+ if (fcgi_strm_sc(fstrm) && sc_strm(fcgi_strm_sc(fstrm))) {
+ if (sess == NULL)
+ sess = __sc_strm(fcgi_strm_sc(fstrm))->sess;
+ if (!(h1m->flags & H1_MF_RESP))
+ other_end = __sc_strm(fcgi_strm_sc(fstrm))->be;
+ else
+ other_end = sess->fe;
+ } else
+ other_end = NULL;
+ /* http-specific part now */
+ ctx.h1.state = h1m->state;
+ ctx.h1.c_flags = fconn->flags;
+ ctx.h1.s_flags = fstrm->flags;
+ ctx.h1.m_flags = h1m->flags;
+ ctx.h1.m_clen = h1m->curr_len;
+ ctx.h1.m_blen = h1m->body_len;
+
+ proxy_capture_error(proxy, 1, other_end, fconn->conn->target, sess, buf, 0, 0,
+ (h1m->err_pos >= 0) ? h1m->err_pos : h1m->next,
+ &ctx, fcgi_show_error_snapshot);
+}
+
+static size_t fcgi_strm_parse_headers(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_hdrs(h1m, NULL, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ ret = 0;
+ goto end;
+ }
+
+ /* Reject any message with an unknown transfer-encoding. In fact if any
+ * encoding other than "chunked". A 422-Unprocessable-Content is
+ * returned for an invalid request, a 502-Bad-Gateway for an invalid
+ * response.
+ */
+ if (h1m->flags & H1_MF_TE_OTHER) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ TRACE_ERROR("Unknown transfer-encoding", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+
+}
+
+static size_t fcgi_strm_parse_data(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx **htx,
+ struct buffer *buf, size_t *ofs, size_t max, struct buffer *htxbuf)
+{
+ size_t ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_data(h1m, htx, buf, *ofs, max, htxbuf);
+ if (!ret) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm);
+ if ((*htx)->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ goto end;
+ }
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t fcgi_strm_parse_trailers(struct fcgi_strm *fstrm, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){max});
+ ret = h1_parse_msg_tlrs(h1m, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm);
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("parsing error, reject H1 response", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS|FCGI_EV_FSTRM_ERR, fstrm->fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ fcgi_strm_capture_bad_message(fstrm->fconn, fstrm, h1m, buf);
+ }
+ ret = 0;
+ goto end;
+ }
+ *ofs += ret;
+ end:
+ TRACE_LEAVE(FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fstrm->fconn->conn, fstrm, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t fcgi_strm_parse_response(struct fcgi_strm *fstrm, struct buffer *buf, size_t count)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+ struct htx *htx;
+ struct h1m *h1m = &fstrm->h1m;
+ size_t ret, data, total = 0;
+
+ htx = htx_from_buf(buf);
+ TRACE_ENTER(FCGI_EV_RSP_DATA, fconn->conn, fstrm, htx, (size_t[]){count});
+
+ data = htx->data;
+ if (fstrm->state == FCGI_SS_ERROR)
+ goto end;
+
+ do {
+ size_t used = htx_used_space(htx);
+
+ if (h1m->state <= H1_MSG_LAST_LF) {
+ TRACE_PROTO("parsing response headers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fconn->conn, fstrm);
+ ret = fcgi_strm_parse_headers(fstrm, h1m, htx, &fstrm->rxbuf, &total, count);
+ if (!ret)
+ break;
+
+ TRACE_USER("rcvd H1 response headers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_HDRS, fconn->conn, fstrm, htx);
+
+ if ((h1m->flags & (H1_MF_VER_11|H1_MF_XFER_LEN)) == H1_MF_VER_11) {
+ struct htx_blk *blk = htx_get_head_blk(htx);
+ struct htx_sl *sl;
+
+ if (!blk)
+ break;
+ sl = htx_get_blk_ptr(htx, blk);
+ sl->flags |= HTX_SL_F_XFER_LEN;
+ htx->extra = 0;
+ }
+ }
+ else if (h1m->state < H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing response payload", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fconn->conn, fstrm);
+ fcgi_strm_parse_data(fstrm, h1m, &htx, &fstrm->rxbuf, &total, count, buf);
+
+ if (!(h1m->flags & H1_MF_XFER_LEN) && fstrm->state != FCGI_SS_ERROR &&
+ (fstrm->flags & FCGI_SF_ES_RCVD) && b_data(&fstrm->rxbuf) == total) {
+ TRACE_DEVEL("end of data", FCGI_EV_RSP_DATA, fconn->conn, fstrm);
+ if (htx_is_empty(htx) && !htx_add_endof(htx, HTX_BLK_EOT))
+ break;
+ htx->flags |= HTX_FL_EOM;
+ h1m->state = H1_MSG_DONE;
+ TRACE_USER("H1 response fully rcvd", FCGI_EV_RSP_DATA|FCGI_EV_RSP_EOM, fconn->conn, fstrm, htx);
+ }
+
+ if (h1m->state < H1_MSG_TRAILERS)
+ break;
+
+ TRACE_PROTO("rcvd response payload data", FCGI_EV_RSP_DATA|FCGI_EV_RSP_BODY, fconn->conn, fstrm, htx);
+ }
+ else if (h1m->state == H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing response trailers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fconn->conn, fstrm);
+ fcgi_strm_parse_trailers(fstrm, h1m, htx, &fstrm->rxbuf, &total, count);
+ if (h1m->state != H1_MSG_DONE)
+ break;
+
+ TRACE_PROTO("rcvd H1 response trailers", FCGI_EV_RSP_DATA|FCGI_EV_RSP_TLRS, fconn->conn, fstrm, htx);
+ }
+ else if (h1m->state == H1_MSG_DONE) {
+ TRACE_USER("H1 response fully rcvd", FCGI_EV_RSP_DATA|FCGI_EV_RSP_EOM, fconn->conn, fstrm, htx);
+ if (b_data(&fstrm->rxbuf) > total) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ TRACE_PROTO("too much data, parsing error", FCGI_EV_RSP_DATA, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ }
+ break;
+ }
+ else {
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ TRACE_ERROR("unexpected processing error", FCGI_EV_RSP_DATA|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ fcgi_strm_error(fstrm);
+ break;
+ }
+
+ count -= htx_used_space(htx) - used;
+ } while (fstrm->state != FCGI_SS_ERROR);
+
+ if (fstrm->state == FCGI_SS_ERROR) {
+ b_reset(&fstrm->rxbuf);
+ htx_to_buf(htx, buf);
+ TRACE_DEVEL("leaving on error", FCGI_EV_RSP_DATA|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ return 0;
+ }
+
+ b_del(&fstrm->rxbuf, total);
+
+ end:
+ htx_to_buf(htx, buf);
+ ret = htx->data - data;
+ TRACE_LEAVE(FCGI_EV_RSP_DATA, fconn->conn, fstrm, htx, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int fcgi_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct fcgi_strm *fstrm;
+ struct fcgi_conn *fconn = conn->ctx;
+
+ TRACE_ENTER(FCGI_EV_FSTRM_NEW, conn);
+ fstrm = fcgi_stconn_new(fconn, sd->sc, sess);
+ if (!fstrm)
+ goto err;
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&fconn->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(conn, conn->xprt, conn->xprt_ctx);
+
+ TRACE_LEAVE(FCGI_EV_FSTRM_NEW, conn, fstrm);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", FCGI_EV_FSTRM_NEW|FCGI_EV_FSTRM_ERR, conn);
+ return -1;
+}
+
+/* Retrieves the first valid stream connector from this connection, or returns NULL.
+ * We have to scan because we may have some orphan streams. It might be
+ * beneficial to scan backwards from the end to reduce the likeliness to find
+ * orphans.
+ */
+static struct stconn *fcgi_get_first_sc(const struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+ struct fcgi_strm *fstrm;
+ struct eb32_node *node;
+
+ node = eb32_first(&fconn->streams_by_id);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ if (fcgi_strm_sc(fstrm))
+ return fcgi_strm_sc(fstrm);
+ node = eb32_next(node);
+ }
+ return NULL;
+}
+
+/*
+ * Destroy the mux and the associated connection, if it is no longer used
+ */
+static void fcgi_destroy(void *ctx)
+{
+ struct fcgi_conn *fconn = ctx;
+
+ TRACE_POINT(FCGI_EV_FCONN_END, fconn->conn);
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ BUG_ON(fconn->conn->ctx != fconn);
+ fcgi_release(fconn);
+ }
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void fcgi_detach(struct sedesc *sd)
+{
+ struct fcgi_strm *fstrm = sd->se;
+ struct fcgi_conn *fconn;
+ struct session *sess;
+
+ TRACE_ENTER(FCGI_EV_STRM_END, (fstrm ? fstrm->fconn->conn : NULL), fstrm);
+
+ if (!fstrm) {
+ TRACE_LEAVE(FCGI_EV_STRM_END);
+ return;
+ }
+
+ /* there's no txbuf so we're certain no to be able to send anything */
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+
+ sess = fstrm->sess;
+ fconn = fstrm->fconn;
+ fconn->nb_sc--;
+
+ if (fstrm->proto_status == FCGI_PS_CANT_MPX_CONN) {
+ fconn->flags &= ~FCGI_CF_MPXS_CONNS;
+ fconn->streams_limit = 1;
+ }
+ else if (fstrm->proto_status == FCGI_PS_OVERLOADED ||
+ fstrm->proto_status == FCGI_PS_UNKNOWN_ROLE) {
+ fconn->flags &= ~FCGI_CF_KEEP_CONN;
+ fconn->state = FCGI_CS_CLOSED;
+ }
+
+ /* this stream may be blocked waiting for some data to leave, so orphan
+ * it in this case.
+ */
+ if (!(fconn->flags & (FCGI_CF_ERR_PENDING|FCGI_CF_ERROR)) && // FIXME: Be sure for ERR_PENDING
+ (fconn->state != FCGI_CS_CLOSED) &&
+ (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) &&
+ (fstrm->subs || (fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))) {
+ TRACE_DEVEL("leaving on stream blocked", FCGI_EV_STRM_END|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return;
+ }
+
+ if ((fconn->flags & FCGI_CF_DEM_BLOCK_ANY && fstrm->id == fconn->dsi)) {
+ /* unblock the connection if it was blocked on this stream. */
+ fconn->flags &= ~FCGI_CF_DEM_BLOCK_ANY;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+
+ fcgi_strm_destroy(fstrm);
+
+ if (!(fconn->flags & (FCGI_CF_EOS|FCGI_CF_ERR_PENDING|FCGI_CF_ERROR)) &&
+ (fconn->flags & FCGI_CF_KEEP_CONN)) {
+ if (fconn->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session serverlist, if not already done */
+ if (!session_add_conn(sess, fconn->conn, fconn->conn->target)) {
+ fconn->conn->owner = NULL;
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* let's kill the connection right away */
+ fconn->conn->mux->destroy(fconn);
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ }
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ if (session_check_idle_conn(fconn->conn->owner, fconn->conn) != 0) {
+ /* The connection is destroyed, let's leave */
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ }
+ }
+ else {
+ if (eb_is_empty(&fconn->streams_by_id)) {
+ /* If the connection is owned by the session, first remove it
+ * from its list
+ */
+ if (fconn->conn->owner) {
+ session_unown_conn(fconn->conn->owner, fconn->conn);
+ fconn->conn->owner = NULL;
+ }
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&fconn->wait_event.tasklet->state, TASK_F_USR1);
+ xprt_set_idle(fconn->conn, fconn->conn->xprt, fconn->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(fconn->conn->target), fconn->conn, 1)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ fconn->conn->mux->destroy(fconn);
+ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR);
+ return;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ TRACE_DEVEL("reusable idle connection", FCGI_EV_STRM_END, fconn->conn);
+ return;
+ }
+ else if (!fconn->conn->hash_node->node.node.leaf_p &&
+ fcgi_avail_streams(fconn->conn) > 0 && objt_server(fconn->conn->target) &&
+ !LIST_INLIST(&fconn->conn->session_list)) {
+ srv_add_to_avail_list(__objt_server(fconn->conn->target), fconn->conn);
+ }
+ }
+ }
+
+ /* We don't want to close right now unless we're removing the last
+ * stream and the connection is in error.
+ */
+ if (fcgi_conn_is_dead(fconn)) {
+ /* no more stream will come, kill it now */
+ TRACE_DEVEL("leaving, killing dead connection", FCGI_EV_STRM_END, fconn->conn);
+ fcgi_release(fconn);
+ }
+ else if (fconn->task) {
+ fconn->task->expire = tick_add(now_ms, (fconn->state == FCGI_CS_CLOSED ? fconn->shut_timeout : fconn->timeout));
+ task_queue(fconn->task);
+ TRACE_DEVEL("leaving, refreshing connection's timeout", FCGI_EV_STRM_END, fconn->conn);
+ }
+ else
+ TRACE_DEVEL("leaving", FCGI_EV_STRM_END, fconn->conn);
+}
+
+
+/* Performs a synchronous or asynchronous shutr(). */
+static void fcgi_do_shutr(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->state == FCGI_SS_CLOSED)
+ goto done;
+
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse.
+ */
+ if (se_fl_test(fstrm->sd, SE_FL_KILL_CONN) &&
+ !(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ fconn->state = FCGI_CS_CLOSED;
+ }
+ else if (fstrm->flags & FCGI_SF_BEGIN_SENT) {
+ TRACE_STATE("no headers sent yet, trying a retryable abort", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ if (!(fstrm->flags & (FCGI_SF_ES_SENT|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ goto add_to_list;
+ }
+
+ fcgi_strm_close(fstrm);
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ done:
+ fstrm->flags &= ~FCGI_SF_WANT_SHUTR;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * send list if not yet done. fcgi_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ if (!LIST_INLIST(&fstrm->send_list)) {
+ if (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) {
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ }
+ fstrm->flags |= FCGI_SF_WANT_SHUTR;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+}
+
+/* Performs a synchronous or asynchronous shutw(). */
+static void fcgi_do_shutw(struct fcgi_strm *fstrm)
+{
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->state != FCGI_SS_HLOC || fstrm->state == FCGI_SS_CLOSED)
+ goto done;
+
+ if (fstrm->state != FCGI_SS_ERROR && (fstrm->flags & FCGI_SF_BEGIN_SENT)) {
+ if (!(fstrm->flags & (FCGI_SF_ES_SENT|FCGI_SF_ABRT_SENT)) &&
+ !fcgi_strm_send_abort(fconn, fstrm))
+ goto add_to_list;
+
+ if (fstrm->state == FCGI_SS_HREM)
+ fcgi_strm_close(fstrm);
+ else
+ fstrm->state = FCGI_SS_HLOC;
+ } else {
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse.
+ */
+ if (se_fl_test(fstrm->sd, SE_FL_KILL_CONN) &&
+ !(fconn->flags & (FCGI_CF_ABRTS_SENT|FCGI_CF_ABRTS_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ fconn->state = FCGI_CS_CLOSED;
+ }
+
+ fcgi_strm_close(fstrm);
+ }
+
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ done:
+ fstrm->flags &= ~FCGI_SF_WANT_SHUTW;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * send list if not yet done. fcgi_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ if (!LIST_INLIST(&fstrm->send_list)) {
+ if (fstrm->flags & (FCGI_SF_BLK_MBUSY|FCGI_SF_BLK_MROOM)) {
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ }
+ fstrm->flags |= FCGI_SF_WANT_SHUTW;
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+ return;
+}
+
+/* This is the tasklet referenced in fstrm->shut_tl, it is used for
+ * deferred shutdowns when the fcgi_detach() was done but the mux buffer was full
+ * and prevented the last record from being emitted.
+ */
+struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state)
+{
+ struct fcgi_strm *fstrm = ctx;
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ TRACE_ENTER(FCGI_EV_STRM_SHUT, fconn->conn, fstrm);
+
+ if (fstrm->flags & FCGI_SF_NOTIFIED) {
+ /* some data processing remains to be done first */
+ goto end;
+ }
+
+ if (fstrm->flags & FCGI_SF_WANT_SHUTW)
+ fcgi_do_shutw(fstrm);
+
+ if (fstrm->flags & FCGI_SF_WANT_SHUTR)
+ fcgi_do_shutr(fstrm);
+
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW))) {
+ /* We're done trying to send, remove ourself from the send_list */
+ LIST_DEL_INIT(&fstrm->send_list);
+
+ if (!fcgi_strm_sc(fstrm)) {
+ fcgi_strm_destroy(fstrm);
+ if (fcgi_conn_is_dead(fconn))
+ fcgi_release(fconn);
+ }
+ }
+ end:
+ TRACE_LEAVE(FCGI_EV_STRM_SHUT);
+ return NULL;
+}
+
+/* shutr() called by the stream connector (mux_ops.shutr) */
+static void fcgi_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+
+ TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm);
+ if (!mode)
+ return;
+ fcgi_do_shutr(fstrm);
+}
+
+/* shutw() called by the stream connector (mux_ops.shutw) */
+static void fcgi_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+
+ TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm);
+ fcgi_do_shutw(fstrm);
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int fcgi_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(fstrm->subs && fstrm->subs != es);
+
+ es->events |= event_type;
+ fstrm->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("unsubscribe(send)", FCGI_EV_STRM_SEND, fconn->conn, fstrm);
+ if (!LIST_INLIST(&fstrm->send_list))
+ LIST_APPEND(&fconn->send_list, &fstrm->send_list);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>
+ * (undo fcgi_subscribe). The <es> pointer is not allowed to differ from the one
+ * passed to the subscribe() call. It always returns zero.
+ */
+static int fcgi_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(fstrm->subs && fstrm->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ fstrm->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", FCGI_EV_STRM_SEND, fconn->conn, fstrm);
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))
+ LIST_DEL_INIT(&fstrm->send_list);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to receive data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t fcgi_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+ size_t ret = 0;
+
+ TRACE_ENTER(FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+
+ if (!(fconn->flags & FCGI_CF_DEM_SALLOC))
+ ret = fcgi_strm_parse_response(fstrm, buf, count);
+ else
+ TRACE_STATE("fstrm rxbuf not allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+
+ if (b_data(&fstrm->rxbuf)) {
+ /* If the channel buffer is not empty, consider the mux is
+ * blocked because it needs more room. But if the channel buffer
+ * is empty, it means partial data were received and the mux
+ * needs to receive more data to be able to parse it.
+ */
+ if (b_data(buf))
+ se_fl_set(fstrm->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ }
+ else {
+ se_fl_clr(fstrm->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (fstrm->state == FCGI_SS_ERROR || (fstrm->h1m.state == H1_MSG_DONE)) {
+ se_fl_set(fstrm->sd, SE_FL_EOI);
+ if (!(fstrm->h1m.flags & (H1_MF_VER_11|H1_MF_XFER_LEN)))
+ se_fl_set(fstrm->sd, SE_FL_EOS);
+ }
+ if (fcgi_conn_read0_pending(fconn)) {
+ se_fl_set(fstrm->sd, SE_FL_EOS);
+ if (!se_fl_test(fstrm->sd, SE_FL_EOI))
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+ }
+ if (se_fl_test(fstrm->sd, SE_FL_ERR_PENDING))
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+ fcgi_release_buf(fconn, &fstrm->rxbuf);
+ }
+
+ if (ret && fconn->dsi == fstrm->id) {
+ /* demux is blocking on this stream's buffer */
+ fconn->flags &= ~FCGI_CF_DEM_SFULL;
+ fcgi_conn_restart_reading(fconn, 1);
+ }
+
+ TRACE_LEAVE(FCGI_EV_STRM_RECV, fconn->conn, fstrm);
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data from buffer <buf> for no more than
+ * <count> bytes. Returns the number of bytes effectively sent. Some status
+ * flags may be updated on the stream connector.
+ */
+static size_t fcgi_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct fcgi_strm *fstrm = __sc_mux_strm(sc);
+ struct fcgi_conn *fconn = fstrm->fconn;
+ size_t total = 0;
+ size_t ret;
+ struct htx *htx = NULL;
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ uint32_t bsize;
+
+ TRACE_ENTER(FCGI_EV_STRM_SEND, fconn->conn, fstrm, 0, (size_t[]){count});
+
+ /* If we were not just woken because we wanted to send but couldn't,
+ * and there's somebody else that is waiting to send, do nothing,
+ * we will subscribe later and be put at the end of the list
+ */
+ if (!(fstrm->flags & FCGI_SF_NOTIFIED) && !LIST_ISEMPTY(&fconn->send_list)) {
+ TRACE_STATE("other streams already waiting, going to the queue and leaving", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return 0;
+ }
+ fstrm->flags &= ~FCGI_SF_NOTIFIED;
+
+ if (fconn->state < FCGI_CS_RECORD_H) {
+ TRACE_STATE("connection not ready, leaving", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_BLK, fconn->conn, fstrm);
+ return 0;
+ }
+
+ htx = htxbuf(buf);
+ if (fstrm->id == 0) {
+ int32_t id = fcgi_conn_get_next_sid(fconn);
+
+ if (id < 0) {
+ fcgi_strm_close(fstrm);
+ se_fl_set(fstrm->sd, SE_FL_ERROR);
+ TRACE_DEVEL("couldn't get a stream ID, leaving in error", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_ERR|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ return 0;
+ }
+
+ eb32_delete(&fstrm->by_id);
+ fstrm->by_id.key = fstrm->id = id;
+ fconn->max_id = id;
+ fconn->nb_reserved--;
+ eb32_insert(&fconn->streams_by_id, &fstrm->by_id);
+
+
+ /* Check if length of the body is known or if the message is
+ * full. Otherwise, the request is invalid.
+ */
+ sl = http_get_stline(htx);
+ if (!sl || (!(sl->flags & HTX_SL_F_CLEN) && !(htx->flags & HTX_FL_EOM))) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ fcgi_strm_error(fstrm);
+ goto done;
+ }
+ }
+
+ if (!(fstrm->flags & FCGI_SF_BEGIN_SENT)) {
+ TRACE_PROTO("sending FCGI BEGIN_REQUEST record", FCGI_EV_TX_RECORD|FCGI_EV_TX_BEGREQ, fconn->conn, fstrm);
+ if (!fcgi_strm_send_begin_request(fconn, fstrm))
+ goto done;
+ }
+
+ if (!(fstrm->flags & FCGI_SF_OUTGOING_DATA) && count)
+ fstrm->flags |= FCGI_SF_OUTGOING_DATA;
+
+ while (fstrm->state < FCGI_SS_HLOC && !(fstrm->flags & FCGI_SF_BLK_ANY) &&
+ count && !htx_is_empty(htx)) {
+ blk = htx_get_head_blk(htx);
+ ALREADY_CHECKED(blk);
+ bsize = htx_get_blksz(blk);
+
+ switch (htx_get_blk_type(blk)) {
+ case HTX_BLK_REQ_SL:
+ case HTX_BLK_HDR:
+ TRACE_USER("sending FCGI PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_params(fconn, fstrm, htx);
+ if (!ret) {
+ goto done;
+ }
+ total += ret;
+ count -= ret;
+ break;
+
+ case HTX_BLK_EOH:
+ if (!(fstrm->flags & FCGI_SF_EP_SENT)) {
+ TRACE_PROTO("sending FCGI PARAMS record", FCGI_EV_TX_RECORD|FCGI_EV_TX_PARAMS, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_empty_params(fconn, fstrm);
+ if (!ret)
+ goto done;
+ }
+ if (htx_is_unique_blk(htx, blk) && (htx->flags & HTX_FL_EOM)) {
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_empty_stdin(fconn, fstrm);
+ if (!ret)
+ goto done;
+ }
+ goto remove_blk;
+
+ case HTX_BLK_DATA:
+ TRACE_PROTO("sending FCGI STDIN record", FCGI_EV_TX_RECORD|FCGI_EV_TX_STDIN, fconn->conn, fstrm, htx);
+ ret = fcgi_strm_send_stdin(fconn, fstrm, htx, count, buf);
+ if (ret > 0) {
+ htx = htx_from_buf(buf);
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ default:
+ remove_blk:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ done:
+ if (fstrm->state >= FCGI_SS_HLOC) {
+ /* trim any possibly pending data after we close (extra CR-LF,
+ * unprocessed trailers, abnormal extra data, ...)
+ */
+ total += count;
+ count = 0;
+ }
+
+ if (fstrm->state == FCGI_SS_ERROR) {
+ TRACE_DEVEL("reporting error to the app-layer stream", FCGI_EV_STRM_SEND|FCGI_EV_FSTRM_ERR|FCGI_EV_STRM_ERR, fconn->conn, fstrm);
+ se_fl_set_error(fstrm->sd);
+ if (!(fstrm->flags & FCGI_SF_BEGIN_SENT) || fcgi_strm_send_abort(fconn, fstrm))
+ fcgi_strm_close(fstrm);
+ }
+
+ if (htx)
+ htx_to_buf(htx, buf);
+
+ if (total > 0) {
+ if (!(fconn->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_DEVEL("data queued, waking up fconn sender", FCGI_EV_STRM_SEND|FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_WAKE, fconn->conn, fstrm);
+ tasklet_wakeup(fconn->wait_event.tasklet);
+ }
+
+ /* Ok we managed to send something, leave the send_list */
+ if (!(fstrm->flags & (FCGI_SF_WANT_SHUTR|FCGI_SF_WANT_SHUTW)))
+ LIST_DEL_INIT(&fstrm->send_list);
+ }
+
+ TRACE_LEAVE(FCGI_EV_STRM_SEND, fconn->conn, fstrm, htx, (size_t[]){total});
+ return total;
+}
+
+/* for debugging with CLI's "show fd" command */
+static int fcgi_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct fcgi_conn *fconn = conn->ctx;
+ struct fcgi_strm *fstrm = NULL;
+ struct eb32_node *node;
+ int send_cnt = 0;
+ int tree_cnt = 0;
+ int orph_cnt = 0;
+ struct buffer *hmbuf, *tmbuf;
+
+ if (!fconn)
+ return 0;
+
+ list_for_each_entry(fstrm, &fconn->send_list, send_list)
+ send_cnt++;
+
+ fstrm = NULL;
+ node = eb32_first(&fconn->streams_by_id);
+ while (node) {
+ fstrm = container_of(node, struct fcgi_strm, by_id);
+ tree_cnt++;
+ if (!fcgi_strm_sc(fstrm))
+ orph_cnt++;
+ node = eb32_next(node);
+ }
+
+ hmbuf = br_head(fconn->mbuf);
+ tmbuf = br_tail(fconn->mbuf);
+ chunk_appendf(msg, " fconn.st0=%d .maxid=%d .flg=0x%04x .nbst=%u"
+ " .nbcs=%u .send_cnt=%d .tree_cnt=%d .orph_cnt=%d .sub=%d "
+ ".dsi=%d .dbuf=%u@%p+%u/%u .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ fconn->state, fconn->max_id, fconn->flags,
+ fconn->nb_streams, fconn->nb_sc, send_cnt, tree_cnt, orph_cnt,
+ fconn->wait_event.events, fconn->dsi,
+ (unsigned int)b_data(&fconn->dbuf), b_orig(&fconn->dbuf),
+ (unsigned int)b_head_ofs(&fconn->dbuf), (unsigned int)b_size(&fconn->dbuf),
+ br_head_idx(fconn->mbuf), br_tail_idx(fconn->mbuf), br_size(fconn->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+
+ if (fstrm) {
+ chunk_appendf(msg, " last_fstrm=%p .id=%d .flg=0x%04x .rxbuf=%u@%p+%u/%u .sc=%p",
+ fstrm, fstrm->id, fstrm->flags,
+ (unsigned int)b_data(&fstrm->rxbuf), b_orig(&fstrm->rxbuf),
+ (unsigned int)b_head_ofs(&fstrm->rxbuf), (unsigned int)b_size(&fstrm->rxbuf),
+ fcgi_strm_sc(fstrm));
+
+ chunk_appendf(msg, " .sd.flg=0x%08x", se_fl_get(fstrm->sd));
+ if (!se_fl_test(fstrm->sd, SE_FL_ORPHAN))
+ chunk_appendf(msg, " .sc.flg=0x%08x .sc.app=%p",
+ fcgi_strm_sc(fstrm)->flags, fcgi_strm_sc(fstrm)->app);
+
+ chunk_appendf(msg, " .subs=%p", fstrm->subs);
+ if (fstrm->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", fstrm->subs->events, fstrm->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ fstrm->subs->tasklet->calls,
+ fstrm->subs->tasklet->context);
+ resolve_sym_name(msg, NULL, fstrm->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ }
+ return 0;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int fcgi_takeover(struct connection *conn, int orig_tid)
+{
+ struct fcgi_conn *fcgi = conn->ctx;
+ struct task *task;
+ struct task *new_task;
+ struct tasklet *new_tasklet;
+
+ /* Pre-allocate tasks so that we don't have to roll back after the xprt
+ * has been migrated.
+ */
+ new_task = task_new_here();
+ new_tasklet = tasklet_new();
+ if (!new_task || !new_tasklet)
+ goto fail;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ goto fail;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid);
+ goto fail;
+ }
+
+ if (fcgi->wait_event.events)
+ fcgi->conn->xprt->unsubscribe(fcgi->conn, fcgi->conn->xprt_ctx,
+ fcgi->wait_event.events, &fcgi->wait_event);
+
+ task = fcgi->task;
+ if (task) {
+ /* only assign a task if there was already one, otherwise
+ * the preallocated new task will be released.
+ */
+ task->context = NULL;
+ fcgi->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ fcgi->task = new_task;
+ new_task = NULL;
+ fcgi->task->process = fcgi_timeout_task;
+ fcgi->task->context = fcgi;
+ }
+
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL;
+ */
+ fcgi->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid);
+
+ fcgi->wait_event.tasklet = new_tasklet;
+ fcgi->wait_event.tasklet->process = fcgi_io_cb;
+ fcgi->wait_event.tasklet->context = fcgi;
+ fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx,
+ SUB_RETRY_RECV, &fcgi->wait_event);
+
+ if (new_task)
+ __task_free(new_task);
+ return 0;
+ fail:
+ if (new_task)
+ __task_free(new_task);
+ tasklet_free(new_tasklet);
+ return -1;
+}
+
+/****************************************/
+/* MUX initialization and instantiation */
+/****************************************/
+
+/* The mux operations */
+static const struct mux_ops mux_fcgi_ops = {
+ .init = fcgi_init,
+ .wake = fcgi_wake,
+ .attach = fcgi_attach,
+ .get_first_sc = fcgi_get_first_sc,
+ .detach = fcgi_detach,
+ .destroy = fcgi_destroy,
+ .avail_streams = fcgi_avail_streams,
+ .used_streams = fcgi_used_streams,
+ .rcv_buf = fcgi_rcv_buf,
+ .snd_buf = fcgi_snd_buf,
+ .subscribe = fcgi_subscribe,
+ .unsubscribe = fcgi_unsubscribe,
+ .shutr = fcgi_shutr,
+ .shutw = fcgi_shutw,
+ .ctl = fcgi_ctl,
+ .sctl = fcgi_sctl,
+ .show_fd = fcgi_show_fd,
+ .takeover = fcgi_takeover,
+ .flags = MX_FL_HTX|MX_FL_HOL_RISK|MX_FL_NO_UPG,
+ .name = "FCGI",
+};
+
+
+/* this mux registers FCGI proto */
+static struct mux_proto_list mux_proto_fcgi =
+{ .token = IST("fcgi"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BE, .mux = &mux_fcgi_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_fcgi);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/mux_h1.c b/src/mux_h1.c
new file mode 100644
index 0000000..455ebeb
--- /dev/null
+++ b/src/mux_h1.c
@@ -0,0 +1,5374 @@
+/*
+ * HTTP/1 mux-demux for connections
+ *
+ * Copyright 2018 Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <import/ebistree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h1.h>
+#include <haproxy/h1_htx.h>
+#include <haproxy/h2.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/mux_h1-t.h>
+#include <haproxy/pipe.h>
+#include <haproxy/proxy.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/xref.h>
+
+/* H1 connection descriptor */
+struct h1c {
+ struct connection *conn;
+ struct h1s *h1s; /* H1 stream descriptor */
+ struct task *task; /* timeout management task */
+
+ uint32_t flags; /* Connection flags: H1C_F_* */
+ enum h1_cs state; /* Connection state */
+
+
+ struct buffer ibuf; /* Input buffer to store data before parsing */
+ struct buffer obuf; /* Output buffer to store data after reformatting */
+ struct proxy *px;
+
+ unsigned int errcode; /* Status code when an error occurred at the H1 connection level */
+
+ int idle_exp; /* idle expiration date (http-keep-alive or http-request timeout) */
+ int timeout; /* client/server timeout duration */
+ int shut_timeout; /* client-fin/server-fin timeout duration */
+
+ unsigned int req_count; /* The number of requests handled by this H1 connection */
+
+ struct h1_counters *px_counters; /* h1 counters attached to proxy */
+ struct buffer_wait buf_wait; /* Wait list for buffer allocation */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+};
+
+/* H1 stream descriptor */
+struct h1s {
+ struct h1c *h1c;
+ struct sedesc *sd;
+ uint32_t flags; /* Connection flags: H1S_F_* */
+
+ struct wait_event *subs; /* Address of the wait_event the stream connector associated is waiting on */
+
+ struct session *sess; /* Associated session */
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+ struct h1m req;
+ struct h1m res;
+
+ enum http_meth_t meth; /* HTTP request method */
+ uint16_t status; /* HTTP response status */
+
+ char ws_key[25]; /* websocket handshake key */
+};
+
+/* Map of headers used to convert outgoing headers */
+struct h1_hdrs_map {
+ char *name;
+ struct eb_root map;
+};
+
+/* An entry in a headers map */
+struct h1_hdr_entry {
+ struct ist name;
+ struct ebpt_node node;
+};
+
+/* Declare the headers map */
+static struct h1_hdrs_map hdrs_map = { .name = NULL, .map = EB_ROOT };
+static int accept_payload_with_any_method = 0;
+
+/* trace source and events */
+static void h1_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * h1c - internal H1 connection
+ * h1s - internal H1 stream
+ * strm - application layer
+ * rx - data receipt
+ * tx - data transmission
+ *
+ */
+static const struct trace_event h1_trace_events[] = {
+#define H1_EV_H1C_NEW (1ULL << 0)
+ { .mask = H1_EV_H1C_NEW, .name = "h1c_new", .desc = "new H1 connection" },
+#define H1_EV_H1C_RECV (1ULL << 1)
+ { .mask = H1_EV_H1C_RECV, .name = "h1c_recv", .desc = "Rx on H1 connection" },
+#define H1_EV_H1C_SEND (1ULL << 2)
+ { .mask = H1_EV_H1C_SEND, .name = "h1c_send", .desc = "Tx on H1 connection" },
+#define H1_EV_H1C_BLK (1ULL << 3)
+ { .mask = H1_EV_H1C_BLK, .name = "h1c_blk", .desc = "H1 connection blocked" },
+#define H1_EV_H1C_WAKE (1ULL << 4)
+ { .mask = H1_EV_H1C_WAKE, .name = "h1c_wake", .desc = "H1 connection woken up" },
+#define H1_EV_H1C_END (1ULL << 5)
+ { .mask = H1_EV_H1C_END, .name = "h1c_end", .desc = "H1 connection terminated" },
+#define H1_EV_H1C_ERR (1ULL << 6)
+ { .mask = H1_EV_H1C_ERR, .name = "h1c_err", .desc = "error on H1 connection" },
+
+#define H1_EV_RX_DATA (1ULL << 7)
+ { .mask = H1_EV_RX_DATA, .name = "rx_data", .desc = "receipt of any H1 data" },
+#define H1_EV_RX_EOI (1ULL << 8)
+ { .mask = H1_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of H1 input" },
+#define H1_EV_RX_HDRS (1ULL << 9)
+ { .mask = H1_EV_RX_HDRS, .name = "rx_headers", .desc = "receipt of H1 headers" },
+#define H1_EV_RX_BODY (1ULL << 10)
+ { .mask = H1_EV_RX_BODY, .name = "rx_body", .desc = "receipt of H1 body" },
+#define H1_EV_RX_TLRS (1ULL << 11)
+ { .mask = H1_EV_RX_TLRS, .name = "rx_trailerus", .desc = "receipt of H1 trailers" },
+
+#define H1_EV_TX_DATA (1ULL << 12)
+ { .mask = H1_EV_TX_DATA, .name = "tx_data", .desc = "transmission of any H1 data" },
+#define H1_EV_TX_EOI (1ULL << 13)
+ { .mask = H1_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of end of H1 input" },
+#define H1_EV_TX_HDRS (1ULL << 14)
+ { .mask = H1_EV_TX_HDRS, .name = "tx_headers", .desc = "transmission of all headers" },
+#define H1_EV_TX_BODY (1ULL << 15)
+ { .mask = H1_EV_TX_BODY, .name = "tx_body", .desc = "transmission of H1 body" },
+#define H1_EV_TX_TLRS (1ULL << 16)
+ { .mask = H1_EV_TX_TLRS, .name = "tx_trailerus", .desc = "transmission of H1 trailers" },
+
+#define H1_EV_H1S_NEW (1ULL << 17)
+ { .mask = H1_EV_H1S_NEW, .name = "h1s_new", .desc = "new H1 stream" },
+#define H1_EV_H1S_BLK (1ULL << 18)
+ { .mask = H1_EV_H1S_BLK, .name = "h1s_blk", .desc = "H1 stream blocked" },
+#define H1_EV_H1S_END (1ULL << 19)
+ { .mask = H1_EV_H1S_END, .name = "h1s_end", .desc = "H1 stream terminated" },
+#define H1_EV_H1S_ERR (1ULL << 20)
+ { .mask = H1_EV_H1S_ERR, .name = "h1s_err", .desc = "error on H1 stream" },
+
+#define H1_EV_STRM_NEW (1ULL << 21)
+ { .mask = H1_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define H1_EV_STRM_RECV (1ULL << 22)
+ { .mask = H1_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define H1_EV_STRM_SEND (1ULL << 23)
+ { .mask = H1_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define H1_EV_STRM_WAKE (1ULL << 24)
+ { .mask = H1_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define H1_EV_STRM_SHUT (1ULL << 25)
+ { .mask = H1_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define H1_EV_STRM_END (1ULL << 26)
+ { .mask = H1_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define H1_EV_STRM_ERR (1ULL << 27)
+ { .mask = H1_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+
+ { }
+};
+
+static const struct name_desc h1_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="h1s", .desc="H1 stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h1_trace_decoding[] = {
+#define H1_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H1_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h1c/h1s state and flags, no real decoding" },
+#define H1_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define H1_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define H1_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_h1 __read_mostly = {
+ .name = IST("h1"),
+ .desc = "HTTP/1 multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = h1_trace,
+ .known_events = h1_trace_events,
+ .lockon_args = h1_trace_lockon_args,
+ .decoding = h1_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_h1
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+
+/* h1 stats module */
+enum {
+ H1_ST_OPEN_CONN,
+ H1_ST_OPEN_STREAM,
+ H1_ST_TOTAL_CONN,
+ H1_ST_TOTAL_STREAM,
+
+ H1_ST_BYTES_IN,
+ H1_ST_BYTES_OUT,
+#if defined(USE_LINUX_SPLICE)
+ H1_ST_SPLICED_BYTES_IN,
+ H1_ST_SPLICED_BYTES_OUT,
+#endif
+ H1_STATS_COUNT /* must be the last member of the enum */
+};
+
+
+static struct name_desc h1_stats[] = {
+ [H1_ST_OPEN_CONN] = { .name = "h1_open_connections",
+ .desc = "Count of currently open connections" },
+ [H1_ST_OPEN_STREAM] = { .name = "h1_open_streams",
+ .desc = "Count of currently open streams" },
+ [H1_ST_TOTAL_CONN] = { .name = "h1_total_connections",
+ .desc = "Total number of connections" },
+ [H1_ST_TOTAL_STREAM] = { .name = "h1_total_streams",
+ .desc = "Total number of streams" },
+
+ [H1_ST_BYTES_IN] = { .name = "h1_bytes_in",
+ .desc = "Total number of bytes received" },
+ [H1_ST_BYTES_OUT] = { .name = "h1_bytes_out",
+ .desc = "Total number of bytes send" },
+#if defined(USE_LINUX_SPLICE)
+ [H1_ST_SPLICED_BYTES_IN] = { .name = "h1_spliced_bytes_in",
+ .desc = "Total number of bytes received using kernel splicing" },
+ [H1_ST_SPLICED_BYTES_OUT] = { .name = "h1_spliced_bytes_out",
+ .desc = "Total number of bytes sendusing kernel splicing" },
+#endif
+
+};
+
+static struct h1_counters {
+ long long open_conns; /* count of currently open connections */
+ long long open_streams; /* count of currently open streams */
+ long long total_conns; /* total number of connections */
+ long long total_streams; /* total number of streams */
+
+ long long bytes_in; /* number of bytes received */
+ long long bytes_out; /* number of bytes sent */
+#if defined(USE_LINUX_SPLICE)
+ long long spliced_bytes_in; /* number of bytes received using kernel splicing */
+ long long spliced_bytes_out; /* number of bytes sent using kernel splicing */
+#endif
+} h1_counters;
+
+static void h1_fill_stats(void *data, struct field *stats)
+{
+ struct h1_counters *counters = data;
+
+ stats[H1_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns);
+ stats[H1_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams);
+ stats[H1_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns);
+ stats[H1_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams);
+
+ stats[H1_ST_BYTES_IN] = mkf_u64(FN_COUNTER, counters->bytes_in);
+ stats[H1_ST_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->bytes_out);
+#if defined(USE_LINUX_SPLICE)
+ stats[H1_ST_SPLICED_BYTES_IN] = mkf_u64(FN_COUNTER, counters->spliced_bytes_in);
+ stats[H1_ST_SPLICED_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->spliced_bytes_out);
+#endif
+}
+
+static struct stats_module h1_stats_module = {
+ .name = "h1",
+ .fill_stats = h1_fill_stats,
+ .stats = h1_stats,
+ .stats_count = H1_STATS_COUNT,
+ .counters = &h1_counters,
+ .counters_size = sizeof(h1_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_BE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h1_stats_module);
+
+
+/* the h1c and h1s pools */
+DECLARE_STATIC_POOL(pool_head_h1c, "h1c", sizeof(struct h1c));
+DECLARE_STATIC_POOL(pool_head_h1s, "h1s", sizeof(struct h1s));
+
+static int h1_recv(struct h1c *h1c);
+static int h1_send(struct h1c *h1c);
+static int h1_process(struct h1c *h1c);
+/* h1_io_cb is exported to see it resolved in "show fd" */
+struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state);
+struct task *h1_timeout_task(struct task *t, void *context, unsigned int state);
+static void h1_shutw_conn(struct connection *conn);
+static void h1_wake_stream_for_recv(struct h1s *h1s);
+static void h1_wake_stream_for_send(struct h1s *h1s);
+static void h1s_destroy(struct h1s *h1s);
+
+/* returns the stconn associated to the H1 stream */
+static forceinline struct stconn *h1s_sc(const struct h1s *h1s)
+{
+ return h1s->sd->sc;
+}
+
+/* the H1 traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive h1c), that arg2, if non-null, is of type h1s, and
+ * that arg3, if non-null, is a htx for rx/tx headers.
+ */
+static void h1_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct h1c *h1c = conn ? conn->ctx : NULL;
+ const struct h1s *h1s = a2;
+ const struct htx *htx = a3;
+ const size_t *val = a4;
+
+ if (!h1c)
+ h1c = (h1s ? h1s->h1c : NULL);
+
+ if (!h1c || src->verbosity < H1_VERB_CLEAN)
+ return;
+
+ /* Display frontend/backend info by default */
+ chunk_appendf(&trace_buf, " : [%c,%s]", ((h1c->flags & H1C_F_IS_BACK) ? 'B' : 'F'), h1c_st_to_str(h1c->state));
+
+ /* Display request and response states if h1s is defined */
+ if (h1s) {
+ chunk_appendf(&trace_buf, " [%s, %s]",
+ h1m_state_str(h1s->req.state), h1m_state_str(h1s->res.state));
+
+ if (src->verbosity > H1_VERB_SIMPLE) {
+ chunk_appendf(&trace_buf, " - req=(.fl=0x%08x .curr_len=%lu .body_len=%lu)",
+ h1s->req.flags, (unsigned long)h1s->req.curr_len, (unsigned long)h1s->req.body_len);
+ chunk_appendf(&trace_buf, " res=(.fl=0x%08x .curr_len=%lu .body_len=%lu)",
+ h1s->res.flags, (unsigned long)h1s->res.curr_len, (unsigned long)h1s->res.body_len);
+ }
+
+ }
+
+ if (src->verbosity == H1_VERB_CLEAN)
+ return;
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > H1_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+ /* Display h1c info and, if defined, h1s info (pointer + flags) */
+ chunk_appendf(&trace_buf, " - h1c=%p(0x%08x)", h1c, h1c->flags);
+ if (h1c->conn)
+ chunk_appendf(&trace_buf, " conn=%p(0x%08x)", h1c->conn, h1c->conn->flags);
+ if (h1s) {
+ chunk_appendf(&trace_buf, " h1s=%p(0x%08x)", h1s, h1s->flags);
+ if (h1s->sd)
+ chunk_appendf(&trace_buf, " sd=%p(0x%08x)", h1s->sd, se_fl_get(h1s->sd));
+ if (h1s->sd && h1s_sc(h1s))
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", h1s_sc(h1s), h1s_sc(h1s)->flags);
+ }
+
+ if (src->verbosity == H1_VERB_MINIMAL)
+ return;
+
+ /* Display input and output buffer info (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER) {
+ if (src->verbosity == H1_VERB_COMPLETE ||
+ (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_H1C_RECV|H1_EV_STRM_RECV))))
+ chunk_appendf(&trace_buf, " ibuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&h1c->ibuf), b_orig(&h1c->ibuf),
+ (unsigned int)b_head_ofs(&h1c->ibuf), (unsigned int)b_size(&h1c->ibuf));
+ if (src->verbosity == H1_VERB_COMPLETE ||
+ (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_H1C_SEND|H1_EV_STRM_SEND))))
+ chunk_appendf(&trace_buf, " obuf=%u@%p+%u/%u",
+ (unsigned int)b_data(&h1c->obuf), b_orig(&h1c->obuf),
+ (unsigned int)b_head_ofs(&h1c->obuf), (unsigned int)b_size(&h1c->obuf));
+ }
+
+ /* Display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == H1_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == H1_VERB_ADVANCED && (mask & (H1_EV_RX_HDRS|H1_EV_TX_HDRS)))
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+/*
+ * Indicates whether or not we may receive data. The rules are the following :
+ * - if an error or a shutdown for reads was detected on the H1 connection we
+ * must not attempt to receive
+ * - if we are waiting for the connection establishment, we must not attempt
+ * to receive
+ * - if reads are explicitly disabled, we must not attempt to receive
+ * - if the input buffer failed to be allocated or is full , we must not try
+ * to receive
+ * - if the mux is blocked on an input condition, we must may not attempt to
+ * receive
+ * - otherwise we may attempt to receive
+ */
+static inline int h1_recv_allowed(const struct h1c *h1c)
+{
+ if (h1c->flags & (H1C_F_EOS|H1C_F_ERROR)) {
+ TRACE_DEVEL("recv not allowed because of (eos|error) on h1c", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ if (h1c->conn->flags & (CO_FL_WAIT_L4_CONN|CO_FL_WAIT_L6_CONN)) {
+ TRACE_DEVEL("recv not allowed because of (waitl4|waitl6) on connection", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ if ((h1c->flags & (H1C_F_IN_ALLOC|H1C_F_IN_FULL|H1C_F_IN_SALLOC))) {
+ TRACE_DEVEL("recv not allowed because input is blocked", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Tries to grab a buffer and to re-enables processing on mux <target>. The h1
+ * flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the connection is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+static int h1_buf_available(void *target)
+{
+ struct h1c *h1c = target;
+
+ if ((h1c->flags & H1C_F_IN_ALLOC) && b_alloc(&h1c->ibuf)) {
+ TRACE_STATE("unblocking h1c, ibuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn);
+ h1c->flags &= ~H1C_F_IN_ALLOC;
+ if (h1_recv_allowed(h1c))
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ return 1;
+ }
+
+ if ((h1c->flags & H1C_F_OUT_ALLOC) && b_alloc(&h1c->obuf)) {
+ TRACE_STATE("unblocking h1s, obuf allocated", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s);
+ h1c->flags &= ~H1C_F_OUT_ALLOC;
+ if (h1c->h1s)
+ h1_wake_stream_for_send(h1c->h1s);
+ return 1;
+ }
+
+ if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s && b_alloc(&h1c->h1s->rxbuf)) {
+ TRACE_STATE("unblocking h1c, stream rxbuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn);
+ h1c->flags &= ~H1C_F_IN_SALLOC;
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Allocate a buffer. If if fails, it adds the mux in buffer wait queue.
+ */
+static inline struct buffer *h1_get_buf(struct h1c *h1c, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&h1c->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ h1c->buf_wait.target = h1c;
+ h1c->buf_wait.wakeup_cb = h1_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &h1c->buf_wait.list);
+ }
+ return buf;
+}
+
+/*
+ * Release a buffer, if any, and try to wake up entities waiting in the buffer
+ * wait queue.
+ */
+static inline void h1_release_buf(struct h1c *h1c, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(h1c->buf_wait.target, 1);
+ }
+}
+
+/* Returns 1 if the H1 connection is alive (IDLE, EMBRYONIC, RUNNING or
+ * RUNNING). Ortherwise 0 is returned.
+ */
+static inline int h1_is_alive(const struct h1c *h1c)
+{
+ return (h1c->state <= H1_CS_RUNNING);
+}
+
+/* Switch the H1 connection to CLOSING or CLOSED mode, depending on the output
+ * buffer state and if there is still a H1 stream or not. If there are sill
+ * pending outgoing data or if there is still a H1 stream, it is set to CLOSING
+ * state. Otherwise it is set to CLOSED mode. */
+static inline void h1_close(struct h1c *h1c)
+{
+ h1c->state = ((h1c->h1s || b_data(&h1c->obuf)) ? H1_CS_CLOSING : H1_CS_CLOSED);
+}
+
+/* returns the number of streams in use on a connection to figure if it's idle
+ * or not. We rely on H1C state to know if the connection is in-use or not. It
+ * is IDLE only when no H1 stream is attached and when the previous stream, if
+ * any, was fully terminated without any error and in K/A mode.
+ */
+static int h1_used_streams(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+
+ return ((h1c->state == H1_CS_IDLE) ? 0 : 1);
+}
+
+/* returns the number of streams still available on a connection */
+static int h1_avail_streams(struct connection *conn)
+{
+ return 1 - h1_used_streams(conn);
+}
+
+/* Refresh the h1c task timeout if necessary */
+static void h1_refresh_timeout(struct h1c *h1c)
+{
+ int is_idle_conn = 0;
+
+ if (h1c->task) {
+ if (!h1_is_alive(h1c)) {
+ /* half-closed or dead connections : switch to clientfin/serverfin
+ * timeouts so that we don't hang too long on clients that have
+ * gone away (especially in tunnel mode).
+ */
+ h1c->task->expire = tick_add(now_ms, h1c->shut_timeout);
+ TRACE_DEVEL("refreshing connection's timeout (dead or half-closed)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ is_idle_conn = 1;
+ }
+ else if (b_data(&h1c->obuf)) {
+ /* alive connection with pending outgoing data, need a timeout (server or client). */
+ h1c->task->expire = tick_add(now_ms, h1c->timeout);
+ TRACE_DEVEL("refreshing connection's timeout (pending outgoing data)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+ else if (!(h1c->flags & H1C_F_IS_BACK) && (h1c->state == H1_CS_IDLE)) {
+ /* idle front connections. */
+ h1c->task->expire = (tick_isset(h1c->idle_exp) ? h1c->idle_exp : tick_add(now_ms, h1c->timeout));
+ TRACE_DEVEL("refreshing connection's timeout (idle front h1c)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ is_idle_conn = 1;
+ }
+ else if (!(h1c->flags & H1C_F_IS_BACK) && (h1c->state != H1_CS_RUNNING)) {
+ /* alive front connections waiting for a fully usable stream need a timeout. */
+ h1c->task->expire = tick_add(now_ms, h1c->timeout);
+ TRACE_DEVEL("refreshing connection's timeout (alive front h1c but not ready)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ /* A frontend connection not yet ready could be treated the same way as an idle
+ * one in case of soft-close.
+ */
+ is_idle_conn = 1;
+ }
+ else {
+ /* alive back connections of front connections with a stream connector attached */
+ h1c->task->expire = TICK_ETERNITY;
+ TRACE_DEVEL("no connection timeout (alive back h1c or front h1c with an SC)", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+
+ /* Finally set the idle expiration date if shorter */
+ h1c->task->expire = tick_first(h1c->task->expire, h1c->idle_exp);
+
+ if ((h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
+ is_idle_conn && tick_isset(global.close_spread_end)) {
+ /* If a soft-stop is in progress and a close-spread-time
+ * is set, we want to spread idle connection closing roughly
+ * evenly across the defined window. This should only
+ * act on idle frontend connections.
+ * If the window end is already in the past, we wake the
+ * timeout task up immediately so that it can be closed.
+ */
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* We don't need to reset the expire if it would
+ * already happen before the close window end.
+ */
+ if (tick_is_le(global.close_spread_end, h1c->task->expire)) {
+ /* Set an expire value shorter than the current value
+ * because the close spread window end comes earlier.
+ */
+ h1c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
+ TRACE_DEVEL("connection timeout set to value before close-spread window end", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ else {
+ /* We are past the soft close window end, wake the timeout
+ * task up immediately.
+ */
+ task_wakeup(h1c->task, TASK_WOKEN_TIMER);
+ }
+ }
+ TRACE_DEVEL("new expiration date", H1_EV_H1C_SEND|H1_EV_H1C_RECV, h1c->conn, 0, 0, (size_t[]){h1c->task->expire});
+ task_queue(h1c->task);
+ }
+}
+
+static void h1_set_idle_expiration(struct h1c *h1c)
+{
+ if (h1c->flags & H1C_F_IS_BACK || !h1c->task) {
+ TRACE_DEVEL("no idle expiration (backend connection || no task)", H1_EV_H1C_RECV, h1c->conn);
+ h1c->idle_exp = TICK_ETERNITY;
+ return;
+ }
+ if (h1c->state == H1_CS_IDLE) {
+ if (!tick_isset(h1c->idle_exp)) {
+ if ((h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* Not the first request */
+ !b_data(&h1c->ibuf) && /* No input data */
+ tick_isset(h1c->px->timeout.httpka)) { /* K-A timeout set */
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpka);
+ TRACE_DEVEL("set idle expiration (keep-alive timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ else {
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpreq);
+ TRACE_DEVEL("set idle expiration (http-request timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ }
+ else if (h1c->state < H1_CS_RUNNING) {
+ if (!tick_isset(h1c->idle_exp)) {
+ h1c->idle_exp = tick_add_ifset(now_ms, h1c->px->timeout.httpreq);
+ TRACE_DEVEL("set idle expiration (http-request timeout)", H1_EV_H1C_RECV, h1c->conn);
+ }
+ }
+ else {
+ h1c->idle_exp = TICK_ETERNITY;
+ TRACE_DEVEL("unset idle expiration (running or closing)", H1_EV_H1C_RECV, h1c->conn);
+ }
+}
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* returns non-zero if there are input data pending for stream h1s. */
+static inline size_t h1s_data_pending(const struct h1s *h1s)
+{
+ const struct h1m *h1m;
+
+ h1m = ((h1s->h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ return ((h1m->state == H1_MSG_DONE) ? 0 : b_data(&h1s->h1c->ibuf));
+}
+
+/* Creates a new stream connector and the associate stream. <input> is used as input
+ * buffer for the stream. On success, it is transferred to the stream and the
+ * mux is no longer responsible of it. On error, <input> is unchanged, thus the
+ * mux must still take care of it. However, there is nothing special to do
+ * because, on success, <input> is updated to points on BUF_NULL. Thus, calling
+ * b_free() on it is always safe. This function returns the stream connector on
+ * success or NULL on error. */
+static struct stconn *h1s_new_sc(struct h1s *h1s, struct buffer *input)
+{
+ struct h1c *h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_NEW, h1c->conn, h1s);
+
+ if (h1s->flags & H1S_F_NOT_FIRST)
+ se_fl_set(h1s->sd, SE_FL_NOT_FIRST);
+ if (h1s->req.flags & H1_MF_UPG_WEBSOCKET)
+ se_fl_set(h1s->sd, SE_FL_WEBSOCKET);
+
+ if (!sc_new_from_endp(h1s->sd, h1c->conn->owner, input)) {
+ TRACE_ERROR("SC allocation failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, h1c->conn, h1s);
+ goto err;
+ }
+
+ h1c->state = H1_CS_RUNNING;
+ TRACE_LEAVE(H1_EV_STRM_NEW, h1c->conn, h1s);
+ return h1s_sc(h1s);
+
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return NULL;
+}
+
+static struct stconn *h1s_upgrade_sc(struct h1s *h1s, struct buffer *input)
+{
+ TRACE_ENTER(H1_EV_STRM_NEW, h1s->h1c->conn, h1s);
+
+ if (stream_upgrade_from_sc(h1s_sc(h1s), input) < 0) {
+ TRACE_ERROR("stream upgrade failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, h1s->h1c->conn, h1s);
+ goto err;
+ }
+
+ h1s->h1c->state = H1_CS_RUNNING;
+ TRACE_LEAVE(H1_EV_STRM_NEW, h1s->h1c->conn, h1s);
+ return h1s_sc(h1s);
+
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1s->h1c->conn, h1s);
+ return NULL;
+}
+
+static struct h1s *h1s_new(struct h1c *h1c)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = pool_alloc(pool_head_h1s);
+ if (!h1s) {
+ TRACE_ERROR("H1S allocation failure", H1_EV_H1S_NEW|H1_EV_H1S_END|H1_EV_H1S_ERR, h1c->conn);
+ goto fail;
+ }
+ h1s->h1c = h1c;
+ h1c->h1s = h1s;
+ h1s->sess = NULL;
+ h1s->sd = NULL;
+ h1s->flags = H1S_F_WANT_KAL;
+ h1s->subs = NULL;
+ h1s->rxbuf = BUF_NULL;
+ memset(h1s->ws_key, 0, sizeof(h1s->ws_key));
+
+ h1m_init_req(&h1s->req);
+ h1s->req.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ h1m_init_res(&h1s->res);
+ h1s->res.flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+
+ h1s->status = 0;
+ h1s->meth = HTTP_METH_OTHER;
+
+ if (h1c->flags & H1C_F_WAIT_NEXT_REQ)
+ h1s->flags |= H1S_F_NOT_FIRST;
+ h1s->h1c->state = H1_CS_EMBRYONIC;
+ h1s->h1c->flags &= ~H1C_F_WAIT_NEXT_REQ;
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ return NULL;
+}
+
+static struct h1s *h1c_frt_stream_new(struct h1c *h1c, struct stconn *sc, struct session *sess)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = h1s_new(h1c);
+ if (!h1s)
+ goto fail;
+
+ if (sc) {
+ if (sc_attach_mux(sc, h1s, h1c->conn) < 0)
+ goto fail;
+ h1s->sd = sc->sedesc;
+ }
+ else {
+ h1s->sd = sedesc_new();
+ if (!h1s->sd)
+ goto fail;
+ h1s->sd->se = h1s;
+ h1s->sd->conn = h1c->conn;
+ se_fl_set(h1s->sd, SE_FL_T_MUX | SE_FL_ORPHAN);
+ }
+ /* When a request starts, the H1S does not expect data while the request
+ * is not finished. It does not mean the response must not be received,
+ * especially if headers were already forwarded. But it is not
+ * mandatory.
+ */
+ if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H1_SND))
+ se_fl_set(h1s->sd, SE_FL_MAY_FASTFWD_CONS);
+ se_expect_no_data(h1s->sd);
+ h1s->sess = sess;
+
+ if (h1c->px->options2 & PR_O2_REQBUG_OK)
+ h1s->req.err_pos = -1;
+
+ HA_ATOMIC_INC(&h1c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h1c->px_counters->total_streams);
+
+ h1c->idle_exp = TICK_ETERNITY;
+ h1_set_idle_expiration(h1c);
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ h1s_destroy(h1s);
+ return NULL;
+}
+
+static struct h1s *h1c_bck_stream_new(struct h1c *h1c, struct stconn *sc, struct session *sess)
+{
+ struct h1s *h1s;
+
+ TRACE_ENTER(H1_EV_H1S_NEW, h1c->conn);
+
+ h1s = h1s_new(h1c);
+ if (!h1s)
+ goto fail;
+
+ if (sc_attach_mux(sc, h1s, h1c->conn) < 0)
+ goto fail;
+
+ h1s->flags |= H1S_F_RX_BLK;
+ h1s->sd = sc->sedesc;
+ h1s->sess = sess;
+
+ if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H1_SND))
+ se_fl_set(h1s->sd, SE_FL_MAY_FASTFWD_CONS);
+ h1c->state = H1_CS_RUNNING;
+
+ if (h1c->px->options2 & PR_O2_RSPBUG_OK)
+ h1s->res.err_pos = -1;
+
+ HA_ATOMIC_INC(&h1c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h1c->px_counters->total_streams);
+
+ TRACE_LEAVE(H1_EV_H1S_NEW, h1c->conn, h1s);
+ return h1s;
+
+ fail:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_ERR, h1c->conn);
+ h1s_destroy(h1s);
+ return NULL;
+}
+
+static void h1s_destroy(struct h1s *h1s)
+{
+ if (h1s) {
+ struct h1c *h1c = h1s->h1c;
+
+ TRACE_POINT(H1_EV_H1S_END, h1c->conn, h1s);
+ h1c->h1s = NULL;
+
+ if (h1s->subs)
+ h1s->subs->events = 0;
+
+ h1_release_buf(h1c, &h1s->rxbuf);
+
+ h1c->flags &= ~(H1C_F_WANT_FASTFWD|
+ H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_IN_SALLOC|
+ H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER);
+
+ if (!(h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR|H1C_F_ABRT_PENDING|H1C_F_ABRTED)) && /* No error/read0/abort */
+ h1_is_alive(h1c) && /* still alive */
+ (h1s->flags & H1S_F_WANT_KAL) && /* K/A possible */
+ h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) { /* req/res in DONE state */
+ h1c->state = H1_CS_IDLE;
+ h1c->flags |= H1C_F_WAIT_NEXT_REQ;
+ h1c->req_count++;
+ TRACE_STATE("set idle mode on h1c, waiting for the next request", H1_EV_H1C_ERR, h1c->conn, h1s);
+ }
+ else {
+ h1_close(h1c);
+ TRACE_STATE("close h1c", H1_EV_H1S_END, h1c->conn, h1s);
+ }
+
+ HA_ATOMIC_DEC(&h1c->px_counters->open_streams);
+ BUG_ON(h1s->sd && !se_fl_test(h1s->sd, SE_FL_ORPHAN));
+ sedesc_free(h1s->sd);
+ pool_free(pool_head_h1s, h1s);
+ }
+}
+
+/*
+ * Initialize the mux once it's attached. It is expected that conn->ctx points
+ * to the existing stream connector (for outgoing connections or for incoming
+ * ones during a mux upgrade) or NULL (for incoming ones during the connection
+ * establishment). <input> is always used as Input buffer and may contain
+ * data. It is the caller responsibility to not reuse it anymore. Returns < 0 on
+ * error.
+ */
+static int h1_init(struct connection *conn, struct proxy *proxy, struct session *sess,
+ struct buffer *input)
+{
+ struct h1c *h1c;
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(H1_EV_H1C_NEW);
+
+ h1c = pool_alloc(pool_head_h1c);
+ if (!h1c) {
+ TRACE_ERROR("H1C allocation failure", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ goto fail_h1c;
+ }
+ h1c->conn = conn;
+ h1c->px = proxy;
+
+ h1c->state = H1_CS_IDLE;
+ h1c->flags = H1C_F_NONE;
+ h1c->errcode = 0;
+ h1c->ibuf = *input;
+ h1c->obuf = BUF_NULL;
+ h1c->h1s = NULL;
+ h1c->task = NULL;
+ h1c->req_count = 0;
+
+ LIST_INIT(&h1c->buf_wait.list);
+ h1c->wait_event.tasklet = tasklet_new();
+ if (!h1c->wait_event.tasklet)
+ goto fail;
+ h1c->wait_event.tasklet->process = h1_io_cb;
+ h1c->wait_event.tasklet->context = h1c;
+ h1c->wait_event.events = 0;
+ h1c->idle_exp = TICK_ETERNITY;
+
+ if (conn_is_back(conn)) {
+ h1c->flags |= H1C_F_IS_BACK;
+ h1c->shut_timeout = h1c->timeout = proxy->timeout.server;
+ if (tick_isset(proxy->timeout.serverfin))
+ h1c->shut_timeout = proxy->timeout.serverfin;
+
+ h1c->px_counters = EXTRA_COUNTERS_GET(proxy->extra_counters_be,
+ &h1_stats_module);
+ } else {
+ h1c->shut_timeout = h1c->timeout = proxy->timeout.client;
+ if (tick_isset(proxy->timeout.clientfin))
+ h1c->shut_timeout = proxy->timeout.clientfin;
+
+ h1c->px_counters = EXTRA_COUNTERS_GET(proxy->extra_counters_fe,
+ &h1_stats_module);
+
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &h1c->conn->stopping_list);
+ }
+ if (tick_isset(h1c->timeout)) {
+ t = task_new_here();
+ if (!t) {
+ TRACE_ERROR("H1C task allocation failure", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ goto fail;
+ }
+
+ h1c->task = t;
+ t->process = h1_timeout_task;
+ t->context = h1c;
+
+ t->expire = tick_add(now_ms, h1c->timeout);
+ }
+
+ conn->ctx = h1c;
+
+ if (h1c->flags & H1C_F_IS_BACK) {
+ /* Create a new H1S now for backend connection only */
+ if (!h1c_bck_stream_new(h1c, conn_ctx, sess))
+ goto fail;
+ }
+ else if (conn_ctx) {
+ /* Upgraded frontend connection (from TCP) */
+ if (!h1c_frt_stream_new(h1c, conn_ctx, h1c->conn->owner))
+ goto fail;
+
+ /* Attach the SC but Not ready yet */
+ h1c->state = H1_CS_UPGRADING;
+ TRACE_DEVEL("Inherit the SC from TCP connection to perform an upgrade",
+ H1_EV_H1C_NEW|H1_EV_STRM_NEW, h1c->conn, h1c->h1s);
+ }
+
+ if (t) {
+ h1_set_idle_expiration(h1c);
+ t->expire = tick_first(t->expire, h1c->idle_exp);
+ task_queue(t);
+ }
+
+ /* prepare to read something */
+ if (b_data(&h1c->ibuf))
+ tasklet_wakeup(h1c->wait_event.tasklet);
+ else if (h1_recv_allowed(h1c))
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+
+ if (!conn_is_back(conn))
+ proxy_inc_fe_cum_sess_ver_ctr(sess->listener, proxy, 1);
+ HA_ATOMIC_INC(&h1c->px_counters->open_conns);
+ HA_ATOMIC_INC(&h1c->px_counters->total_conns);
+
+ /* mux->wake will be called soon to complete the operation */
+ TRACE_LEAVE(H1_EV_H1C_NEW, conn, h1c->h1s);
+ return 0;
+
+ fail:
+ task_destroy(t);
+ tasklet_free(h1c->wait_event.tasklet);
+ pool_free(pool_head_h1c, h1c);
+ fail_h1c:
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+ conn->ctx = conn_ctx; // restore saved context
+ TRACE_DEVEL("leaving in error", H1_EV_H1C_NEW|H1_EV_H1C_END|H1_EV_H1C_ERR);
+ return -1;
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void h1_release(struct h1c *h1c)
+{
+ struct connection *conn = NULL;
+
+ TRACE_POINT(H1_EV_H1C_END);
+
+ /* The connection must be aattached to this mux to be released */
+ if (h1c->conn && h1c->conn->ctx == h1c)
+ conn = h1c->conn;
+
+ if (conn && h1c->flags & H1C_F_UPG_H2C) {
+ TRACE_DEVEL("upgrading H1 to H2", H1_EV_H1C_END, conn);
+ /* Make sure we're no longer subscribed to anything */
+ if (h1c->wait_event.events)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx,
+ h1c->wait_event.events, &h1c->wait_event);
+ if (conn_upgrade_mux_fe(conn, NULL, &h1c->ibuf, ist("h2"), PROTO_MODE_HTTP) != -1) {
+ /* connection successfully upgraded to H2, this
+ * mux was already released */
+ return;
+ }
+ TRACE_ERROR("h2 upgrade failed", H1_EV_H1C_END|H1_EV_H1C_ERR, conn);
+ sess_log(conn->owner); /* Log if the upgrade failed */
+ }
+
+
+ if (LIST_INLIST(&h1c->buf_wait.list))
+ LIST_DEL_INIT(&h1c->buf_wait.list);
+
+ h1_release_buf(h1c, &h1c->ibuf);
+ h1_release_buf(h1c, &h1c->obuf);
+
+ if (h1c->task) {
+ h1c->task->context = NULL;
+ task_wakeup(h1c->task, TASK_WOKEN_OTHER);
+ h1c->task = NULL;
+ }
+
+ if (h1c->wait_event.tasklet) {
+ tasklet_free(h1c->wait_event.tasklet);
+ h1c->wait_event.tasklet = NULL;
+ }
+
+ h1s_destroy(h1c->h1s);
+ if (conn) {
+ if (h1c->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, h1c->wait_event.events,
+ &h1c->wait_event);
+ h1_shutw_conn(conn);
+ }
+
+ HA_ATOMIC_DEC(&h1c->px_counters->open_conns);
+ pool_free(pool_head_h1c, h1c);
+
+ if (conn) {
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", H1_EV_H1C_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/******************************************************/
+/* functions below are for the H1 protocol processing */
+/******************************************************/
+/* Parse the request version and set H1_MF_VER_11 on <h1m> if the version is
+ * greater or equal to 1.1
+ */
+static void h1_parse_req_vsn(struct h1m *h1m, const struct htx_sl *sl)
+{
+ const char *p = HTX_SL_REQ_VPTR(sl);
+
+ if ((HTX_SL_REQ_VLEN(sl) == 8) &&
+ (*(p + 5) > '1' ||
+ (*(p + 5) == '1' && *(p + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+}
+
+/* Parse the response version and set H1_MF_VER_11 on <h1m> if the version is
+ * greater or equal to 1.1
+ */
+static void h1_parse_res_vsn(struct h1m *h1m, const struct htx_sl *sl)
+{
+ const char *p = HTX_SL_RES_VPTR(sl);
+
+ if ((HTX_SL_RES_VLEN(sl) == 8) &&
+ (*(p + 5) > '1' ||
+ (*(p + 5) == '1' && *(p + 7) >= '1')))
+ h1m->flags |= H1_MF_VER_11;
+}
+
+/* Deduce the connection mode of the client connection, depending on the
+ * configuration and the H1 message flags. This function is called twice, the
+ * first time when the request is parsed and the second time when the response
+ * is parsed.
+ */
+static void h1_set_cli_conn_mode(struct h1s *h1s, struct h1m *h1m)
+{
+ struct proxy *fe = h1s->h1c->px;
+
+ if (h1m->flags & H1_MF_RESP) {
+ /* Output direction: second pass */
+ if ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) ||
+ h1s->status == 101) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely to
+ * understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement
+ * specific parsers for such protocols, we'll want to check the
+ * Upgrade header which contains information about that protocol
+ * for responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_TUN;
+ TRACE_STATE("set tunnel mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (h1s->flags & H1S_F_WANT_KAL) {
+ /* By default the client is in KAL mode. CLOSE mode mean
+ * it is imposed by the client itself. So only change
+ * KAL mode here. */
+ if (!(h1m->flags & H1_MF_XFER_LEN) || (h1m->flags & H1_MF_CONN_CLO)) {
+ /* no length known or explicit close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (!(h1m->flags & H1_MF_CONN_KAL) &&
+ (fe->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO) {
+ /* no explicit keep-alive and option httpclose => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+ }
+ else {
+ /* Input direction: first pass */
+ if (!(h1m->flags & (H1_MF_VER_11|H1_MF_CONN_KAL)) || h1m->flags & H1_MF_CONN_CLO) {
+ /* no explicit keep-alive in HTTP/1.0 or explicit close => close*/
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (req)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+
+ /* If KAL, check if the frontend is stopping. If yes, switch in CLO mode
+ * unless a 'close-spread-time' option is set (either to define a
+ * soft-close window or to disable active closing (close-spread-time
+ * option set to 0).
+ */
+ if (h1s->flags & H1S_F_WANT_KAL && (fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ int want_clo = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the further along
+ * the window we are.
+ */
+ want_clo = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ want_clo = 0;
+
+ if (want_clo) {
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("stopping, set close mode", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+}
+
+/* Deduce the connection mode of the client connection, depending on the
+ * configuration and the H1 message flags. This function is called twice, the
+ * first time when the request is parsed and the second time when the response
+ * is parsed.
+ */
+static void h1_set_srv_conn_mode(struct h1s *h1s, struct h1m *h1m)
+{
+ struct session *sess = h1s->sess;
+ struct proxy *be = h1s->h1c->px;
+ int fe_flags = sess ? sess->fe->options : 0;
+
+ if (h1m->flags & H1_MF_RESP) {
+ /* Input direction: second pass */
+ if ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) ||
+ h1s->status == 101) {
+ /* Either we've established an explicit tunnel, or we're
+ * switching the protocol. In both cases, we're very unlikely to
+ * understand the next protocols. We have to switch to tunnel
+ * mode, so that we transfer the request and responses then let
+ * this protocol pass unmodified. When we later implement
+ * specific parsers for such protocols, we'll want to check the
+ * Upgrade header which contains information about that protocol
+ * for responses with status 101 (eg: see RFC2817 about TLS).
+ */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_TUN;
+ TRACE_STATE("set tunnel mode (resp)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (h1s->flags & H1S_F_WANT_KAL) {
+ /* By default the server is in KAL mode. CLOSE mode mean
+ * it is imposed by haproxy itself. So only change KAL
+ * mode here. */
+ if (!(h1m->flags & H1_MF_XFER_LEN) || h1m->flags & H1_MF_CONN_CLO ||
+ !(h1m->flags & (H1_MF_VER_11|H1_MF_CONN_KAL))){
+ /* no length known or explicit close or no explicit keep-alive in HTTP/1.0 => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (resp)", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+ }
+ else {
+ /* Output direction: first pass */
+ if (h1m->flags & H1_MF_CONN_CLO) {
+ /* explicit close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("detect close mode (req)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if (!(h1m->flags & H1_MF_CONN_KAL) &&
+ ((fe_flags & PR_O_HTTP_MODE) == PR_O_HTTP_SCL ||
+ (be->options & PR_O_HTTP_MODE) == PR_O_HTTP_SCL ||
+ (be->options & PR_O_HTTP_MODE) == PR_O_HTTP_CLO)) {
+ /* no explicit keep-alive option httpclose/server-close => close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (req)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ }
+
+ /* If KAL, check if the backend is stopping. If yes, switch in CLO mode */
+ if (h1s->flags & H1S_F_WANT_KAL && (be->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("stopping, set close mode", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+}
+
+static void h1_update_req_conn_value(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ struct proxy *px = h1s->h1c->px;
+
+ /* Don't update "Connection:" header in TUNNEL mode or if "Upgrage"
+ * token is found
+ */
+ if (h1s->flags & H1S_F_WANT_TUN || h1m->flags & H1_MF_CONN_UPG)
+ return;
+
+ if (h1s->flags & H1S_F_WANT_KAL || px->options2 & PR_O2_FAKE_KA) {
+ if (!(h1m->flags & H1_MF_VER_11)) {
+ TRACE_STATE("add \"Connection: keep-alive\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("keep-alive");
+ }
+ }
+ else { /* H1S_F_WANT_CLO && !PR_O2_FAKE_KA */
+ if (h1m->flags & H1_MF_VER_11) {
+ TRACE_STATE("add \"Connection: close\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("close");
+ }
+ }
+}
+
+static void h1_update_res_conn_value(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ /* Don't update "Connection:" header in TUNNEL mode or if "Upgrage"
+ * token is found
+ */
+ if (h1s->flags & H1S_F_WANT_TUN || h1m->flags & H1_MF_CONN_UPG)
+ return;
+
+ if (h1s->flags & H1S_F_WANT_KAL) {
+ if (!(h1m->flags & H1_MF_VER_11) ||
+ !((h1m->flags & h1s->req.flags) & H1_MF_VER_11)) {
+ TRACE_STATE("add \"Connection: keep-alive\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("keep-alive");
+ }
+ }
+ else { /* H1S_F_WANT_CLO */
+ if (h1m->flags & H1_MF_VER_11) {
+ TRACE_STATE("add \"Connection: close\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ *conn_val = ist("close");
+ }
+ }
+}
+
+static void h1_process_input_conn_mode(struct h1s *h1s, struct h1m *h1m, struct htx *htx)
+{
+ if (!(h1s->h1c->flags & H1C_F_IS_BACK))
+ h1_set_cli_conn_mode(h1s, h1m);
+ else
+ h1_set_srv_conn_mode(h1s, h1m);
+}
+
+static void h1_process_output_conn_mode(struct h1s *h1s, struct h1m *h1m, struct ist *conn_val)
+{
+ if (!(h1s->h1c->flags & H1C_F_IS_BACK))
+ h1_set_cli_conn_mode(h1s, h1m);
+ else
+ h1_set_srv_conn_mode(h1s, h1m);
+
+ if (!(h1m->flags & H1_MF_RESP))
+ h1_update_req_conn_value(h1s, h1m, conn_val);
+ else
+ h1_update_res_conn_value(h1s, h1m, conn_val);
+}
+
+/* Try to adjust the case of the message header name using the global map
+ * <hdrs_map>.
+ */
+static void h1_adjust_case_outgoing_hdr(struct h1s *h1s, struct h1m *h1m, struct ist *name)
+{
+ struct ebpt_node *node;
+ struct h1_hdr_entry *entry;
+
+ /* No entry in the map, do nothing */
+ if (eb_is_empty(&hdrs_map.map))
+ return;
+
+ /* No conversion for the request headers */
+ if (!(h1m->flags & H1_MF_RESP) && !(h1s->h1c->px->options2 & PR_O2_H1_ADJ_BUGSRV))
+ return;
+
+ /* No conversion for the response headers */
+ if ((h1m->flags & H1_MF_RESP) && !(h1s->h1c->px->options2 & PR_O2_H1_ADJ_BUGCLI))
+ return;
+
+ node = ebis_lookup_len(&hdrs_map.map, name->ptr, name->len);
+ if (!node)
+ return;
+ entry = container_of(node, struct h1_hdr_entry, node);
+ name->ptr = entry->name.ptr;
+ name->len = entry->name.len;
+}
+
+/* Append the description of what is present in error snapshot <es> into <out>.
+ * The description must be small enough to always fit in a buffer. The output
+ * buffer may be the trash so the trash must not be used inside this function.
+ */
+static void h1_show_error_snapshot(struct buffer *out, const struct error_snapshot *es)
+{
+ chunk_appendf(out,
+ " H1 connection flags 0x%08x, H1 stream flags 0x%08x\n"
+ " H1 msg state %s(%d), H1 msg flags 0x%08x\n"
+ " H1 chunk len %lld bytes, H1 body len %lld bytes :\n",
+ es->ctx.h1.c_flags, es->ctx.h1.s_flags,
+ h1m_state_str(es->ctx.h1.state), es->ctx.h1.state,
+ es->ctx.h1.m_flags, es->ctx.h1.m_clen, es->ctx.h1.m_blen);
+}
+/*
+ * Capture a bad request or response and archive it in the proxy's structure.
+ * By default it tries to report the error position as h1m->err_pos. However if
+ * this one is not set, it will then report h1m->next, which is the last known
+ * parsing point. The function is able to deal with wrapping buffers. It always
+ * displays buffers as a contiguous area starting at buf->p. The direction is
+ * determined thanks to the h1m's flags.
+ */
+static void h1_capture_bad_message(struct h1c *h1c, struct h1s *h1s,
+ struct h1m *h1m, struct buffer *buf)
+{
+ struct session *sess = h1s->sess;
+ struct proxy *proxy = h1c->px;
+ struct proxy *other_end;
+ union error_snapshot_ctx ctx;
+
+ if (h1c->state == H1_CS_UPGRADING || h1c->state == H1_CS_RUNNING) {
+ if (sess == NULL)
+ sess = __sc_strm(h1s_sc(h1s))->sess;
+ if (!(h1m->flags & H1_MF_RESP))
+ other_end = __sc_strm(h1s_sc(h1s))->be;
+ else
+ other_end = sess->fe;
+ } else
+ other_end = NULL;
+
+ /* http-specific part now */
+ ctx.h1.state = h1m->state;
+ ctx.h1.c_flags = h1c->flags;
+ ctx.h1.s_flags = h1s->flags;
+ ctx.h1.m_flags = h1m->flags;
+ ctx.h1.m_clen = h1m->curr_len;
+ ctx.h1.m_blen = h1m->body_len;
+
+ proxy_capture_error(proxy, !!(h1m->flags & H1_MF_RESP), other_end,
+ h1c->conn->target, sess, buf, 0, 0,
+ (h1m->err_pos >= 0) ? h1m->err_pos : h1m->next,
+ &ctx, h1_show_error_snapshot);
+}
+
+/* Emit the chunksize followed by a CRLF in front of data of the buffer
+ * <buf>. It goes backwards and starts with the byte before the buffer's
+ * head. The caller is responsible for ensuring there is enough room left before
+ * the buffer's head for the string.
+ */
+static void h1_prepend_chunk_size(struct buffer *buf, size_t chksz)
+{
+ char *beg, *end;
+
+ beg = end = b_head(buf);
+ *--beg = '\n';
+ *--beg = '\r';
+ do {
+ *--beg = hextab[chksz & 0xF];
+ } while (chksz >>= 4);
+ buf->head -= (end - beg);
+ b_add(buf, end - beg);
+}
+
+/* Emit the chunksize followed by a CRLF after the data of the buffer
+ * <buf>. Returns 0 on error.
+ */
+static int h1_append_chunk_size(struct buffer *buf, size_t chksz)
+{
+ char tmp[10];
+ char *beg, *end;
+
+ beg = end = tmp+10;
+ *--beg = '\n';
+ *--beg = '\r';
+ do {
+ *--beg = hextab[chksz & 0xF];
+ } while (chksz >>= 4);
+
+ return chunk_memcat(buf, beg, end - beg);
+}
+
+/* Emit a CRLF in front of data of the buffer <buf>. It goes backwards and
+ * starts with the byte before the buffer's head. The caller is responsible for
+ * ensuring there is enough room left before the buffer's head for the string.
+ */
+static void h1_prepend_chunk_crlf(struct buffer *buf)
+{
+ char *head;
+
+ head = b_head(buf);
+ *--head = '\n';
+ *--head = '\r';
+ buf->head -= 2;
+ b_add(buf, 2);
+}
+
+
+/* Emit a CRLF after the data of the buffer <buf>. The caller is responsible for
+ * ensuring there is enough room left in the buffer for the string. */
+static void h1_append_chunk_crlf(struct buffer *buf)
+{
+ *(b_peek(buf, b_data(buf))) = '\r';
+ *(b_peek(buf, b_data(buf) + 1)) = '\n';
+ b_add(buf, 2);
+}
+
+/*
+ * Switch the stream to tunnel mode. This function must only be called on 2xx
+ * (successful) replies to CONNECT requests or on 101 (switching protocol).
+ */
+static void h1_set_tunnel_mode(struct h1s *h1s)
+{
+ struct h1c *h1c = h1s->h1c;
+
+ h1s->req.state = H1_MSG_TUNNEL;
+ h1s->req.flags &= ~(H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK);
+
+ h1s->res.state = H1_MSG_TUNNEL;
+ h1s->res.flags &= ~(H1_MF_XFER_LEN|H1_MF_CLEN|H1_MF_CHNK);
+
+ TRACE_STATE("switch H1 stream in tunnel mode", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_RX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+}
+
+/* Search for a websocket key header. The message should have been identified
+ * as a valid websocket handshake.
+ *
+ * On the request side, if found the key is stored in the session. It might be
+ * needed to calculate response key if the server side is using http/2.
+ *
+ * On the response side, the key might be verified if haproxy has been
+ * responsible for the generation of a key. This happens when a h2 client is
+ * interfaced with a h1 server.
+ *
+ * Returns 0 if no key found or invalid key
+ */
+static int h1_search_websocket_key(struct h1s *h1s, struct h1m *h1m, struct htx *htx)
+{
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ struct ist n, v;
+ int ws_key_found = 0, idx;
+
+ idx = htx_get_head(htx); // returns the SL that we skip
+ while ((idx = htx_get_next(htx, idx)) != -1) {
+ blk = htx_get_blk(htx, idx);
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type != HTX_BLK_HDR)
+ break;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* Websocket key is base64 encoded of 16 bytes */
+ if (isteqi(n, ist("sec-websocket-key")) && v.len == 24 &&
+ !(h1m->flags & H1_MF_RESP)) {
+ /* Copy the key on request side
+ * we might need it if the server is using h2 and does
+ * not provide the response
+ */
+ memcpy(h1s->ws_key, v.ptr, 24);
+ ws_key_found = 1;
+ break;
+ }
+ else if (isteqi(n, ist("sec-websocket-accept")) &&
+ h1m->flags & H1_MF_RESP) {
+ /* Need to verify the response key if the input was
+ * generated by haproxy
+ */
+ if (h1s->ws_key[0]) {
+ char key[29];
+ h1_calculate_ws_output_key(h1s->ws_key, key);
+ if (!isteqi(ist(key), v))
+ break;
+ }
+ ws_key_found = 1;
+ break;
+ }
+ }
+
+ /* missing websocket key, reject the message */
+ if (!ws_key_found) {
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Parse HTTP/1 headers. It returns the number of bytes parsed if > 0, or 0 if
+ * it couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR
+ * flag. If more room is requested, H1S_F_RX_CONGESTED flag is set. If relies on
+ * the function http_parse_msg_hdrs() to do the parsing.
+ */
+static size_t h1_handle_headers(struct h1s *h1s, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ union h1_sl h1sl;
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+
+ if (h1s->meth == HTTP_METH_CONNECT)
+ h1m->flags |= H1_MF_METH_CONNECT;
+ if (h1s->meth == HTTP_METH_HEAD)
+ h1m->flags |= H1_MF_METH_HEAD;
+
+ ret = h1_parse_msg_hdrs(h1m, &h1sl, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ if (ret == -1) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ else if (ret == -2) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+ ret = 0;
+ goto end;
+ }
+
+
+ /* Reject HTTP/1.0 GET/HEAD/DELETE requests with a payload except if
+ * accept_payload_with_any_method global option is set.
+ *There is a payload if the c-l is not null or the the payload is
+ * chunk-encoded. A parsing error is reported but a A
+ * 413-Payload-Too-Large is returned instead of a 400-Bad-Request.
+ */
+ if (!accept_payload_with_any_method &&
+ !(h1m->flags & (H1_MF_RESP|H1_MF_VER_11)) &&
+ (((h1m->flags & H1_MF_CLEN) && h1m->body_len) || (h1m->flags & H1_MF_CHNK)) &&
+ (h1sl.rq.meth == HTTP_METH_GET || h1sl.rq.meth == HTTP_METH_HEAD || h1sl.rq.meth == HTTP_METH_DELETE)) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ h1s->h1c->errcode = 413;
+ TRACE_ERROR("HTTP/1.0 GET/HEAD/DELETE request with a payload forbidden", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ /* Reject any message with an unknown transfer-encoding. In fact if any
+ * encoding other than "chunked". A 422-Unprocessable-Content is
+ * returned for an invalid request, a 502-Bad-Gateway for an invalid
+ * response.
+ */
+ if (h1m->flags & H1_MF_TE_OTHER) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ if (!(h1m->flags & H1_MF_RESP))
+ h1s->h1c->errcode = 422;
+ TRACE_ERROR("Unknown transfer-encoding", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ ret = 0;
+ goto end;
+ }
+
+ /* If websocket handshake, search for the websocket key */
+ if ((h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) ==
+ (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) {
+ int ws_ret = h1_search_websocket_key(h1s, h1m, htx);
+ if (!ws_ret) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("missing/invalid websocket key, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_HDRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+
+ ret = 0;
+ goto end;
+ }
+ }
+
+ if (h1m->err_pos >= 0) {
+ /* Maybe we found an error during the parsing while we were
+ * configured not to block on that, so we have to capture it
+ * now.
+ */
+ TRACE_STATE("Ignored parsing error", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+
+ if (!(h1m->flags & H1_MF_RESP)) {
+ h1s->meth = h1sl.rq.meth;
+ if (h1s->meth == HTTP_METH_HEAD)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ }
+ else {
+ h1s->status = h1sl.st.status;
+ if (h1s->status == 204 || h1s->status == 304)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+ }
+ h1_process_input_conn_mode(h1s, h1m, htx);
+ *ofs += ret;
+
+ end:
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_HDRS, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Parse HTTP/1 body. It returns the number of bytes parsed if > 0, or 0 if it
+ * couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR flag.
+ * If relies on the function http_parse_msg_data() to do the parsing.
+ */
+static size_t h1_handle_data(struct h1s *h1s, struct h1m *h1m, struct htx **htx,
+ struct buffer *buf, size_t *ofs, size_t max,
+ struct buffer *htxbuf)
+{
+ size_t ret;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+ ret = h1_parse_msg_data(h1m, htx, buf, *ofs, max, htxbuf);
+ if (!ret) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s);
+ if ((*htx)->flags & HTX_FL_PARSING_ERROR) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_BODY|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ goto end;
+ }
+
+ *ofs += ret;
+
+ end:
+ if (b_data(buf) != *ofs && (h1m->state == H1_MSG_DATA || h1m->state == H1_MSG_TUNNEL)) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_BODY|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Parse HTTP/1 trailers. It returns the number of bytes parsed if > 0, or 0 if
+ * it couldn't proceed. Parsing errors are reported by setting H1S_F_*_ERROR
+ * flag and filling h1s->err_pos and h1s->err_state fields. This functions is
+ * responsible to update the parser state <h1m>. If more room is requested,
+ * H1S_F_RX_CONGESTED flag is set.
+ */
+static size_t h1_handle_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx,
+ struct buffer *buf, size_t *ofs, size_t max)
+{
+ int ret;
+
+ TRACE_ENTER(H1_EV_RX_DATA|H1_EV_RX_TLRS, h1s->h1c->conn, h1s, 0, (size_t[]){max});
+ ret = h1_parse_msg_tlrs(h1m, htx, buf, *ofs, max);
+ if (ret <= 0) {
+ TRACE_DEVEL("leaving on missing data or error", H1_EV_RX_DATA|H1_EV_RX_BODY, h1s->h1c->conn, h1s);
+ if (ret == -1) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ TRACE_ERROR("parsing error, reject H1 message", H1_EV_RX_DATA|H1_EV_RX_TLRS|H1_EV_H1S_ERR, h1s->h1c->conn, h1s);
+ h1_capture_bad_message(h1s->h1c, h1s, h1m, buf);
+ }
+ else if (ret == -2) {
+ TRACE_STATE("RX path congested, waiting for more space", H1_EV_RX_DATA|H1_EV_RX_TLRS|H1_EV_H1S_BLK, h1s->h1c->conn, h1s);
+ h1s->flags |= H1S_F_RX_CONGESTED;
+ }
+ ret = 0;
+ goto end;
+ }
+
+ *ofs += ret;
+
+ end:
+ TRACE_LEAVE(H1_EV_RX_DATA|H1_EV_RX_TLRS, h1s->h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+/*
+ * Process incoming data. It parses data and transfer them from h1c->ibuf into
+ * <buf>. It returns the number of bytes parsed and transferred if > 0, or 0 if
+ * it couldn't proceed.
+ *
+ * WARNING: H1S_F_RX_CONGESTED flag must be removed before processing input data.
+ */
+static size_t h1_process_demux(struct h1c *h1c, struct buffer *buf, size_t count)
+{
+ struct h1s *h1s = h1c->h1s;
+ struct h1m *h1m;
+ struct htx *htx;
+ size_t data;
+ size_t ret = 0;
+ size_t total = 0;
+
+ htx = htx_from_buf(buf);
+ TRACE_ENTER(H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){count});
+
+ h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ data = htx->data;
+
+ if (h1s->flags & (H1S_F_INTERNAL_ERROR|H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR))
+ goto end;
+
+ if (h1s->flags & H1S_F_RX_BLK)
+ goto out;
+
+ /* Always remove congestion flags and try to process more input data */
+ h1s->flags &= ~H1S_F_RX_CONGESTED;
+
+ do {
+ size_t used = htx_used_space(htx);
+
+ if (h1m->state <= H1_MSG_LAST_LF) {
+ TRACE_PROTO("parsing message headers", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s);
+ ret = h1_handle_headers(h1s, h1m, htx, &h1c->ibuf, &total, count);
+ if (!ret)
+ break;
+
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request headers" : "rcvd H1 response headers"),
+ H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+
+ if ((h1m->flags & H1_MF_RESP) &&
+ h1s->status < 200 && (h1s->status == 100 || h1s->status >= 102)) {
+ h1m_init_res(&h1s->res);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ TRACE_STATE("1xx response rcvd", H1_EV_RX_DATA|H1_EV_RX_HDRS, h1c->conn, h1s);
+ }
+ }
+ else if (h1m->state < H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing message payload", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ ret = h1_handle_data(h1s, h1m, &htx, &h1c->ibuf, &total, count, buf);
+ if (h1m->state < H1_MSG_TRAILERS)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request payload data" : "rcvd H1 response payload data"),
+ H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else if (h1m->state == H1_MSG_TRAILERS) {
+ TRACE_PROTO("parsing message trailers", H1_EV_RX_DATA|H1_EV_RX_TLRS, h1c->conn, h1s);
+ ret = h1_handle_trailers(h1s, h1m, htx, &h1c->ibuf, &total, count);
+ if (h1m->state != H1_MSG_DONE)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request trailers" : "rcvd H1 response trailers"),
+ H1_EV_RX_DATA|H1_EV_RX_TLRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else if (h1m->state == H1_MSG_DONE) {
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "H1 request fully rcvd" : "H1 response fully rcvd"),
+ H1_EV_RX_DATA|H1_EV_RX_EOI, h1c->conn, h1s, htx);
+
+ if (!(h1c->flags & H1C_F_IS_BACK)) {
+ /* The request was fully received. It means the H1S now
+ * expect data from the opposite side
+ */
+ se_expect_data(h1s->sd);
+ }
+
+ if ((h1m->flags & H1_MF_RESP) &&
+ ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) || h1s->status == 101))
+ h1_set_tunnel_mode(h1s);
+ else {
+ if (h1s->req.state < H1_MSG_DONE || h1s->res.state < H1_MSG_DONE) {
+ /* Unfinished transaction: block this input side waiting the end of the output side */
+ h1s->flags |= H1S_F_RX_BLK;
+ TRACE_STATE("Disable input processing", H1_EV_RX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ break;
+ }
+ }
+ else if (h1m->state == H1_MSG_TUNNEL) {
+ TRACE_PROTO("parsing tunneled data", H1_EV_RX_DATA, h1c->conn, h1s);
+ ret = h1_handle_data(h1s, h1m, &htx, &h1c->ibuf, &total, count, buf);
+ if (!ret)
+ break;
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "rcvd H1 request tunneled data" : "rcvd H1 response tunneled data"),
+ H1_EV_RX_DATA|H1_EV_RX_EOI, h1c->conn, h1s, htx, (size_t[]){ret});
+ }
+ else {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ break;
+ }
+
+ count -= htx_used_space(htx) - used;
+ } while (!(h1s->flags & (H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR|H1S_F_RX_BLK|H1S_F_RX_CONGESTED)));
+
+
+ if (h1s->flags & (H1S_F_PARSING_ERROR|H1S_F_NOT_IMPL_ERROR)) {
+ TRACE_ERROR("parsing or not-implemented error", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto err;
+ }
+
+ b_del(&h1c->ibuf, total);
+
+ TRACE_DEVEL("incoming data parsed", H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){ret});
+
+ ret = htx->data - data;
+ if ((h1c->flags & H1C_F_IN_FULL) && buf_room_for_htx_data(&h1c->ibuf)) {
+ h1c->flags &= ~H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf not full anymore", H1_EV_RX_DATA|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+
+ if (h1m->state <= H1_MSG_LAST_LF)
+ goto out;
+
+ if (h1c->state < H1_CS_RUNNING) {
+ /* The H1 connection is not ready. Most of time, there is no SC
+ * attached, except for TCP>H1 upgrade, from a TCP frontend. In both
+ * cases, it is only possible on the client side.
+ */
+ BUG_ON(h1c->flags & H1C_F_IS_BACK);
+
+ if (h1c->state == H1_CS_EMBRYONIC) {
+ TRACE_DEVEL("request headers fully parsed, create and attach the SC", H1_EV_RX_DATA, h1c->conn, h1s);
+ BUG_ON(h1s_sc(h1s));
+ if (!h1s_new_sc(h1s, buf)) {
+ h1s->flags |= H1S_F_INTERNAL_ERROR;
+ goto err;
+ }
+ }
+ else {
+ TRACE_DEVEL("request headers fully parsed, upgrade the inherited SC", H1_EV_RX_DATA, h1c->conn, h1s);
+ BUG_ON(h1s_sc(h1s) == NULL);
+ if (!h1s_upgrade_sc(h1s, buf)) {
+ h1s->flags |= H1S_F_INTERNAL_ERROR;
+ TRACE_ERROR("H1S upgrade failure", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto err;
+ }
+ }
+ }
+
+ /* Here h1s_sc(h1s) is always defined */
+ if (!(h1c->flags & H1C_F_CANT_FASTFWD) &&
+ (!(h1m->flags & H1_MF_RESP) || !(h1s->flags & H1S_F_BODYLESS_RESP)) &&
+ (h1m->state == H1_MSG_DATA || h1m->state == H1_MSG_TUNNEL) &&
+ !(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H1_RCV)) {
+ TRACE_STATE("notify the mux can use fast-forward", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ se_fl_set(h1s->sd, SE_FL_MAY_FASTFWD_PROD);
+ }
+ else {
+ TRACE_STATE("notify the mux can't use fast-forward anymore", H1_EV_RX_DATA|H1_EV_RX_BODY, h1c->conn, h1s);
+ se_fl_clr(h1s->sd, SE_FL_MAY_FASTFWD_PROD);
+ h1c->flags &= ~H1C_F_WANT_FASTFWD;
+ }
+
+ /* Set EOI on stream connector in DONE state iff:
+ * - it is a response
+ * - it is a request but no a protocol upgrade nor a CONNECT
+ *
+ * If not set, Wait the response to do so or not depending on the status
+ * code.
+ */
+ if (((h1m->state == H1_MSG_DONE) && (h1m->flags & H1_MF_RESP)) ||
+ ((h1m->state == H1_MSG_DONE) && (h1s->meth != HTTP_METH_CONNECT) && !(h1m->flags & H1_MF_CONN_UPG)))
+ se_fl_set(h1s->sd, SE_FL_EOI);
+
+ out:
+ /* When Input data are pending for this message, notify upper layer that
+ * the mux need more space in the HTX buffer to continue if :
+ *
+ * - The parser is blocked in MSG_DATA or MSG_TUNNEL state
+ * - Headers or trailers are pending to be copied.
+ */
+ if (h1s->flags & (H1S_F_RX_CONGESTED)) {
+ se_fl_set(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ TRACE_STATE("waiting for more room", H1_EV_RX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ else {
+ se_fl_clr(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (h1c->flags & H1C_F_EOS) {
+ se_fl_set(h1s->sd, SE_FL_EOS);
+ TRACE_STATE("report EOS to SE", H1_EV_RX_DATA, h1c->conn, h1s);
+ if (h1m->state >= H1_MSG_DONE || (h1m->state > H1_MSG_LAST_LF && !(h1m->flags & H1_MF_XFER_LEN))) {
+ /* DONE or TUNNEL or SHUTR without XFER_LEN, set
+ * EOI on the stream connector */
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ TRACE_STATE("report EOI to SE", H1_EV_RX_DATA, h1c->conn, h1s);
+ }
+ else if (h1m->state < H1_MSG_DONE) {
+ if (h1m->state <= H1_MSG_LAST_LF && b_data(&h1c->ibuf))
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("message aborted, set error on SC", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+
+ if (h1s->flags & H1S_F_TX_BLK) {
+ h1s->flags &= ~H1S_F_TX_BLK;
+ h1_wake_stream_for_send(h1s);
+ TRACE_STATE("Re-enable output processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ }
+ if (h1c->flags & H1C_F_ERROR) {
+ /* Report a terminal error to the SE if a previous read error was detected */
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_STATE("report ERROR to SE", H1_EV_RX_DATA|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+ }
+
+ end:
+ htx_to_buf(htx, buf);
+ TRACE_LEAVE(H1_EV_RX_DATA, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ err:
+ htx_to_buf(htx, buf);
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ if (h1c->state < H1_CS_RUNNING) {
+ h1c->flags |= H1C_F_EOS;
+ se_fl_set(h1s->sd, SE_FL_EOS);
+ }
+ TRACE_DEVEL("leaving on error", H1_EV_RX_DATA|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return 0;
+}
+
+/* Try to send the request line from the HTX message <htx> for the stream
+ * <h1s>. It returns the number of bytes consumed or zero if nothing was done or
+ * if an error occurred. No more than <count> bytes can be sent.
+ */
+static size_t h1_make_reqline(struct h1s *h1s, struct h1m *h1m, struct htx *htx, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ enum htx_blk_type type;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){count});
+
+ while (1) {
+ blk = htx_get_head_blk(htx);
+ if (!blk)
+ goto end;
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+ if (type == HTX_BLK_UNUSED)
+ continue;
+ if (type != HTX_BLK_REQ_SL || sz > count)
+ goto error;
+ break;
+ }
+
+ TRACE_USER("sending request headers", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx);
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+
+ sl = htx_get_blk_ptr(htx, blk);
+ if (!h1_format_htx_reqline(sl, &h1c->obuf))
+ goto full;
+
+ h1s->meth = sl->info.req.meth;
+ h1_parse_req_vsn(h1m, sl);
+
+ h1m->flags |= H1_MF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CHNK)
+ h1m->flags |= H1_MF_CHNK;
+ else if (sl->flags & HTX_SL_F_CLEN)
+ h1m->flags |= H1_MF_CLEN;
+ if (sl->flags & HTX_SL_F_XFER_ENC)
+ h1m->flags |= H1_MF_XFER_ENC;
+
+ if (sl->flags & HTX_SL_F_BODYLESS && !(h1m->flags & H1_MF_CLEN)) {
+ h1m->flags = (h1m->flags & ~H1_MF_CHNK) | H1_MF_CLEN;
+ h1s->flags |= H1S_F_HAVE_CLEN;
+ }
+ if ((sl->flags & HTX_SL_F_BODYLESS_RESP) || h1s->meth == HTTP_METH_HEAD)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+
+ h1m->state = H1_MSG_HDR_NAME;
+ ret += sz;
+ htx_remove_blk(htx, blk);
+
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto end;
+
+ error:
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on request start-line",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the status line from the HTX message <htx> for the stream
+ * <h1s>. It returns the number of bytes consumed or zero if nothing was done or
+ * if an error occurred. No more than <count> bytes can be sent.
+ */
+static size_t h1_make_stline(struct h1s *h1s, struct h1m *h1m, struct htx *htx, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx_blk *blk;
+ struct htx_sl *sl;
+ enum htx_blk_type type;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){count});
+
+ while (1) {
+ blk = htx_get_head_blk(htx);
+ if (!blk)
+ goto end;
+
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+ if (type != HTX_BLK_RES_SL || sz > count)
+ goto error;
+ break;
+ }
+
+ TRACE_USER("sending response headers", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx);
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+
+ sl = htx_get_blk_ptr(htx, blk);
+ if (!h1_format_htx_stline(sl, &h1c->obuf))
+ goto full;
+
+ h1s->status = sl->info.res.status;
+ h1_parse_res_vsn(h1m, sl);
+
+ if (sl->flags & HTX_SL_F_XFER_LEN) {
+ h1m->flags |= H1_MF_XFER_LEN;
+ if (sl->flags & HTX_SL_F_CHNK)
+ h1m->flags |= H1_MF_CHNK;
+ else if (sl->flags & HTX_SL_F_CLEN)
+ h1m->flags |= H1_MF_CLEN;
+ if (sl->flags & HTX_SL_F_XFER_ENC)
+ h1m->flags |= H1_MF_XFER_ENC;
+ }
+ if (h1s->status < 200)
+ h1s->flags |= H1S_F_HAVE_O_CONN;
+ else if ((sl->flags & HTX_SL_F_BODYLESS_RESP) || h1s->status == 204 || h1s->status == 304)
+ h1s->flags |= H1S_F_BODYLESS_RESP;
+
+ h1m->state = H1_MSG_HDR_NAME;
+ ret += sz;
+ htx_remove_blk(htx, blk);
+
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto end;
+
+ error:
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on response start-line",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the message headers from the HTX message <htx> for the stream
+ * <h1s>. It returns the number of bytes consumed or zero if nothing was done or
+ * if an error occurred. No more than <count> bytes can be sent.
+ */
+static size_t h1_make_headers(struct h1s *h1s, struct h1m *h1m, struct htx *htx, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ enum htx_blk_type type;
+ struct ist n, v;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){count});
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+
+ if (type == HTX_BLK_HDR) {
+ if (sz > count)
+ goto error;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* Skip all pseudo-headers */
+ if (*(n.ptr) == ':')
+ goto nextblk;
+
+ if (isteq(n, ist("transfer-encoding"))) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->status < 200 || h1s->status == 204))
+ goto nextblk;
+ if (!(h1m->flags & H1_MF_CHNK))
+ goto nextblk;
+ if (h1_parse_xfer_enc_header(h1m, v) < 0)
+ goto error;
+ h1s->flags |= H1S_F_HAVE_CHNK;
+ }
+ else if (isteq(n, ist("content-length"))) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->status < 200 || h1s->status == 204))
+ goto nextblk;
+ if (!(h1m->flags & H1_MF_CLEN))
+ goto nextblk;
+ if (!(h1s->flags & H1S_F_HAVE_CLEN))
+ h1m->flags &= ~H1_MF_CLEN;
+ /* Only skip C-L header with invalid value. */
+ if (h1_parse_cont_len_header(h1m, &v) < 0)
+ goto error;
+ if (h1s->flags & H1S_F_HAVE_CLEN)
+ goto nextblk;
+ h1s->flags |= H1S_F_HAVE_CLEN;
+ }
+ else if (isteq(n, ist("connection"))) {
+ h1_parse_connection_header(h1m, &v);
+ if (!v.len)
+ goto nextblk;
+ }
+ else if (isteq(n, ist("upgrade"))) {
+ h1_parse_upgrade_header(h1m, v);
+ }
+ else if ((isteq(n, ist("sec-websocket-accept")) && h1m->flags & H1_MF_RESP) ||
+ (isteq(n, ist("sec-websocket-key")) && !(h1m->flags & H1_MF_RESP))) {
+ h1s->flags |= H1S_F_HAVE_WS_KEY;
+ }
+ else if (isteq(n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ v = istist(v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ goto nextblk;
+ v = ist("trailers");
+ }
+
+ /* Skip header if same name is used to add the server name */
+ if (!(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name) &&
+ isteqi(n, h1c->px->server_id_hdr_name))
+ goto nextblk;
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ }
+ else if (type == HTX_BLK_EOH) {
+ h1m->state = H1_MSG_LAST_LF;
+ break; /* Do not consume this block */
+ }
+ else if (type == HTX_BLK_UNUSED)
+ goto nextblk;
+ else
+ goto error;
+
+ nextblk:
+ ret += sz;
+ count -= sz;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ copy:
+ b_add(&h1c->obuf, outbuf.data);
+
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+
+ error:
+ ret = 0;
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on message headers",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Handle the EOH and perform last processing before sending the data. It
+ * returns the number of bytes consumed or zero if nothing was done or if an
+ * error occurred. No more than <count> bytes can be sent.
+ */
+static size_t h1_make_eoh(struct h1s *h1s, struct h1m *h1m, struct htx *htx, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ enum htx_blk_type type;
+ struct ist n, v;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){count});
+
+ while (1) {
+ blk = htx_get_head_blk(htx);
+ if (!blk)
+ goto end;
+
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+ if (type != HTX_BLK_EOH || sz > count)
+ goto error;
+ break;
+ }
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+ /* Deal with "Connection" header */
+ if (!(h1s->flags & H1S_F_HAVE_O_CONN)) {
+ if ((htx->flags & HTX_FL_PROXY_RESP) && h1s->req.state != H1_MSG_DONE) {
+ /* If the reply comes from haproxy while the request is
+ * not finished, we force the connection close. */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) {
+ /* T-E + C-L: force close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ h1m->flags &= ~H1_MF_CLEN;
+ TRACE_STATE("force close mode (T-E + C-L)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+ else if ((h1m->flags & (H1_MF_VER_11|H1_MF_XFER_ENC)) == H1_MF_XFER_ENC) {
+ /* T-E + HTTP/1.0: force close */
+ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO;
+ TRACE_STATE("force close mode (T-E + HTTP/1.0)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s);
+ }
+
+ /* the conn_mode must be processed. So do it */
+ n = ist("connection");
+ v = ist("");
+ h1_process_output_conn_mode(h1s, h1m, &v);
+ if (v.len) {
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ }
+ h1s->flags |= H1S_F_HAVE_O_CONN;
+ }
+
+ /* Deal with "Transfer-Encoding" header */
+ if ((h1s->meth != HTTP_METH_CONNECT &&
+ (h1m->flags & (H1_MF_VER_11|H1_MF_RESP|H1_MF_CLEN|H1_MF_CHNK|H1_MF_XFER_LEN)) ==
+ (H1_MF_VER_11|H1_MF_XFER_LEN)) ||
+ (h1s->status >= 200 && !(h1s->flags & H1S_F_BODYLESS_RESP) &&
+ !(h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) &&
+ (h1m->flags & (H1_MF_VER_11|H1_MF_RESP|H1_MF_CLEN|H1_MF_CHNK|H1_MF_XFER_LEN)) ==
+ (H1_MF_VER_11|H1_MF_RESP|H1_MF_XFER_LEN)))
+ h1m->flags |= H1_MF_CHNK;
+
+ if ((h1m->flags & H1_MF_CHNK) && !(h1s->flags & H1S_F_HAVE_CHNK)) {
+ /* chunking needed but header not seen */
+ n = ist("transfer-encoding");
+ v = ist("chunked");
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ TRACE_STATE("add \"Transfer-Encoding: chunked\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ h1s->flags |= H1S_F_HAVE_CHNK;
+ }
+
+ /* Deal with "Content-Length header */
+ if ((h1m->flags & H1_MF_CLEN) && !(h1s->flags & H1S_F_HAVE_CLEN)) {
+ char *end;
+
+ h1m->curr_len = h1m->body_len = htx->data + htx->extra - sz;
+ end = DISGUISE(ulltoa(h1m->body_len, trash.area, b_size(&trash)));
+
+ n = ist("content-length");
+ v = ist2(trash.area, end-trash.area);
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ TRACE_STATE("add \"Content-Length: <LEN>\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ h1s->flags |= H1S_F_HAVE_CLEN;
+ }
+
+ /* Add the server name to a header (if requested) */
+ if (!(h1s->flags & H1S_F_HAVE_SRV_NAME) &&
+ !(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name)) {
+ struct server *srv = objt_server(h1c->conn->target);
+
+ if (srv) {
+ n = h1c->px->server_id_hdr_name;
+ v = ist(srv->id);
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ }
+ TRACE_STATE("add server name header", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ h1s->flags |= H1S_F_HAVE_SRV_NAME;
+ }
+
+ /* Add websocket handshake key if needed */
+ if (!(h1s->flags & H1S_F_HAVE_WS_KEY) &&
+ (h1m->flags & (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) == (H1_MF_CONN_UPG|H1_MF_UPG_WEBSOCKET)) {
+ if (!(h1m->flags & H1_MF_RESP)) {
+ /* generate a random websocket key
+ * stored in the session to
+ * verify it on the response side
+ */
+ h1_generate_random_ws_input_key(h1s->ws_key);
+
+ if (!h1_format_htx_hdr(ist("Sec-Websocket-Key"),
+ ist(h1s->ws_key),
+ &outbuf)) {
+ goto full;
+ }
+ }
+ else {
+ /* add the response header key */
+ char key[29];
+
+ h1_calculate_ws_output_key(h1s->ws_key, key);
+ if (!h1_format_htx_hdr(ist("Sec-Websocket-Accept"),
+ ist(key),
+ &outbuf)) {
+ goto full;
+ }
+ }
+ h1s->flags |= H1S_F_HAVE_WS_KEY;
+ }
+
+ /*
+ * All headers was sent, now process EOH
+ */
+ if (!(h1m->flags & H1_MF_RESP) && h1s->meth == HTTP_METH_CONNECT) {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ /* a CONNECT request was sent. Output processing is now blocked
+ * waiting the server response.
+ */
+ h1m->state = H1_MSG_DONE;
+ h1s->flags |= H1S_F_TX_BLK;
+ TRACE_STATE("CONNECT request waiting for tunnel mode", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ }
+ else if ((h1m->flags & H1_MF_RESP) &&
+ ((h1s->meth == HTTP_METH_CONNECT && h1s->status >= 200 && h1s->status < 300) || h1s->status == 101)) {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ /* a successful reply to a CONNECT or a protocol switching is sent
+ * to the client. Switch the response to tunnel mode.
+ */
+ h1_set_tunnel_mode(h1s);
+ }
+ else if ((h1m->flags & H1_MF_RESP) &&
+ h1s->status < 200 && (h1s->status == 100 || h1s->status >= 102)) {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ /* 1xx response was sent, reset response processing */
+ h1m_init_res(h1m);
+ h1m->flags |= (H1_MF_NO_PHDR|H1_MF_CLEAN_CONN_HDR);
+ h1s->flags &= ~H1S_F_HAVE_O_CONN;
+ TRACE_STATE("1xx response xferred", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ }
+ else if (htx_is_unique_blk(htx, blk) &&
+ ((htx->flags & HTX_FL_EOM) || ((h1m->flags & H1_MF_CLEN) && !h1m->curr_len))) {
+ /* EOM flag is set and it is the last block or there is no
+ * payload. If cannot be removed now. We must emit the end of
+ * the message first to be sure the output buffer is not full
+ */
+ if ((h1m->flags & H1_MF_CHNK) && !(h1s->flags & H1S_F_BODYLESS_RESP)) {
+ if (!chunk_memcat(&outbuf, "\r\n0\r\n\r\n", 7))
+ goto full;
+ }
+ else if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ h1m->state = ((htx->flags & HTX_FL_EOM) ? H1_MSG_DONE : H1_MSG_TRAILERS);
+ }
+ else {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ h1m->state = ((h1m->flags & H1_MF_CHNK) ? H1_MSG_CHUNK_SIZE: H1_MSG_DATA);
+ }
+
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request headers xferred" : "H1 response headers xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s);
+ ret += sz;
+ htx_remove_blk(htx, blk);
+
+ copy:
+ b_add(&h1c->obuf, outbuf.data);
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+
+ error:
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on message EOH",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the message payload from the HTX message <htx> for the stream
+ * <h1s>. In this case, we are not in TUNNEL mode. It returns the number of
+ * bytes consumed or zero if nothing was done or if an error occurred. No more
+ * than <count> bytes can be sent.
+ */
+static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx *htx = htx_from_buf(buf);
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ enum htx_blk_type type;
+ struct ist v;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx, (size_t[]){count});
+ blk = htx_get_head_blk(htx);
+
+ /* Perform some optimizations to reduce the number of buffer copies. If
+ * the mux's buffer is empty and the htx area contains exactly one data
+ * block of the same size as the requested count, then it's possible to
+ * simply swap the caller's buffer with the mux's output buffer and
+ * adjust offsets and length to match the entire DATA HTX block in the
+ * middle. In this case we perform a true zero-copy operation from
+ * end-to-end. This is the situation that happens all the time with
+ * large files.
+ */
+ if ((!(h1m->flags & H1_MF_RESP) || !(h1s->flags & H1S_F_BODYLESS_RESP)) &&
+ !b_data(&h1c->obuf) &&
+ (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_CHNK) && (!h1m->curr_len || count == h1m->curr_len))) &&
+ htx_nbblks(htx) == 1 &&
+ htx_get_blk_type(blk) == HTX_BLK_DATA &&
+ htx_get_blk_value(htx, blk).len == count) {
+ void *old_area;
+ uint64_t extra;
+ int eom = (htx->flags & HTX_FL_EOM);
+
+ extra = htx->extra;
+ old_area = h1c->obuf.area;
+ h1c->obuf.area = buf->area;
+ h1c->obuf.head = sizeof(struct htx) + blk->addr;
+ h1c->obuf.data = count;
+
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+
+ htx = (struct htx *)buf->area;
+ htx_reset(htx);
+ htx->extra = extra;
+
+ if (h1m->flags & H1_MF_CLEN) {
+ if (count > h1m->curr_len) {
+ TRACE_ERROR("more payload than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ h1m->curr_len -= count;
+ if (!h1m->curr_len)
+ h1m->state = (eom ? H1_MSG_DONE : H1_MSG_TRAILERS);
+ }
+ else if (h1m->flags & H1_MF_CHNK) {
+ /* The message is chunked. We need to check if we must
+ * emit the chunk size, the CRLF marking the end of the
+ * current chunk and eventually the CRLF marking the end
+ * of the previous chunk (because of fast-forwarding).
+ * If it is the end of the message, we must
+ * also emit the last chunk.
+ *
+ * We have at least the size of the struct htx to write
+ * the chunk envelope. It should be enough.
+ */
+
+ /* If is a new chunk, prepend the chunk size */
+ if (h1m->state == H1_MSG_CHUNK_CRLF || h1m->state == H1_MSG_CHUNK_SIZE) {
+ if (h1m->curr_len) {
+ TRACE_ERROR("chunk bigger than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ h1m->curr_len = count + (htx->extra != HTX_UNKOWN_PAYLOAD_LENGTH ? htx->extra : 0);
+
+ /* Because chunk meta-data are prepended, the chunk size of the current chunk
+ * must be handled before the end of the previous chunk.
+ */
+ h1_prepend_chunk_size(&h1c->obuf, h1m->curr_len);
+ if (h1m->state == H1_MSG_CHUNK_CRLF)
+ h1_prepend_chunk_crlf(&h1c->obuf);
+
+ h1m->state = H1_MSG_DATA;
+ }
+
+ h1m->curr_len -= count;
+
+ /* It is the end of the chunk, append the CRLF */
+ if (!h1m->curr_len) {
+ h1_append_chunk_crlf(&h1c->obuf);
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ }
+
+ /* It is the end of the message, add the last chunk with the extra CRLF */
+ if (eom) {
+ if (h1m->curr_len) {
+ TRACE_ERROR("chunk smaller than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ /* Emit the last chunk too at the buffer's end */
+ b_putblk(&h1c->obuf, "0\r\n\r\n", 5);
+ h1m->state = H1_MSG_DONE;
+ }
+ }
+ /* Nothing to do if XFER len is unknown */
+
+ ret = count;
+ TRACE_PROTO("H1 message payload data xferred (zero-copy)", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){ret});
+ goto end;
+ }
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+
+ /* Handle now case of CRLF at the end of a chun. */
+ if ((h1m->flags & H1_MF_CHNK) && h1m->state == H1_MSG_CHUNK_CRLF) {
+ if (h1m->curr_len) {
+ TRACE_ERROR("chunk bigger than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ }
+
+ while (blk && count) {
+ uint32_t vlen, chklen;
+ int last_data = 0;
+
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+ vlen = sz;
+ if (type == HTX_BLK_DATA) {
+ if (vlen > count) {
+ /* Get the maximum amount of data we can xferred */
+ vlen = count;
+ }
+ else if (htx_is_unique_blk(htx, blk) && (htx->flags & HTX_FL_EOM)) {
+ /* It is the last block of this message. After this one,
+ * only tunneled data may be forwarded. */
+ TRACE_DEVEL("last message block", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s);
+ last_data = 1;
+ }
+
+ if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) {
+ TRACE_PROTO("Skip data for bodyless response", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx);
+ goto nextblk;
+ }
+
+ chklen = 0;
+ if (h1m->flags & H1_MF_CHNK) {
+ /* If is a new chunk, prepend the chunk size */
+ if (h1m->state == H1_MSG_CHUNK_SIZE) {
+ h1m->curr_len = (htx->extra && htx->extra != HTX_UNKOWN_PAYLOAD_LENGTH ? htx->data + htx->extra : vlen);
+ if (!h1_append_chunk_size(&outbuf, h1m->curr_len)) {
+ h1m->curr_len = 0;
+ goto full;
+ }
+ h1m->state = H1_MSG_DATA;
+ }
+
+ if (vlen > h1m->curr_len) {
+ vlen = h1m->curr_len;
+ last_data = 0;
+ }
+
+ chklen = 0;
+ if (h1m->curr_len == vlen)
+ chklen += 2;
+ if (last_data)
+ chklen += 5;
+ }
+
+ if (vlen + chklen > b_room(&outbuf)) {
+ /* too large for the buffer */
+ if (chklen >= b_room(&outbuf))
+ goto full;
+ vlen = b_room(&outbuf) - chklen;
+ last_data = 0;
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v.len = vlen;
+ if (!h1_format_htx_data(v, &outbuf, 0))
+ goto full;
+
+ if (h1m->flags & H1_MF_CLEN) {
+ if (vlen > h1m->curr_len) {
+ TRACE_ERROR("more payload than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ h1m->curr_len -= vlen;
+ }
+ else if (h1m->flags & H1_MF_CHNK) {
+ h1m->curr_len -= vlen;
+ /* Space already reserved, so it must succeed */
+ if (!h1m->curr_len) {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto error;
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ }
+ if (last_data) {
+ if (h1m->curr_len) {
+ TRACE_ERROR("chunk smaller than announced",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto error;
+ }
+ if (!chunk_memcat(&outbuf, "0\r\n\r\n", 5))
+ goto error;
+ }
+ }
+
+
+ }
+ else if (type == HTX_BLK_EOT || type == HTX_BLK_TLR) {
+ if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) {
+ /* Do nothing the payload must be skipped
+ * because it is a bodyless response
+ */
+ }
+ else if (h1m->flags & H1_MF_CHNK) {
+ /* Emit last chunk for chunked messages only */
+ if (!chunk_memcat(&outbuf, "0\r\n", 3))
+ goto full;
+ }
+ h1m->state = H1_MSG_TRAILERS;
+ break;
+ }
+ else if (type == HTX_BLK_UNUSED)
+ goto nextblk;
+ else
+ goto error;
+
+ nextblk:
+ ret += vlen;
+ count -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ if (!b_room(&outbuf))
+ goto full;
+ }
+
+ if (last_data)
+ h1m->state = H1_MSG_DONE;
+ }
+
+ copy:
+ TRACE_PROTO("H1 message payload data xferred", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){ret});
+ b_add(&h1c->obuf, outbuf.data);
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+ error:
+ ret = 0;
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on message payload",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the tunneled data from the HTX message <htx> for the stream
+ * <h1s>. In this case, we are in TUNNEL mode. It returns the number of bytes
+ * consumed or zero if nothing was done or if an error occurred. No more than
+ * <count> bytes can be sent.
+ */
+static size_t h1_make_tunnel(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx *htx = htx_from_buf(buf);
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ enum htx_blk_type type;
+ struct ist v;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx, (size_t[]){count});
+
+ blk = htx_get_head_blk(htx);
+
+ /* Perform some optimizations to reduce the number of buffer copies. If
+ * the mux's buffer is empty and the htx area contains exactly one data
+ * block of the same size as the requested count, then it's possible to
+ * simply swap the caller's buffer with the mux's output buffer and
+ * adjust offsets and length to match the entire DATA HTX block in the
+ * middle. In this case we perform a true zero-copy operation from
+ * end-to-end. This is the situation that happens all the time with
+ * large files.
+ */
+ if (!b_data(&h1c->obuf) &&
+ htx_nbblks(htx) == 1 &&
+ htx_get_blk_type(blk) == HTX_BLK_DATA &&
+ htx_get_blksz(blk) == count) {
+ void *old_area;
+
+ old_area = h1c->obuf.area;
+ h1c->obuf.area = buf->area;
+ h1c->obuf.head = sizeof(struct htx) + blk->addr;
+ h1c->obuf.data = count;
+
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+
+ htx = (struct htx *)buf->area;
+ htx_reset(htx);
+
+ ret = count;
+ TRACE_PROTO("H1 tunneled data xferred (zero-copy)", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){ret});
+ goto end;
+ }
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+ while (blk) {
+ uint32_t vlen;
+
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+ vlen = sz;
+
+ if (type == HTX_BLK_DATA) {
+ if (vlen > count) {
+ /* Get the maximum amount of data we can xferred */
+ vlen = count;
+ }
+
+ if (vlen > b_room(&outbuf)) {
+ /* too large for the buffer */
+ vlen = b_room(&outbuf);
+ }
+
+ v = htx_get_blk_value(htx, blk);
+ v.len = vlen;
+ if (!h1_format_htx_data(v, &outbuf, 0))
+ goto full;
+ }
+ else if (type == HTX_BLK_UNUSED)
+ goto nextblk;
+ else
+ goto error;
+
+ nextblk:
+ ret += vlen;
+ count -= vlen;
+ if (sz == vlen)
+ blk = htx_remove_blk(htx, blk);
+ else {
+ htx_cut_data_blk(htx, blk, vlen);
+ break;
+ }
+ }
+
+ copy:
+ TRACE_PROTO("H1 tunneled data xferred", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){ret});
+ b_add(&h1c->obuf, outbuf.data);
+
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+
+ error:
+ ret = 0;
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on tunneled",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the trailers from the HTX message <htx> for the stream <h1s>. It
+ * returns the number of bytes consumed or zero if nothing was done or if an
+ * error occurred. No more than <count> bytes can be sent.
+ */
+static size_t h1_make_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx, size_t count)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ enum htx_blk_type type;
+ struct ist n, v;
+ uint32_t sz;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s, htx, (size_t[]){count});
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ chunk_reset(&outbuf);
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ sz = htx_get_blksz(blk);
+
+ if (type == HTX_BLK_TLR) {
+ if (sz > count)
+ goto error;
+
+ if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)))
+ goto nextblk;
+
+ n = htx_get_blk_name(htx, blk);
+ v = htx_get_blk_value(htx, blk);
+
+ /* Try to adjust the case of the header name */
+ if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV))
+ h1_adjust_case_outgoing_hdr(h1s, h1m, &n);
+ if (!h1_format_htx_hdr(n, v, &outbuf))
+ goto full;
+ }
+ else if (type == HTX_BLK_EOT) {
+ if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) {
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request trailers skipped" : "H1 response trailers skipped"),
+ H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s);
+ }
+ else {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request trailers xferred" : "H1 response trailers xferred"),
+ H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s);
+ }
+ h1m->state = H1_MSG_DONE;
+ }
+ else if (type == HTX_BLK_UNUSED)
+ goto nextblk;
+ else
+ goto error;
+
+ nextblk:
+ ret += sz;
+ count -= sz;
+ blk = htx_remove_blk(htx, blk);
+ }
+
+ copy:
+ b_add(&h1c->obuf, outbuf.data);
+
+ end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s, htx, (size_t[]){ret});
+ return ret;
+
+ full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto copy;
+
+ error:
+ ret = 0;
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error on message trailers",
+ H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+}
+
+/* Try to send the header for a chunk of <len> bytes. It returns the number of
+ * bytes consumed or zero if nothing was done or if an error occurred..
+ */
+static size_t h1_make_chunk(struct h1s *h1s, struct h1m * h1m, size_t len)
+{
+ struct h1c *h1c = h1s->h1c;
+ struct buffer outbuf;
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s);
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= H1C_F_OUT_ALLOC;
+ TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ goto end;
+ }
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+ outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0);
+
+ if (h1m->state == H1_MSG_CHUNK_CRLF) {
+ if (!chunk_memcat(&outbuf, "\r\n", 2))
+ goto full;
+ h1m->state = H1_MSG_CHUNK_SIZE;
+ }
+ if (!h1_append_chunk_size(&outbuf, len))
+ goto full;
+
+ h1m->state = H1_MSG_DATA;
+
+ TRACE_PROTO("H1 chunk info xferred", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, 0, (size_t[]){ret});
+ b_add(&h1c->obuf, outbuf.data);
+ ret = outbuf.data;
+
+end:
+ TRACE_LEAVE(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, NULL, (size_t[]){ret});
+ return ret;
+full:
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ goto end;
+}
+
+/*
+ * Process outgoing data. It parses data and transfer them from the channel buffer into
+ * h1c->obuf. It returns the number of bytes parsed and transferred if > 0, or
+ * 0 if it couldn't proceed.
+ */
+static size_t h1_process_mux(struct h1c *h1c, struct buffer *buf, size_t count)
+{
+ struct h1s *h1s = h1c->h1s;
+ struct h1m *h1m;
+ struct htx *htx;
+ size_t ret, total = 0;
+
+ htx = htxbuf(buf);
+ TRACE_ENTER(H1_EV_TX_DATA, h1c->conn, h1s, htx, (size_t[]){count});
+
+ if (htx_is_empty(htx))
+ goto end;
+
+ if (h1s->flags & (H1S_F_INTERNAL_ERROR|H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK))
+ goto end;
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= H1C_F_OUT_ALLOC;
+ TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ goto end;
+ }
+ h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+
+ while (!(h1c->flags & H1C_F_OUT_FULL) &&
+ !(h1s->flags & (H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK)) &&
+ !htx_is_empty(htx) && count) {
+ switch (h1m->state) {
+ case H1_MSG_RQBEFORE:
+ ret = h1_make_reqline(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_RPBEFORE:
+ ret = h1_make_stline(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_HDR_NAME:
+ ret = h1_make_headers(h1s, h1m, htx, count);
+ if (unlikely(h1m->state == H1_MSG_LAST_LF)) // in case of no header
+ ret += h1_make_eoh(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_LAST_LF:
+ ret = h1_make_eoh(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_CHUNK_SIZE:
+ case H1_MSG_CHUNK_CRLF:
+ case H1_MSG_DATA:
+ ret = h1_make_data(h1s, h1m, buf, count);
+ if (ret > 0)
+ htx = htx_from_buf(buf);
+ if (unlikely(h1m->state == H1_MSG_TRAILERS)) // in case of no data
+ ret += h1_make_trailers(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_TUNNEL:
+ ret = h1_make_tunnel(h1s, h1m, buf, count);
+ if (ret > 0)
+ htx = htx_from_buf(buf);
+ break;
+
+ case H1_MSG_TRAILERS:
+ ret = h1_make_trailers(h1s, h1m, htx, count);
+ break;
+
+ case H1_MSG_DONE:
+ TRACE_STATE("unexpected data xferred in done state", H1_EV_TX_DATA|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ __fallthrough;
+
+ default:
+ ret = 0;
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("processing error", H1_EV_TX_DATA|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ break;
+ }
+
+ if (!ret)
+ break;
+ total += ret;
+ count -= ret;
+
+ if (h1m->state == H1_MSG_DONE) {
+ TRACE_USER((!(h1m->flags & H1_MF_RESP) ? "H1 request fully xferred" : "H1 response fully xferred"),
+ H1_EV_TX_DATA, h1c->conn, h1s);
+
+ if (h1s->flags & H1S_F_RX_BLK) {
+ h1s->flags &= ~H1S_F_RX_BLK;
+ h1_wake_stream_for_recv(h1s);
+ TRACE_STATE("Re-enable input processing", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1s);
+ }
+ }
+ }
+
+ htx_to_buf(htx, buf);
+ if (!buf_room_for_htx_data(&h1c->obuf)) {
+ TRACE_STATE("h1c obuf full", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s);
+ h1c->flags |= H1C_F_OUT_FULL;
+ }
+
+ end:
+
+ /* Both the request and the response reached the DONE state. So set EOI
+ * flag on the conn-stream. Most of time, the flag will already be set,
+ * except for protocol upgrades. Report an error if data remains blocked
+ * in the output buffer.
+ */
+ if (h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) {
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ if (!htx_is_empty(htx)) {
+ htx->flags |= HTX_FL_PROCESSING_ERROR;
+ h1s->flags |= H1S_F_PROCESSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("txn done but data waiting to be sent, set error on h1c", H1_EV_H1C_ERR, h1c->conn, h1s);
+ }
+ }
+
+ TRACE_LEAVE(H1_EV_TX_DATA, h1c->conn, h1s, htx, (size_t[]){total});
+ return total;
+}
+
+/*********************************************************/
+/* functions below are I/O callbacks from the connection */
+/*********************************************************/
+static void h1_wake_stream_for_recv(struct h1s *h1s)
+{
+ if (h1s && h1s->subs && h1s->subs->events & SUB_RETRY_RECV) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ tasklet_wakeup(h1s->subs->tasklet);
+ h1s->subs->events &= ~SUB_RETRY_RECV;
+ if (!h1s->subs->events)
+ h1s->subs = NULL;
+ }
+}
+static void h1_wake_stream_for_send(struct h1s *h1s)
+{
+ if (h1s && h1s->subs && h1s->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ tasklet_wakeup(h1s->subs->tasklet);
+ h1s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h1s->subs->events)
+ h1s->subs = NULL;
+ }
+}
+
+/* alerts the data layer following this sequence :
+ * - if the h1s' data layer is subscribed to recv, then it's woken up for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ */
+static void h1_alert(struct h1s *h1s)
+{
+ if (h1s->subs) {
+ h1_wake_stream_for_recv(h1s);
+ h1_wake_stream_for_send(h1s);
+ }
+ else if (h1s_sc(h1s) && h1s_sc(h1s)->app_ops->wake != NULL) {
+ TRACE_POINT(H1_EV_STRM_WAKE, h1s->h1c->conn, h1s);
+ h1s_sc(h1s)->app_ops->wake(h1s_sc(h1s));
+ }
+}
+
+/* Try to send an HTTP error with h1c->errcode status code. It returns 1 on success
+ * and 0 on error. The flag H1C_F_ABRT_PENDING is set on the H1 connection for
+ * retryable errors (allocation error or buffer full). On success, the error is
+ * copied in the output buffer.
+*/
+static int h1_send_error(struct h1c *h1c)
+{
+ int rc = http_get_status_idx(h1c->errcode);
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_H1C_ERR, h1c->conn, 0, 0, (size_t[]){h1c->errcode});
+
+ /* Verify if the error is mapped on /dev/null or any empty file */
+ /// XXX: do a function !
+ if (h1c->px->replies[rc] &&
+ h1c->px->replies[rc]->type == HTTP_REPLY_ERRMSG &&
+ h1c->px->replies[rc]->body.errmsg &&
+ b_is_null(h1c->px->replies[rc]->body.errmsg)) {
+ /* Empty error, so claim a success */
+ ret = 1;
+ goto out;
+ }
+
+ if (h1c->flags & (H1C_F_OUT_ALLOC|H1C_F_OUT_FULL)) {
+ h1c->flags |= H1C_F_ABRT_PENDING;
+ goto out;
+ }
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= (H1C_F_OUT_ALLOC|H1C_F_ABRT_PENDING);
+ TRACE_STATE("waiting for h1c obuf allocation", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn);
+ goto out;
+ }
+ ret = b_istput(&h1c->obuf, ist(http_err_msgs[rc]));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ h1c->flags |= (H1C_F_OUT_FULL|H1C_F_ABRT_PENDING);
+ TRACE_STATE("h1c obuf full", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn);
+ goto out;
+ }
+ else {
+ /* we cannot report this error, so claim a success */
+ ret = 1;
+ }
+ }
+
+ if (h1c->state == H1_CS_EMBRYONIC) {
+ BUG_ON(h1c->h1s == NULL || h1s_sc(h1c->h1s) != NULL);
+ TRACE_DEVEL("Abort embryonic H1S", H1_EV_H1C_ERR, h1c->conn, h1c->h1s);
+ h1s_destroy(h1c->h1s);
+ }
+
+ h1c->flags = (h1c->flags & ~(H1C_F_WAIT_NEXT_REQ|H1C_F_ABRT_PENDING)) | H1C_F_ABRTED;
+ h1_close(h1c);
+ out:
+ TRACE_LEAVE(H1_EV_H1C_ERR, h1c->conn);
+ return ret;
+}
+
+/* Try to send a 500 internal error. It relies on h1_send_error to send the
+ * error. This function takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_internal_err(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 0;
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[5]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+
+ h1c->errcode = 500;
+ ret = h1_send_error(h1c);
+ sess_log(sess);
+ return ret;
+}
+
+/* Try to send an error because of a parsing error. By default a 400 bad request
+ * error is returned. But the status code may be specified by setting
+ * h1c->errcode. It relies on h1_send_error to send the error. This function
+ * takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_parsing_error(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 0;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) {
+ h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED;
+ h1_close(h1c);
+ goto end;
+ }
+
+ session_inc_http_req_ctr(sess);
+ session_inc_http_err_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ if (!h1c->errcode)
+ h1c->errcode = 400;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+/* Try to send a 501 not implemented error. It relies on h1_send_error to send
+ * the error. This function takes care of incrementing stats and tracked
+ * counters.
+ */
+static int h1_handle_not_impl_err(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 0;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) {
+ h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED;
+ h1_close(h1c);
+ goto end;
+ }
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ h1c->errcode = 501;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+/* Try to send a 408 timeout error. It relies on h1_send_error to send the
+ * error. This function takes care of incrementing stats and tracked counters.
+ */
+static int h1_handle_req_tout(struct h1c *h1c)
+{
+ struct session *sess = h1c->conn->owner;
+ int ret = 0;
+
+ if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) {
+ h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED;
+ h1_close(h1c);
+ goto end;
+ }
+
+ session_inc_http_req_ctr(sess);
+ proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[4]);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ h1c->errcode = 408;
+ ret = h1_send_error(h1c);
+ if (b_data(&h1c->ibuf) || !(sess->fe->options & PR_O_NULLNOLOG))
+ sess_log(sess);
+
+ end:
+ return ret;
+}
+
+
+/*
+ * Attempt to read data, and subscribe if none available
+ */
+static int h1_recv(struct h1c *h1c)
+{
+ struct connection *conn = h1c->conn;
+ size_t ret = 0, max;
+ int flags = 0;
+
+ TRACE_ENTER(H1_EV_H1C_RECV, h1c->conn);
+
+ if (h1c->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", H1_EV_H1C_RECV, h1c->conn);
+ return (b_data(&h1c->ibuf));
+ }
+
+ if ((h1c->flags & H1C_F_WANT_FASTFWD) || !h1_recv_allowed(h1c)) {
+ TRACE_DEVEL("leaving on (want_fastfwde|!recv_allowed)", H1_EV_H1C_RECV, h1c->conn);
+ return 1;
+ }
+
+ if (!h1_get_buf(h1c, &h1c->ibuf)) {
+ h1c->flags |= H1C_F_IN_ALLOC;
+ TRACE_STATE("waiting for h1c ibuf allocation", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ /*
+ * If we only have a small amount of data, realign it,
+ * it's probably cheaper than doing 2 recv() calls.
+ */
+ if (b_data(&h1c->ibuf) > 0 && b_data(&h1c->ibuf) < 128)
+ b_slow_realign_ofs(&h1c->ibuf, trash.area, sizeof(struct htx));
+
+ max = buf_room_for_htx_data(&h1c->ibuf);
+
+ /* avoid useless reads after first responses */
+ if (!h1c->h1s ||
+ (!(h1c->flags & H1C_F_IS_BACK) && h1c->h1s->req.state == H1_MSG_RQBEFORE) ||
+ ((h1c->flags & H1C_F_IS_BACK) && h1c->h1s->res.state == H1_MSG_RPBEFORE)) {
+ flags |= CO_RFL_READ_ONCE;
+
+ /* we know that the first read will be constrained to a smaller
+ * read by the stream layer in order to respect the reserve.
+ * Reading too much will result in global.tune.maxrewrite being
+ * left at the end of the buffer, and in a very small read
+ * being performed again to complete them (typically 16 bytes
+ * freed in the index after headers were consumed) before
+ * another larger read. Instead, given that we know we're
+ * waiting for a header and we'll be limited, let's perform a
+ * shorter first read that the upper layer can retrieve by just
+ * a pointer swap and the next read will be doable at once in
+ * an empty buffer.
+ */
+ if (max > global.tune.bufsize - global.tune.maxrewrite)
+ max = global.tune.bufsize - global.tune.maxrewrite;
+ }
+
+ if (max) {
+ if (h1c->flags & H1C_F_IN_FULL) {
+ h1c->flags &= ~H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf not full anymore", H1_EV_H1C_RECV|H1_EV_H1C_BLK);
+ }
+
+ if (!b_data(&h1c->ibuf)) {
+ /* try to pre-align the buffer like the rxbufs will be
+ * to optimize memory copies.
+ */
+ h1c->ibuf.head = sizeof(struct htx);
+ }
+ ret = conn->xprt->rcv_buf(conn, conn->xprt_ctx, &h1c->ibuf, max, flags);
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_in, ret);
+ }
+
+ if (conn_xprt_read0_pending(conn)) {
+ TRACE_DEVEL("read0 on connection", H1_EV_H1C_RECV, h1c->conn);
+ h1c->flags |= H1C_F_EOS;
+ }
+ if (h1c->conn->flags & CO_FL_ERROR) {
+ TRACE_DEVEL("connection error", H1_EV_H1C_RECV, h1c->conn);
+ h1c->flags |= H1C_F_ERROR;
+ }
+
+ if (max && !ret && h1_recv_allowed(h1c)) {
+ TRACE_STATE("failed to receive data, subscribing", H1_EV_H1C_RECV, h1c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ else {
+ TRACE_DATA("data received or pending or connection error", H1_EV_H1C_RECV, h1c->conn, 0, 0, (size_t[]){ret});
+ h1_wake_stream_for_recv(h1c->h1s);
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+ else if (!buf_room_for_htx_data(&h1c->ibuf)) {
+ h1c->flags |= H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf full", H1_EV_H1C_RECV|H1_EV_H1C_BLK);
+ }
+
+ TRACE_LEAVE(H1_EV_H1C_RECV, h1c->conn);
+ return !!ret || (h1c->flags & (H1C_F_EOS|H1C_F_ERROR));
+}
+
+
+/*
+ * Try to send data if possible
+ */
+static int h1_send(struct h1c *h1c)
+{
+ struct connection *conn = h1c->conn;
+ unsigned int flags = 0;
+ size_t ret;
+ int sent = 0;
+
+ TRACE_ENTER(H1_EV_H1C_SEND, h1c->conn);
+
+ if (h1c->flags & (H1C_F_ERROR|H1C_F_ERR_PENDING)) {
+ TRACE_DEVEL("leaving on H1C error|err_pending", H1_EV_H1C_SEND, h1c->conn);
+ b_reset(&h1c->obuf);
+ if (h1c->flags & H1C_F_EOS)
+ h1c->flags |= H1C_F_ERROR;
+ return 1;
+ }
+
+ if (!b_data(&h1c->obuf))
+ goto end;
+
+ if (h1c->flags & H1C_F_CO_MSG_MORE)
+ flags |= CO_SFL_MSG_MORE;
+ if (h1c->flags & H1C_F_CO_STREAMER)
+ flags |= CO_SFL_STREAMER;
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, &h1c->obuf, b_data(&h1c->obuf), flags);
+ if (ret > 0) {
+ TRACE_DATA("data sent", H1_EV_H1C_SEND, h1c->conn, 0, 0, (size_t[]){ret});
+ if (h1c->flags & H1C_F_OUT_FULL) {
+ h1c->flags &= ~H1C_F_OUT_FULL;
+ TRACE_STATE("h1c obuf not full anymore", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn);
+ }
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_out, ret);
+ b_del(&h1c->obuf, ret);
+ sent = 1;
+ }
+
+ if (conn->flags & CO_FL_ERROR) {
+ /* connection error, nothing to send, clear the buffer to release it */
+ TRACE_DEVEL("connection error", H1_EV_H1C_SEND, h1c->conn);
+ h1c->flags |= H1C_F_ERR_PENDING;
+ if (h1c->flags & H1C_F_EOS)
+ h1c->flags |= H1C_F_ERROR;
+ else if (!(h1c->wait_event.events & SUB_RETRY_RECV)) {
+ /* EOS not seen, so subscribe for reads to be able to
+ * catch the error on the reading path. It is especially
+ * important if EOI was reached.
+ */
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ b_reset(&h1c->obuf);
+ }
+
+ end:
+ if (!(h1c->flags & (H1C_F_OUT_FULL|H1C_F_OUT_ALLOC)))
+ h1_wake_stream_for_send(h1c->h1s);
+
+ /* We're done, no more to send */
+ if (!b_data(&h1c->obuf)) {
+ TRACE_DEVEL("leaving with everything sent", H1_EV_H1C_SEND, h1c->conn);
+ h1_release_buf(h1c, &h1c->obuf);
+ if (h1c->state == H1_CS_CLOSING) {
+ TRACE_STATE("process pending shutdown for writes", H1_EV_H1C_SEND, h1c->conn);
+ h1_shutw_conn(conn);
+ }
+ }
+ else if (!(h1c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", H1_EV_H1C_SEND, h1c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &h1c->wait_event);
+ }
+
+ TRACE_LEAVE(H1_EV_H1C_SEND, h1c->conn);
+ return sent || (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) || (h1c->state == H1_CS_CLOSED);
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection.
+ */
+static int h1_process(struct h1c * h1c)
+{
+ struct connection *conn = h1c->conn;
+
+ TRACE_ENTER(H1_EV_H1C_WAKE, conn);
+
+ /* Try to parse now the first block of a request, creating the H1 stream if necessary */
+ if (b_data(&h1c->ibuf) && /* Input data to be processed */
+ (h1c->state < H1_CS_RUNNING) && /* IDLE, EMBRYONIC or UPGRADING */
+ !(h1c->flags & (H1C_F_IN_SALLOC|H1C_F_ABRT_PENDING))) { /* No allocation failure on the stream rxbuf and no ERROR on the H1C */
+ struct h1s *h1s = h1c->h1s;
+ struct buffer *buf;
+ size_t count;
+
+ /* When it happens for a backend connection, we may release it (it is probably a 408) */
+ if (h1c->flags & H1C_F_IS_BACK)
+ goto release;
+
+ /* First of all handle H1 to H2 upgrade (no need to create the H1 stream) */
+ if (!(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */
+ !(h1c->px->options2 & PR_O2_NO_H2_UPGRADE) && /* H2 upgrade supported by the proxy */
+ !(conn->mux->flags & MX_FL_NO_UPG)) { /* the current mux supports upgrades */
+ /* Try to match H2 preface before parsing the request headers. */
+ if (b_isteq(&h1c->ibuf, 0, b_data(&h1c->ibuf), ist(H2_CONN_PREFACE)) > 0) {
+ h1c->flags |= H1C_F_UPG_H2C;
+ if (h1c->state == H1_CS_UPGRADING) {
+ BUG_ON(!h1s);
+ se_fl_set(h1s->sd, SE_FL_EOI|SE_FL_EOS); /* Set EOS here to release the SC */
+ }
+ TRACE_STATE("release h1c to perform H2 upgrade ", H1_EV_RX_DATA|H1_EV_H1C_WAKE);
+ goto release;
+ }
+ }
+
+ /* Create the H1 stream if not already there */
+ if (!h1s) {
+ h1s = h1c_frt_stream_new(h1c, NULL, h1c->conn->owner);
+ if (!h1s) {
+ b_reset(&h1c->ibuf);
+ h1_handle_internal_err(h1c);
+ TRACE_ERROR("alloc error", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ goto no_parsing;
+ }
+ }
+
+ if (h1s->sess->t_idle == -1)
+ h1s->sess->t_idle = ns_to_ms(now_ns - h1s->sess->accept_ts) - h1s->sess->t_handshake;
+
+ /* Get the stream rxbuf */
+ buf = h1_get_buf(h1c, &h1s->rxbuf);
+ if (!buf) {
+ h1c->flags |= H1C_F_IN_SALLOC;
+ TRACE_STATE("waiting for stream rxbuf allocation", H1_EV_H1C_WAKE|H1_EV_H1C_BLK, h1c->conn);
+ return 0;
+ }
+
+ count = (buf->size - sizeof(struct htx) - global.tune.maxrewrite);
+ h1_process_demux(h1c, buf, count);
+ h1_release_buf(h1c, &h1s->rxbuf);
+ h1_set_idle_expiration(h1c);
+ if (h1c->state < H1_CS_RUNNING) {
+ if (h1s->flags & H1S_F_INTERNAL_ERROR) {
+ h1_handle_internal_err(h1c);
+ TRACE_ERROR("internal error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ else if (h1s->flags & H1S_F_NOT_IMPL_ERROR) {
+ h1_handle_not_impl_err(h1c);
+ TRACE_ERROR("not-implemented error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ else if (h1s->flags & H1S_F_PARSING_ERROR || se_fl_test(h1s->sd, SE_FL_ERROR)) {
+ h1_handle_parsing_error(h1c);
+ TRACE_ERROR("parsing error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR);
+ }
+ else {
+ TRACE_STATE("Incomplete message, subscribing", H1_EV_RX_DATA|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ }
+ }
+
+ no_parsing:
+ h1_send(h1c);
+
+ /* H1 connection must be released ASAP if:
+ * - an error occurred on the H1C or
+ * - a read0 was received or
+ * - a silent shutdown was emitted and all outgoing data sent
+ */
+ if ((h1c->flags & (H1C_F_EOS|H1C_F_ERROR|H1C_F_ABRT_PENDING|H1C_F_ABRTED)) ||
+ (h1c->state >= H1_CS_CLOSING && (h1c->flags & H1C_F_SILENT_SHUT) && !b_data(&h1c->obuf))) {
+ if (h1c->state != H1_CS_RUNNING) {
+ /* No stream connector or upgrading */
+ if (h1c->state < H1_CS_RUNNING && !(h1c->flags & (H1C_F_IS_BACK|H1C_F_ABRT_PENDING))) {
+ /* shutdown for reads and no error on the frontend connection: Send an error */
+ if (h1_handle_parsing_error(h1c))
+ h1_send(h1c);
+ }
+ else if (h1c->flags & H1C_F_ABRT_PENDING) {
+ /* Handle pending error, if any (only possible on frontend connection) */
+ BUG_ON(h1c->flags & H1C_F_IS_BACK);
+ if (h1_send_error(h1c))
+ h1_send(h1c);
+ }
+ else {
+ h1_close(h1c);
+ TRACE_STATE("close h1c", H1_EV_H1S_END, h1c->conn);
+ }
+
+ /* If there is some pending outgoing data or error, just wait */
+ if (h1c->state == H1_CS_CLOSING || (h1c->flags & H1C_F_ABRT_PENDING))
+ goto end;
+
+ /* Otherwise we can release the H1 connection */
+ goto release;
+ }
+ else {
+ struct h1s *h1s = h1c->h1s;
+
+ /* Here there is still a H1 stream with a stream connector.
+ * Report an error at the stream level and wake up the stream
+ */
+ BUG_ON(!h1s);
+
+ if (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) {
+ se_fl_set_error(h1s->sd);
+ TRACE_STATE("report (ERR_PENDING|ERROR) to SE", H1_EV_H1C_RECV, conn, h1s);
+ }
+ TRACE_POINT(H1_EV_STRM_WAKE, h1c->conn, h1s);
+ h1_alert(h1s);
+ }
+ }
+
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+
+ /* Check if a soft-stop is in progress.
+ * Release idling front connection if this is the case.
+ */
+ if (!(h1c->flags & H1C_F_IS_BACK)) {
+ if (unlikely(h1c->px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ if (!(h1c->px->options & PR_O_IDLE_CLOSE_RESP) &&
+ h1c->flags & H1C_F_WAIT_NEXT_REQ) {
+
+ int send_close = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP2 connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the
+ * further along the window we are.
+ */
+ send_close = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ send_close = 0; /* let the client close his connection himself */
+ if (send_close)
+ goto release;
+ }
+ }
+ }
+
+ if (h1c->state == H1_CS_RUNNING && (h1c->flags & H1C_F_WANT_FASTFWD) && !h1s_data_pending(h1c->h1s)) {
+ TRACE_DEVEL("xprt rcv_buf blocked (want_fastfwd), notify h1s for recv", H1_EV_H1C_RECV, h1c->conn);
+ h1_wake_stream_for_recv(h1c->h1s);
+ }
+
+ end:
+ h1_refresh_timeout(h1c);
+ TRACE_LEAVE(H1_EV_H1C_WAKE, conn);
+ return 0;
+
+ release:
+ if (h1c->state == H1_CS_UPGRADING) {
+ struct h1s *h1s = h1c->h1s;
+
+ /* Don't release the H1 connection right now, we must destroy
+ * the attached SC first */
+ BUG_ON(!h1s);
+
+ if (h1c->flags & H1C_F_EOS) {
+ se_fl_set(h1s->sd, SE_FL_EOI|SE_FL_EOS);
+ TRACE_STATE("report EOS to SE", H1_EV_H1C_RECV, conn, h1s);
+ }
+ if (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) {
+ se_fl_set_error(h1s->sd);
+ TRACE_STATE("report (ERR_PENDING|ERROR) to SE", H1_EV_H1C_RECV, conn, h1s);
+ }
+ h1_alert(h1s);
+ TRACE_DEVEL("waiting to release the SC before releasing the connection", H1_EV_H1C_WAKE);
+ }
+ else {
+ h1_release(h1c);
+ TRACE_DEVEL("leaving after releasing the connection", H1_EV_H1C_WAKE);
+ }
+ return -1;
+}
+
+struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ struct h1c *h1c = ctx;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = h1c->conn;
+ TRACE_POINT(H1_EV_H1C_WAKE, conn);
+
+ /* Remove the connection from the list, to be sure nobody attempts
+ * to use it while we handle the I/O events
+ */
+ conn_in_list = conn->flags & CO_FL_LIST_MASK;
+ if (conn_in_list)
+ conn_delete_from_tree(conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = h1c->conn;
+ TRACE_ENTER(H1_EV_H1C_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(h1c->wait_event.events & SUB_RETRY_SEND))
+ ret = h1_send(h1c);
+ if (!(h1c->wait_event.events & SUB_RETRY_RECV))
+ ret |= h1_recv(h1c);
+ if (ret || b_data(&h1c->ibuf))
+ ret = h1_process(h1c);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless h1_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if h1_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+static int h1_wake(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ int ret;
+
+ TRACE_POINT(H1_EV_H1C_WAKE, conn);
+
+ h1_send(h1c);
+ ret = h1_process(h1c);
+ if (ret == 0) {
+ struct h1s *h1s = h1c->h1s;
+
+ if (h1c->state == H1_CS_UPGRADING || h1c->state == H1_CS_RUNNING)
+ h1_alert(h1s);
+ }
+ return ret;
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed.
+ */
+struct task *h1_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct h1c *h1c = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(H1_EV_H1C_WAKE, h1c ? h1c->conn : NULL);
+
+ if (h1c) {
+ /* Make sure nobody stole the connection from us */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ h1c = NULL;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ goto do_leave;
+ }
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", H1_EV_H1C_WAKE, h1c->conn, h1c->h1s);
+ return t;
+ }
+
+ /* If a stream connector is still attached and ready to the mux, wait for the
+ * stream's timeout
+ */
+ if (h1c->state == H1_CS_RUNNING) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ t->expire = TICK_ETERNITY;
+ TRACE_DEVEL("leaving (SC still attached)", H1_EV_H1C_WAKE, h1c->conn, h1c->h1s);
+ return t;
+ }
+
+ /* Try to send an error to the client */
+ if (h1c->state != H1_CS_CLOSING && !(h1c->flags & (H1C_F_IS_BACK|H1C_F_ERROR|H1C_F_ABRT_PENDING))) {
+ TRACE_DEVEL("timeout error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR, h1c->conn, h1c->h1s);
+ if (h1_handle_req_tout(h1c))
+ h1_send(h1c);
+ if (b_data(&h1c->obuf) || (h1c->flags & H1C_F_ABRT_PENDING)) {
+ h1_refresh_timeout(h1c);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ return t;
+ }
+ }
+
+ if (h1c->h1s && !se_fl_test(h1c->h1s->sd, SE_FL_ORPHAN)) {
+ /* Don't release the H1 connection right now, we must destroy the
+ * attached SC first. */
+ se_fl_set(h1c->h1s->sd, SE_FL_EOS | SE_FL_ERROR);
+ h1_alert(h1c->h1s);
+ h1_refresh_timeout(h1c);
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("waiting to release the SC before releasing the connection", H1_EV_H1C_WAKE);
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (h1c->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(h1c->conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ do_leave:
+ task_destroy(t);
+
+ if (!h1c) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more h1c)", H1_EV_H1C_WAKE);
+ return NULL;
+ }
+
+ h1c->task = NULL;
+ h1_release(h1c);
+ TRACE_LEAVE(H1_EV_H1C_WAKE);
+ return NULL;
+}
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int h1_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s;
+
+ /* this connection is no more idle (if it was at all) */
+ h1c->flags &= ~H1C_F_SILENT_SHUT;
+
+ TRACE_ENTER(H1_EV_STRM_NEW, conn);
+ if (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) {
+ TRACE_ERROR("h1c on error", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ goto err;
+ }
+
+ h1s = h1c_bck_stream_new(h1c, sd->sc, sess);
+ if (h1s == NULL) {
+ TRACE_ERROR("h1s creation failure", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ goto err;
+ }
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&h1c->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(conn, conn->xprt, conn->xprt_ctx);
+
+ TRACE_LEAVE(H1_EV_STRM_NEW, conn, h1s);
+ return 0;
+ err:
+ TRACE_DEVEL("leaving on error", H1_EV_STRM_NEW|H1_EV_STRM_END|H1_EV_STRM_ERR, conn);
+ return -1;
+}
+
+/* Retrieves a valid stream connector from this connection, or returns NULL.
+ * For this mux, it's easy as we can only store a single stream connector.
+ */
+static struct stconn *h1_get_first_sc(const struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s = h1c->h1s;
+
+ if (h1s)
+ return h1s_sc(h1s);
+
+ return NULL;
+}
+
+static void h1_destroy(void *ctx)
+{
+ struct h1c *h1c = ctx;
+
+ TRACE_POINT(H1_EV_H1C_END, h1c->conn);
+ if (!h1c->h1s || h1c->conn->ctx != h1c)
+ h1_release(h1c);
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void h1_detach(struct sedesc *sd)
+{
+ struct h1s *h1s = sd->se;
+ struct h1c *h1c;
+ struct session *sess;
+ int is_not_first;
+
+ TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s);
+
+ if (!h1s) {
+ TRACE_LEAVE(H1_EV_STRM_END);
+ return;
+ }
+
+ sess = h1s->sess;
+ h1c = h1s->h1c;
+
+ sess->accept_date = date;
+ sess->accept_ts = now_ns;
+ sess->t_handshake = 0;
+ sess->t_idle = -1;
+
+ is_not_first = h1s->flags & H1S_F_NOT_FIRST;
+ h1s_destroy(h1s);
+
+ if (h1c->state == H1_CS_IDLE && (h1c->flags & H1C_F_IS_BACK)) {
+ /* this connection may be killed at any moment, we want it to
+ * die "cleanly" (i.e. only an RST).
+ */
+ h1c->flags |= H1C_F_SILENT_SHUT;
+
+ /* If there are any excess server data in the input buffer,
+ * release it and close the connection ASAP (some data may
+ * remain in the output buffer). This happens if a server sends
+ * invalid responses. So in such case, we don't want to reuse
+ * the connection
+ */
+ if (b_data(&h1c->ibuf)) {
+ h1_release_buf(h1c, &h1c->ibuf);
+ h1_close(h1c);
+ TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto release;
+ }
+
+ if (h1c->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session server list, if not already done */
+ if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) {
+ h1c->conn->owner = NULL;
+ h1c->conn->mux->destroy(h1c);
+ goto end;
+ }
+ /* Always idle at this step */
+ if (session_check_idle_conn(sess, h1c->conn)) {
+ /* The connection got destroyed, let's leave */
+ TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto end;
+ }
+ }
+ else {
+ if (h1c->conn->owner == sess)
+ h1c->conn->owner = NULL;
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ h1c->conn->mux->destroy(h1c);
+ TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END);
+ goto end;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ return;
+ }
+ }
+
+ release:
+ /* We don't want to close right now unless the connection is in error or shut down for writes */
+ if ((h1c->flags & H1C_F_ERROR) ||
+ (h1c->state == H1_CS_CLOSED) ||
+ (h1c->state == H1_CS_CLOSING && !b_data(&h1c->obuf)) ||
+ !h1c->conn->owner) {
+ TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn);
+ h1_release(h1c);
+ }
+ else {
+ if (h1c->state == H1_CS_IDLE) {
+ /* If we have a new request, process it immediately or
+ * subscribe for reads waiting for new data
+ */
+ if (unlikely(b_data(&h1c->ibuf))) {
+ if (h1_process(h1c) == -1)
+ goto end;
+ }
+ else
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ h1_set_idle_expiration(h1c);
+ h1_refresh_timeout(h1c);
+ }
+ end:
+ TRACE_LEAVE(H1_EV_STRM_END);
+}
+
+
+static void h1_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return;
+ h1c = h1s->h1c;
+
+ TRACE_POINT(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode});
+}
+
+static void h1_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return;
+ h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode});
+
+ if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) {
+ TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutw;
+ }
+ if (h1c->state == H1_CS_CLOSING || (h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR))) {
+ TRACE_STATE("shutdown on connection (EOS || CLOSING || ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto do_shutw;
+ }
+
+ if (h1c->state == H1_CS_UPGRADING) {
+ TRACE_STATE("keep connection alive (UPGRADING)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+ if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) {
+ TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s);
+ goto end;
+ }
+
+ do_shutw:
+ h1_close(h1c);
+ if (mode != CO_SHW_NORMAL)
+ h1c->flags |= H1C_F_SILENT_SHUT;
+
+ if (!b_data(&h1c->obuf))
+ h1_shutw_conn(h1c->conn);
+ end:
+ TRACE_LEAVE(H1_EV_STRM_SHUT, h1c->conn, h1s);
+}
+
+static void h1_shutw_conn(struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+
+ TRACE_ENTER(H1_EV_H1C_END, conn);
+ h1_close(h1c);
+ if (conn->flags & CO_FL_SOCK_WR_SH)
+ return;
+
+ conn_xprt_shutw(conn);
+ conn_sock_shutw(conn, !(h1c->flags & H1C_F_SILENT_SHUT));
+
+ if (h1c->wait_event.tasklet && !h1c->wait_event.events)
+ tasklet_wakeup(h1c->wait_event.tasklet);
+
+ TRACE_LEAVE(H1_EV_H1C_END, conn);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int h1_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+
+ if (!h1s)
+ return 0;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h1s->subs && h1s->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ h1s->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", H1_EV_STRM_RECV, h1s->h1c->conn, h1s);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("unsubscribe(send)", H1_EV_STRM_SEND, h1s->h1c->conn, h1s);
+
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0, unless
+ * the stream connector <sc> was already detached, in which case it will return
+ * -1.
+ */
+static int h1_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+
+ if (!h1s)
+ return -1;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h1s->subs && h1s->subs != es);
+
+ es->events |= event_type;
+ h1s->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", H1_EV_STRM_RECV, h1s->h1c->conn, h1s);
+
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", H1_EV_STRM_SEND, h1s->h1c->conn, h1s);
+ /*
+ * If the stconn attempts to subscribe, and the
+ * mux isn't subscribed to the connection, then it
+ * probably means the connection wasn't established
+ * yet, so we have to subscribe.
+ */
+ h1c = h1s->h1c;
+ if (!(h1c->wait_event.events & SUB_RETRY_SEND))
+ h1c->conn->xprt->subscribe(h1c->conn,
+ h1c->conn->xprt_ctx,
+ SUB_RETRY_SEND,
+ &h1c->wait_event);
+ }
+ return 0;
+}
+
+/* Called from the upper layer, to receive data.
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use fast-forwarding.
+ * If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t h1_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count});
+
+ /* Do nothing for now if not RUNNING (implies UPGRADING) */
+ if (h1c->state < H1_CS_RUNNING) {
+ TRACE_DEVEL("h1c not ready yet", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+ goto end;
+ }
+
+ if (!(h1c->flags & H1C_F_IN_ALLOC))
+ ret = h1_process_demux(h1c, buf, count);
+ else
+ TRACE_DEVEL("h1c ibuf not allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn);
+
+ if ((flags & CO_RFL_BUF_FLUSH) && se_fl_test(h1s->sd, SE_FL_MAY_FASTFWD_PROD)) {
+ h1c->flags |= H1C_F_WANT_FASTFWD;
+ TRACE_STATE("Block xprt rcv_buf to flush stream's buffer (want_fastfwd)", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ else {
+ if (((flags & CO_RFL_KEEP_RECV) || (h1m->state != H1_MSG_DONE)) && !(h1c->wait_event.events & SUB_RETRY_RECV))
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+
+ end:
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data */
+static size_t h1_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c;
+ size_t total = 0;
+
+ if (!h1s)
+ return 0;
+ h1c = h1s->h1c;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){count});
+
+ /* If we're not connected yet, or we're waiting for a handshake, stop
+ * now, as we don't want to remove everything from the channel buffer
+ * before we're sure we can send it.
+ */
+ if (h1c->conn->flags & CO_FL_WAIT_XPRT) {
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s);
+ return 0;
+ }
+
+ if (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) {
+ se_fl_set_error(h1s->sd);
+ TRACE_ERROR("H1C on error, leaving in error", H1_EV_STRM_SEND|H1_EV_H1C_ERR|H1_EV_H1S_ERR|H1_EV_STRM_ERR, h1c->conn, h1s);
+ return 0;
+ }
+
+ /* Inherit some flags from the upper layer */
+ h1c->flags &= ~(H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER);
+ if (flags & CO_SFL_MSG_MORE)
+ h1c->flags |= H1C_F_CO_MSG_MORE;
+ if (flags & CO_SFL_STREAMER)
+ h1c->flags |= H1C_F_CO_STREAMER;
+
+ while (count) {
+ size_t ret = 0;
+
+ if (!(h1c->flags & (H1C_F_OUT_FULL|H1C_F_OUT_ALLOC)))
+ ret = h1_process_mux(h1c, buf, count);
+ else
+ TRACE_DEVEL("h1c obuf not allocated", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s);
+
+ if (!ret)
+ break;
+
+ if ((count - ret) > 0)
+ h1c->flags |= H1C_F_CO_MSG_MORE;
+
+ total += ret;
+ count -= ret;
+
+ if ((h1c->wait_event.events & SUB_RETRY_SEND) || !h1_send(h1c))
+ break;
+
+ if ((h1c->conn->flags & (CO_FL_ERROR|CO_FL_SOCK_WR_SH)))
+ break;
+ }
+
+ if (h1c->flags & (H1C_F_ERR_PENDING|H1C_F_ERROR)) {
+ // FIXME: following test was removed :
+ // ((h1c->conn->flags & CO_FL_ERROR) && (se_fl_test(h1s->sd, SE_FL_EOI | SE_FL_EOS) || !b_data(&h1c->ibuf)))) {
+ se_fl_set_error(h1s->sd);
+ TRACE_ERROR("reporting error to the app-layer stream", H1_EV_STRM_SEND|H1_EV_H1S_ERR|H1_EV_STRM_ERR, h1c->conn, h1s);
+ }
+
+ h1_refresh_timeout(h1c);
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){total});
+ return total;
+}
+
+static inline struct sedesc *h1s_opposite_sd(struct h1s *h1s)
+{
+ struct xref *peer;
+ struct sedesc *sdo;
+
+ peer = xref_get_peer_and_lock(&h1s->sd->xref);
+ if (!peer)
+ return NULL;
+
+ sdo = container_of(peer, struct sedesc, xref);
+ xref_unlock(&h1s->sd->xref, peer);
+ return sdo;
+}
+
+static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ size_t ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){count});
+
+
+ if (global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H1_SND) {
+ h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF;
+ goto out;
+ }
+
+ /* TODO: add check on curr_len if CLEN */
+
+ if (h1m->flags & H1_MF_CHNK) {
+ if (h1m->curr_len) {
+ BUG_ON(h1m->state != H1_MSG_DATA);
+ if (count > h1m->curr_len)
+ count = h1m->curr_len;
+ }
+ else {
+ BUG_ON(h1m->state != H1_MSG_CHUNK_CRLF && h1m->state != H1_MSG_CHUNK_SIZE);
+ if (!h1_make_chunk(h1s, h1m, count))
+ goto out;
+ h1m->curr_len = count;
+ }
+ }
+
+ /* Use kernel splicing if it is supported by the sender and if there
+ * are no input data _AND_ no output data.
+ *
+ * TODO: It may be good to add a flag to send obuf data first if any,
+ * and then data in pipe, or the opposite. For now, it is not
+ * supported to mix data.
+ */
+ if (!b_data(input) && !b_data(&h1c->obuf) && may_splice) {
+#if defined(USE_LINUX_SPLICE)
+ if (h1c->conn->xprt->snd_pipe && (h1s->sd->iobuf.pipe || (pipes_used < global.maxpipes && (h1s->sd->iobuf.pipe = get_pipe())))) {
+ h1s->sd->iobuf.offset = 0;
+ h1s->sd->iobuf.data = 0;
+ ret = count;
+ goto out;
+ }
+#endif
+ h1s->sd->iobuf.flags |= IOBUF_FL_NO_SPLICING;
+ TRACE_DEVEL("Unable to allocate pipe for splicing, fallback to buffer", H1_EV_STRM_SEND, h1c->conn, h1s);
+ }
+
+ if (!h1_get_buf(h1c, &h1c->obuf)) {
+ h1c->flags |= H1C_F_OUT_ALLOC;
+ TRACE_STATE("waiting for opposite h1c obuf allocation", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s);
+ goto out;
+ }
+
+ if (b_space_wraps(&h1c->obuf))
+ b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf));
+
+ h1s->sd->iobuf.buf = &h1c->obuf;
+ h1s->sd->iobuf.offset = 0;
+ h1s->sd->iobuf.data = 0;
+
+ /* Cannot forward more than available room in output buffer */
+ if (count > b_room(&h1c->obuf))
+ count = b_room(&h1c->obuf);
+
+ if (!count) {
+ h1c->flags |= H1C_F_OUT_FULL;
+ h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("output buffer full", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s);
+ goto out;
+ }
+
+ /* forward remaining input data */
+ if (b_data(input)) {
+ size_t xfer = count;
+
+ if (xfer > b_data(input))
+ xfer = b_data(input);
+ h1s->sd->iobuf.data = b_xfer(&h1c->obuf, input, xfer);
+
+ /* Cannot forward more data, wait for room */
+ if (b_data(input))
+ goto out;
+ }
+
+ ret = count - h1s->sd->iobuf.data;
+
+ out:
+ TRACE_LEAVE(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t h1_done_ff(struct stconn *sc)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ struct sedesc *sd = h1s->sd;
+ size_t total = 0;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s);
+
+#if defined(USE_LINUX_SPLICE)
+ if (sd->iobuf.pipe) {
+ total = h1c->conn->xprt->snd_pipe(h1c->conn, h1c->conn->xprt_ctx, sd->iobuf.pipe, sd->iobuf.pipe->data);
+ if (total > 0)
+ HA_ATOMIC_ADD(&h1c->px_counters->spliced_bytes_out, total);
+ if (!sd->iobuf.pipe->data) {
+ put_pipe(sd->iobuf.pipe);
+ sd->iobuf.pipe = NULL;
+ }
+ goto out;
+ }
+#endif
+ if (!sd->iobuf.pipe) {
+ if (b_room(&h1c->obuf) == sd->iobuf.offset)
+ h1c->flags |= H1C_F_OUT_FULL;
+
+ total = sd->iobuf.data;
+ sd->iobuf.buf = NULL;
+ sd->iobuf.offset = 0;
+ sd->iobuf.data = 0;
+
+ if (sd->iobuf.flags & IOBUF_FL_EOI)
+ h1c->flags &= ~H1C_F_CO_MSG_MORE;
+
+ /* Perform a synchronous send but in all cases, consider
+ * everything was already sent from the SC point of view.
+ */
+ h1_send(h1c);
+ }
+
+ out:
+ if (h1m->curr_len)
+ h1m->curr_len -= total;
+
+ if (!h1m->curr_len && (h1m->flags & H1_MF_CLEN))
+ h1m->state = ((sd->iobuf.flags & IOBUF_FL_EOI) ? H1_MSG_DONE : H1_MSG_TRAILERS);
+ else if (!h1m->curr_len && (h1m->flags & H1_MF_CHNK)) {
+ if (h1m->state == H1_MSG_DATA)
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ }
+
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_out, total);
+
+ // TODO: should we call h1_process() instead ?
+ if (h1c->conn->flags & CO_FL_ERROR) {
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_ERR_PENDING;
+ if (h1c->flags & H1C_F_EOS)
+ h1c->flags |= H1C_F_ERROR;
+ else if (!(h1c->wait_event.events & SUB_RETRY_RECV)) {
+ /* EOS not seen, so subscribe for reads to be able to
+ * catch the error on the reading path. It is especially
+ * important if EOI was reached.
+ */
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ se_fl_set_error(h1s->sd);
+ TRACE_DEVEL("connection error", H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){total});
+ return total;
+}
+
+static int h1_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res);
+ struct sedesc *sdo = NULL;
+ size_t total = 0, try = 0;
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count});
+
+ if (h1m->state != H1_MSG_DATA && h1m->state != H1_MSG_TUNNEL) {
+ h1c->flags &= ~H1C_F_WANT_FASTFWD;
+ TRACE_STATE("Cannot fast-forwad data now !(msg_data|msg_tunnel)", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+
+ se_fl_clr(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ h1c->conn->flags &= ~CO_FL_WAIT_ROOM;
+ h1c->flags |= H1C_F_WANT_FASTFWD;
+
+ if (h1c->flags & (H1C_F_EOS|H1C_F_ERROR)) {
+ h1c->flags &= ~H1C_F_WANT_FASTFWD;
+ TRACE_DEVEL("leaving on (EOS|ERROR)", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+
+ sdo = h1s_opposite_sd(h1s);
+ if (!sdo) {
+ TRACE_STATE("Opposite endpoint not available yet", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto out;
+ }
+
+ retry:
+ ret = 0;
+
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN)) && count > h1m->curr_len)
+ count = h1m->curr_len;
+
+ try = se_nego_ff(sdo, &h1c->ibuf, count, h1c->conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING));
+ if (b_room(&h1c->ibuf) && (h1c->flags & H1C_F_IN_FULL)) {
+ h1c->flags &= ~H1C_F_IN_FULL;
+ TRACE_STATE("h1c ibuf not full anymore", H1_EV_STRM_RECV|H1_EV_H1C_BLK);
+ }
+ if (!b_data(&h1c->ibuf))
+ h1_release_buf(h1c, &h1c->ibuf);
+
+ if (sdo->iobuf.flags & IOBUF_FL_NO_FF) {
+ /* Fast forwarding is not supported by the consumer */
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_CANT_FASTFWD;
+ TRACE_DEVEL("Fast-forwarding not supported by opposite endpoint, disable it", H1_EV_STRM_RECV, h1c->conn, h1s);
+ goto end;
+ }
+ if (sdo->iobuf.flags & IOBUF_FL_FF_BLOCKED) {
+ se_fl_set(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ TRACE_STATE("waiting for more room", H1_EV_STRM_RECV|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto out;
+ }
+
+ total += sdo->iobuf.data;
+ count -= sdo->iobuf.data;
+#if defined(USE_LINUX_SPLICE)
+ if (sdo->iobuf.pipe) {
+ /* Here, not data was xferred */
+ ret = h1c->conn->xprt->rcv_pipe(h1c->conn, h1c->conn->xprt_ctx, sdo->iobuf.pipe, try);
+ if (ret < 0) {
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_CANT_FASTFWD;
+ TRACE_ERROR("Error when trying to fast-forward data, disable it and abort",
+ H1_EV_STRM_RECV|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ BUG_ON(sdo->iobuf.pipe->data);
+ put_pipe(sdo->iobuf.pipe);
+ sdo->iobuf.pipe = NULL;
+ goto end;
+ }
+ total += ret;
+ count -= ret;
+ if (!ret) {
+ TRACE_STATE("failed to receive data, subscribing", H1_EV_STRM_RECV, h1c->conn);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ HA_ATOMIC_ADD(&h1c->px_counters->spliced_bytes_in, ret);
+ }
+#endif
+ if (!sdo->iobuf.pipe) {
+ b_add(sdo->iobuf.buf, sdo->iobuf.offset);
+ ret = h1c->conn->xprt->rcv_buf(h1c->conn, h1c->conn->xprt_ctx, sdo->iobuf.buf, try, flags);
+ if (ret < try) {
+ TRACE_STATE("failed to receive data, subscribing", H1_EV_STRM_RECV, h1c->conn);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ b_sub(sdo->iobuf.buf, sdo->iobuf.offset);
+ total += ret;
+ count -= ret;
+ sdo->iobuf.data += ret;
+ }
+
+ /* Till now, we forwarded less than a buffer, we can immediately retry
+ * to fast-forward more data. Instruct the consumer it is an interim
+ * fast-forward. It is of course only possible if there is still data to
+ * fast-forward (count > 0), if the previous attempt was a full success
+ * (0 > ret == try) and if we are not splicing (iobuf.buf != NULL).
+ */
+ if (ret > 0 && ret == try && count && sdo->iobuf.buf && total < b_size(sdo->iobuf.buf)) {
+ sdo->iobuf.flags |= IOBUF_FL_INTERIM_FF;
+ se_done_ff(sdo);
+ goto retry;
+ }
+
+ out:
+ if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN))) {
+ if (total > h1m->curr_len) {
+ h1s->flags |= H1S_F_PARSING_ERROR;
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ TRACE_ERROR("too much payload, more than announced",
+ H1_EV_STRM_RECV|H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ goto end;
+ }
+ h1m->curr_len -= total;
+ if (!h1m->curr_len) {
+ if (h1m->flags & H1_MF_CLEN) {
+ h1m->state = H1_MSG_DONE;
+ se_fl_set(h1s->sd, SE_FL_EOI); /* TODO: this line is tricky and must be evaluated first
+ * Its purpose is to avoid to set CO_SFL_MSG_MORE on the
+ * next calls to ->complete_fastfwd().
+ */
+ }
+ else
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ h1c->flags &= ~H1C_F_WANT_FASTFWD;
+
+ if (!(h1c->flags & H1C_F_IS_BACK)) {
+ /* The request was fully received. It means the H1S now
+ * expect data from the opposite side
+ */
+ se_expect_data(h1s->sd);
+ }
+
+ TRACE_STATE("payload fully received", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ }
+
+ if (conn_xprt_read0_pending(h1c->conn)) {
+ se_fl_set(h1s->sd, SE_FL_EOS);
+ TRACE_STATE("report EOS to SE", H1_EV_STRM_RECV, h1c->conn, h1s);
+ if (h1m->state >= H1_MSG_DONE || !(h1m->flags & H1_MF_XFER_LEN)) {
+ /* DONE or TUNNEL or SHUTR without XFER_LEN, set
+ * EOI on the stream connector */
+ se_fl_set(h1s->sd, SE_FL_EOI);
+ TRACE_STATE("report EOI to SE", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ else {
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_ERROR;
+ TRACE_ERROR("message aborted, set error on SC", H1_EV_STRM_RECV|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_EOS;
+ TRACE_STATE("Allow xprt rcv_buf on read0", H1_EV_STRM_RECV, h1c->conn, h1s);
+ }
+ if (h1c->conn->flags & CO_FL_ERROR) {
+ se_fl_set(h1s->sd, SE_FL_ERROR);
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_ERROR;
+ TRACE_DEVEL("connection error", H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+
+
+ sdo->iobuf.flags &= ~IOBUF_FL_INTERIM_FF;
+ if (se_fl_test(h1s->sd, SE_FL_EOI)) {
+ sdo->iobuf.flags |= IOBUF_FL_EOI; /* TODO: it may be good to have a flag to be sure we can
+ * forward the EOI the to consumer side
+ */
+ }
+ se_done_ff(sdo);
+
+ ret = total;
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_in, total);
+
+ if (sdo->iobuf.pipe) {
+ se_fl_set(h1s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ }
+
+ end:
+
+ if (!(h1c->flags & H1C_F_WANT_FASTFWD)) {
+ TRACE_STATE("notify the mux can't use fast-forward anymore", H1_EV_STRM_RECV, h1c->conn, h1s);
+ se_fl_clr(h1s->sd, SE_FL_MAY_FASTFWD_PROD);
+ if (!(h1c->wait_event.events & SUB_RETRY_RECV)) {
+ TRACE_STATE("restart receiving data, subscribing", H1_EV_STRM_RECV, h1c->conn, h1s);
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ }
+
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+static int h1_resume_fastfwd(struct stconn *sc, unsigned int flags)
+{
+ struct h1s *h1s = __sc_mux_strm(sc);
+ struct h1c *h1c = h1s->h1c;
+ int ret = 0;
+
+ TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){flags});
+
+#if defined(USE_LINUX_SPLICE)
+ if (h1s->sd->iobuf.pipe) {
+ struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req);
+ struct sedesc *sd = h1s->sd;
+
+ ret = h1c->conn->xprt->snd_pipe(h1c->conn, h1c->conn->xprt_ctx, sd->iobuf.pipe, sd->iobuf.pipe->data);
+ if (ret > 0)
+ HA_ATOMIC_ADD(&h1c->px_counters->spliced_bytes_out, ret);
+ if (!sd->iobuf.pipe->data) {
+ put_pipe(sd->iobuf.pipe);
+ sd->iobuf.pipe = NULL;
+ }
+
+ h1m->curr_len -= ret;
+
+ if (!h1m->curr_len && (h1m->flags & H1_MF_CLEN))
+ h1m->state = H1_MSG_DONE;
+ else if (!h1m->curr_len && (h1m->flags & H1_MF_CHNK)) {
+ if (h1m->state == H1_MSG_DATA)
+ h1m->state = H1_MSG_CHUNK_CRLF;
+ }
+
+ HA_ATOMIC_ADD(&h1c->px_counters->bytes_out, ret);
+ }
+#endif
+
+ // TODO: should we call h1_process() instead ?
+ if (h1c->conn->flags & CO_FL_ERROR) {
+ h1c->flags = (h1c->flags & ~H1C_F_WANT_FASTFWD) | H1C_F_ERR_PENDING;
+ if (h1c->flags & H1C_F_EOS)
+ h1c->flags |= H1C_F_ERROR;
+ else if (!(h1c->wait_event.events & SUB_RETRY_RECV)) {
+ /* EOS not seen, so subscribe for reads to be able to
+ * catch the error on the reading path. It is especially
+ * important if EOI was reached.
+ */
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ }
+ se_fl_set_error(h1s->sd);
+ TRACE_DEVEL("connection error", H1_EV_STRM_ERR|H1_EV_H1C_ERR|H1_EV_H1S_ERR, h1c->conn, h1s);
+ }
+
+ TRACE_LEAVE(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){ret});
+ return ret;
+}
+
+static int h1_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ struct h1c *h1c = conn->ctx;
+ int ret = 0;
+
+ switch (mux_ctl) {
+ case MUX_CTL_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_CTL_EXIT_STATUS:
+ if (output)
+ *((int *)output) = h1c->errcode;
+ ret = (h1c->errcode == 408 ? MUX_ES_TOUT_ERR :
+ (h1c->errcode == 501 ? MUX_ES_NOTIMPL_ERR :
+ (h1c->errcode == 500 ? MUX_ES_INTERNAL_ERR :
+ ((h1c->errcode >= 400 && h1c->errcode <= 499) ? MUX_ES_INVALID_ERR :
+ MUX_ES_SUCCESS))));
+ return ret;
+ case MUX_CTL_SUBS_RECV:
+ if (!(h1c->wait_event.events & SUB_RETRY_RECV))
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int h1_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output)
+{
+ int ret = 0;
+ struct h1s *h1s = __sc_mux_strm(sc);
+
+ switch (mux_sctl) {
+ case MUX_SCTL_SID:
+ if (output)
+ *((int64_t *)output) = h1s->h1c->req_count;
+ return ret;
+
+ default:
+ return -1;
+ }
+}
+
+/* appends some info about connection <h1c> to buffer <msg>, or does nothing if
+ * <h1c> is NULL. Returns non-zero if the connection is considered suspicious.
+ * May emit multiple lines, each new one being prefixed with <pfx>, if <pfx> is
+ * not NULL, otherwise a single line is used.
+ */
+static int h1_dump_h1c_info(struct buffer *msg, struct h1c *h1c, const char *pfx)
+{
+ int ret = 0;
+
+ if (!h1c)
+ return ret;
+
+ chunk_appendf(msg, " h1c.flg=0x%x .sub=%d .ibuf=%u@%p+%u/%u .obuf=%u@%p+%u/%u",
+ h1c->flags, h1c->wait_event.events,
+ (unsigned int)b_data(&h1c->ibuf), b_orig(&h1c->ibuf),
+ (unsigned int)b_head_ofs(&h1c->ibuf), (unsigned int)b_size(&h1c->ibuf),
+ (unsigned int)b_data(&h1c->obuf), b_orig(&h1c->obuf),
+ (unsigned int)b_head_ofs(&h1c->obuf), (unsigned int)b_size(&h1c->obuf));
+
+ chunk_appendf(msg, " .task=%p", h1c->task);
+ if (h1c->task) {
+ chunk_appendf(msg, " .exp=%s",
+ h1c->task->expire ? tick_is_expired(h1c->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(h1c->task->expire - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ }
+
+ return ret;
+}
+
+/* appends some info about stream <h1s> to buffer <msg>, or does nothing if
+ * <h1s> is NULL. Returns non-zero if the stream is considered suspicious. May
+ * emit multiple lines, each new one being prefixed with <pfx>, if <pfx> is not
+ * NULL, otherwise a single line is used.
+ */
+static int h1_dump_h1s_info(struct buffer *msg, const struct h1s *h1s, const char *pfx)
+{
+ const char *method;
+ int ret = 0;
+
+ if (!h1s)
+ return ret;
+
+ if (h1s->meth < HTTP_METH_OTHER)
+ method = http_known_methods[h1s->meth].ptr;
+ else
+ method = "UNKNOWN";
+
+ chunk_appendf(msg, " h1s=%p h1s.flg=0x%x .sd.flg=0x%x .req.state=%s .res.state=%s",
+ h1s, h1s->flags, se_fl_get(h1s->sd),
+ h1m_state_str(h1s->req.state), h1m_state_str(h1s->res.state));
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .meth=%s status=%d",
+ method, h1s->status);
+
+ chunk_appendf(msg, " .sd.flg=0x%08x", se_fl_get(h1s->sd));
+ if (!se_fl_test(h1s->sd, SE_FL_ORPHAN))
+ chunk_appendf(msg, " .sc.flg=0x%08x .sc.app=%p",
+ h1s_sc(h1s)->flags, h1s_sc(h1s)->app);
+
+ if (pfx && h1s->subs)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .subs=%p", h1s->subs);
+ if (h1s->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", h1s->subs->events, h1s->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ h1s->subs->tasklet->calls,
+ h1s->subs->tasklet->context);
+ if (h1s->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ resolve_sym_name(msg, NULL, h1s->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ return ret;
+}
+
+/* for debugging with CLI's "show fd" command */
+static int h1_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct h1c *h1c = conn->ctx;
+ struct h1s *h1s = h1c->h1s;
+ int ret = 0;
+
+ ret |= h1_dump_h1c_info(msg, h1c, NULL);
+
+ if (h1s)
+ ret |= h1_dump_h1s_info(msg, h1s, NULL);
+
+ return ret;
+}
+
+/* for debugging with CLI's "show sess" command. May emit multiple lines, each
+ * new one being prefixed with <pfx>, if <pfx> is not NULL, otherwise a single
+ * line is used. Each field starts with a space so it's safe to print it after
+ * existing fields.
+ */
+static int h1_show_sd(struct buffer *msg, struct sedesc *sd, const char *pfx)
+{
+ struct h1s *h1s = sd->se;
+ int ret = 0;
+
+ if (!h1s)
+ return ret;
+
+ ret |= h1_dump_h1s_info(msg, h1s, pfx);
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+ chunk_appendf(msg, " h1c=%p", h1s->h1c);
+ ret |= h1_dump_h1c_info(msg, h1s->h1c, pfx);
+ return ret;
+}
+
+
+/* Add an entry in the headers map. Returns -1 on error and 0 on success. */
+static int add_hdr_case_adjust(const char *from, const char *to, char **err)
+{
+ struct h1_hdr_entry *entry;
+
+ /* Be sure there is a non-empty <to> */
+ if (!strlen(to)) {
+ memprintf(err, "expect <to>");
+ return -1;
+ }
+
+ /* Be sure only the case differs between <from> and <to> */
+ if (strcasecmp(from, to) != 0) {
+ memprintf(err, "<from> and <to> must not differ except the case");
+ return -1;
+ }
+
+ /* Be sure <from> does not already existsin the tree */
+ if (ebis_lookup(&hdrs_map.map, from)) {
+ memprintf(err, "duplicate entry '%s'", from);
+ return -1;
+ }
+
+ /* Create the entry and insert it in the tree */
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ memprintf(err, "out of memory");
+ return -1;
+ }
+
+ entry->node.key = strdup(from);
+ entry->name = ist(strdup(to));
+ if (!entry->node.key || !isttest(entry->name)) {
+ free(entry->node.key);
+ istfree(&entry->name);
+ free(entry);
+ memprintf(err, "out of memory");
+ return -1;
+ }
+ ebis_insert(&hdrs_map.map, &entry->node);
+ return 0;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int h1_takeover(struct connection *conn, int orig_tid)
+{
+ struct h1c *h1c = conn->ctx;
+ struct task *task;
+ struct task *new_task;
+ struct tasklet *new_tasklet;
+
+ /* Pre-allocate tasks so that we don't have to roll back after the xprt
+ * has been migrated.
+ */
+ new_task = task_new_here();
+ new_tasklet = tasklet_new();
+ if (!new_task || !new_tasklet)
+ goto fail;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ goto fail;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid);
+ goto fail;
+ }
+
+ if (h1c->wait_event.events)
+ h1c->conn->xprt->unsubscribe(h1c->conn, h1c->conn->xprt_ctx,
+ h1c->wait_event.events, &h1c->wait_event);
+
+ task = h1c->task;
+ if (task) {
+ /* only assign a task if there was already one, otherwise
+ * the preallocated new task will be released.
+ */
+ task->context = NULL;
+ h1c->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ h1c->task = new_task;
+ new_task = NULL;
+ h1c->task->process = h1_timeout_task;
+ h1c->task->context = h1c;
+ }
+
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL.
+ */
+ h1c->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid);
+
+ h1c->wait_event.tasklet = new_tasklet;
+ h1c->wait_event.tasklet->process = h1_io_cb;
+ h1c->wait_event.tasklet->context = h1c;
+ h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx,
+ SUB_RETRY_RECV, &h1c->wait_event);
+
+ if (new_task)
+ __task_free(new_task);
+ return 0;
+ fail:
+ if (new_task)
+ __task_free(new_task);
+ tasklet_free(new_tasklet);
+ return -1;
+}
+
+
+static void h1_hdeaders_case_adjust_deinit()
+{
+ struct ebpt_node *node, *next;
+ struct h1_hdr_entry *entry;
+
+ node = ebpt_first(&hdrs_map.map);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ entry = container_of(node, struct h1_hdr_entry, node);
+ free(entry->node.key);
+ istfree(&entry->name);
+ free(entry);
+ node = next;
+ }
+ free(hdrs_map.name);
+}
+
+static int cfg_h1_headers_case_adjust_postparser()
+{
+ FILE *file = NULL;
+ char *c, *key_beg, *key_end, *value_beg, *value_end;
+ char *err;
+ int rc, line = 0, err_code = 0;
+
+ if (!hdrs_map.name)
+ goto end;
+
+ file = fopen(hdrs_map.name, "r");
+ if (!file) {
+ ha_alert("h1-headers-case-adjust-file '%s': failed to open file.\n",
+ hdrs_map.name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* now parse all lines. The file may contain only two header name per
+ * line, separated by spaces. All heading and trailing spaces will be
+ * ignored. Lines starting with a # are ignored.
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* ignore emptu lines, or lines beginning with a dash */
+ if (*c == '#' || *c == '\0' || *c == '\r' || *c == '\n')
+ continue;
+
+ /* look for the end of the key */
+ key_beg = c;
+ while (*c != '\0' && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
+ c++;
+ key_end = c;
+
+ /* strip middle spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* look for the end of the value, it is the end of the line */
+ value_beg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ value_end = c;
+
+ /* trim possibly trailing spaces and tabs */
+ while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
+ value_end--;
+
+ /* set final \0 and check entries */
+ *key_end = '\0';
+ *value_end = '\0';
+
+ err = NULL;
+ rc = add_hdr_case_adjust(key_beg, value_beg, &err);
+ if (rc < 0) {
+ ha_alert("h1-headers-case-adjust-file '%s' : %s at line %d.\n",
+ hdrs_map.name, err, line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(err);
+ goto end;
+ }
+ if (rc > 0) {
+ ha_warning("h1-headers-case-adjust-file '%s' : %s at line %d.\n",
+ hdrs_map.name, err, line);
+ err_code |= ERR_WARN;
+ free(err);
+ }
+ }
+
+ end:
+ if (file)
+ fclose(file);
+ hap_register_post_deinit(h1_hdeaders_case_adjust_deinit);
+ return err_code;
+}
+
+/* config parser for global "h1-accept-payload_=-with-any-method" */
+static int cfg_parse_h1_accept_payload_with_any_method(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ accept_payload_with_any_method = 1;
+ return 0;
+}
+
+
+/* config parser for global "h1-header-case-adjust" */
+static int cfg_parse_h1_header_case_adjust(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(2, args, err, NULL))
+ return -1;
+ if (!*(args[1]) || !*(args[2])) {
+ memprintf(err, "'%s' expects <from> and <to> as argument.", args[0]);
+ return -1;
+ }
+ return add_hdr_case_adjust(args[1], args[2], err);
+}
+
+/* config parser for global "h1-headers-case-adjust-file" */
+static int cfg_parse_h1_headers_case_adjust_file(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+ if (!*(args[1])) {
+ memprintf(err, "'%s' expects <file> as argument.", args[0]);
+ return -1;
+ }
+ free(hdrs_map.name);
+ hdrs_map.name = strdup(args[1]);
+ return 0;
+}
+
+/* config parser for global "tune.h1.zero-copy-fwd-recv" */
+static int cfg_parse_h1_zero_copy_fwd_rcv(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_H1_RCV;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_H1_RCV;
+ else {
+ memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h1.zero-copy-fwd-send" */
+static int cfg_parse_h1_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_H1_SND;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_H1_SND;
+ else {
+ memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {{ }, {
+ { CFG_GLOBAL, "h1-accept-payload-with-any-method", cfg_parse_h1_accept_payload_with_any_method },
+ { CFG_GLOBAL, "h1-case-adjust", cfg_parse_h1_header_case_adjust },
+ { CFG_GLOBAL, "h1-case-adjust-file", cfg_parse_h1_headers_case_adjust_file },
+ { CFG_GLOBAL, "tune.h1.zero-copy-fwd-recv", cfg_parse_h1_zero_copy_fwd_rcv },
+ { CFG_GLOBAL, "tune.h1.zero-copy-fwd-send", cfg_parse_h1_zero_copy_fwd_snd },
+ { 0, NULL, NULL },
+ }
+};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+REGISTER_CONFIG_POSTPARSER("h1-headers-map", cfg_h1_headers_case_adjust_postparser);
+
+
+/****************************************/
+/* MUX initialization and instantiation */
+/****************************************/
+
+/* The mux operations */
+static const struct mux_ops mux_http_ops = {
+ .init = h1_init,
+ .wake = h1_wake,
+ .attach = h1_attach,
+ .get_first_sc = h1_get_first_sc,
+ .detach = h1_detach,
+ .destroy = h1_destroy,
+ .avail_streams = h1_avail_streams,
+ .used_streams = h1_used_streams,
+ .rcv_buf = h1_rcv_buf,
+ .snd_buf = h1_snd_buf,
+ .nego_fastfwd = h1_nego_ff,
+ .done_fastfwd = h1_done_ff,
+ .fastfwd = h1_fastfwd,
+ .resume_fastfwd = h1_resume_fastfwd,
+ .subscribe = h1_subscribe,
+ .unsubscribe = h1_unsubscribe,
+ .shutr = h1_shutr,
+ .shutw = h1_shutw,
+ .show_fd = h1_show_fd,
+ .show_sd = h1_show_sd,
+ .ctl = h1_ctl,
+ .sctl = h1_sctl,
+ .takeover = h1_takeover,
+ .flags = MX_FL_HTX,
+ .name = "H1",
+};
+
+static const struct mux_ops mux_h1_ops = {
+ .init = h1_init,
+ .wake = h1_wake,
+ .attach = h1_attach,
+ .get_first_sc = h1_get_first_sc,
+ .detach = h1_detach,
+ .destroy = h1_destroy,
+ .avail_streams = h1_avail_streams,
+ .used_streams = h1_used_streams,
+ .rcv_buf = h1_rcv_buf,
+ .snd_buf = h1_snd_buf,
+ .nego_fastfwd = h1_nego_ff,
+ .done_fastfwd = h1_done_ff,
+ .fastfwd = h1_fastfwd,
+ .resume_fastfwd = h1_resume_fastfwd,
+ .subscribe = h1_subscribe,
+ .unsubscribe = h1_unsubscribe,
+ .shutr = h1_shutr,
+ .shutw = h1_shutw,
+ .show_fd = h1_show_fd,
+ .show_sd = h1_show_sd,
+ .ctl = h1_ctl,
+ .sctl = h1_sctl,
+ .takeover = h1_takeover,
+ .flags = MX_FL_HTX|MX_FL_NO_UPG,
+ .name = "H1",
+};
+
+/* this mux registers default HTX proto but also h1 proto (to be referenced in the conf */
+static struct mux_proto_list mux_proto_h1 =
+ { .token = IST("h1"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &mux_h1_ops };
+static struct mux_proto_list mux_proto_http =
+ { .token = IST(""), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &mux_http_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_h1);
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_http);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/mux_h2.c b/src/mux_h2.c
new file mode 100644
index 0000000..273e1f5
--- /dev/null
+++ b/src/mux_h2.c
@@ -0,0 +1,7598 @@
+/*
+ * HTTP/2 mux-demux for connections
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h2.h>
+#include <haproxy/hpack-dec.h>
+#include <haproxy/hpack-enc.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/mux_h2-t.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+#include <haproxy/session-t.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/trace.h>
+#include <haproxy/xref.h>
+
+
+/* dummy streams returned for closed, error, refused, idle and states */
+static const struct h2s *h2_closed_stream;
+static const struct h2s *h2_error_stream;
+static const struct h2s *h2_refused_stream;
+static const struct h2s *h2_idle_stream;
+
+
+/**** H2 connection descriptor ****/
+struct h2c {
+ struct connection *conn;
+
+ enum h2_cs st0; /* mux state */
+ enum h2_err errcode; /* H2 err code (H2_ERR_*) */
+
+ /* 16 bit hole here */
+ uint32_t flags; /* connection flags: H2_CF_* */
+ uint32_t streams_limit; /* maximum number of concurrent streams the peer supports */
+ int32_t max_id; /* highest ID known on this connection, <0 before preface */
+ uint32_t rcvd_c; /* newly received data to ACK for the connection */
+ uint32_t rcvd_s; /* newly received data to ACK for the current stream (dsi) or zero */
+
+ /* states for the demux direction */
+ struct hpack_dht *ddht; /* demux dynamic header table */
+ struct buffer dbuf; /* demux buffer */
+
+ int32_t dsi; /* demux stream ID (<0 = idle) */
+ int32_t dfl; /* demux frame length (if dsi >= 0) */
+ int8_t dft; /* demux frame type (if dsi >= 0) */
+ int8_t dff; /* demux frame flags (if dsi >= 0) */
+ uint8_t dpl; /* demux pad length (part of dfl), init to 0 */
+ /* 8 bit hole here */
+ int32_t last_sid; /* last processed stream ID for GOAWAY, <0 before preface */
+
+ /* states for the mux direction */
+ struct buffer mbuf[H2C_MBUF_CNT]; /* mux buffers (ring) */
+ int32_t miw; /* mux initial window size for all new streams */
+ int32_t mws; /* mux window size. Can be negative. */
+ int32_t mfs; /* mux's max frame size */
+
+ int timeout; /* idle timeout duration in ticks */
+ int shut_timeout; /* idle timeout duration in ticks after GOAWAY was sent */
+ int idle_start; /* date of the last time the connection went idle (no stream + empty mbuf), or the start of current http req */
+ /* 32-bit hole here */
+ unsigned int nb_streams; /* number of streams in the tree */
+ unsigned int nb_sc; /* number of attached stream connectors */
+ unsigned int nb_reserved; /* number of reserved streams */
+ unsigned int stream_cnt; /* total number of streams seen */
+ struct proxy *proxy; /* the proxy this connection was created for */
+ struct task *task; /* timeout management task */
+ struct h2_counters *px_counters; /* h2 counters attached to proxy */
+ struct eb_root streams_by_id; /* all active streams by their ID */
+ struct list send_list; /* list of blocked streams requesting to send */
+ struct list fctl_list; /* list of streams blocked by connection's fctl */
+ struct list blocked_list; /* list of streams blocked for other reasons (e.g. sfctl, dep) */
+ struct buffer_wait buf_wait; /* wait list for buffer allocations */
+ struct wait_event wait_event; /* To be used if we're waiting for I/Os */
+};
+
+
+/* H2 stream descriptor, describing the stream as it appears in the H2C, and as
+ * it is being processed in the internal HTTP representation (HTX).
+ */
+struct h2s {
+ struct sedesc *sd;
+ struct session *sess;
+ struct h2c *h2c;
+ struct eb32_node by_id; /* place in h2c's streams_by_id */
+ int32_t id; /* stream ID */
+ uint32_t flags; /* H2_SF_* */
+ int sws; /* stream window size, to be added to the mux's initial window size */
+ enum h2_err errcode; /* H2 err code (H2_ERR_*) */
+ enum h2_ss st;
+ uint16_t status; /* HTTP response status */
+ unsigned long long body_len; /* remaining body length according to content-length if H2_SF_DATA_CLEN */
+ struct buffer rxbuf; /* receive buffer, always valid (buf_empty or real buffer) */
+ struct wait_event *subs; /* recv wait_event the stream connector associated is waiting on (via h2_subscribe) */
+ struct list list; /* To be used when adding in h2c->send_list or h2c->fctl_lsit */
+ struct tasklet *shut_tl; /* deferred shutdown tasklet, to retry to send an RST after we failed to,
+ * in case there's no other subscription to do it */
+
+ char upgrade_protocol[16]; /* rfc 8441: requested protocol on Extended CONNECT */
+};
+
+/* descriptor for an h2 frame header */
+struct h2_fh {
+ uint32_t len; /* length, host order, 24 bits */
+ uint32_t sid; /* stream id, host order, 31 bits */
+ uint8_t ft; /* frame type */
+ uint8_t ff; /* frame flags */
+};
+
+/* trace source and events */
+static void h2_trace(enum trace_level level, uint64_t mask, \
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * strm - application layer
+ * h2s - internal H2 stream
+ * h2c - internal H2 connection
+ * conn - external connection
+ *
+ */
+static const struct trace_event h2_trace_events[] = {
+#define H2_EV_H2C_NEW (1ULL << 0)
+ { .mask = H2_EV_H2C_NEW, .name = "h2c_new", .desc = "new H2 connection" },
+#define H2_EV_H2C_RECV (1ULL << 1)
+ { .mask = H2_EV_H2C_RECV, .name = "h2c_recv", .desc = "Rx on H2 connection" },
+#define H2_EV_H2C_SEND (1ULL << 2)
+ { .mask = H2_EV_H2C_SEND, .name = "h2c_send", .desc = "Tx on H2 connection" },
+#define H2_EV_H2C_FCTL (1ULL << 3)
+ { .mask = H2_EV_H2C_FCTL, .name = "h2c_fctl", .desc = "H2 connection flow-controlled" },
+#define H2_EV_H2C_BLK (1ULL << 4)
+ { .mask = H2_EV_H2C_BLK, .name = "h2c_blk", .desc = "H2 connection blocked" },
+#define H2_EV_H2C_WAKE (1ULL << 5)
+ { .mask = H2_EV_H2C_WAKE, .name = "h2c_wake", .desc = "H2 connection woken up" },
+#define H2_EV_H2C_END (1ULL << 6)
+ { .mask = H2_EV_H2C_END, .name = "h2c_end", .desc = "H2 connection terminated" },
+#define H2_EV_H2C_ERR (1ULL << 7)
+ { .mask = H2_EV_H2C_ERR, .name = "h2c_err", .desc = "error on H2 connection" },
+#define H2_EV_RX_FHDR (1ULL << 8)
+ { .mask = H2_EV_RX_FHDR, .name = "rx_fhdr", .desc = "H2 frame header received" },
+#define H2_EV_RX_FRAME (1ULL << 9)
+ { .mask = H2_EV_RX_FRAME, .name = "rx_frame", .desc = "receipt of any H2 frame" },
+#define H2_EV_RX_EOI (1ULL << 10)
+ { .mask = H2_EV_RX_EOI, .name = "rx_eoi", .desc = "receipt of end of H2 input (ES or RST)" },
+#define H2_EV_RX_PREFACE (1ULL << 11)
+ { .mask = H2_EV_RX_PREFACE, .name = "rx_preface", .desc = "receipt of H2 preface" },
+#define H2_EV_RX_DATA (1ULL << 12)
+ { .mask = H2_EV_RX_DATA, .name = "rx_data", .desc = "receipt of H2 DATA frame" },
+#define H2_EV_RX_HDR (1ULL << 13)
+ { .mask = H2_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H2 HEADERS frame" },
+#define H2_EV_RX_PRIO (1ULL << 14)
+ { .mask = H2_EV_RX_PRIO, .name = "rx_prio", .desc = "receipt of H2 PRIORITY frame" },
+#define H2_EV_RX_RST (1ULL << 15)
+ { .mask = H2_EV_RX_RST, .name = "rx_rst", .desc = "receipt of H2 RST_STREAM frame" },
+#define H2_EV_RX_SETTINGS (1ULL << 16)
+ { .mask = H2_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H2 SETTINGS frame" },
+#define H2_EV_RX_PUSH (1ULL << 17)
+ { .mask = H2_EV_RX_PUSH, .name = "rx_push", .desc = "receipt of H2 PUSH_PROMISE frame" },
+#define H2_EV_RX_PING (1ULL << 18)
+ { .mask = H2_EV_RX_PING, .name = "rx_ping", .desc = "receipt of H2 PING frame" },
+#define H2_EV_RX_GOAWAY (1ULL << 19)
+ { .mask = H2_EV_RX_GOAWAY, .name = "rx_goaway", .desc = "receipt of H2 GOAWAY frame" },
+#define H2_EV_RX_WU (1ULL << 20)
+ { .mask = H2_EV_RX_WU, .name = "rx_wu", .desc = "receipt of H2 WINDOW_UPDATE frame" },
+#define H2_EV_RX_CONT (1ULL << 21)
+ { .mask = H2_EV_RX_CONT, .name = "rx_cont", .desc = "receipt of H2 CONTINUATION frame" },
+#define H2_EV_TX_FRAME (1ULL << 22)
+ { .mask = H2_EV_TX_FRAME, .name = "tx_frame", .desc = "transmission of any H2 frame" },
+#define H2_EV_TX_EOI (1ULL << 23)
+ { .mask = H2_EV_TX_EOI, .name = "tx_eoi", .desc = "transmission of H2 end of input (ES or RST)" },
+#define H2_EV_TX_PREFACE (1ULL << 24)
+ { .mask = H2_EV_TX_PREFACE, .name = "tx_preface", .desc = "transmission of H2 preface" },
+#define H2_EV_TX_DATA (1ULL << 25)
+ { .mask = H2_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H2 DATA frame" },
+#define H2_EV_TX_HDR (1ULL << 26)
+ { .mask = H2_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H2 HEADERS frame" },
+#define H2_EV_TX_PRIO (1ULL << 27)
+ { .mask = H2_EV_TX_PRIO, .name = "tx_prio", .desc = "transmission of H2 PRIORITY frame" },
+#define H2_EV_TX_RST (1ULL << 28)
+ { .mask = H2_EV_TX_RST, .name = "tx_rst", .desc = "transmission of H2 RST_STREAM frame" },
+#define H2_EV_TX_SETTINGS (1ULL << 29)
+ { .mask = H2_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H2 SETTINGS frame" },
+#define H2_EV_TX_PUSH (1ULL << 30)
+ { .mask = H2_EV_TX_PUSH, .name = "tx_push", .desc = "transmission of H2 PUSH_PROMISE frame" },
+#define H2_EV_TX_PING (1ULL << 31)
+ { .mask = H2_EV_TX_PING, .name = "tx_ping", .desc = "transmission of H2 PING frame" },
+#define H2_EV_TX_GOAWAY (1ULL << 32)
+ { .mask = H2_EV_TX_GOAWAY, .name = "tx_goaway", .desc = "transmission of H2 GOAWAY frame" },
+#define H2_EV_TX_WU (1ULL << 33)
+ { .mask = H2_EV_TX_WU, .name = "tx_wu", .desc = "transmission of H2 WINDOW_UPDATE frame" },
+#define H2_EV_TX_CONT (1ULL << 34)
+ { .mask = H2_EV_TX_CONT, .name = "tx_cont", .desc = "transmission of H2 CONTINUATION frame" },
+#define H2_EV_H2S_NEW (1ULL << 35)
+ { .mask = H2_EV_H2S_NEW, .name = "h2s_new", .desc = "new H2 stream" },
+#define H2_EV_H2S_RECV (1ULL << 36)
+ { .mask = H2_EV_H2S_RECV, .name = "h2s_recv", .desc = "Rx for H2 stream" },
+#define H2_EV_H2S_SEND (1ULL << 37)
+ { .mask = H2_EV_H2S_SEND, .name = "h2s_send", .desc = "Tx for H2 stream" },
+#define H2_EV_H2S_FCTL (1ULL << 38)
+ { .mask = H2_EV_H2S_FCTL, .name = "h2s_fctl", .desc = "H2 stream flow-controlled" },
+#define H2_EV_H2S_BLK (1ULL << 39)
+ { .mask = H2_EV_H2S_BLK, .name = "h2s_blk", .desc = "H2 stream blocked" },
+#define H2_EV_H2S_WAKE (1ULL << 40)
+ { .mask = H2_EV_H2S_WAKE, .name = "h2s_wake", .desc = "H2 stream woken up" },
+#define H2_EV_H2S_END (1ULL << 41)
+ { .mask = H2_EV_H2S_END, .name = "h2s_end", .desc = "H2 stream terminated" },
+#define H2_EV_H2S_ERR (1ULL << 42)
+ { .mask = H2_EV_H2S_ERR, .name = "h2s_err", .desc = "error on H2 stream" },
+#define H2_EV_STRM_NEW (1ULL << 43)
+ { .mask = H2_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define H2_EV_STRM_RECV (1ULL << 44)
+ { .mask = H2_EV_STRM_RECV, .name = "strm_recv", .desc = "receiving data for stream" },
+#define H2_EV_STRM_SEND (1ULL << 45)
+ { .mask = H2_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" },
+#define H2_EV_STRM_FULL (1ULL << 46)
+ { .mask = H2_EV_STRM_FULL, .name = "strm_full", .desc = "stream buffer full" },
+#define H2_EV_STRM_WAKE (1ULL << 47)
+ { .mask = H2_EV_STRM_WAKE, .name = "strm_wake", .desc = "stream woken up" },
+#define H2_EV_STRM_SHUT (1ULL << 48)
+ { .mask = H2_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define H2_EV_STRM_END (1ULL << 49)
+ { .mask = H2_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define H2_EV_STRM_ERR (1ULL << 50)
+ { .mask = H2_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+#define H2_EV_PROTO_ERR (1ULL << 51)
+ { .mask = H2_EV_PROTO_ERR, .name = "proto_err", .desc = "protocol error" },
+ { }
+};
+
+static const struct name_desc h2_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="h2s", .desc="H2 stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc h2_trace_decoding[] = {
+#define H2_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define H2_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h2c/h2s state and flags, no real decoding" },
+#define H2_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or frame info when available" },
+#define H2_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define H2_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_h2 __read_mostly = {
+ .name = IST("h2"),
+ .desc = "HTTP/2 multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = h2_trace,
+ .known_events = h2_trace_events,
+ .lockon_args = h2_trace_lockon_args,
+ .decoding = h2_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_h2
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* h2 stats module */
+enum {
+ H2_ST_HEADERS_RCVD,
+ H2_ST_DATA_RCVD,
+ H2_ST_SETTINGS_RCVD,
+ H2_ST_RST_STREAM_RCVD,
+ H2_ST_GOAWAY_RCVD,
+
+ H2_ST_CONN_PROTO_ERR,
+ H2_ST_STRM_PROTO_ERR,
+ H2_ST_RST_STREAM_RESP,
+ H2_ST_GOAWAY_RESP,
+
+ H2_ST_OPEN_CONN,
+ H2_ST_OPEN_STREAM,
+ H2_ST_TOTAL_CONN,
+ H2_ST_TOTAL_STREAM,
+
+ H2_STATS_COUNT /* must be the last member of the enum */
+};
+
+static struct name_desc h2_stats[] = {
+ [H2_ST_HEADERS_RCVD] = { .name = "h2_headers_rcvd",
+ .desc = "Total number of received HEADERS frames" },
+ [H2_ST_DATA_RCVD] = { .name = "h2_data_rcvd",
+ .desc = "Total number of received DATA frames" },
+ [H2_ST_SETTINGS_RCVD] = { .name = "h2_settings_rcvd",
+ .desc = "Total number of received SETTINGS frames" },
+ [H2_ST_RST_STREAM_RCVD] = { .name = "h2_rst_stream_rcvd",
+ .desc = "Total number of received RST_STREAM frames" },
+ [H2_ST_GOAWAY_RCVD] = { .name = "h2_goaway_rcvd",
+ .desc = "Total number of received GOAWAY frames" },
+
+ [H2_ST_CONN_PROTO_ERR] = { .name = "h2_detected_conn_protocol_errors",
+ .desc = "Total number of connection protocol errors" },
+ [H2_ST_STRM_PROTO_ERR] = { .name = "h2_detected_strm_protocol_errors",
+ .desc = "Total number of stream protocol errors" },
+ [H2_ST_RST_STREAM_RESP] = { .name = "h2_rst_stream_resp",
+ .desc = "Total number of RST_STREAM sent on detected error" },
+ [H2_ST_GOAWAY_RESP] = { .name = "h2_goaway_resp",
+ .desc = "Total number of GOAWAY sent on detected error" },
+
+ [H2_ST_OPEN_CONN] = { .name = "h2_open_connections",
+ .desc = "Count of currently open connections" },
+ [H2_ST_OPEN_STREAM] = { .name = "h2_backend_open_streams",
+ .desc = "Count of currently open streams" },
+ [H2_ST_TOTAL_CONN] = { .name = "h2_total_connections",
+ .desc = "Total number of connections" },
+ [H2_ST_TOTAL_STREAM] = { .name = "h2_backend_total_streams",
+ .desc = "Total number of streams" },
+};
+
+static struct h2_counters {
+ long long headers_rcvd; /* total number of HEADERS frame received */
+ long long data_rcvd; /* total number of DATA frame received */
+ long long settings_rcvd; /* total number of SETTINGS frame received */
+ long long rst_stream_rcvd; /* total number of RST_STREAM frame received */
+ long long goaway_rcvd; /* total number of GOAWAY frame received */
+
+ long long conn_proto_err; /* total number of protocol errors detected */
+ long long strm_proto_err; /* total number of protocol errors detected */
+ long long rst_stream_resp; /* total number of RST_STREAM frame sent on error */
+ long long goaway_resp; /* total number of GOAWAY frame sent on error */
+
+ long long open_conns; /* count of currently open connections */
+ long long open_streams; /* count of currently open streams */
+ long long total_conns; /* total number of connections */
+ long long total_streams; /* total number of streams */
+} h2_counters;
+
+static void h2_fill_stats(void *data, struct field *stats)
+{
+ struct h2_counters *counters = data;
+
+ stats[H2_ST_HEADERS_RCVD] = mkf_u64(FN_COUNTER, counters->headers_rcvd);
+ stats[H2_ST_DATA_RCVD] = mkf_u64(FN_COUNTER, counters->data_rcvd);
+ stats[H2_ST_SETTINGS_RCVD] = mkf_u64(FN_COUNTER, counters->settings_rcvd);
+ stats[H2_ST_RST_STREAM_RCVD] = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd);
+ stats[H2_ST_GOAWAY_RCVD] = mkf_u64(FN_COUNTER, counters->goaway_rcvd);
+
+ stats[H2_ST_CONN_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->conn_proto_err);
+ stats[H2_ST_STRM_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->strm_proto_err);
+ stats[H2_ST_RST_STREAM_RESP] = mkf_u64(FN_COUNTER, counters->rst_stream_resp);
+ stats[H2_ST_GOAWAY_RESP] = mkf_u64(FN_COUNTER, counters->goaway_resp);
+
+ stats[H2_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns);
+ stats[H2_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams);
+ stats[H2_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns);
+ stats[H2_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams);
+}
+
+static struct stats_module h2_stats_module = {
+ .name = "h2",
+ .fill_stats = h2_fill_stats,
+ .stats = h2_stats,
+ .stats_count = H2_STATS_COUNT,
+ .counters = &h2_counters,
+ .counters_size = sizeof(h2_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_BE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &h2_stats_module);
+
+/* the h2c connection pool */
+DECLARE_STATIC_POOL(pool_head_h2c, "h2c", sizeof(struct h2c));
+
+/* the h2s stream pool */
+DECLARE_STATIC_POOL(pool_head_h2s, "h2s", sizeof(struct h2s));
+
+/* The default connection window size is 65535, it may only be enlarged using
+ * a WINDOW_UPDATE message. Since the window must never be larger than 2G-1,
+ * we'll pretend we already received the difference between the two to send
+ * an equivalent window update to enlarge it to 2G-1.
+ */
+#define H2_INITIAL_WINDOW_INCREMENT ((1U<<31)-1 - 65535)
+
+/* maximum amount of data we're OK with re-aligning for buffer optimizations */
+#define MAX_DATA_REALIGN 1024
+
+/* a few settings from the global section */
+static int h2_settings_header_table_size = 4096; /* initial value */
+static int h2_settings_initial_window_size = 65536; /* default initial value */
+static int h2_be_settings_initial_window_size = 0; /* backend's default initial value */
+static int h2_fe_settings_initial_window_size = 0; /* frontend's default initial value */
+static unsigned int h2_settings_max_concurrent_streams = 100; /* default value */
+static unsigned int h2_be_settings_max_concurrent_streams = 0; /* backend value */
+static unsigned int h2_fe_settings_max_concurrent_streams = 0; /* frontend value */
+static int h2_settings_max_frame_size = 0; /* unset */
+
+/* other non-protocol settings */
+static unsigned int h2_fe_max_total_streams = 0; /* frontend value */
+
+/* a dummy closed endpoint */
+static const struct sedesc closed_ep = {
+ .sc = NULL,
+ .flags = SE_FL_DETACHED,
+};
+
+/* a dmumy closed stream */
+static const struct h2s *h2_closed_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_STREAM_CLOSED,
+ .flags = H2_SF_RST_RCVD,
+ .id = 0,
+};
+
+/* a dmumy closed stream returning a PROTOCOL_ERROR error */
+static const struct h2s *h2_error_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_PROTOCOL_ERROR,
+ .flags = 0,
+ .id = 0,
+};
+
+/* a dmumy closed stream returning a REFUSED_STREAM error */
+static const struct h2s *h2_refused_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_CLOSED,
+ .errcode = H2_ERR_REFUSED_STREAM,
+ .flags = 0,
+ .id = 0,
+};
+
+/* and a dummy idle stream for use with any unannounced stream */
+static const struct h2s *h2_idle_stream = &(const struct h2s){
+ .sd = (struct sedesc *)&closed_ep,
+ .h2c = NULL,
+ .st = H2_SS_IDLE,
+ .errcode = H2_ERR_STREAM_CLOSED,
+ .id = 0,
+};
+
+
+struct task *h2_timeout_task(struct task *t, void *context, unsigned int state);
+static int h2_send(struct h2c *h2c);
+static int h2_recv(struct h2c *h2c);
+static int h2_process(struct h2c *h2c);
+/* h2_io_cb is exported to see it resolved in "show fd" */
+struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state);
+static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id);
+static int h2c_dec_hdrs(struct h2c *h2c, struct buffer *rxbuf, uint32_t *flags, unsigned long long *body_len, char *upgrade_protocol);
+static int h2_frt_transfer_data(struct h2s *h2s);
+struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state);
+static struct h2s *h2c_bck_stream_new(struct h2c *h2c, struct stconn *sc, struct session *sess);
+static void h2s_alert(struct h2s *h2s);
+static inline void h2_remove_from_list(struct h2s *h2s);
+
+/* returns the stconn associated to the H2 stream */
+static forceinline struct stconn *h2s_sc(const struct h2s *h2s)
+{
+ return h2s->sd->sc;
+}
+
+/* the H2 traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive h2c), that arg2, if non-null, is of type h2s, and
+ * that arg3, if non-null, is either of type htx for tx headers, or of type
+ * buffer for everything else.
+ */
+static void h2_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct h2c *h2c = conn ? conn->ctx : NULL;
+ const struct h2s *h2s = a2;
+ const struct buffer *buf = a3;
+ const struct htx *htx;
+ int pos;
+
+ if (!h2c) // nothing to add
+ return;
+
+ if (src->verbosity > H2_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : h2c=%p(%c,%s)", h2c, conn_is_back(conn) ? 'B' : 'F', h2c_st_to_str(h2c->st0));
+
+ if (mask & H2_EV_H2C_NEW) // inside h2_init, otherwise it's hard to match conn & h2c
+ conn_append_debug_info(&trace_buf, conn, " : ");
+
+ if (h2c->errcode)
+ chunk_appendf(&trace_buf, " err=%s/%02x", h2_err_str(h2c->errcode), h2c->errcode);
+
+ if (h2c->flags & H2_CF_DEM_IN_PROGRESS && // frame processing has started, type and length are valid
+ (mask & (H2_EV_RX_FRAME|H2_EV_RX_FHDR)) == (H2_EV_RX_FRAME|H2_EV_RX_FHDR)) {
+ chunk_appendf(&trace_buf, " dft=%s/%02x dfl=%d", h2_ft_str(h2c->dft), h2c->dff, h2c->dfl);
+ }
+
+ if (h2s) {
+ if (h2s->id <= 0)
+ chunk_appendf(&trace_buf, " dsi=%d", h2c->dsi);
+ if (h2s == h2_idle_stream)
+ chunk_appendf(&trace_buf, " h2s=IDL");
+ else if (h2s != h2_closed_stream && h2s != h2_refused_stream && h2s != h2_error_stream)
+ chunk_appendf(&trace_buf, " h2s=%p(%d,%s)", h2s, h2s->id, h2s_st_to_str(h2s->st));
+ else if (h2c->dsi > 0) // don't show that before sid is known
+ chunk_appendf(&trace_buf, " h2s=CLO");
+ if (h2s->id && h2s->errcode)
+ chunk_appendf(&trace_buf, " err=%s/%02x", h2_err_str(h2s->errcode), h2s->errcode);
+ }
+ }
+
+ /* Let's dump decoded requests and responses right after parsing. They
+ * are traced at level USER with a few recognizable flags.
+ */
+ if ((mask == (H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW) ||
+ mask == (H2_EV_RX_FRAME|H2_EV_RX_HDR)) && buf)
+ htx = htxbuf(buf); // recv req/res
+ else if (mask == (H2_EV_TX_FRAME|H2_EV_TX_HDR))
+ htx = a3; // send req/res
+ else
+ htx = NULL;
+
+ if (level == TRACE_LEVEL_USER && src->verbosity != H2_VERB_MINIMAL && htx && (pos = htx_get_head(htx)) != -1) {
+ const struct htx_blk *blk = htx_get_blk(htx, pos);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL)
+ chunk_appendf(&trace_buf, " : [%d] H2 REQ: %.*s %.*s %.*s",
+ h2s ? h2s->id : h2c->dsi,
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ else if (type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " : [%d] H2 RES: %.*s %.*s %.*s",
+ h2s ? h2s->id : h2c->dsi,
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+}
+
+
+/* Detect a pending read0 for a H2 connection. It happens if a read0 was
+ * already reported on a previous xprt->rcvbuf() AND a frame parser failed
+ * to parse pending data, confirming no more progress is possible because
+ * we're facing a truncated frame. The function returns 1 to report a read0
+ * or 0 otherwise.
+ */
+static inline int h2c_read0_pending(struct h2c *h2c)
+{
+ return !!(h2c->flags & H2_CF_END_REACHED);
+}
+
+/* returns true if the connection is allowed to expire, false otherwise. A
+ * connection may expire when it has no attached streams. As long as streams
+ * are attached, the application layer is responsible for timeout management,
+ * and each layer will detach when it doesn't want to wait anymore. When the
+ * last one leaves, the connection must take over timeout management.
+ */
+static inline int h2c_may_expire(const struct h2c *h2c)
+{
+ return !h2c->nb_sc;
+}
+
+/* returns the number of max concurrent streams permitted on a connection,
+ * depending on its side (frontend or backend), falling back to the default
+ * h2_settings_max_concurrent_streams. It may even be zero.
+ */
+static inline int h2c_max_concurrent_streams(const struct h2c *h2c)
+{
+ int ret;
+
+ ret = (h2c->flags & H2_CF_IS_BACK) ?
+ h2_be_settings_max_concurrent_streams :
+ h2_fe_settings_max_concurrent_streams;
+
+ ret = ret ? ret : h2_settings_max_concurrent_streams;
+ return ret;
+}
+
+
+/* update h2c timeout if needed */
+static void h2c_update_timeout(struct h2c *h2c)
+{
+ int is_idle_conn = 0;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (!h2c->task)
+ goto leave;
+
+ if (h2c_may_expire(h2c)) {
+ /* no more streams attached */
+ if (br_data(h2c->mbuf)) {
+ /* pending output data: always the regular data timeout */
+ h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
+ } else {
+ /* no stream, no output data */
+ if (!(h2c->flags & H2_CF_IS_BACK)) {
+ int to;
+
+ if (h2c->max_id > 0 && !b_data(&h2c->dbuf) &&
+ tick_isset(h2c->proxy->timeout.httpka)) {
+ /* idle after having seen one stream => keep-alive */
+ to = h2c->proxy->timeout.httpka;
+ } else {
+ /* before first request, or started to deserialize a
+ * new req => http-request.
+ */
+ to = h2c->proxy->timeout.httpreq;
+ }
+
+ h2c->task->expire = tick_add_ifset(h2c->idle_start, to);
+ is_idle_conn = 1;
+ }
+
+ if (h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED)) {
+ /* GOAWAY sent (or failed), closing in progress */
+ int exp = tick_add_ifset(now_ms, h2c->shut_timeout);
+
+ h2c->task->expire = tick_first(h2c->task->expire, exp);
+ is_idle_conn = 1;
+ }
+
+ /* if a timeout above was not set, fall back to the default one */
+ if (!tick_isset(h2c->task->expire))
+ h2c->task->expire = tick_add_ifset(now_ms, h2c->timeout);
+ }
+
+ if ((h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
+ is_idle_conn && tick_isset(global.close_spread_end)) {
+ /* If a soft-stop is in progress and a close-spread-time
+ * is set, we want to spread idle connection closing roughly
+ * evenly across the defined window. This should only
+ * act on idle frontend connections.
+ * If the window end is already in the past, we wake the
+ * timeout task up immediately so that it can be closed.
+ */
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* We don't need to reset the expire if it would
+ * already happen before the close window end.
+ */
+ if (tick_isset(h2c->task->expire) &&
+ tick_is_le(global.close_spread_end, h2c->task->expire)) {
+ /* Set an expire value shorter than the current value
+ * because the close spread window end comes earlier.
+ */
+ h2c->task->expire = tick_add(now_ms, statistical_prng_range(remaining_window));
+ }
+ }
+ else {
+ /* We are past the soft close window end, wake the timeout
+ * task up immediately.
+ */
+ task_wakeup(h2c->task, TASK_WOKEN_TIMER);
+ }
+ }
+
+ } else {
+ h2c->task->expire = TICK_ETERNITY;
+ }
+ task_queue(h2c->task);
+ leave:
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+}
+
+static __inline int
+h2c_is_dead(const struct h2c *h2c)
+{
+ if (eb_is_empty(&h2c->streams_by_id) && /* don't close if streams exist */
+ ((h2c->flags & H2_CF_ERROR) || /* errors close immediately */
+ (h2c->flags & H2_CF_ERR_PENDING && h2c->st0 < H2_CS_FRAME_H) || /* early error during connect */
+ (h2c->st0 >= H2_CS_ERROR && !h2c->task) || /* a timeout stroke earlier */
+ (!(h2c->conn->owner) && !conn_is_reverse(h2c->conn)) || /* Nobody's left to take care of the connection, drop it now */
+ (!br_data(h2c->mbuf) && /* mux buffer empty, also process clean events below */
+ ((h2c->flags & H2_CF_RCVD_SHUT) ||
+ (h2c->last_sid >= 0 && h2c->max_id >= h2c->last_sid)))))
+ return 1;
+
+ return 0;
+}
+
+/*****************************************************/
+/* functions below are for dynamic buffer management */
+/*****************************************************/
+
+/* indicates whether or not the we may call the h2_recv() function to attempt
+ * to receive data into the buffer and/or demux pending data. The condition is
+ * a bit complex due to some API limits for now. The rules are the following :
+ * - if an error or a shutdown was detected on the connection and the buffer
+ * is empty, we must not attempt to receive
+ * - if the demux buf failed to be allocated, we must not try to receive and
+ * we know there is nothing pending
+ * - if no flag indicates a blocking condition, we may attempt to receive,
+ * regardless of whether the demux buffer is full or not, so that only
+ * de demux part decides whether or not to block. This is needed because
+ * the connection API indeed prevents us from re-enabling receipt that is
+ * already enabled in a polled state, so we must always immediately stop
+ * as soon as the demux can't proceed so as never to hit an end of read
+ * with data pending in the buffers.
+ * - otherwise must may not attempt
+ */
+static inline int h2_recv_allowed(const struct h2c *h2c)
+{
+ if (b_data(&h2c->dbuf) == 0 &&
+ ((h2c->flags & (H2_CF_RCVD_SHUT|H2_CF_ERROR)) || h2c->st0 >= H2_CS_ERROR))
+ return 0;
+
+ if (!(h2c->flags & H2_CF_DEM_DALLOC) &&
+ !(h2c->flags & H2_CF_DEM_BLOCK_ANY))
+ return 1;
+
+ return 0;
+}
+
+/* restarts reading on the connection if it was not enabled */
+static inline void h2c_restart_reading(const struct h2c *h2c, int consider_buffer)
+{
+ if (!h2_recv_allowed(h2c))
+ return;
+ if ((!consider_buffer || !b_data(&h2c->dbuf))
+ && (h2c->wait_event.events & SUB_RETRY_RECV))
+ return;
+ tasklet_wakeup(h2c->wait_event.tasklet);
+}
+
+
+/* returns true if the front connection has too many stream connectors attached */
+static inline int h2_frt_has_too_many_sc(const struct h2c *h2c)
+{
+ return h2c->nb_sc > h2c_max_concurrent_streams(h2c) ||
+ unlikely(conn_reverse_in_preconnect(h2c->conn));
+}
+
+/* Tries to grab a buffer and to re-enable processing on mux <target>. The h2c
+ * flags are used to figure what buffer was requested. It returns 1 if the
+ * allocation succeeds, in which case the connection is woken up, or 0 if it's
+ * impossible to wake up and we prefer to be woken up later.
+ */
+static int h2_buf_available(void *target)
+{
+ struct h2c *h2c = target;
+ struct h2s *h2s;
+
+ if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf)) {
+ h2c->flags &= ~H2_CF_DEM_DALLOC;
+ h2c_restart_reading(h2c, 1);
+ return 1;
+ }
+
+ if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf))) {
+ h2c->flags &= ~H2_CF_MUX_MALLOC;
+
+ if (h2c->flags & H2_CF_DEM_MROOM) {
+ h2c->flags &= ~H2_CF_DEM_MROOM;
+ h2c_restart_reading(h2c, 1);
+ }
+ return 1;
+ }
+
+ if ((h2c->flags & H2_CF_DEM_SALLOC) &&
+ (h2s = h2c_st_by_id(h2c, h2c->dsi)) && h2s_sc(h2s) &&
+ b_alloc(&h2s->rxbuf)) {
+ h2c->flags &= ~H2_CF_DEM_SALLOC;
+ h2c_restart_reading(h2c, 1);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline struct buffer *h2_get_buf(struct h2c *h2c, struct buffer *bptr)
+{
+ struct buffer *buf = NULL;
+
+ if (likely(!LIST_INLIST(&h2c->buf_wait.list)) &&
+ unlikely((buf = b_alloc(bptr)) == NULL)) {
+ h2c->buf_wait.target = h2c;
+ h2c->buf_wait.wakeup_cb = h2_buf_available;
+ LIST_APPEND(&th_ctx->buffer_wq, &h2c->buf_wait.list);
+ }
+ return buf;
+}
+
+static inline void h2_release_buf(struct h2c *h2c, struct buffer *bptr)
+{
+ if (bptr->size) {
+ b_free(bptr);
+ offer_buffers(NULL, 1);
+ }
+}
+
+static inline void h2_release_mbuf(struct h2c *h2c)
+{
+ struct buffer *buf;
+ unsigned int count = 0;
+
+ while (b_size(buf = br_head_pick(h2c->mbuf))) {
+ b_free(buf);
+ count++;
+ }
+ if (count)
+ offer_buffers(NULL, count);
+}
+
+/* returns the number of allocatable outgoing streams for the connection taking
+ * the last_sid and the reserved ones into account.
+ */
+static inline int h2_streams_left(const struct h2c *h2c)
+{
+ int ret;
+
+ /* consider the number of outgoing streams we're allowed to create before
+ * reaching the last GOAWAY frame seen. max_id is the last assigned id,
+ * nb_reserved is the number of streams which don't yet have an ID.
+ */
+ ret = (h2c->last_sid >= 0) ? h2c->last_sid : 0x7FFFFFFF;
+ ret = (unsigned int)(ret - h2c->max_id) / 2 - h2c->nb_reserved - 1;
+ if (ret < 0)
+ ret = 0;
+ return ret;
+}
+
+/* returns the number of streams in use on a connection to figure if it's
+ * idle or not. We check nb_sc and not nb_streams as the caller will want
+ * to know if it was the last one after a detach().
+ */
+static int h2_used_streams(struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+
+ return h2c->nb_sc;
+}
+
+/* returns the number of concurrent streams available on the connection */
+static int h2_avail_streams(struct connection *conn)
+{
+ struct server *srv = objt_server(conn->target);
+ struct h2c *h2c = conn->ctx;
+ int ret1, ret2;
+
+ /* RFC7540#6.8: Receivers of a GOAWAY frame MUST NOT open additional
+ * streams on the connection.
+ */
+ if (h2c->last_sid >= 0)
+ return 0;
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ return 0;
+
+ /* note: may be negative if a SETTINGS frame changes the limit */
+ ret1 = h2c->streams_limit - h2c->nb_streams;
+
+ /* we must also consider the limit imposed by stream IDs */
+ ret2 = h2_streams_left(h2c);
+ ret1 = MIN(ret1, ret2);
+ if (ret1 > 0 && srv && srv->max_reuse >= 0) {
+ ret2 = h2c->stream_cnt <= srv->max_reuse ? srv->max_reuse - h2c->stream_cnt + 1: 0;
+ ret1 = MIN(ret1, ret2);
+ }
+ return ret1;
+}
+
+/* Unconditionally produce a trace of the header. Please do not call this one
+ * and use h2_trace_header() instead which first checks if traces are enabled.
+ */
+void _h2_trace_header(const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ struct ist n_ist, v_ist;
+ const char *c_str, *s_str;
+
+ chunk_reset(&trash);
+ c_str = chunk_newstr(&trash);
+ if (h2c) {
+ chunk_appendf(&trash, "h2c=%p(%c,%s) ",
+ h2c, (h2c->flags & H2_CF_IS_BACK) ? 'B' : 'F', h2c_st_to_str(h2c->st0));
+ }
+
+ s_str = chunk_newstr(&trash);
+ if (h2s) {
+ if (h2s->id <= 0)
+ chunk_appendf(&trash, "dsi=%d ", h2s->h2c->dsi);
+ chunk_appendf(&trash, "h2s=%p(%d,%s) ", h2s, h2s->id, h2s_st_to_str(h2s->st));
+ }
+ else if (h2c)
+ chunk_appendf(&trash, "dsi=%d ", h2c->dsi);
+
+ n_ist = ist2(chunk_newstr(&trash), 0);
+ istscpy(&n_ist, hn, 256);
+ trash.data += n_ist.len;
+ if (n_ist.len != hn.len)
+ chunk_appendf(&trash, " (... +%ld)", (long)(hn.len - n_ist.len));
+
+ v_ist = ist2(chunk_newstr(&trash), 0);
+ istscpy(&v_ist, hv, 1024);
+ trash.data += v_ist.len;
+ if (v_ist.len != hv.len)
+ chunk_appendf(&trash, " (... +%ld)", (long)(hv.len - v_ist.len));
+
+ TRACE_PRINTF_LOC(TRACE_LEVEL_USER, mask, trc_loc, func,
+ (h2c ? h2c->conn : 0), 0, 0, 0,
+ "%s%s%s %s: %s", c_str, s_str,
+ (mask & H2_EV_TX_HDR) ? "sndh" : "rcvh",
+ n_ist.ptr, v_ist.ptr);
+}
+
+/* produce a trace of the header after checking that tracing is enabled */
+static inline void h2_trace_header(const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ if ((TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED &&
+ TRACE_ENABLED(TRACE_LEVEL_USER, mask, h2c ? h2c->conn : 0, h2s, 0, 0))
+ _h2_trace_header(hn, hv, mask, trc_loc, func, h2c, h2s);
+}
+
+/* hpack-encode header name <hn> and value <hv>, possibly emitting a trace if
+ * currently enabled. This is done on behalf of function <func> at <trc_loc>
+ * passed as ist(TRC_LOC), h2c <h2c>, and h2s <h2s>, all of which may be NULL.
+ * The trace is only emitted if the header is emitted (in which case non-zero
+ * is returned). The trash is modified. In the traces, the header's name will
+ * be truncated to 256 chars and the header's value to 1024 chars.
+ */
+static inline int h2_encode_header(struct buffer *buf, const struct ist hn, const struct ist hv,
+ uint64_t mask, const struct ist trc_loc, const char *func,
+ const struct h2c *h2c, const struct h2s *h2s)
+{
+ int ret;
+
+ ret = hpack_encode_header(buf, hn, hv);
+ if (ret)
+ h2_trace_header(hn, hv, mask, trc_loc, func, h2c, h2s);
+
+ return ret;
+}
+
+/*****************************************************************/
+/* functions below are dedicated to the mux setup and management */
+/*****************************************************************/
+
+/* Initialize the mux once it's attached. For outgoing connections, the context
+ * is already initialized before installing the mux, so we detect incoming
+ * connections from the fact that the context is still NULL (even during mux
+ * upgrades). <input> is always used as Input buffer and may contain data. It is
+ * the caller responsibility to not reuse it anymore. Returns < 0 on error.
+ */
+static int h2_init(struct connection *conn, struct proxy *prx, struct session *sess,
+ struct buffer *input)
+{
+ struct h2c *h2c;
+ struct task *t = NULL;
+ void *conn_ctx = conn->ctx;
+
+ TRACE_ENTER(H2_EV_H2C_NEW);
+
+ h2c = pool_alloc(pool_head_h2c);
+ if (!h2c)
+ goto fail_no_h2c;
+
+ if (conn_is_back(conn)) {
+ h2c->flags = H2_CF_IS_BACK;
+ h2c->shut_timeout = h2c->timeout = prx->timeout.server;
+ if (tick_isset(prx->timeout.serverfin))
+ h2c->shut_timeout = prx->timeout.serverfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_be,
+ &h2_stats_module);
+ } else {
+ h2c->flags = H2_CF_NONE;
+ h2c->shut_timeout = h2c->timeout = prx->timeout.client;
+ if (tick_isset(prx->timeout.clientfin))
+ h2c->shut_timeout = prx->timeout.clientfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &h2_stats_module);
+ }
+
+ h2c->proxy = prx;
+ h2c->task = NULL;
+ h2c->wait_event.tasklet = NULL;
+ h2c->idle_start = now_ms;
+ if (tick_isset(h2c->timeout)) {
+ t = task_new_here();
+ if (!t)
+ goto fail;
+
+ h2c->task = t;
+ t->process = h2_timeout_task;
+ t->context = h2c;
+ t->expire = tick_add(now_ms, h2c->timeout);
+ }
+
+ h2c->wait_event.tasklet = tasklet_new();
+ if (!h2c->wait_event.tasklet)
+ goto fail;
+ h2c->wait_event.tasklet->process = h2_io_cb;
+ h2c->wait_event.tasklet->context = h2c;
+ h2c->wait_event.events = 0;
+ if (!conn_is_back(conn)) {
+ /* Connection might already be in the stopping_list if subject
+ * to h1->h2 upgrade.
+ */
+ if (!LIST_INLIST(&conn->stopping_list)) {
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &conn->stopping_list);
+ }
+ }
+
+ h2c->ddht = hpack_dht_alloc();
+ if (!h2c->ddht)
+ goto fail;
+
+ /* Initialise the context. */
+ h2c->st0 = H2_CS_PREFACE;
+ h2c->conn = conn;
+ h2c->streams_limit = h2c_max_concurrent_streams(h2c);
+ h2c->max_id = -1;
+ h2c->errcode = H2_ERR_NO_ERROR;
+ h2c->rcvd_c = 0;
+ h2c->rcvd_s = 0;
+ h2c->nb_streams = 0;
+ h2c->nb_sc = 0;
+ h2c->nb_reserved = 0;
+ h2c->stream_cnt = 0;
+
+ h2c->dbuf = *input;
+ h2c->dsi = -1;
+
+ h2c->last_sid = -1;
+
+ br_init(h2c->mbuf, sizeof(h2c->mbuf) / sizeof(h2c->mbuf[0]));
+ h2c->miw = 65535; /* mux initial window size */
+ h2c->mws = 65535; /* mux window size */
+ h2c->mfs = 16384; /* initial max frame size */
+ h2c->streams_by_id = EB_ROOT;
+ LIST_INIT(&h2c->send_list);
+ LIST_INIT(&h2c->fctl_list);
+ LIST_INIT(&h2c->blocked_list);
+ LIST_INIT(&h2c->buf_wait.list);
+
+ conn->ctx = h2c;
+
+ TRACE_USER("new H2 connection", H2_EV_H2C_NEW, conn);
+
+ if (t)
+ task_queue(t);
+
+ if (h2c->flags & H2_CF_IS_BACK && likely(!conn_is_reverse(h2c->conn))) {
+ /* FIXME: this is temporary, for outgoing connections we need
+ * to immediately allocate a stream until the code is modified
+ * so that the caller calls ->attach(). For now the outgoing sc
+ * is stored as conn->ctx by the caller and saved in conn_ctx.
+ */
+ struct h2s *h2s;
+
+ h2s = h2c_bck_stream_new(h2c, conn_ctx, sess);
+ if (!h2s)
+ goto fail_stream;
+ }
+
+ if (sess)
+ proxy_inc_fe_cum_sess_ver_ctr(sess->listener, prx, 2);
+ HA_ATOMIC_INC(&h2c->px_counters->open_conns);
+ HA_ATOMIC_INC(&h2c->px_counters->total_conns);
+
+ /* prepare to read something */
+ h2c_restart_reading(h2c, 1);
+ TRACE_LEAVE(H2_EV_H2C_NEW, conn);
+ return 0;
+ fail_stream:
+ hpack_dht_free(h2c->ddht);
+ fail:
+ task_destroy(t);
+ tasklet_free(h2c->wait_event.tasklet);
+ pool_free(pool_head_h2c, h2c);
+ fail_no_h2c:
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+ conn->ctx = conn_ctx; /* restore saved ctx */
+ TRACE_DEVEL("leaving in error", H2_EV_H2C_NEW|H2_EV_H2C_END|H2_EV_H2C_ERR);
+ return -1;
+}
+
+/* returns the next allocatable outgoing stream ID for the H2 connection, or
+ * -1 if no more is allocatable.
+ */
+static inline int32_t h2c_get_next_sid(const struct h2c *h2c)
+{
+ int32_t id = (h2c->max_id + 1) | 1;
+
+ if ((id & 0x80000000U) || (h2c->last_sid >= 0 && id > h2c->last_sid))
+ id = -1;
+ return id;
+}
+
+/* returns the stream associated with id <id> or NULL if not found */
+static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id)
+{
+ struct eb32_node *node;
+
+ if (id == 0)
+ return (struct h2s *)h2_closed_stream;
+
+ if (id > h2c->max_id)
+ return (struct h2s *)h2_idle_stream;
+
+ node = eb32_lookup(&h2c->streams_by_id, id);
+ if (!node)
+ return (struct h2s *)h2_closed_stream;
+
+ return container_of(node, struct h2s, by_id);
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void h2_release(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2C_END);
+
+ hpack_dht_free(h2c->ddht);
+
+ if (LIST_INLIST(&h2c->buf_wait.list))
+ LIST_DEL_INIT(&h2c->buf_wait.list);
+
+ h2_release_buf(h2c, &h2c->dbuf);
+ h2_release_mbuf(h2c);
+
+ if (h2c->task) {
+ h2c->task->context = NULL;
+ task_wakeup(h2c->task, TASK_WOKEN_OTHER);
+ h2c->task = NULL;
+ }
+ tasklet_free(h2c->wait_event.tasklet);
+ if (conn && h2c->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, h2c->wait_event.events,
+ &h2c->wait_event);
+
+ HA_ATOMIC_DEC(&h2c->px_counters->open_conns);
+
+ pool_free(pool_head_h2c, h2c);
+
+ if (conn) {
+ if (!conn_is_back(conn))
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", H2_EV_H2C_END, conn);
+
+ conn_stop_tracking(conn);
+
+ /* there might be a GOAWAY frame still pending in the TCP
+ * stack, and if the peer continues to send (i.e. window
+ * updates etc), this can result in losing the GOAWAY. For
+ * this reason we try to drain anything received in between.
+ */
+ conn->flags |= CO_FL_WANT_DRAIN;
+
+ conn_xprt_shutw(conn);
+ conn_xprt_close(conn);
+ conn_sock_shutw(conn, !conn_is_back(conn));
+ conn_ctrl_close(conn);
+
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_END);
+}
+
+
+/******************************************************/
+/* functions below are for the H2 protocol processing */
+/******************************************************/
+
+/* returns the stream if of stream <h2s> or 0 if <h2s> is NULL */
+static inline __maybe_unused int h2s_id(const struct h2s *h2s)
+{
+ return h2s ? h2s->id : 0;
+}
+
+/* returns the sum of the stream's own window size and the mux's initial
+ * window, which together form the stream's effective window size.
+ */
+static inline int h2s_mws(const struct h2s *h2s)
+{
+ return h2s->sws + h2s->h2c->miw;
+}
+
+/* marks an error on the connection. Before settings are sent, we must not send
+ * a GOAWAY frame, and the error state will prevent h2c_send_goaway_error()
+ * from verifying this so we set H2_CF_GOAWAY_FAILED to make sure it will not
+ * even try.
+ */
+static inline __maybe_unused void h2c_error(struct h2c *h2c, enum h2_err err)
+{
+ TRACE_POINT(H2_EV_H2C_ERR, h2c->conn, 0, 0, (void *)(long)(err));
+ h2c->errcode = err;
+ if (h2c->st0 < H2_CS_SETTINGS1)
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ h2c->st0 = H2_CS_ERROR;
+}
+
+/* marks an error on the stream. It may also update an already closed stream
+ * (e.g. to report an error after an RST was received).
+ */
+static inline __maybe_unused void h2s_error(struct h2s *h2s, enum h2_err err)
+{
+ if (h2s->id && h2s->st != H2_SS_ERROR) {
+ TRACE_POINT(H2_EV_H2S_ERR, h2s->h2c->conn, h2s, 0, (void *)(long)(err));
+ h2s->errcode = err;
+ if (h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ se_fl_set_error(h2s->sd);
+ }
+}
+
+/* attempt to notify the data layer of recv availability */
+static void __maybe_unused h2s_notify_recv(struct h2s *h2s)
+{
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_RECV) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_RECV;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+}
+
+/* attempt to notify the data layer of send availability */
+static void __maybe_unused h2s_notify_send(struct h2s *h2s)
+{
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ h2s->flags |= H2_SF_NOTIFIED;
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+ else if (h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ tasklet_wakeup(h2s->shut_tl);
+ }
+}
+
+/* alerts the data layer, trying to wake it up by all means, following
+ * this sequence :
+ * - if the h2s' data layer is subscribed to recv, then it's woken up for recv
+ * - if its subscribed to send, then it's woken up for send
+ * - if it was subscribed to neither, its ->wake() callback is called
+ * It is safe to call this function with a closed stream which doesn't have a
+ * stream connector anymore.
+ */
+static void __maybe_unused h2s_alert(struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2s->h2c->conn, h2s);
+
+ if (h2s->subs ||
+ (h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW))) {
+ h2s_notify_recv(h2s);
+ h2s_notify_send(h2s);
+ }
+ else if (h2s_sc(h2s) && h2s_sc(h2s)->app_ops->wake != NULL) {
+ TRACE_POINT(H2_EV_STRM_WAKE, h2s->h2c->conn, h2s);
+ h2s_sc(h2s)->app_ops->wake(h2s_sc(h2s));
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2s->h2c->conn, h2s);
+}
+
+/* writes the 24-bit frame size <len> at address <frame> */
+static inline __maybe_unused void h2_set_frame_size(void *frame, uint32_t len)
+{
+ uint8_t *out = frame;
+
+ *out = len >> 16;
+ write_n16(out + 1, len);
+}
+
+/* reads <bytes> bytes from buffer <b> starting at relative offset <o> from the
+ * current pointer, dealing with wrapping, and stores the result in <dst>. It's
+ * the caller's responsibility to verify that there are at least <bytes> bytes
+ * available in the buffer's input prior to calling this function. The buffer
+ * is assumed not to hold any output data.
+ */
+static inline __maybe_unused void h2_get_buf_bytes(void *dst, size_t bytes,
+ const struct buffer *b, int o)
+{
+ readv_bytes(dst, bytes, b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint16_t h2_get_n16(const struct buffer *b, int o)
+{
+ return readv_n16(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint32_t h2_get_n32(const struct buffer *b, int o)
+{
+ return readv_n32(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+static inline __maybe_unused uint64_t h2_get_n64(const struct buffer *b, int o)
+{
+ return readv_n64(b_peek(b, o), b_wrap(b) - b_peek(b, o), b_orig(b));
+}
+
+
+/* Peeks an H2 frame header from offset <o> of buffer <b> into descriptor <h>.
+ * The algorithm is not obvious. It turns out that H2 headers are neither
+ * aligned nor do they use regular sizes. And to add to the trouble, the buffer
+ * may wrap so each byte read must be checked. The header is formed like this :
+ *
+ * b0 b1 b2 b3 b4 b5..b8
+ * +----------+---------+--------+----+----+----------------------+
+ * |len[23:16]|len[15:8]|len[7:0]|type|flag|sid[31:0] (big endian)|
+ * +----------+---------+--------+----+----+----------------------+
+ *
+ * Here we read a big-endian 64 bit word from h[1]. This way in a single read
+ * we get the sid properly aligned and ordered, and 16 bits of len properly
+ * ordered as well. The type and flags can be extracted using bit shifts from
+ * the word, and only one extra read is needed to fetch len[16:23].
+ * Returns zero if some bytes are missing, otherwise non-zero on success. The
+ * buffer is assumed not to contain any output data.
+ */
+static __maybe_unused int h2_peek_frame_hdr(const struct buffer *b, int o, struct h2_fh *h)
+{
+ uint64_t w;
+
+ if (b_data(b) < o + 9)
+ return 0;
+
+ w = h2_get_n64(b, o + 1);
+ h->len = *(uint8_t*)b_peek(b, o) << 16;
+ h->sid = w & 0x7FFFFFFF; /* RFC7540#4.1: R bit must be ignored */
+ h->ff = w >> 32;
+ h->ft = w >> 40;
+ h->len += w >> 48;
+ return 1;
+}
+
+/* skip the next 9 bytes corresponding to the frame header possibly parsed by
+ * h2_peek_frame_hdr() above.
+ */
+static inline __maybe_unused void h2_skip_frame_hdr(struct buffer *b)
+{
+ b_del(b, 9);
+}
+
+/* same as above, automatically advances the buffer on success */
+static inline __maybe_unused int h2_get_frame_hdr(struct buffer *b, struct h2_fh *h)
+{
+ int ret;
+
+ ret = h2_peek_frame_hdr(b, 0, h);
+ if (ret > 0)
+ h2_skip_frame_hdr(b);
+ return ret;
+}
+
+
+/* try to fragment the headers frame present at the beginning of buffer <b>,
+ * enforcing a limit of <mfs> bytes per frame. Returns 0 on failure, 1 on
+ * success. Typical causes of failure include a buffer not large enough to
+ * add extra frame headers. The existing frame size is read in the current
+ * frame. Its EH flag will be cleared if CONTINUATION frames need to be added,
+ * and its length will be adjusted. The stream ID for continuation frames will
+ * be copied from the initial frame's.
+ */
+static int h2_fragment_headers(struct buffer *b, uint32_t mfs)
+{
+ size_t remain = b->data - 9;
+ int extra_frames = (remain - 1) / mfs;
+ size_t fsize;
+ char *fptr;
+ int frame;
+
+ if (b->data <= mfs + 9)
+ return 1;
+
+ /* Too large a frame, we need to fragment it using CONTINUATION
+ * frames. We start from the end and move tails as needed.
+ */
+ if (b->data + extra_frames * 9 > b->size)
+ return 0;
+
+ for (frame = extra_frames; frame; frame--) {
+ fsize = ((remain - 1) % mfs) + 1;
+ remain -= fsize;
+
+ /* move data */
+ fptr = b->area + 9 + remain + (frame - 1) * 9;
+ memmove(fptr + 9, b->area + 9 + remain, fsize);
+ b->data += 9;
+
+ /* write new frame header */
+ h2_set_frame_size(fptr, fsize);
+ fptr[3] = H2_FT_CONTINUATION;
+ fptr[4] = (frame == extra_frames) ? H2_F_HEADERS_END_HEADERS : 0;
+ write_n32(fptr + 5, read_n32(b->area + 5));
+ }
+
+ b->area[4] &= ~H2_F_HEADERS_END_HEADERS;
+ h2_set_frame_size(b->area, remain);
+ return 1;
+}
+
+
+/* marks stream <h2s> as CLOSED and decrement the number of active streams for
+ * its connection if the stream was not yet closed. Please use this exclusively
+ * before closing a stream to ensure stream count is well maintained. Note that
+ * it does explicitly support being called with a partially initialized h2s
+ * (e.g. sd==NULL).
+ */
+static inline void h2s_close(struct h2s *h2s)
+{
+ if (h2s->st != H2_SS_CLOSED) {
+ TRACE_ENTER(H2_EV_H2S_END, h2s->h2c->conn, h2s);
+ h2s->h2c->nb_streams--;
+ if (!h2s->id)
+ h2s->h2c->nb_reserved--;
+ if (h2s->sd && h2s_sc(h2s)) {
+ if (!se_fl_test(h2s->sd, SE_FL_EOS) && !b_data(&h2s->rxbuf))
+ h2s_notify_recv(h2s);
+ }
+ HA_ATOMIC_DEC(&h2s->h2c->px_counters->open_streams);
+
+ TRACE_LEAVE(H2_EV_H2S_END, h2s->h2c->conn, h2s);
+ }
+ h2s->st = H2_SS_CLOSED;
+}
+
+/* Check h2c and h2s flags to evaluate if EOI/EOS/ERR_PENDING/ERROR flags must
+ * be set on the SE.
+ */
+static inline void h2s_propagate_term_flags(struct h2c *h2c, struct h2s *h2s)
+{
+ if (h2s->flags & H2_SF_ES_RCVD) {
+ se_fl_set(h2s->sd, SE_FL_EOI);
+ /* Add EOS flag for tunnel */
+ if (h2s->flags & H2_SF_BODY_TUNNEL)
+ se_fl_set(h2s->sd, SE_FL_EOS);
+ }
+ if (h2c_read0_pending(h2c) || h2s->st == H2_SS_CLOSED) {
+ se_fl_set(h2s->sd, SE_FL_EOS);
+ if (!se_fl_test(h2s->sd, SE_FL_EOI))
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ }
+ if (se_fl_test(h2s->sd, SE_FL_ERR_PENDING))
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+}
+
+/* detaches an H2 stream from its H2C and releases it to the H2S pool. */
+/* h2s_destroy should only ever be called by the thread that owns the stream,
+ * that means that a tasklet should be used if we want to destroy the h2s
+ * from another thread
+ */
+static void h2s_destroy(struct h2s *h2s)
+{
+ struct connection *conn = h2s->h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2S_END, conn, h2s);
+
+ h2s_close(h2s);
+ eb32_delete(&h2s->by_id);
+ if (b_size(&h2s->rxbuf)) {
+ b_free(&h2s->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+
+ if (h2s->subs)
+ h2s->subs->events = 0;
+
+ /* There's no need to explicitly call unsubscribe here, the only
+ * reference left would be in the h2c send_list/fctl_list, and if
+ * we're in it, we're getting out anyway
+ */
+ h2_remove_from_list(h2s);
+
+ /* ditto, calling tasklet_free() here should be ok */
+ tasklet_free(h2s->shut_tl);
+ BUG_ON(h2s->sd && !se_fl_test(h2s->sd, SE_FL_ORPHAN));
+ sedesc_free(h2s->sd);
+ pool_free(pool_head_h2s, h2s);
+
+ TRACE_LEAVE(H2_EV_H2S_END, conn);
+}
+
+/* allocates a new stream <id> for connection <h2c> and adds it into h2c's
+ * stream tree. In case of error, nothing is added and NULL is returned. The
+ * causes of errors can be any failed memory allocation. The caller is
+ * responsible for checking if the connection may support an extra stream
+ * prior to calling this function.
+ */
+static struct h2s *h2s_new(struct h2c *h2c, int id)
+{
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ h2s = pool_alloc(pool_head_h2s);
+ if (!h2s)
+ goto out;
+
+ h2s->shut_tl = tasklet_new();
+ if (!h2s->shut_tl) {
+ pool_free(pool_head_h2s, h2s);
+ goto out;
+ }
+ h2s->subs = NULL;
+ h2s->shut_tl->process = h2_deferred_shut;
+ h2s->shut_tl->context = h2s;
+ LIST_INIT(&h2s->list);
+ h2s->h2c = h2c;
+ h2s->sd = NULL;
+ h2s->sws = 0;
+ h2s->flags = H2_SF_NONE;
+ h2s->errcode = H2_ERR_NO_ERROR;
+ h2s->st = H2_SS_IDLE;
+ h2s->status = 0;
+ h2s->body_len = 0;
+ h2s->rxbuf = BUF_NULL;
+ memset(h2s->upgrade_protocol, 0, sizeof(h2s->upgrade_protocol));
+
+ h2s->by_id.key = h2s->id = id;
+ if (id > 0)
+ h2c->max_id = id;
+ else
+ h2c->nb_reserved++;
+
+ eb32_insert(&h2c->streams_by_id, &h2s->by_id);
+ h2c->nb_streams++;
+
+ HA_ATOMIC_INC(&h2c->px_counters->open_streams);
+ HA_ATOMIC_INC(&h2c->px_counters->total_streams);
+
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn, h2s);
+ return h2s;
+ out:
+ TRACE_DEVEL("leaving in error", H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn);
+ return NULL;
+}
+
+/* creates a new stream <id> on the h2c connection and returns it, or NULL in
+ * case of memory allocation error. <input> is used as input buffer for the new
+ * stream. On success, it is transferred to the stream and the mux is no longer
+ * responsible of it. On error, <input> is unchanged, thus the mux must still
+ * take care of it.
+ */
+static struct h2s *h2c_frt_stream_new(struct h2c *h2c, int id, struct buffer *input, uint32_t flags)
+{
+ struct session *sess = h2c->conn->owner;
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ /* Cannot handle stream if active reversed connection is not yet accepted. */
+ BUG_ON(conn_reverse_in_preconnect(h2c->conn));
+
+ if (h2c->nb_streams >= h2c_max_concurrent_streams(h2c)) {
+ TRACE_ERROR("HEADERS frame causing MAX_CONCURRENT_STREAMS to be exceeded", H2_EV_H2S_NEW|H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ session_inc_http_req_ctr(sess);
+ session_inc_http_err_ctr(sess);
+ goto out;
+ }
+
+ h2s = h2s_new(h2c, id);
+ if (!h2s)
+ goto out_alloc;
+
+ h2s->sd = sedesc_new();
+ if (!h2s->sd)
+ goto out_close;
+ h2s->sd->se = h2s;
+ h2s->sd->conn = h2c->conn;
+ se_fl_set(h2s->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST);
+
+ if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H2_SND))
+ se_fl_set(h2s->sd, SE_FL_MAY_FASTFWD_CONS);
+
+ /* The request is not finished, don't expect data from the opposite side
+ * yet
+ */
+ if (!(h2c->dff & (H2_F_HEADERS_END_STREAM| H2_F_DATA_END_STREAM)) && !(flags & H2_SF_BODY_TUNNEL))
+ se_expect_no_data(h2s->sd);
+
+ /* FIXME wrong analogy between ext-connect and websocket, this need to
+ * be refine.
+ */
+ if (flags & H2_SF_EXT_CONNECT_RCVD)
+ se_fl_set(h2s->sd, SE_FL_WEBSOCKET);
+
+ /* The stream will record the request's accept date (which is either the
+ * end of the connection's or the date immediately after the previous
+ * request) and the idle time, which is the delay since the previous
+ * request. We can set the value now, it will be copied by stream_new().
+ */
+ sess->t_idle = ns_to_ms(now_ns - sess->accept_ts) - sess->t_handshake;
+
+ if (!sc_new_from_endp(h2s->sd, sess, input))
+ goto out_close;
+
+ h2c->nb_sc++;
+
+ /* We want the accept date presented to the next stream to be the one
+ * we have now, the handshake time to be null (since the next stream
+ * is not delayed by a handshake), and the idle time to count since
+ * right now.
+ */
+ sess->accept_date = date;
+ sess->accept_ts = now_ns;
+ sess->t_handshake = 0;
+ sess->t_idle = 0;
+
+ /* OK done, the stream lives its own life now */
+ if (h2_frt_has_too_many_sc(h2c))
+ h2c->flags |= H2_CF_DEM_TOOMANY;
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn);
+ return h2s;
+
+ out_close:
+ h2s_destroy(h2s);
+ out_alloc:
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW|H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ out:
+ sess_log(sess);
+ TRACE_LEAVE(H2_EV_H2S_NEW|H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn);
+ return NULL;
+}
+
+/* allocates a new stream associated to stream connector <sc> on the h2c
+ * connection and returns it, or NULL in case of memory allocation error or if
+ * the highest possible stream ID was reached.
+ */
+static struct h2s *h2c_bck_stream_new(struct h2c *h2c, struct stconn *sc, struct session *sess)
+{
+ struct h2s *h2s = NULL;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, h2c->conn);
+
+ /* Cannot handle stream if connection waiting to be reversed. */
+ BUG_ON(conn_reverse_in_preconnect(h2c->conn));
+
+ if (h2c->nb_streams >= h2c->streams_limit) {
+ TRACE_ERROR("Aborting stream since negotiated limit is too low", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ if (h2_streams_left(h2c) < 1) {
+ TRACE_ERROR("Aborting stream since no more streams left", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ /* Defer choosing the ID until we send the first message to create the stream */
+ h2s = h2s_new(h2c, 0);
+ if (!h2s) {
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW, h2c->conn);
+ goto out;
+ }
+
+ if (sc_attach_mux(sc, h2s, h2c->conn) < 0) {
+ TRACE_ERROR("Failed to allocate a new stream", H2_EV_H2S_NEW, h2c->conn);
+ h2s_destroy(h2s);
+ h2s = NULL;
+ goto out;
+ }
+ h2s->sd = sc->sedesc;
+ h2s->sess = sess;
+ h2c->nb_sc++;
+
+ if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_H2_SND))
+ se_fl_set(h2s->sd, SE_FL_MAY_FASTFWD_CONS);
+ /* on the backend we can afford to only count total streams upon success */
+ h2c->stream_cnt++;
+
+ out:
+ if (likely(h2s))
+ TRACE_LEAVE(H2_EV_H2S_NEW, h2c->conn, h2s);
+ else
+ TRACE_LEAVE(H2_EV_H2S_NEW|H2_EV_H2S_ERR|H2_EV_H2S_END, h2c->conn, h2s);
+ return h2s;
+}
+
+/* try to send a settings frame on the connection. Returns > 0 on success, 0 if
+ * it couldn't do anything. It may return an error in h2c. See RFC7540#11.3 for
+ * the various settings codes.
+ */
+static int h2c_send_settings(struct h2c *h2c)
+{
+ struct buffer *res;
+ char buf_data[100]; // enough for 15 settings
+ struct buffer buf;
+ int iws;
+ int mfs;
+ int mcs;
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+
+ chunk_init(&buf, buf_data, sizeof(buf_data));
+ chunk_memcpy(&buf,
+ "\x00\x00\x00" /* length : 0 for now */
+ "\x04\x00" /* type : 4 (settings), flags : 0 */
+ "\x00\x00\x00\x00", /* stream ID : 0 */
+ 9);
+
+ if (h2c->flags & H2_CF_IS_BACK) {
+ /* send settings_enable_push=0 */
+ chunk_memcat(&buf, "\x00\x02\x00\x00\x00\x00", 6);
+ }
+
+ /* rfc 8441 #3 SETTINGS_ENABLE_CONNECT_PROTOCOL=1,
+ * sent automatically unless disabled in the global config */
+ if (!(global.tune.options & GTUNE_DISABLE_H2_WEBSOCKET))
+ chunk_memcat(&buf, "\x00\x08\x00\x00\x00\x01", 6);
+
+ if (h2_settings_header_table_size != 4096) {
+ char str[6] = "\x00\x01"; /* header_table_size */
+
+ write_n32(str + 2, h2_settings_header_table_size);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ iws = (h2c->flags & H2_CF_IS_BACK) ?
+ h2_be_settings_initial_window_size:
+ h2_fe_settings_initial_window_size;
+ iws = iws ? iws : h2_settings_initial_window_size;
+
+ if (iws != 65535) {
+ char str[6] = "\x00\x04"; /* initial_window_size */
+
+ write_n32(str + 2, iws);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ mcs = h2c_max_concurrent_streams(h2c);
+ if (mcs != 0) {
+ char str[6] = "\x00\x03"; /* max_concurrent_streams */
+
+ /* Note: 0 means "unlimited" for haproxy's config but not for
+ * the protocol, so never send this value!
+ */
+ write_n32(str + 2, mcs);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ mfs = h2_settings_max_frame_size;
+ if (mfs > global.tune.bufsize)
+ mfs = global.tune.bufsize;
+
+ if (!mfs)
+ mfs = global.tune.bufsize;
+
+ if (mfs != 16384) {
+ char str[6] = "\x00\x05"; /* max_frame_size */
+
+ /* note: similarly we could also emit MAX_HEADER_LIST_SIZE to
+ * match bufsize - rewrite size, but at the moment it seems
+ * that clients don't take care of it.
+ */
+ write_n32(str + 2, mfs);
+ chunk_memcat(&buf, str, 6);
+ }
+
+ h2_set_frame_size(buf.area, buf.data - 9);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(buf.area, buf.data));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+ return ret;
+}
+
+/* Try to receive a connection preface, then upon success try to send our
+ * preface which is a SETTINGS frame. Returns > 0 on success or zero on
+ * missing data. It may return an error in h2c.
+ */
+static int h2c_frt_recv_preface(struct h2c *h2c)
+{
+ int ret1;
+ int ret2;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+
+ ret1 = b_isteq(&h2c->dbuf, 0, b_data(&h2c->dbuf), ist(H2_CONN_PREFACE));
+
+ if (unlikely(ret1 <= 0)) {
+ if (!ret1)
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ if (ret1 < 0 || (h2c->flags & H2_CF_RCVD_SHUT)) {
+ TRACE_ERROR("I/O error or short read", H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (b_data(&h2c->dbuf) ||
+ !(((const struct session *)h2c->conn->owner)->fe->options & PR_O_IGNORE_PRB))
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ }
+ ret2 = 0;
+ goto out;
+ }
+
+ ret2 = h2c_send_settings(h2c);
+ if (ret2 > 0)
+ b_del(&h2c->dbuf, ret1);
+ out:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_PREFACE, h2c->conn);
+ return ret2;
+}
+
+/* Try to send a connection preface, then upon success try to send our
+ * preface which is a SETTINGS frame. Returns > 0 on success or zero on
+ * missing data. It may return an error in h2c.
+ */
+static int h2c_bck_send_preface(struct h2c *h2c)
+{
+ struct buffer *res;
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_PREFACE, h2c->conn);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ if (!b_data(res)) {
+ /* preface not yet sent */
+ ret = b_istput(res, ist(H2_CONN_PREFACE));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+ ret = h2c_send_settings(h2c);
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_PREFACE, h2c->conn);
+ return ret;
+}
+
+/* try to send a GOAWAY frame on the connection to report an error or a graceful
+ * shutdown, with h2c->errcode as the error code. Returns > 0 on success or zero
+ * if nothing was done. It uses h2c->last_sid as the advertised ID, or copies it
+ * from h2c->max_id if it's not set yet (<0). In case of lack of room to write
+ * the message, it subscribes the requester (either <h2s> or <h2c>) to future
+ * notifications. It sets H2_CF_GOAWAY_SENT on success, and H2_CF_GOAWAY_FAILED
+ * on unrecoverable failure. It will not attempt to send one again in this last
+ * case, nor will it send one if settings were not sent (e.g. still waiting for
+ * a preface) so that it is safe to use h2c_error() to report such errors.
+ */
+static int h2c_send_goaway_error(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[17];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_GOAWAY, h2c->conn);
+
+ if ((h2c->flags & H2_CF_GOAWAY_FAILED) || h2c->st0 < H2_CS_SETTINGS1) {
+ ret = 1; // claim that it worked
+ goto out;
+ }
+
+ /* len: 8, type: 7, flags: none, sid: 0 */
+ memcpy(str, "\x00\x00\x08\x07\x00\x00\x00\x00\x00", 9);
+
+ if (h2c->last_sid < 0)
+ h2c->last_sid = h2c->max_id;
+
+ write_n32(str + 9, h2c->last_sid);
+ write_n32(str + 13, h2c->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ if (h2s)
+ h2s->flags |= H2_SF_BLK_MROOM;
+ else
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 17));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ if (h2s)
+ h2s->flags |= H2_SF_BLK_MROOM;
+ else
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ /* we cannot report this error using GOAWAY, so we mark
+ * it and claim a success.
+ */
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ ret = 1;
+ goto out;
+ }
+ }
+ h2c->flags |= H2_CF_GOAWAY_SENT;
+
+ /* some codes are not for real errors, just attempts to close cleanly */
+ switch (h2c->errcode) {
+ case H2_ERR_NO_ERROR:
+ case H2_ERR_ENHANCE_YOUR_CALM:
+ case H2_ERR_REFUSED_STREAM:
+ case H2_ERR_CANCEL:
+ break;
+ default:
+ HA_ATOMIC_INC(&h2c->px_counters->goaway_resp);
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_GOAWAY, h2c->conn);
+ return ret;
+}
+
+/* Try to send an RST_STREAM frame on the connection for the indicated stream
+ * during mux operations. This stream must be valid and cannot be closed
+ * already. h2s->id will be used for the stream ID and h2s->errcode will be
+ * used for the error code. h2s->st will be update to H2_SS_CLOSED if it was
+ * not yet.
+ *
+ * Returns > 0 on success or zero if nothing was done. In case of lack of room
+ * to write the message, it subscribes the stream to future notifications.
+ */
+static int h2s_send_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+
+ if (!h2s || h2s->st == H2_SS_CLOSED) {
+ ret = 1;
+ goto out;
+ }
+
+ /* RFC7540#5.4.2: To avoid looping, an endpoint MUST NOT send a
+ * RST_STREAM in response to a RST_STREAM frame.
+ */
+ if (h2c->dsi == h2s->id && h2c->dft == H2_FT_RST_STREAM) {
+ ret = 1;
+ goto ignore;
+ }
+
+ /* len: 4, type: 3, flags: none */
+ memcpy(str, "\x00\x00\x04\x03\x00", 5);
+ write_n32(str + 5, h2s->id);
+ write_n32(str + 9, h2s->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ignore:
+ h2s->flags |= H2_SF_RST_SENT;
+ h2s_close(h2s);
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+ return ret;
+}
+
+/* Try to send an RST_STREAM frame on the connection for the stream being
+ * demuxed using h2c->dsi for the stream ID. It will use h2s->errcode as the
+ * error code, even if the stream is one of the dummy ones, and will update
+ * h2s->st to H2_SS_CLOSED if it was not yet.
+ *
+ * Returns > 0 on success or zero if nothing was done. In case of lack of room
+ * to write the message, it blocks the demuxer and subscribes it to future
+ * notifications. It's worth mentioning that an RST may even be sent for a
+ * closed stream.
+ */
+static int h2c_send_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+
+ /* RFC7540#5.4.2: To avoid looping, an endpoint MUST NOT send a
+ * RST_STREAM in response to a RST_STREAM frame.
+ */
+ if (h2c->dft == H2_FT_RST_STREAM) {
+ ret = 1;
+ goto ignore;
+ }
+
+ /* len: 4, type: 3, flags: none */
+ memcpy(str, "\x00\x00\x04\x03\x00", 5);
+
+ write_n32(str + 5, h2c->dsi);
+ write_n32(str + 9, h2s->errcode);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ ignore:
+ if (h2s->id) {
+ h2s->flags |= H2_SF_RST_SENT;
+ h2s_close(h2s);
+ }
+
+ out:
+ HA_ATOMIC_INC(&h2c->px_counters->rst_stream_resp);
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_RST, h2c->conn, h2s);
+ return ret;
+}
+
+/* try to send an empty DATA frame with the ES flag set to notify about the
+ * end of stream and match a shutdown(write). If an ES was already sent as
+ * indicated by HLOC/ERROR/RESET/CLOSED states, nothing is done. Returns > 0
+ * on success or zero if nothing was done. In case of lack of room to write the
+ * message, it subscribes the requesting stream to future notifications.
+ */
+static int h2_send_empty_data_es(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct buffer *res;
+ char str[9];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+
+ if (h2s->st == H2_SS_HLOC || h2s->st == H2_SS_ERROR || h2s->st == H2_SS_CLOSED) {
+ ret = 1;
+ goto out;
+ }
+
+ /* len: 0x000000, type: 0(DATA), flags: ES=1 */
+ memcpy(str, "\x00\x00\x00\x00\x01", 5);
+ write_n32(str + 5, h2s->id);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 9));
+ if (likely(ret > 0)) {
+ h2s->flags |= H2_SF_ES_SENT;
+ }
+ else if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+ return ret;
+}
+
+/* wake a specific stream and assign its stream connector some SE_FL_* flags
+ * among SE_FL_ERR_PENDING and SE_FL_ERROR if needed. The stream's state
+ * is automatically updated accordingly. If the stream is orphaned, it is
+ * destroyed.
+ */
+static void h2s_wake_one_stream(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2c->conn, h2s);
+
+ if (!h2s_sc(h2s)) {
+ /* this stream was already orphaned */
+ h2s_destroy(h2s);
+ TRACE_DEVEL("leaving with no h2s", H2_EV_H2S_WAKE, h2c->conn);
+ return;
+ }
+
+ if (h2c_read0_pending(h2s->h2c)) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else if (h2s->st == H2_SS_HLOC)
+ h2s_close(h2s);
+ }
+
+ if ((h2s->st != H2_SS_CLOSED) &&
+ (h2s->h2c->st0 >= H2_CS_ERROR || (h2s->h2c->flags & H2_CF_ERROR) ||
+ (h2s->h2c->last_sid > 0 && (!h2s->id || h2s->id > h2s->h2c->last_sid)))) {
+ se_fl_set_error(h2s->sd);
+
+ if (h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ }
+
+ h2s_alert(h2s);
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* wake the streams attached to the connection, whose id is greater than <last>
+ * or unassigned.
+ */
+static void h2_wake_some_streams(struct h2c *h2c, int last)
+{
+ struct eb32_node *node;
+ struct h2s *h2s;
+
+ TRACE_ENTER(H2_EV_H2S_WAKE, h2c->conn);
+
+ /* Wake all streams with ID > last */
+ node = eb32_lookup_ge(&h2c->streams_by_id, last + 1);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ node = eb32_next(node);
+ h2s_wake_one_stream(h2s);
+ }
+
+ /* Wake all streams with unassigned ID (ID == 0) */
+ node = eb32_lookup(&h2c->streams_by_id, 0);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s->id > 0)
+ break;
+ node = eb32_next(node);
+ h2s_wake_one_stream(h2s);
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* Wake up all blocked streams whose window size has become positive after the
+ * mux's initial window was adjusted. This should be done after having processed
+ * SETTINGS frames which have updated the mux's initial window size.
+ */
+static void h2c_unblock_sfctl(struct h2c *h2c)
+{
+ struct h2s *h2s;
+ struct eb32_node *node;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s->flags & H2_SF_BLK_SFCTL && h2s_mws(h2s) > 0) {
+ h2s->flags &= ~H2_SF_BLK_SFCTL;
+ LIST_DEL_INIT(&h2s->list);
+ if ((h2s->subs && h2s->subs->events & SUB_RETRY_SEND) ||
+ h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ node = eb32_next(node);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+}
+
+/* processes a SETTINGS frame whose payload is <payload> for <plen> bytes, and
+ * ACKs it if needed. Returns > 0 on success or zero on missing data. It may
+ * return an error in h2c. The caller must have already verified frame length
+ * and stream ID validity. Described in RFC7540#6.5.
+ */
+static int h2c_handle_settings(struct h2c *h2c)
+{
+ unsigned int offset;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+
+ if (h2c->dff & H2_F_SETTINGS_ACK) {
+ if (h2c->dfl) {
+ error = H2_ERR_FRAME_SIZE_ERROR;
+ goto fail;
+ }
+ goto done;
+ }
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out0;
+ }
+
+ /* parse the frame */
+ for (offset = 0; offset < h2c->dfl; offset += 6) {
+ uint16_t type = h2_get_n16(&h2c->dbuf, offset);
+ int32_t arg = h2_get_n32(&h2c->dbuf, offset + 2);
+
+ switch (type) {
+ case H2_SETTINGS_INITIAL_WINDOW_SIZE:
+ /* we need to update all existing streams with the
+ * difference from the previous iws.
+ */
+ if (arg < 0) { // RFC7540#6.5.2
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ goto fail;
+ }
+ h2c->miw = arg;
+ break;
+ case H2_SETTINGS_MAX_FRAME_SIZE:
+ if (arg < 16384 || arg > 16777215) { // RFC7540#6.5.2
+ TRACE_ERROR("MAX_FRAME_SIZE out of range", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+ h2c->mfs = arg;
+ break;
+ case H2_SETTINGS_HEADER_TABLE_SIZE:
+ h2c->flags |= H2_CF_SHTS_UPDATED;
+ break;
+ case H2_SETTINGS_ENABLE_PUSH:
+ if (arg < 0 || arg > 1) { // RFC7540#6.5.2
+ TRACE_ERROR("ENABLE_PUSH out of range", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+ break;
+ case H2_SETTINGS_MAX_CONCURRENT_STREAMS:
+ if (h2c->flags & H2_CF_IS_BACK) {
+ /* the limit is only for the backend; for the frontend it is our limit */
+ if ((unsigned int)arg > h2c_max_concurrent_streams(h2c))
+ arg = h2c_max_concurrent_streams(h2c);
+ h2c->streams_limit = arg;
+ }
+ break;
+ case H2_SETTINGS_ENABLE_CONNECT_PROTOCOL:
+ if (arg == 1)
+ h2c->flags |= H2_CF_RCVD_RFC8441;
+ break;
+ }
+ }
+
+ /* need to ACK this frame now */
+ h2c->st0 = H2_CS_FRAME_A;
+ done:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ return 1;
+ fail:
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ h2c_error(h2c, error);
+ out0:
+ TRACE_DEVEL("leaving with missing data or error", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn);
+ return 0;
+}
+
+/* try to send an ACK for a settings frame on the connection. Returns > 0 on
+ * success or one of the h2_status values.
+ */
+static int h2c_ack_settings(struct h2c *h2c)
+{
+ struct buffer *res;
+ char str[9];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+
+ memcpy(str,
+ "\x00\x00\x00" /* length : 0 (no data) */
+ "\x04" "\x01" /* type : 4, flags : ACK */
+ "\x00\x00\x00\x00" /* stream ID */, 9);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 9));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn);
+ return ret;
+}
+
+/* processes a PING frame and schedules an ACK if needed. The caller must pass
+ * the pointer to the payload in <payload>. Returns > 0 on success or zero on
+ * missing data. The caller must have already verified frame length
+ * and stream ID validity.
+ */
+static int h2c_handle_ping(struct h2c *h2c)
+{
+ /* schedule a response */
+ if (!(h2c->dff & H2_F_PING_ACK))
+ h2c->st0 = H2_CS_FRAME_A;
+ return 1;
+}
+
+/* Try to send a window update for stream id <sid> and value <increment>.
+ * Returns > 0 on success or zero on missing room or failure. It may return an
+ * error in h2c.
+ */
+static int h2c_send_window_update(struct h2c *h2c, int sid, uint32_t increment)
+{
+ struct buffer *res;
+ char str[13];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ /* length: 4, type: 8, flags: none */
+ memcpy(str, "\x00\x00\x04\x08\x00", 5);
+ write_n32(str + 5, sid);
+ write_n32(str + 9, increment);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 13));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send pending window update for the connection. It's safe to call it
+ * with no pending updates. Returns > 0 on success or zero on missing room or
+ * failure. It may return an error in h2c.
+ */
+static int h2c_send_conn_wu(struct h2c *h2c)
+{
+ int ret = 1;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ if (h2c->rcvd_c <= 0)
+ goto out;
+
+ if (!(h2c->flags & H2_CF_WINDOW_OPENED)) {
+ /* increase the advertised connection window to 2G on
+ * first update.
+ */
+ h2c->flags |= H2_CF_WINDOW_OPENED;
+ h2c->rcvd_c += H2_INITIAL_WINDOW_INCREMENT;
+ }
+
+ /* send WU for the connection */
+ ret = h2c_send_window_update(h2c, 0, h2c->rcvd_c);
+ if (ret > 0)
+ h2c->rcvd_c = 0;
+
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send pending window update for the current dmux stream. It's safe to
+ * call it with no pending updates. Returns > 0 on success or zero on missing
+ * room or failure. It may return an error in h2c.
+ */
+static int h2c_send_strm_wu(struct h2c *h2c)
+{
+ int ret = 1;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+
+ if (h2c->rcvd_s <= 0)
+ goto out;
+
+ /* send WU for the stream */
+ ret = h2c_send_window_update(h2c, h2c->dsi, h2c->rcvd_s);
+ if (ret > 0)
+ h2c->rcvd_s = 0;
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ return ret;
+}
+
+/* try to send an ACK for a ping frame on the connection. Returns > 0 on
+ * success, 0 on missing data or one of the h2_status values.
+ */
+static int h2c_ack_ping(struct h2c *h2c)
+{
+ struct buffer *res;
+ char str[17];
+ int ret = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_PING, h2c->conn);
+
+ if (b_data(&h2c->dbuf) < 8)
+ goto out;
+
+ memcpy(str,
+ "\x00\x00\x08" /* length : 8 (same payload) */
+ "\x06" "\x01" /* type : 6, flags : ACK */
+ "\x00\x00\x00\x00" /* stream ID */, 9);
+
+ /* copy the original payload */
+ h2_get_buf_bytes(str + 9, 8, &h2c->dbuf, 0);
+
+ res = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, res)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ goto out;
+ }
+
+ ret = b_istput(res, ist2(str, 17));
+ if (unlikely(ret <= 0)) {
+ if (!ret) {
+ if ((res = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2c->flags |= H2_CF_DEM_MROOM;
+ }
+ else {
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ }
+ }
+ out:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_PING, h2c->conn);
+ return ret;
+}
+
+/* processes a WINDOW_UPDATE frame whose payload is <payload> for <plen> bytes.
+ * Returns > 0 on success or zero on missing data. It may return an error in
+ * h2c or h2s. The caller must have already verified frame length and stream ID
+ * validity. Described in RFC7540#6.9.
+ */
+static int h2c_handle_window_update(struct h2c *h2c, struct h2s *h2s)
+{
+ int32_t inc;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out0;
+ }
+
+ inc = h2_get_n32(&h2c->dbuf, 0);
+
+ if (h2c->dsi != 0) {
+ /* stream window update */
+
+ /* it's not an error to receive WU on a closed stream */
+ if (h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ if (!inc) {
+ TRACE_ERROR("stream WINDOW_UPDATE inc=0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+
+ if (h2s_mws(h2s) >= 0 && h2s_mws(h2s) + inc < 0) {
+ TRACE_ERROR("stream WINDOW_UPDATE inc<0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+
+ h2s->sws += inc;
+ if (h2s_mws(h2s) > 0 && (h2s->flags & H2_SF_BLK_SFCTL)) {
+ h2s->flags &= ~H2_SF_BLK_SFCTL;
+ LIST_DEL_INIT(&h2s->list);
+ if ((h2s->subs && h2s->subs->events & SUB_RETRY_SEND) ||
+ h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ }
+ else {
+ /* connection window update */
+ if (!inc) {
+ TRACE_ERROR("conn WINDOW_UPDATE inc=0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto conn_err;
+ }
+
+ if (h2c->mws >= 0 && h2c->mws + inc < 0) {
+ TRACE_ERROR("conn WINDOW_UPDATE inc<0", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ error = H2_ERR_FLOW_CONTROL_ERROR;
+ goto conn_err;
+ }
+
+ h2c->mws += inc;
+ }
+
+ done:
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 1;
+
+ conn_err:
+ h2c_error(h2c, error);
+ out0:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 0;
+
+ strm_err:
+ h2s_error(h2s, error);
+ h2c->st0 = H2_CS_FRAME_E;
+ TRACE_DEVEL("leaving on stream error", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ return 0;
+}
+
+/* processes a GOAWAY frame, and signals all streams whose ID is greater than
+ * the last ID. Returns > 0 on success or zero on missing data. The caller must
+ * have already verified frame length and stream ID validity. Described in
+ * RFC7540#6.8.
+ */
+static int h2c_handle_goaway(struct h2c *h2c)
+{
+ int last;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ last = h2_get_n32(&h2c->dbuf, 0);
+ h2c->errcode = h2_get_n32(&h2c->dbuf, 4);
+ if (h2c->last_sid < 0)
+ h2c->last_sid = last;
+ h2_wake_some_streams(h2c, last);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn);
+ return 1;
+}
+
+/* processes a PRIORITY frame, and either skips it or rejects if it is
+ * invalid. Returns > 0 on success or zero on missing data. It may return an
+ * error in h2c. The caller must have already verified frame length and stream
+ * ID validity. Described in RFC7540#6.3.
+ */
+static int h2c_handle_priority(struct h2c *h2c)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ if (h2_get_n32(&h2c->dbuf, 0) == h2c->dsi) {
+ /* 7540#5.3 : can't depend on itself */
+ TRACE_ERROR("PRIORITY depends on itself", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ return 0;
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn);
+ return 1;
+}
+
+/* processes an RST_STREAM frame, and sets the 32-bit error code on the stream.
+ * Returns > 0 on success or zero on missing data. The caller must have already
+ * verified frame length and stream ID validity. Described in RFC7540#6.4.
+ */
+static int h2c_handle_rst_stream(struct h2c *h2c, struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+
+ /* process full frame only */
+ if (b_data(&h2c->dbuf) < h2c->dfl) {
+ TRACE_DEVEL("leaving on missing data", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ return 0;
+ }
+
+ /* late RST, already handled */
+ if (h2s->st == H2_SS_CLOSED) {
+ TRACE_DEVEL("leaving on stream closed", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ return 1;
+ }
+
+ h2s->errcode = h2_get_n32(&h2c->dbuf, 0);
+ h2s_close(h2s);
+
+ if (h2s_sc(h2s)) {
+ se_fl_set_error(h2s->sd);
+ h2s_alert(h2s);
+ }
+
+ h2s->flags |= H2_SF_RST_RCVD;
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ return 1;
+}
+
+/* processes a HEADERS frame. Returns h2s on success or NULL on missing data.
+ * It may return an error in h2c or h2s. The caller must consider that the
+ * return value is the new h2s in case one was allocated (most common case).
+ * Described in RFC7540#6.2. Most of the
+ * errors here are reported as connection errors since it's impossible to
+ * recover from such errors after the compression context has been altered.
+ */
+static struct h2s *h2c_frt_handle_headers(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer rxbuf = BUF_NULL;
+ unsigned long long body_len = 0;
+ uint32_t flags = 0;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+
+ if (!b_size(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // incomplete frame
+ }
+
+ /* now either the frame is complete or the buffer is complete */
+ if (h2s->st != H2_SS_IDLE) {
+ /* The stream exists/existed, this must be a trailers frame */
+ if (h2s->st != H2_SS_CLOSED) {
+ error = h2c_dec_hdrs(h2c, &h2s->rxbuf, &h2s->flags, &body_len, NULL);
+ /* unrecoverable error ? */
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 trailers", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ sess_log(h2c->conn->owner);
+ goto out;
+ }
+
+ if (error == 0) {
+ /* Demux not blocked because of the stream, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out; // missing data
+ }
+
+ if (error < 0) {
+ /* Failed to decode this frame (e.g. too large request)
+ * but the HPACK decompressor is still synchronized.
+ */
+ sess_log(h2c->conn->owner);
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ TRACE_USER("Stream error decoding H2 trailers", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+ goto out;
+ }
+ goto done;
+ }
+ /* the stream was already killed by an RST, let's consume
+ * the data and send another RST.
+ */
+ error = h2c_dec_hdrs(h2c, &rxbuf, &flags, &body_len, NULL);
+ sess_log(h2c->conn->owner);
+ h2s = (struct h2s*)h2_error_stream;
+ TRACE_USER("rcvd H2 trailers on closed stream", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, h2s, &rxbuf);
+ goto send_rst;
+ }
+ else if (h2c->dsi <= h2c->max_id || !(h2c->dsi & 1)) {
+ /* RFC7540#5.1.1 stream id > prev ones, and must be odd here */
+ error = H2_ERR_PROTOCOL_ERROR;
+ TRACE_ERROR("HEADERS on invalid stream ID", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ sess_log(h2c->conn->owner);
+ session_inc_http_req_ctr(h2c->conn->owner);
+ session_inc_http_err_ctr(h2c->conn->owner);
+ goto conn_err;
+ }
+ else if (h2c->flags & H2_CF_DEM_TOOMANY) {
+ goto out; // IDLE but too many sc still present
+ }
+ else if (h2_fe_max_total_streams &&
+ h2c->stream_cnt >= h2_fe_max_total_streams + h2c_max_concurrent_streams(h2c)) {
+ /* We've already told this client we were going to close a
+ * while ago and apparently it didn't care, so it's time to
+ * stop processing its requests for real.
+ */
+ error = H2_ERR_ENHANCE_YOUR_CALM;
+ TRACE_STATE("Stream limit violated", H2_EV_STRM_SHUT, h2c->conn);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ sess_log(h2c->conn->owner);
+ session_inc_http_req_ctr(h2c->conn->owner);
+ session_inc_http_err_ctr(h2c->conn->owner);
+ goto conn_err;
+ }
+
+ error = h2c_dec_hdrs(h2c, &rxbuf, &flags, &body_len, NULL);
+
+ if (error == 0) {
+ /* No error but missing data for demuxing, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto out;
+ }
+
+ /* Now we cannot roll back and we won't come back here anymore for this
+ * stream, so this stream ID is open from a protocol perspective, even
+ * if incomplete or broken, we want to count it as attempted.
+ */
+ if (h2c->dsi > h2c->max_id)
+ h2c->max_id = h2c->dsi;
+ h2c->stream_cnt++;
+
+ if (error < 0) {
+ /* Failed to decode this stream. This might be due to a
+ * recoverable error affecting only the stream (e.g. too large
+ * request for buffer, that leaves the HPACK decompressor still
+ * synchronized), or a non-recoverable error such as an invalid
+ * frame type sequence (e.g. other frame type interleaved with
+ * CONTINUATION), in which h2c_dec_hdrs() has already set the
+ * error code in the connection and counted it in the relevant
+ * stats. We still count a req error in both cases.
+ */
+ sess_log(h2c->conn->owner);
+ session_inc_http_req_ctr(h2c->conn->owner);
+ session_inc_http_err_ctr(h2c->conn->owner);
+
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 request", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ goto out;
+ }
+
+ /* recoverable stream error (e.g. too large request) */
+ TRACE_USER("rcvd unparsable H2 request", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, h2s, &rxbuf);
+ goto strm_err;
+ }
+
+ TRACE_USER("rcvd H2 request ", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW, h2c->conn, 0, &rxbuf);
+
+ /* Note: we don't emit any other logs below because if we return
+ * positively from h2c_frt_stream_new(), the stream will report the error,
+ * and if we return in error, h2c_frt_stream_new() will emit the error.
+ *
+ * Xfer the rxbuf to the stream. On success, the new stream owns the
+ * rxbuf. On error, it is released here.
+ */
+ h2s = h2c_frt_stream_new(h2c, h2c->dsi, &rxbuf, flags);
+ if (!h2s) {
+ h2s = (struct h2s*)h2_refused_stream;
+ TRACE_USER("refused H2 req. ", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, h2s, &rxbuf);
+ goto send_rst;
+ }
+
+ h2s->st = H2_SS_OPEN;
+ h2s->flags |= flags;
+ h2s->body_len = body_len;
+ h2s_propagate_term_flags(h2c, h2s);
+
+ done:
+ if (h2s->flags & H2_SF_ES_RCVD) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else
+ h2s_close(h2s);
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ goto leave;
+
+ conn_err:
+ h2c_error(h2c, error);
+ out:
+ h2_release_buf(h2c, &rxbuf);
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ h2s = NULL;
+ goto leave;
+
+ strm_err:
+ h2s = (struct h2s*)h2_error_stream;
+
+ send_rst:
+ /* make the demux send an RST for the current stream. We may only
+ * do this if we're certain that the HEADERS frame was properly
+ * decompressed so that the HPACK decoder is still kept up to date.
+ */
+ h2_release_buf(h2c, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+
+ leave:
+ if (h2_fe_max_total_streams && h2c->stream_cnt >= h2_fe_max_total_streams) {
+ /* we've had enough streams on this connection, time to renew it.
+ * In order to gracefully do this, we'll advertise a stream limit
+ * of the current one plus the max concurrent streams value in the
+ * GOAWAY frame, so that we're certain that the client is aware of
+ * the limit before creating a new stream, but knows we won't harm
+ * the streams in flight. Remember that client stream IDs are odd
+ * so we apply twice the concurrent streams value to the current
+ * ID.
+ */
+ if (h2c->last_sid <= 0 ||
+ h2c->last_sid > h2c->max_id + 2 * h2c_max_concurrent_streams(h2c)) {
+ /* not set yet or was too high */
+ h2c->last_sid = h2c->max_id + 2 * h2c_max_concurrent_streams(h2c);
+ h2c_send_goaway_error(h2c, NULL);
+ }
+ }
+
+ return h2s;
+}
+
+/* processes a HEADERS frame. Returns h2s on success or NULL on missing data.
+ * It may return an error in h2c or h2s. Described in RFC7540#6.2. Most of the
+ * errors here are reported as connection errors since it's impossible to
+ * recover from such errors after the compression context has been altered.
+ */
+static struct h2s *h2c_bck_handle_headers(struct h2c *h2c, struct h2s *h2s)
+{
+ struct buffer rxbuf = BUF_NULL;
+ unsigned long long body_len = 0;
+ uint32_t flags = 0;
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+
+ if (!b_size(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // incomplete frame
+ }
+
+ if (h2s->st != H2_SS_CLOSED) {
+ error = h2c_dec_hdrs(h2c, &h2s->rxbuf, &h2s->flags, &h2s->body_len, h2s->upgrade_protocol);
+ }
+ else {
+ /* the connection was already killed by an RST, let's consume
+ * the data and send another RST.
+ */
+ error = h2c_dec_hdrs(h2c, &rxbuf, &flags, &body_len, NULL);
+ h2s = (struct h2s*)h2_error_stream;
+ h2c->st0 = H2_CS_FRAME_E;
+ goto send_rst;
+ }
+
+ /* unrecoverable error ? */
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_USER("Unrecoverable error decoding H2 HEADERS", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ if (h2s->st != H2_SS_OPEN && h2s->st != H2_SS_HLOC) {
+ /* RFC7540#5.1 */
+ TRACE_ERROR("response HEADERS in invalid state", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ h2c->st0 = H2_CS_FRAME_E;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto fail;
+ }
+
+ if (error <= 0) {
+ if (error == 0) {
+ /* Demux not blocked because of the stream, it is an incomplete frame */
+ if (!(h2c->flags &H2_CF_DEM_BLOCK_ANY))
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // missing data
+ }
+
+ /* stream error : send RST_STREAM */
+ TRACE_ERROR("couldn't decode response HEADERS", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_PROTOCOL_ERROR);
+ h2c->st0 = H2_CS_FRAME_E;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto fail;
+ }
+
+ if (se_fl_test(h2s->sd, SE_FL_ERROR) && h2s->st < H2_SS_ERROR)
+ h2s->st = H2_SS_ERROR;
+ else if (h2s->flags & H2_SF_ES_RCVD) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else if (h2s->st == H2_SS_HLOC)
+ h2s_close(h2s);
+ }
+
+ /* Unblock busy server h2s waiting for the response headers to validate
+ * the tunnel establishment or the end of the response of an oborted
+ * tunnel
+ */
+ if ((h2s->flags & (H2_SF_BODY_TUNNEL|H2_SF_BLK_MBUSY)) == (H2_SF_BODY_TUNNEL|H2_SF_BLK_MBUSY) ||
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) {
+ TRACE_STATE("Unblock h2s blocked on tunnel establishment/abort", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_BLK_MBUSY;
+ }
+
+ TRACE_USER("rcvd H2 response ", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, 0, &h2s->rxbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return NULL;
+
+ send_rst:
+ /* make the demux send an RST for the current stream. We may only
+ * do this if we're certain that the HEADERS frame was properly
+ * decompressed so that the HPACK decoder is still kept up to date.
+ */
+ h2_release_buf(h2c, &rxbuf);
+ h2c->st0 = H2_CS_FRAME_E;
+
+ TRACE_USER("rejected H2 response", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_STRM_NEW|H2_EV_STRM_END, h2c->conn, 0, &rxbuf);
+ TRACE_DEVEL("leaving on error", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ return h2s;
+}
+
+/* processes a DATA frame. Returns > 0 on success or zero on missing data.
+ * It may return an error in h2c or h2s. Described in RFC7540#6.1.
+ */
+static int h2c_handle_data(struct h2c *h2c, struct h2s *h2s)
+{
+ int error;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+
+ /* note that empty DATA frames are perfectly valid and sometimes used
+ * to signal an end of stream (with the ES flag).
+ */
+
+ if (!b_size(&h2c->dbuf) && h2c->dfl) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // empty buffer
+ }
+
+ if (b_data(&h2c->dbuf) < h2c->dfl && !b_full(&h2c->dbuf)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ goto fail; // incomplete frame
+ }
+
+ /* now either the frame is complete or the buffer is complete */
+
+ if (h2s->st != H2_SS_OPEN && h2s->st != H2_SS_HLOC) {
+ /* RFC7540#6.1 */
+ error = H2_ERR_STREAM_CLOSED;
+ goto strm_err;
+ }
+
+ if (!(h2s->flags & H2_SF_HEADERS_RCVD)) {
+ /* RFC9113#8.1: The header section must be received before the message content */
+ TRACE_ERROR("Unexpected DATA frame before the message headers", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+ if ((h2s->flags & H2_SF_DATA_CLEN) && (h2c->dfl - h2c->dpl) > h2s->body_len) {
+ /* RFC7540#8.1.2 */
+ TRACE_ERROR("DATA frame larger than content-length", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+ if (!(h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_SENT)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_SENT) &&
+ ((h2c->dfl - h2c->dpl) || !(h2c->dff & H2_F_DATA_END_STREAM))) {
+ /* a tunnel attempt was aborted but the client still try to send some raw data.
+ * Thus the stream is closed with the CANCEL error. Here we take care it is not
+ * an empty DATA Frame with the ES flag. The error is only handled if ES was
+ * already sent to the client because depending on the scheduling, these data may
+ * have been sent before the server response but not handle here.
+ */
+ TRACE_ERROR("Request DATA frame for aborted tunnel", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_CANCEL;
+ goto strm_err;
+ }
+
+ if (!h2_frt_transfer_data(h2s))
+ goto fail;
+
+ /* call the upper layers to process the frame, then let the upper layer
+ * notify the stream about any change.
+ */
+ if (!h2s_sc(h2s)) {
+ /* The upper layer has already closed, this may happen on
+ * 4xx/redirects during POST, or when receiving a response
+ * from an H2 server after the client has aborted.
+ */
+ error = H2_ERR_CANCEL;
+ goto strm_err;
+ }
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ goto fail;
+
+ if (h2s->st >= H2_SS_ERROR) {
+ /* stream error : send RST_STREAM */
+ h2c->st0 = H2_CS_FRAME_E;
+ }
+
+ /* check for completion : the callee will change this to FRAME_A or
+ * FRAME_H once done.
+ */
+ if (h2c->st0 == H2_CS_FRAME_P)
+ goto fail;
+
+ /* last frame */
+ if (h2c->dff & H2_F_DATA_END_STREAM) {
+ h2s->flags |= H2_SF_ES_RCVD;
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HREM;
+ else
+ h2s_close(h2s);
+
+ if (h2s->flags & H2_SF_DATA_CLEN && h2s->body_len) {
+ /* RFC7540#8.1.2 */
+ TRACE_ERROR("ES on DATA frame before content-length", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ error = H2_ERR_PROTOCOL_ERROR;
+ HA_ATOMIC_INC(&h2c->px_counters->strm_proto_err);
+ goto strm_err;
+ }
+ }
+
+ /* Unblock busy server h2s waiting for the end of the response for an
+ * aborted tunnel
+ */
+ if ((h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) == (H2_SF_TUNNEL_ABRT|H2_SF_ES_RCVD|H2_SF_BLK_MBUSY)) {
+ TRACE_STATE("Unblock h2s blocked on tunnel abort", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_BLK_MBUSY;
+ }
+
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 1;
+
+ strm_err:
+ h2s_error(h2s, error);
+ h2c->st0 = H2_CS_FRAME_E;
+ fail:
+ TRACE_DEVEL("leaving on missing data or error", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 0;
+}
+
+/* check that the current frame described in h2c->{dsi,dft,dfl,dff,...} is
+ * valid for the current stream state. This is needed only after parsing the
+ * frame header but in practice it can be performed at any time during
+ * H2_CS_FRAME_P since no state transition happens there. Returns >0 on success
+ * or 0 in case of error, in which case either h2s or h2c will carry an error.
+ */
+static int h2_frame_check_vs_state(struct h2c *h2c, struct h2s *h2s)
+{
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+
+ if (h2s->st == H2_SS_IDLE &&
+ h2c->dft != H2_FT_HEADERS && h2c->dft != H2_FT_PRIORITY) {
+ /* RFC7540#5.1: any frame other than HEADERS or PRIORITY in
+ * this state MUST be treated as a connection error
+ */
+ TRACE_ERROR("invalid frame type for IDLE state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!h2c->nb_streams && !(h2c->flags & H2_CF_IS_BACK)) {
+ /* only log if no other stream can report the error */
+ sess_log(h2c->conn->owner);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving in error (idle&!hdrs&!prio)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->st == H2_SS_IDLE && (h2c->flags & H2_CF_IS_BACK)) {
+ /* only PUSH_PROMISE would be permitted here */
+ TRACE_ERROR("invalid frame type for IDLE state (back)", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ TRACE_DEVEL("leaving in error (idle&back)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->st == H2_SS_HREM && h2c->dft != H2_FT_WINDOW_UPDATE &&
+ h2c->dft != H2_FT_RST_STREAM && h2c->dft != H2_FT_PRIORITY) {
+ /* RFC7540#5.1: any frame other than WU/PRIO/RST in
+ * this state MUST be treated as a stream error.
+ * 6.2, 6.6 and 6.10 further mandate that HEADERS/
+ * PUSH_PROMISE/CONTINUATION cause connection errors.
+ */
+ if (h2_ft_bit(h2c->dft) & H2_FT_HDR_MASK) {
+ TRACE_ERROR("invalid frame type for HREM state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ }
+ else {
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ }
+ TRACE_DEVEL("leaving in error (hrem&!wu&!rst&!prio)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* Below the management of frames received in closed state is a
+ * bit hackish because the spec makes strong differences between
+ * streams closed by receiving RST, sending RST, and seeing ES
+ * in both directions. In addition to this, the creation of a
+ * new stream reusing the identifier of a closed one will be
+ * detected here. Given that we cannot keep track of all closed
+ * streams forever, we consider that unknown closed streams were
+ * closed on RST received, which allows us to respond with an
+ * RST without breaking the connection (eg: to abort a transfer).
+ * Some frames have to be silently ignored as well.
+ */
+ if (h2s->st == H2_SS_CLOSED && h2c->dsi) {
+ if (!(h2c->flags & H2_CF_IS_BACK) && h2_ft_bit(h2c->dft) & H2_FT_HDR_MASK) {
+ /* #5.1.1: The identifier of a newly
+ * established stream MUST be numerically
+ * greater than all streams that the initiating
+ * endpoint has opened or reserved. This
+ * governs streams that are opened using a
+ * HEADERS frame and streams that are reserved
+ * using PUSH_PROMISE. An endpoint that
+ * receives an unexpected stream identifier
+ * MUST respond with a connection error.
+ */
+ h2c_error(h2c, H2_ERR_STREAM_CLOSED);
+ TRACE_DEVEL("leaving in error (closed&hdrmask)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->flags & H2_SF_RST_RCVD &&
+ !(h2_ft_bit(h2c->dft) & (H2_FT_HDR_MASK | H2_FT_RST_STREAM_BIT | H2_FT_PRIORITY_BIT | H2_FT_WINDOW_UPDATE_BIT))) {
+ /* RFC7540#5.1:closed: an endpoint that
+ * receives any frame other than PRIORITY after
+ * receiving a RST_STREAM MUST treat that as a
+ * stream error of type STREAM_CLOSED.
+ *
+ * Note that old streams fall into this category
+ * and will lead to an RST being sent.
+ *
+ * However, we cannot generalize this to all frame types. Those
+ * carrying compression state must still be processed before
+ * being dropped or we'll desynchronize the decoder. This can
+ * happen with request trailers received after sending an
+ * RST_STREAM, or with header/trailers responses received after
+ * sending RST_STREAM (aborted stream).
+ *
+ * In addition, since our CLOSED streams always carry the
+ * RST_RCVD bit, we don't want to accidentally catch valid
+ * frames for a closed stream, i.e. RST/PRIO/WU.
+ */
+ h2s_error(h2s, H2_ERR_STREAM_CLOSED);
+ h2c->st0 = H2_CS_FRAME_E;
+ TRACE_DEVEL("leaving in error (rst_rcvd&!hdrmask)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+
+ /* RFC7540#5.1:closed: if this state is reached as a
+ * result of sending a RST_STREAM frame, the peer that
+ * receives the RST_STREAM might have already sent
+ * frames on the stream that cannot be withdrawn. An
+ * endpoint MUST ignore frames that it receives on
+ * closed streams after it has sent a RST_STREAM
+ * frame. An endpoint MAY choose to limit the period
+ * over which it ignores frames and treat frames that
+ * arrive after this time as being in error.
+ */
+ if (h2s->id && !(h2s->flags & H2_SF_RST_SENT)) {
+ /* RFC7540#5.1:closed: any frame other than
+ * PRIO/WU/RST in this state MUST be treated as
+ * a connection error
+ */
+ if (h2c->dft != H2_FT_RST_STREAM &&
+ h2c->dft != H2_FT_PRIORITY &&
+ h2c->dft != H2_FT_WINDOW_UPDATE) {
+ h2c_error(h2c, H2_ERR_STREAM_CLOSED);
+ TRACE_DEVEL("leaving in error (rst_sent&!rst&!prio&!wu)", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ return 0;
+ }
+ }
+ }
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn, h2s);
+ return 1;
+}
+
+/* Reverse the connection <h2c>. Common operations are done for both active and
+ * passive reversal. Timeouts are inverted and H2_CF_IS_BACK is set or unset
+ * depending on the reversal direction.
+ *
+ * For active reversal, only minor steps are required. The connection should
+ * then be accepted by its listener before being able to use it for transfers.
+ *
+ * For passive reversal, connection is inserted in its targeted server idle
+ * pool. It can thus be reused immediately for future transfers on this server.
+ *
+ * Returns 1 on success else 0.
+ */
+static int h2_conn_reverse(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (conn_reverse(conn)) {
+ TRACE_ERROR("reverse connection failed", H2_EV_H2C_WAKE, conn);
+ goto err;
+ }
+
+ TRACE_USER("reverse connection", H2_EV_H2C_WAKE, conn);
+
+ /* Check the connection new side after reversal. */
+ if (conn_is_back(conn)) {
+ struct server *srv = __objt_server(h2c->conn->target);
+ struct proxy *prx = srv->proxy;
+
+ h2c->flags |= H2_CF_IS_BACK;
+
+ h2c->shut_timeout = h2c->timeout = prx->timeout.server;
+ if (tick_isset(prx->timeout.serverfin))
+ h2c->shut_timeout = prx->timeout.serverfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_be,
+ &h2_stats_module);
+
+ HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1);
+ xprt_set_idle(conn, conn->xprt, conn->xprt_ctx);
+ if (!srv_add_to_idle_list(srv, conn, 1))
+ goto err;
+ }
+ else {
+ struct listener *l = __objt_listener(h2c->conn->target);
+ struct proxy *prx = l->bind_conf->frontend;
+
+ h2c->flags &= ~H2_CF_IS_BACK;
+
+ h2c->shut_timeout = h2c->timeout = prx->timeout.client;
+ if (tick_isset(prx->timeout.clientfin))
+ h2c->shut_timeout = prx->timeout.clientfin;
+
+ h2c->px_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &h2_stats_module);
+
+ proxy_inc_fe_cum_sess_ver_ctr(l, prx, 2);
+
+ BUG_ON(LIST_INLIST(&h2c->conn->stopping_list));
+ LIST_APPEND(&mux_stopping_data[tid].list,
+ &h2c->conn->stopping_list);
+ }
+
+ /* Check if stream creation is initially forbidden. This is the case
+ * for active preconnect until reversal is done.
+ */
+ if (conn_reverse_in_preconnect(h2c->conn)) {
+ TRACE_DEVEL("prevent stream demux until accept is done", H2_EV_H2C_WAKE, conn);
+ h2c->flags |= H2_CF_DEM_TOOMANY;
+ }
+
+ /* If only the new side has a defined timeout, task must be allocated.
+ * On the contrary, if only old side has a timeout, it must be freed.
+ */
+ if (!h2c->task && tick_isset(h2c->timeout)) {
+ h2c->task = task_new_here();
+ if (!h2c->task)
+ goto err;
+
+ h2c->task->process = h2_timeout_task;
+ h2c->task->context = h2c;
+ }
+ else if (!tick_isset(h2c->timeout)) {
+ task_destroy(h2c->task);
+ h2c->task = NULL;
+ }
+
+ /* Requeue task if instantiated with the new timeout value. */
+ if (h2c->task) {
+ h2c->task->expire = tick_add(now_ms, h2c->timeout);
+ task_queue(h2c->task);
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+ return 1;
+
+ err:
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ TRACE_DEVEL("leaving on error", H2_EV_H2C_WAKE);
+ return 0;
+}
+
+/* process Rx frames to be demultiplexed */
+static void h2_process_demux(struct h2c *h2c)
+{
+ struct h2s *h2s = NULL, *tmp_h2s;
+ struct h2_fh hdr;
+ unsigned int padlen = 0;
+ int32_t old_iw = h2c->miw;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (h2c->st0 >= H2_CS_ERROR)
+ goto out;
+
+ if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
+ if (h2c->st0 == H2_CS_PREFACE) {
+ TRACE_STATE("expecting preface", H2_EV_RX_PREFACE, h2c->conn);
+ if (h2c->flags & H2_CF_IS_BACK)
+ goto out;
+
+ if (unlikely(h2c_frt_recv_preface(h2c) <= 0)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ if (h2c->st0 == H2_CS_ERROR) {
+ TRACE_PROTO("failed to receive preface", H2_EV_RX_PREFACE|H2_EV_PROTO_ERR, h2c->conn);
+ h2c->st0 = H2_CS_ERROR2;
+ if (b_data(&h2c->dbuf) ||
+ !(((const struct session *)h2c->conn->owner)->fe->options & (PR_O_NULLNOLOG|PR_O_IGNORE_PRB)))
+ sess_log(h2c->conn->owner);
+ }
+ goto done;
+ }
+ TRACE_PROTO("received preface", H2_EV_RX_PREFACE, h2c->conn);
+
+ h2c->max_id = 0;
+ TRACE_STATE("switching to SETTINGS1", H2_EV_RX_PREFACE, h2c->conn);
+ h2c->st0 = H2_CS_SETTINGS1;
+ }
+
+ if (h2c->st0 == H2_CS_SETTINGS1) {
+ /* ensure that what is pending is a valid SETTINGS frame
+ * without an ACK.
+ */
+ TRACE_STATE("expecting settings", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS, h2c->conn);
+ if (!h2_get_frame_hdr(&h2c->dbuf, &hdr)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ if (h2c->st0 == H2_CS_ERROR) {
+ TRACE_ERROR("failed to receive settings", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ }
+ goto done;
+ }
+
+ if (hdr.sid || hdr.ft != H2_FT_SETTINGS || hdr.ff & H2_F_SETTINGS_ACK) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ TRACE_ERROR("unexpected frame type or flags", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ if ((int)hdr.len < 0 || (int)hdr.len > global.tune.bufsize) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ TRACE_ERROR("invalid settings frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_SETTINGS|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ h2c->st0 = H2_CS_ERROR2;
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ goto done;
+ }
+
+ /* that's OK, switch to FRAME_P to process it. This is
+ * a SETTINGS frame whose header has already been
+ * deleted above.
+ */
+ padlen = 0;
+ HA_ATOMIC_INC(&h2c->px_counters->settings_rcvd);
+ goto new_frame;
+ }
+ }
+
+ /* process as many incoming frames as possible below */
+ while (1) {
+ int ret = 0;
+
+ if (!b_data(&h2c->dbuf)) {
+ TRACE_DEVEL("no more Rx data", H2_EV_RX_FRAME, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break;
+ }
+
+ if (h2c->st0 >= H2_CS_ERROR) {
+ TRACE_STATE("end of connection reported", H2_EV_RX_FRAME|H2_EV_RX_EOI, h2c->conn);
+ break;
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_H) {
+ TRACE_STATE("expecting H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ if (!h2_peek_frame_hdr(&h2c->dbuf, 0, &hdr)) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break;
+ }
+
+ if ((int)hdr.len < 0 || (int)hdr.len > global.tune.bufsize) {
+ TRACE_ERROR("invalid H2 frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ if (!h2c->nb_streams && !(h2c->flags & H2_CF_IS_BACK)) {
+ /* only log if no other stream can report the error */
+ sess_log(h2c->conn->owner);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ break;
+ }
+
+ if (h2c->rcvd_s && h2c->dsi != hdr.sid) {
+ /* changed stream with a pending WU, need to
+ * send it now.
+ */
+ TRACE_PROTO("sending stream WINDOW_UPDATE frame on stream switch", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ ret = h2c_send_strm_wu(h2c);
+ if (ret <= 0)
+ break;
+ }
+
+ padlen = 0;
+ if (h2_ft_bit(hdr.ft) & H2_FT_PADDED_MASK && hdr.ff & H2_F_PADDED) {
+ /* If the frame is padded (HEADERS, PUSH_PROMISE or DATA),
+ * we read the pad length and drop it from the remaining
+ * payload (one byte + the 9 remaining ones = 10 total
+ * removed), so we have a frame payload starting after the
+ * pad len. Flow controlled frames (DATA) also count the
+ * padlen in the flow control, so it must be adjusted.
+ */
+ if (hdr.len < 1) {
+ TRACE_ERROR("invalid H2 padded frame length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+ hdr.len--;
+
+ if (b_data(&h2c->dbuf) < 10) {
+ h2c->flags |= H2_CF_DEM_SHORT_READ;
+ break; // missing padlen
+ }
+
+ padlen = *(uint8_t *)b_peek(&h2c->dbuf, 9);
+
+ if (padlen > hdr.len) {
+ TRACE_ERROR("invalid H2 padding length", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ /* RFC7540#6.1 : pad length = length of
+ * frame payload or greater => error.
+ */
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ if (h2_ft_bit(hdr.ft) & H2_FT_FC_MASK) {
+ h2c->rcvd_c++;
+ h2c->rcvd_s++;
+ }
+ b_del(&h2c->dbuf, 1);
+ }
+ h2_skip_frame_hdr(&h2c->dbuf);
+
+ new_frame:
+ h2c->dfl = hdr.len;
+ h2c->dsi = hdr.sid;
+ h2c->dft = hdr.ft;
+ h2c->dff = hdr.ff;
+ h2c->dpl = padlen;
+ h2c->flags |= H2_CF_DEM_IN_PROGRESS;
+ TRACE_STATE("rcvd H2 frame header, switching to FRAME_P state", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ h2c->st0 = H2_CS_FRAME_P;
+
+ /* check for minimum basic frame format validity */
+ ret = h2_frame_check(h2c->dft, 1, h2c->dsi, h2c->dfl, global.tune.bufsize);
+ if (ret != H2_ERR_NO_ERROR) {
+ TRACE_ERROR("received invalid H2 frame header", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, ret);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+ }
+
+ /* transition to HEADERS frame ends the keep-alive idle
+ * timer and starts the http-request idle delay. It uses
+ * the idle_start timer as well.
+ */
+ if (hdr.ft == H2_FT_HEADERS)
+ h2c->idle_start = now_ms;
+ }
+
+ /* Only H2_CS_FRAME_P, H2_CS_FRAME_A and H2_CS_FRAME_E here.
+ * H2_CS_FRAME_P indicates an incomplete previous operation
+ * (most often the first attempt) and requires some validity
+ * checks for the frame and the current state. The two other
+ * ones are set after completion (or abortion) and must skip
+ * validity checks.
+ */
+ tmp_h2s = h2c_st_by_id(h2c, h2c->dsi);
+
+ if (tmp_h2s != h2s && h2s && h2s_sc(h2s) &&
+ (b_data(&h2s->rxbuf) ||
+ h2c_read0_pending(h2c) ||
+ h2s->st == H2_SS_CLOSED ||
+ (h2s->flags & H2_SF_ES_RCVD) ||
+ se_fl_test(h2s->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", H2_EV_RX_FRAME|H2_EV_STRM_WAKE, h2c->conn, h2s);
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE);
+ h2s_notify_recv(h2s);
+ }
+ h2s = tmp_h2s;
+
+ if (h2c->st0 == H2_CS_FRAME_E ||
+ (h2c->st0 == H2_CS_FRAME_P && !h2_frame_check_vs_state(h2c, h2s))) {
+ TRACE_PROTO("stream error reported", H2_EV_RX_FRAME|H2_EV_PROTO_ERR, h2c->conn, h2s);
+ goto strm_err;
+ }
+
+ switch (h2c->dft) {
+ case H2_FT_SETTINGS:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 SETTINGS frame", H2_EV_RX_FRAME|H2_EV_RX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_handle_settings(h2c);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->settings_rcvd);
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ TRACE_PROTO("sending H2 SETTINGS ACK frame", H2_EV_TX_FRAME|H2_EV_RX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_ack_settings(h2c);
+
+ if (ret > 0 && conn_is_reverse(h2c->conn)) {
+ /* Initiate connection reversal after SETTINGS reception. */
+ ret = h2_conn_reverse(h2c);
+ }
+ }
+ break;
+
+ case H2_FT_PING:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 PING frame", H2_EV_RX_FRAME|H2_EV_RX_PING, h2c->conn, h2s);
+ ret = h2c_handle_ping(h2c);
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ TRACE_PROTO("sending H2 PING ACK frame", H2_EV_TX_FRAME|H2_EV_TX_SETTINGS, h2c->conn, h2s);
+ ret = h2c_ack_ping(h2c);
+ }
+ break;
+
+ case H2_FT_WINDOW_UPDATE:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 WINDOW_UPDATE frame", H2_EV_RX_FRAME|H2_EV_RX_WU, h2c->conn, h2s);
+ ret = h2c_handle_window_update(h2c, h2s);
+ }
+ break;
+
+ case H2_FT_CONTINUATION:
+ /* RFC7540#6.10: CONTINUATION may only be preceded by
+ * a HEADERS/PUSH_PROMISE/CONTINUATION frame. These
+ * frames' parsers consume all following CONTINUATION
+ * frames so this one is out of sequence.
+ */
+ TRACE_ERROR("received unexpected H2 CONTINUATION frame", H2_EV_RX_FRAME|H2_EV_RX_CONT|H2_EV_H2C_ERR, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ if (!(h2c->flags & H2_CF_IS_BACK))
+ sess_log(h2c->conn->owner);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto done;
+
+ case H2_FT_HEADERS:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 HEADERS frame", H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, h2s);
+ if (h2c->flags & H2_CF_IS_BACK)
+ tmp_h2s = h2c_bck_handle_headers(h2c, h2s);
+ else
+ tmp_h2s = h2c_frt_handle_headers(h2c, h2s);
+ if (tmp_h2s) {
+ h2s = tmp_h2s;
+ ret = 1;
+ }
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->headers_rcvd);
+ break;
+
+ case H2_FT_DATA:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 DATA frame", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ ret = h2c_handle_data(h2c, h2s);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->data_rcvd);
+
+ if (h2c->st0 == H2_CS_FRAME_A) {
+ /* rcvd_s will suffice to trigger the sending of a WU */
+ h2c->st0 = H2_CS_FRAME_H;
+ }
+ break;
+
+ case H2_FT_PRIORITY:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 PRIORITY frame", H2_EV_RX_FRAME|H2_EV_RX_PRIO, h2c->conn, h2s);
+ ret = h2c_handle_priority(h2c);
+ }
+ break;
+
+ case H2_FT_RST_STREAM:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 RST_STREAM frame", H2_EV_RX_FRAME|H2_EV_RX_RST|H2_EV_RX_EOI, h2c->conn, h2s);
+ ret = h2c_handle_rst_stream(h2c, h2s);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->rst_stream_rcvd);
+ break;
+
+ case H2_FT_GOAWAY:
+ if (h2c->st0 == H2_CS_FRAME_P) {
+ TRACE_PROTO("receiving H2 GOAWAY frame", H2_EV_RX_FRAME|H2_EV_RX_GOAWAY, h2c->conn, h2s);
+ ret = h2c_handle_goaway(h2c);
+ }
+ HA_ATOMIC_INC(&h2c->px_counters->goaway_rcvd);
+ break;
+
+ /* implement all extra frame types here */
+ default:
+ TRACE_PROTO("receiving H2 ignored frame", H2_EV_RX_FRAME, h2c->conn, h2s);
+ /* drop frames that we ignore. They may be larger than
+ * the buffer so we drain all of their contents until
+ * we reach the end.
+ */
+ ret = MIN(b_data(&h2c->dbuf), h2c->dfl);
+ b_del(&h2c->dbuf, ret);
+ h2c->dfl -= ret;
+ ret = h2c->dfl == 0;
+ }
+
+ strm_err:
+ /* We may have to send an RST if not done yet */
+ if (h2s->st == H2_SS_ERROR) {
+ TRACE_STATE("stream error, switching to FRAME_E", H2_EV_RX_FRAME|H2_EV_H2S_ERR, h2c->conn, h2s);
+ h2c->st0 = H2_CS_FRAME_E;
+ }
+
+ if (h2c->st0 == H2_CS_FRAME_E) {
+ TRACE_PROTO("sending H2 RST_STREAM frame", H2_EV_TX_FRAME|H2_EV_TX_RST|H2_EV_TX_EOI, h2c->conn, h2s);
+ ret = h2c_send_rst_stream(h2c, h2s);
+ }
+
+ /* error or missing data condition met above ? */
+ if (ret <= 0)
+ break;
+
+ if (h2c->st0 != H2_CS_FRAME_H) {
+ if (h2c->dfl)
+ TRACE_DEVEL("skipping remaining frame payload", H2_EV_RX_FRAME, h2c->conn, h2s);
+ ret = MIN(b_data(&h2c->dbuf), h2c->dfl);
+ b_del(&h2c->dbuf, ret);
+ h2c->dfl -= ret;
+ if (!h2c->dfl) {
+ h2c->flags &= ~H2_CF_DEM_IN_PROGRESS;
+ TRACE_STATE("switching to FRAME_H", H2_EV_RX_FRAME|H2_EV_RX_FHDR, h2c->conn);
+ h2c->st0 = H2_CS_FRAME_H;
+ }
+ }
+ }
+
+ if (h2c->rcvd_s > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MROOM))) {
+ TRACE_PROTO("sending stream WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn, h2s);
+ h2c_send_strm_wu(h2c);
+ }
+
+ if (h2c->rcvd_c > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MROOM))) {
+ TRACE_PROTO("sending H2 WINDOW_UPDATE frame", H2_EV_TX_FRAME|H2_EV_TX_WU, h2c->conn);
+ h2c_send_conn_wu(h2c);
+ }
+
+ done:
+ if (h2c->st0 >= H2_CS_ERROR || (h2c->flags & H2_CF_DEM_SHORT_READ)) {
+ if (h2c->flags & H2_CF_RCVD_SHUT)
+ h2c->flags |= H2_CF_END_REACHED;
+ }
+
+ if (h2s && h2s_sc(h2s) &&
+ (b_data(&h2s->rxbuf) ||
+ h2c_read0_pending(h2c) ||
+ h2s->st == H2_SS_CLOSED ||
+ (h2s->flags & H2_SF_ES_RCVD) ||
+ se_fl_test(h2s->sd, SE_FL_ERROR | SE_FL_ERR_PENDING | SE_FL_EOS))) {
+ /* we may have to signal the upper layers */
+ TRACE_DEVEL("notifying stream before switching SID", H2_EV_RX_FRAME|H2_EV_H2S_WAKE, h2c->conn, h2s);
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE);
+ h2s_notify_recv(h2s);
+ }
+
+ if (old_iw != h2c->miw) {
+ TRACE_STATE("notifying streams about SFCTL increase", H2_EV_RX_FRAME|H2_EV_H2S_WAKE, h2c->conn);
+ h2c_unblock_sfctl(h2c);
+ }
+
+ h2c_restart_reading(h2c, 0);
+ out:
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+ return;
+}
+
+/* resume each h2s eligible for sending in list head <head> */
+static void h2_resume_each_sending_h2s(struct h2c *h2c, struct list *head)
+{
+ struct h2s *h2s, *h2s_back;
+
+ TRACE_ENTER(H2_EV_H2C_SEND|H2_EV_H2S_WAKE, h2c->conn);
+
+ list_for_each_entry_safe(h2s, h2s_back, head, list) {
+ if (h2c->mws <= 0 ||
+ h2c->flags & H2_CF_MUX_BLOCK_ANY ||
+ h2c->st0 >= H2_CS_ERROR)
+ break;
+
+ h2s->flags &= ~H2_SF_BLK_ANY;
+
+ if (h2s->flags & H2_SF_NOTIFIED)
+ continue;
+
+ /* If the sender changed his mind and unsubscribed, let's just
+ * remove the stream from the send_list.
+ */
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW)) &&
+ (!h2s->subs || !(h2s->subs->events & SUB_RETRY_SEND))) {
+ LIST_DEL_INIT(&h2s->list);
+ continue;
+ }
+
+ if (h2s->subs && h2s->subs->events & SUB_RETRY_SEND) {
+ h2s->flags |= H2_SF_NOTIFIED;
+ tasklet_wakeup(h2s->subs->tasklet);
+ h2s->subs->events &= ~SUB_RETRY_SEND;
+ if (!h2s->subs->events)
+ h2s->subs = NULL;
+ }
+ else if (h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW)) {
+ tasklet_wakeup(h2s->shut_tl);
+ }
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_SEND|H2_EV_H2S_WAKE, h2c->conn);
+}
+
+/* removes a stream from the list it may be in. If a stream has recently been
+ * appended to the send_list, it might have been waiting on this one when
+ * entering h2_snd_buf() and expecting it to complete before starting to send
+ * in turn. For this reason we check (and clear) H2_CF_WAIT_INLIST to detect
+ * this condition, and we try to resume sending streams if it happens. Note
+ * that we don't need to do it for fctl_list as this list is relevant before
+ * (only consulted after) a window update on the connection, and not because
+ * of any competition with other streams.
+ */
+static inline void h2_remove_from_list(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ if (!LIST_INLIST(&h2s->list))
+ return;
+
+ LIST_DEL_INIT(&h2s->list);
+ if (h2c->flags & H2_CF_WAIT_INLIST) {
+ h2c->flags &= ~H2_CF_WAIT_INLIST;
+ h2_resume_each_sending_h2s(h2c, &h2c->send_list);
+ }
+}
+
+/* process Tx frames from streams to be multiplexed. Returns > 0 if it reached
+ * the end.
+ */
+static int h2_process_mux(struct h2c *h2c)
+{
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c->conn);
+
+ if (unlikely(h2c->st0 < H2_CS_FRAME_H)) {
+ if (unlikely(h2c->st0 == H2_CS_PREFACE && (h2c->flags & H2_CF_IS_BACK))) {
+ if (unlikely(h2c_bck_send_preface(h2c) <= 0)) {
+ /* RFC7540#3.5: a GOAWAY frame MAY be omitted */
+ if (h2c->st0 == H2_CS_ERROR)
+ h2c->st0 = H2_CS_ERROR2;
+ goto fail;
+ }
+ h2c->st0 = H2_CS_SETTINGS1;
+ }
+ /* need to wait for the other side */
+ if (h2c->st0 < H2_CS_FRAME_H)
+ goto done;
+ }
+
+ /* start by sending possibly pending window updates */
+ if (h2c->rcvd_s > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_MUX_MALLOC)) &&
+ h2c_send_strm_wu(h2c) < 0)
+ goto fail;
+
+ if (h2c->rcvd_c > 0 &&
+ !(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_MUX_MALLOC)) &&
+ h2c_send_conn_wu(h2c) < 0)
+ goto fail;
+
+ /* First we always process the flow control list because the streams
+ * waiting there were already elected for immediate emission but were
+ * blocked just on this.
+ */
+ h2c->flags &= ~H2_CF_WAIT_INLIST;
+ h2_resume_each_sending_h2s(h2c, &h2c->fctl_list);
+ h2_resume_each_sending_h2s(h2c, &h2c->send_list);
+
+ fail:
+ if (unlikely(h2c->st0 >= H2_CS_ERROR)) {
+ if (h2c->st0 == H2_CS_ERROR) {
+ if (h2c->max_id >= 0) {
+ h2c_send_goaway_error(h2c, NULL);
+ if (h2c->flags & H2_CF_MUX_BLOCK_ANY)
+ goto out0;
+ }
+
+ h2c->st0 = H2_CS_ERROR2; // sent (or failed hard) !
+ }
+ }
+ done:
+ TRACE_LEAVE(H2_EV_H2C_WAKE, h2c->conn);
+ return 1;
+ out0:
+ TRACE_DEVEL("leaving in blocked situation", H2_EV_H2C_WAKE, h2c->conn);
+ return 0;
+}
+
+
+/* Attempt to read data, and subscribe if none available.
+ * The function returns 1 if data has been received, otherwise zero.
+ */
+static int h2_recv(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+ struct buffer *buf;
+ int max;
+ size_t ret;
+
+ TRACE_ENTER(H2_EV_H2C_RECV, h2c->conn);
+
+ if (h2c->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("leaving on sub_recv", H2_EV_H2C_RECV, h2c->conn);
+ return (b_data(&h2c->dbuf));
+ }
+
+ if (!h2_recv_allowed(h2c)) {
+ TRACE_DEVEL("leaving on !recv_allowed", H2_EV_H2C_RECV, h2c->conn);
+ return 1;
+ }
+
+ buf = h2_get_buf(h2c, &h2c->dbuf);
+ if (!buf) {
+ h2c->flags |= H2_CF_DEM_DALLOC;
+ TRACE_DEVEL("leaving on !alloc", H2_EV_H2C_RECV, h2c->conn);
+ return 0;
+ }
+
+ if (!b_data(buf)) {
+ /* try to pre-align the buffer like the
+ * rxbufs will be to optimize memory copies. We'll make
+ * sure that the frame header lands at the end of the
+ * HTX block to alias it upon recv. We cannot use the
+ * head because rcv_buf() will realign the buffer if
+ * it's empty. Thus we cheat and pretend we already
+ * have a few bytes there.
+ */
+ max = buf_room_for_htx_data(buf) + 9;
+ buf->head = sizeof(struct htx) - 9;
+ }
+ else
+ max = b_room(buf);
+
+ ret = max ? conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, max, 0) : 0;
+
+ if (max && !ret && h2_recv_allowed(h2c)) {
+ TRACE_DATA("failed to receive data, subscribing", H2_EV_H2C_RECV, h2c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &h2c->wait_event);
+ } else if (ret) {
+ TRACE_DATA("received data", H2_EV_H2C_RECV, h2c->conn, 0, 0, (void*)(long)ret);
+ h2c->flags &= ~H2_CF_DEM_SHORT_READ;
+ }
+
+ if (conn_xprt_read0_pending(h2c->conn)) {
+ TRACE_DATA("received read0", H2_EV_H2C_RECV, h2c->conn);
+ h2c->flags |= H2_CF_RCVD_SHUT;
+ }
+ if (h2c->conn->flags & CO_FL_ERROR && !b_data(&h2c->dbuf)) {
+ TRACE_DATA("connection error", H2_EV_H2C_RECV, h2c->conn);
+ h2c->flags |= H2_CF_ERROR;
+ }
+
+ if (!b_data(buf)) {
+ h2_release_buf(h2c, &h2c->dbuf);
+ goto end;
+ }
+
+ if (b_data(buf) == buf->size) {
+ h2c->flags |= H2_CF_DEM_DFULL;
+ TRACE_STATE("demux buffer full", H2_EV_H2C_RECV|H2_EV_H2C_BLK, h2c->conn);
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_H2C_RECV, h2c->conn);
+ return !!ret || (h2c->flags & (H2_CF_RCVD_SHUT|H2_CF_ERROR));
+}
+
+/* Try to send data if possible.
+ * The function returns 1 if data have been sent, otherwise zero.
+ */
+static int h2_send(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+ int done;
+ int sent = 0;
+
+ TRACE_ENTER(H2_EV_H2C_SEND, h2c->conn);
+
+ if (h2c->flags & (H2_CF_ERROR|H2_CF_ERR_PENDING)) {
+ TRACE_DEVEL("leaving on error", H2_EV_H2C_SEND, h2c->conn);
+ if (h2c->flags & H2_CF_END_REACHED)
+ h2c->flags |= H2_CF_ERROR;
+ b_reset(br_tail(h2c->mbuf));
+ h2c->idle_start = now_ms;
+ return 1;
+ }
+
+ /* This loop is quite simple : it tries to fill as much as it can from
+ * pending streams into the existing buffer until it's reportedly full
+ * or the end of send requests is reached. Then it tries to send this
+ * buffer's contents out, marks it not full if at least one byte could
+ * be sent, and tries again.
+ *
+ * The snd_buf() function normally takes a "flags" argument which may
+ * be made of a combination of CO_SFL_MSG_MORE to indicate that more
+ * data immediately comes and CO_SFL_STREAMER to indicate that the
+ * connection is streaming lots of data (used to increase TLS record
+ * size at the expense of latency). The former can be sent any time
+ * there's a buffer full flag, as it indicates at least one stream
+ * attempted to send and failed so there are pending data. An
+ * alternative would be to set it as long as there's an active stream
+ * but that would be problematic for ACKs until we have an absolute
+ * guarantee that all waiters have at least one byte to send. The
+ * latter should possibly not be set for now.
+ */
+
+ done = 0;
+ while (!(conn->flags & CO_FL_WAIT_XPRT) && !done) {
+ unsigned int flags = 0;
+ unsigned int released = 0;
+ struct buffer *buf;
+ uint to_send;
+
+ /* fill as much as we can into the current buffer */
+ while (((h2c->flags & (H2_CF_MUX_MFULL|H2_CF_MUX_MALLOC)) == 0) && !done)
+ done = h2_process_mux(h2c);
+
+ if (h2c->flags & H2_CF_MUX_MALLOC)
+ done = 1; // we won't go further without extra buffers
+
+ if ((conn->flags & (CO_FL_SOCK_WR_SH|CO_FL_ERROR)) ||
+ (h2c->flags & H2_CF_GOAWAY_FAILED))
+ break;
+
+ if (h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MROOM))
+ flags |= CO_SFL_MSG_MORE;
+
+ to_send = br_count(h2c->mbuf);
+ if (to_send > 1) {
+ /* usually we want to emit small TLS records to speed
+ * up the decoding on the client. That's what is being
+ * done by default. However if there is more than one
+ * buffer being allocated, we're streaming large data
+ * so we stich to large records.
+ */
+ flags |= CO_SFL_STREAMER;
+ }
+
+ for (buf = br_head(h2c->mbuf); b_size(buf); buf = br_del_head(h2c->mbuf)) {
+ if (b_data(buf)) {
+ int ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, b_data(buf),
+ flags | (to_send > 1 ? CO_SFL_MSG_MORE : 0));
+ if (!ret) {
+ done = 1;
+ break;
+ }
+ sent = 1;
+ to_send--;
+ TRACE_DATA("sent data", H2_EV_H2C_SEND, h2c->conn, 0, buf, (void*)(long)ret);
+ b_del(buf, ret);
+ if (b_data(buf)) {
+ done = 1;
+ break;
+ }
+ }
+ b_free(buf);
+ released++;
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+
+ /* Normally if wrote at least one byte, the buffer is not full
+ * anymore. However, if it was marked full because all of its
+ * buffers were used, we don't want to instantly wake up many
+ * streams because we'd create a thundering herd effect, notably
+ * when data are flushed in small chunks. Instead we wait for
+ * the buffer to be decongested again before allowing to send
+ * again. It also has the added benefit of not pumping more
+ * data from the other side when it's known that this one is
+ * still congested.
+ */
+ if (sent && br_single(h2c->mbuf))
+ h2c->flags &= ~(H2_CF_MUX_MFULL | H2_CF_DEM_MROOM);
+ }
+
+ if (conn->flags & CO_FL_ERROR) {
+ h2c->flags |= H2_CF_ERR_PENDING;
+ if (h2c->flags & H2_CF_END_REACHED)
+ h2c->flags |= H2_CF_ERROR;
+ b_reset(br_tail(h2c->mbuf));
+ }
+
+ /* We're not full anymore, so we can wake any task that are waiting
+ * for us.
+ */
+ if (!(h2c->flags & (H2_CF_MUX_MFULL | H2_CF_DEM_MROOM)) && h2c->st0 >= H2_CS_FRAME_H) {
+ h2c->flags &= ~H2_CF_WAIT_INLIST;
+ h2_resume_each_sending_h2s(h2c, &h2c->send_list);
+ }
+
+ /* We're done, no more to send */
+ if (!(conn->flags & CO_FL_WAIT_XPRT) && !br_data(h2c->mbuf)) {
+ TRACE_DEVEL("leaving with everything sent", H2_EV_H2C_SEND, h2c->conn);
+ if (h2c->flags & H2_CF_MBUF_HAS_DATA && !h2c->nb_sc) {
+ h2c->flags &= ~H2_CF_MBUF_HAS_DATA;
+ h2c->idle_start = now_ms;
+ }
+ goto end;
+ }
+
+ if (!(conn->flags & CO_FL_ERROR) && !(h2c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_STATE("more data to send, subscribing", H2_EV_H2C_SEND, h2c->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &h2c->wait_event);
+ }
+ TRACE_DEVEL("leaving with some data left to send", H2_EV_H2C_SEND, h2c->conn);
+end:
+ return sent || (h2c->flags & (H2_CF_ERR_PENDING|H2_CF_ERROR));
+}
+
+/* this is the tasklet referenced in h2c->wait_event.tasklet */
+struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct connection *conn;
+ struct tasklet *tl = (struct tasklet *)t;
+ int conn_in_list;
+ struct h2c *h2c = ctx;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (t->context == NULL) {
+ /* The connection has been taken over by another thread,
+ * we're no longer responsible for it, so just free the
+ * tasklet, and do nothing.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ t = NULL;
+ goto leave;
+ }
+ conn = h2c->conn;
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+
+ /* Remove the connection from the list, to be sure nobody attempts
+ * to use it while we handle the I/O events
+ */
+ conn_in_list = conn->flags & CO_FL_LIST_MASK;
+ if (conn_in_list)
+ conn_delete_from_tree(conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ /* we're certain the connection was not in an idle list */
+ conn = h2c->conn;
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+ conn_in_list = 0;
+ }
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ ret = h2_send(h2c);
+ if (!(h2c->wait_event.events & SUB_RETRY_RECV))
+ ret |= h2_recv(h2c);
+ if (ret || b_data(&h2c->dbuf))
+ ret = h2_process(h2c);
+
+ /* If we were in an idle list, we want to add it back into it,
+ * unless h2_process() returned -1, which mean it has destroyed
+ * the connection (testing !ret is enough, if h2_process() wasn't
+ * called then ret will be 0 anyway.
+ */
+ if (ret < 0)
+ t = NULL;
+
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+leave:
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return t;
+}
+
+/* callback called on any event by the connection handler.
+ * It applies changes and returns zero, or < 0 if it wants immediate
+ * destruction of the connection (which normally doesn not happen in h2).
+ */
+static int h2_process(struct h2c *h2c)
+{
+ struct connection *conn = h2c->conn;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+
+ if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY) &&
+ (b_data(&h2c->dbuf) || (h2c->flags & H2_CF_RCVD_SHUT))) {
+ h2_process_demux(h2c);
+
+ if (h2c->st0 >= H2_CS_ERROR || (h2c->flags & H2_CF_ERROR))
+ b_reset(&h2c->dbuf);
+
+ if (!b_full(&h2c->dbuf))
+ h2c->flags &= ~H2_CF_DEM_DFULL;
+ }
+ h2_send(h2c);
+
+ if (unlikely(h2c->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && !(h2c->flags & H2_CF_IS_BACK)) {
+ int send_goaway = 1;
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP2 connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the
+ * further along the window we are. */
+ send_goaway = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)
+ send_goaway = 0; /* let the client close his connection himself */
+ /* frontend is stopping, reload likely in progress, let's try
+ * to announce a graceful shutdown if not yet done. We don't
+ * care if it fails, it will be tried again later.
+ */
+ if (send_goaway) {
+ TRACE_STATE("proxy stopped, sending GOAWAY", H2_EV_H2C_WAKE|H2_EV_TX_FRAME, conn);
+ if (!(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ if (h2c->last_sid < 0)
+ h2c->last_sid = (1U << 31) - 1;
+ h2c_send_goaway_error(h2c, NULL);
+ }
+ }
+ }
+
+ /*
+ * If we received early data, and the handshake is done, wake
+ * any stream that was waiting for it.
+ */
+ if (!(h2c->flags & H2_CF_WAIT_FOR_HS) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT | CO_FL_EARLY_DATA)) == CO_FL_EARLY_DATA) {
+ struct eb32_node *node;
+ struct h2s *h2s;
+
+ h2c->flags |= H2_CF_WAIT_FOR_HS;
+ node = eb32_lookup_ge(&h2c->streams_by_id, 1);
+
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (se_fl_test(h2s->sd, SE_FL_WAIT_FOR_HS))
+ h2s_notify_recv(h2s);
+ node = eb32_next(node);
+ }
+ }
+
+ if ((h2c->flags & H2_CF_ERROR) || h2c_read0_pending(h2c) ||
+ h2c->st0 == H2_CS_ERROR2 || h2c->flags & H2_CF_GOAWAY_FAILED ||
+ (eb_is_empty(&h2c->streams_by_id) && h2c->last_sid >= 0 &&
+ h2c->max_id >= h2c->last_sid)) {
+ h2_wake_some_streams(h2c, 0);
+
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ /* no more stream, kill the connection now */
+ h2_release(h2c);
+ TRACE_DEVEL("leaving after releasing the connection", H2_EV_H2C_WAKE);
+ return -1;
+ }
+
+ /* connections in error must be removed from the idle lists */
+ if (conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+ else if (h2c->st0 == H2_CS_ERROR) {
+ /* connections in error must be removed from the idle lists */
+ if (conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ }
+
+ if (!b_data(&h2c->dbuf))
+ h2_release_buf(h2c, &h2c->dbuf);
+
+ if (h2c->st0 == H2_CS_ERROR2 || (h2c->flags & H2_CF_GOAWAY_FAILED) ||
+ (h2c->st0 != H2_CS_ERROR &&
+ !br_data(h2c->mbuf) &&
+ (h2c->mws <= 0 || LIST_ISEMPTY(&h2c->fctl_list)) &&
+ ((h2c->flags & H2_CF_MUX_BLOCK_ANY) || LIST_ISEMPTY(&h2c->send_list))))
+ h2_release_mbuf(h2c);
+
+ h2c_update_timeout(h2c);
+ h2_send(h2c);
+ TRACE_LEAVE(H2_EV_H2C_WAKE, conn);
+ return 0;
+}
+
+/* wake-up function called by the connection layer (mux_ops.wake) */
+static int h2_wake(struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ int ret;
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, conn);
+ ret = h2_process(h2c);
+ if (ret >= 0) {
+ h2_wake_some_streams(h2c, 0);
+
+ /* For active reverse connection, an explicit check is required if an
+ * error is pending to propagate the error as demux process is blocked
+ * until reversal. This allows to quickly close the connection and
+ * prepare a new one.
+ */
+ if (unlikely(conn_reverse_in_preconnect(conn)) && h2c_is_dead(h2c)) {
+ TRACE_DEVEL("leaving and killing dead connection", H2_EV_STRM_END, h2c->conn);
+ h2_release(h2c);
+ }
+ }
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return ret;
+}
+
+/* Connection timeout management. The principle is that if there's no receipt
+ * nor sending for a certain amount of time, the connection is closed. If the
+ * MUX buffer still has lying data or is not allocatable, the connection is
+ * immediately killed. If it's allocatable and empty, we attempt to send a
+ * GOAWAY frame.
+ */
+struct task *h2_timeout_task(struct task *t, void *context, unsigned int state)
+{
+ struct h2c *h2c = context;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(H2_EV_H2C_WAKE, h2c ? h2c->conn : NULL);
+
+ if (h2c) {
+ /* Make sure nobody stole the connection from us */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+
+ /* Somebody already stole the connection from us, so we should not
+ * free it, we just have to free the task.
+ */
+ if (!t->context) {
+ h2c = NULL;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ goto do_leave;
+ }
+
+
+ if (!expired) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ TRACE_DEVEL("leaving (not expired)", H2_EV_H2C_WAKE, h2c->conn);
+ return t;
+ }
+
+ if (!h2c_may_expire(h2c)) {
+ /* we do still have streams but all of them are idle, waiting
+ * for the data layer, so we must not enforce the timeout here.
+ */
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ t->expire = TICK_ETERNITY;
+ return t;
+ }
+
+ /* We're about to destroy the connection, so make sure nobody attempts
+ * to steal it from us.
+ */
+ if (h2c->conn->flags & CO_FL_LIST_MASK)
+ conn_delete_from_tree(h2c->conn);
+
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+do_leave:
+ task_destroy(t);
+
+ if (!h2c) {
+ /* resources were already deleted */
+ TRACE_DEVEL("leaving (not more h2c)", H2_EV_H2C_WAKE);
+ return NULL;
+ }
+
+ h2c->task = NULL;
+ h2c_error(h2c, H2_ERR_NO_ERROR);
+ h2_wake_some_streams(h2c, 0);
+
+ if (br_data(h2c->mbuf)) {
+ /* don't even try to send a GOAWAY, the buffer is stuck */
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+ }
+
+ /* try to send but no need to insist */
+ h2c->last_sid = h2c->max_id;
+ if (h2c_send_goaway_error(h2c, NULL) <= 0)
+ h2c->flags |= H2_CF_GOAWAY_FAILED;
+
+ if (br_data(h2c->mbuf) && !(h2c->flags & H2_CF_GOAWAY_FAILED) && conn_xprt_ready(h2c->conn)) {
+ unsigned int released = 0;
+ struct buffer *buf;
+
+ for (buf = br_head(h2c->mbuf); b_size(buf); buf = br_del_head(h2c->mbuf)) {
+ if (b_data(buf)) {
+ int ret = h2c->conn->xprt->snd_buf(h2c->conn, h2c->conn->xprt_ctx, buf, b_data(buf), 0);
+ if (!ret)
+ break;
+ b_del(buf, ret);
+ if (b_data(buf))
+ break;
+ b_free(buf);
+ released++;
+ }
+ }
+
+ if (released)
+ offer_buffers(NULL, released);
+ }
+
+ /* in any case this connection must not be considered idle anymore */
+ if (h2c->conn->flags & CO_FL_LIST_MASK) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(h2c->conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+
+ /* either we can release everything now or it will be done later once
+ * the last stream closes.
+ */
+ if (eb_is_empty(&h2c->streams_by_id))
+ h2_release(h2c);
+
+ TRACE_LEAVE(H2_EV_H2C_WAKE);
+ return NULL;
+}
+
+
+/*******************************************/
+/* functions below are used by the streams */
+/*******************************************/
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int h2_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct h2s *h2s;
+ struct h2c *h2c = conn->ctx;
+
+ TRACE_ENTER(H2_EV_H2S_NEW, conn);
+ h2s = h2c_bck_stream_new(h2c, sd->sc, sess);
+ if (!h2s) {
+ TRACE_DEVEL("leaving on stream creation failure", H2_EV_H2S_NEW|H2_EV_H2S_ERR, conn);
+ return -1;
+ }
+
+ /* the connection is not idle anymore, let's mark this */
+ HA_ATOMIC_AND(&h2c->wait_event.tasklet->state, ~TASK_F_USR1);
+ xprt_set_used(h2c->conn, h2c->conn->xprt, h2c->conn->xprt_ctx);
+
+ TRACE_LEAVE(H2_EV_H2S_NEW, conn, h2s);
+ return 0;
+}
+
+/* Retrieves the first valid stream connector from this connection, or returns
+ * NULL. We have to scan because we may have some orphan streams. It might be
+ * beneficial to scan backwards from the end to reduce the likeliness to find
+ * orphans.
+ */
+static struct stconn *h2_get_first_sc(const struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ struct h2s *h2s;
+ struct eb32_node *node;
+
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ if (h2s_sc(h2s))
+ return h2s_sc(h2s);
+ node = eb32_next(node);
+ }
+ return NULL;
+}
+
+static int h2_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ struct h2c *h2c = conn->ctx;
+
+ switch (mux_ctl) {
+ case MUX_CTL_STATUS:
+ /* Only consider the mux to be ready if we're done with
+ * the preface and settings, and we had no error.
+ */
+ if (h2c->st0 >= H2_CS_FRAME_H && h2c->st0 < H2_CS_ERROR)
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_CTL_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+
+ case MUX_CTL_REVERSE_CONN:
+ BUG_ON(h2c->flags & H2_CF_IS_BACK);
+
+ TRACE_DEVEL("connection reverse done, restart demux", H2_EV_H2C_WAKE, h2c->conn);
+ h2c->flags &= ~H2_CF_DEM_TOOMANY;
+ tasklet_wakeup(h2c->wait_event.tasklet);
+ return 0;
+
+ default:
+ return -1;
+ }
+}
+
+static int h2_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output)
+{
+ int ret = 0;
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ switch (mux_sctl) {
+ case MUX_SCTL_SID:
+ if (output)
+ *((int64_t *)output) = h2s->id;
+ return ret;
+
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Destroy the mux and the associated connection, if it is no longer used
+ */
+static void h2_destroy(void *ctx)
+{
+ struct h2c *h2c = ctx;
+
+ TRACE_ENTER(H2_EV_H2C_END, h2c->conn);
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ BUG_ON(h2c->conn->ctx != h2c);
+ h2_release(h2c);
+ }
+ TRACE_LEAVE(H2_EV_H2C_END);
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void h2_detach(struct sedesc *sd)
+{
+ struct h2s *h2s = sd->se;
+ struct h2c *h2c;
+ struct session *sess;
+
+ TRACE_ENTER(H2_EV_STRM_END, h2s ? h2s->h2c->conn : NULL, h2s);
+
+ if (!h2s) {
+ TRACE_LEAVE(H2_EV_STRM_END);
+ return;
+ }
+
+ /* there's no txbuf so we're certain not to be able to send anything */
+ h2s->flags &= ~H2_SF_NOTIFIED;
+
+ sess = h2s->sess;
+ h2c = h2s->h2c;
+ h2c->nb_sc--;
+ if (!h2c->nb_sc && !br_data(h2c->mbuf))
+ h2c->idle_start = now_ms;
+
+ if ((h2c->flags & (H2_CF_IS_BACK|H2_CF_DEM_TOOMANY)) == H2_CF_DEM_TOOMANY &&
+ !h2_frt_has_too_many_sc(h2c)) {
+ /* frontend connection was blocking new streams creation */
+ h2c->flags &= ~H2_CF_DEM_TOOMANY;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ /* this stream may be blocked waiting for some data to leave (possibly
+ * an ES or RST frame), so orphan it in this case.
+ */
+ if (!(h2c->flags & (H2_CF_ERR_PENDING|H2_CF_ERROR)) &&
+ (h2c->st0 < H2_CS_ERROR) &&
+ (h2s->flags & (H2_SF_BLK_MBUSY | H2_SF_BLK_MROOM | H2_SF_BLK_MFCTL)) &&
+ ((h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)) || h2s->subs)) {
+ TRACE_DEVEL("leaving on stream blocked", H2_EV_STRM_END|H2_EV_H2S_BLK, h2c->conn, h2s);
+ /* refresh the timeout if none was active, so that the last
+ * leaving stream may arm it.
+ */
+ if (h2c->task && !tick_isset(h2c->task->expire))
+ h2c_update_timeout(h2c);
+ return;
+ }
+
+ if ((h2c->flags & H2_CF_DEM_BLOCK_ANY && h2s->id == h2c->dsi)) {
+ /* unblock the connection if it was blocked on this
+ * stream.
+ */
+ h2c->flags &= ~H2_CF_DEM_BLOCK_ANY;
+ h2c->flags &= ~H2_CF_MUX_BLOCK_ANY;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ h2s_destroy(h2s);
+
+ if (h2c->flags & H2_CF_IS_BACK) {
+ if (!(h2c->flags & (H2_CF_RCVD_SHUT|H2_CF_ERR_PENDING|H2_CF_ERROR))) {
+ if (h2c->conn->flags & CO_FL_PRIVATE) {
+ /* Add the connection in the session server list, if not already done */
+ if (!session_add_conn(sess, h2c->conn, h2c->conn->target)) {
+ h2c->conn->owner = NULL;
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ h2c->conn->mux->destroy(h2c);
+ TRACE_DEVEL("leaving on error after killing outgoing connection", H2_EV_STRM_END|H2_EV_H2C_ERR);
+ return;
+ }
+ }
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ if (session_check_idle_conn(h2c->conn->owner, h2c->conn) != 0) {
+ /* At this point either the connection is destroyed, or it's been added to the server idle list, just stop */
+ TRACE_DEVEL("leaving without reusable idle connection", H2_EV_STRM_END);
+ return;
+ }
+ }
+ }
+ else {
+ if (eb_is_empty(&h2c->streams_by_id)) {
+ /* If the connection is owned by the session, first remove it
+ * from its list
+ */
+ if (h2c->conn->owner) {
+ session_unown_conn(h2c->conn->owner, h2c->conn);
+ h2c->conn->owner = NULL;
+ }
+
+ /* mark that the tasklet may lose its context to another thread and
+ * that the handler needs to check it under the idle conns lock.
+ */
+ HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1);
+ xprt_set_idle(h2c->conn, h2c->conn->xprt, h2c->conn->xprt_ctx);
+
+ if (!srv_add_to_idle_list(objt_server(h2c->conn->target), h2c->conn, 1)) {
+ /* The server doesn't want it, let's kill the connection right away */
+ h2c->conn->mux->destroy(h2c);
+ TRACE_DEVEL("leaving on error after killing outgoing connection", H2_EV_STRM_END|H2_EV_H2C_ERR);
+ return;
+ }
+ /* At this point, the connection has been added to the
+ * server idle list, so another thread may already have
+ * hijacked it, so we can't do anything with it.
+ */
+ TRACE_DEVEL("reusable idle connection", H2_EV_STRM_END);
+ return;
+
+ }
+ else if (!h2c->conn->hash_node->node.node.leaf_p &&
+ h2_avail_streams(h2c->conn) > 0 && objt_server(h2c->conn->target) &&
+ !LIST_INLIST(&h2c->conn->session_list)) {
+ srv_add_to_avail_list(__objt_server(h2c->conn->target), h2c->conn);
+ }
+ }
+ }
+ }
+
+ /* We don't want to close right now unless we're removing the
+ * last stream, and either the connection is in error, or it
+ * reached the ID already specified in a GOAWAY frame received
+ * or sent (as seen by last_sid >= 0).
+ */
+ if (h2c_is_dead(h2c)) {
+ /* no more stream will come, kill it now */
+ TRACE_DEVEL("leaving and killing dead connection", H2_EV_STRM_END, h2c->conn);
+ h2_release(h2c);
+ }
+ else if (h2c->task) {
+ h2c_update_timeout(h2c);
+ TRACE_DEVEL("leaving, refreshing connection's timeout", H2_EV_STRM_END, h2c->conn);
+ }
+ else
+ TRACE_DEVEL("leaving", H2_EV_STRM_END, h2c->conn);
+}
+
+/* Performs a synchronous or asynchronous shutr(). */
+static void h2_do_shutr(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ if (h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ if (h2s->flags & H2_SF_WANT_SHUTW)
+ goto add_to_list;
+
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse. In this case we schedule a goaway to
+ * close the connection.
+ */
+ if (se_fl_test(h2s->sd, SE_FL_KILL_CONN) &&
+ !(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM);
+ h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM);
+ }
+ else if (!(h2s->flags & H2_SF_HEADERS_SENT)) {
+ /* Nothing was never sent for this stream, so reset with
+ * REFUSED_STREAM error to let the client retry the
+ * request.
+ */
+ TRACE_STATE("no headers sent yet, trying a retryable abort", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_REFUSED_STREAM);
+ }
+ else {
+ /* a final response was already provided, we don't want this
+ * stream anymore. This may happen when the server responds
+ * before the end of an upload and closes quickly (redirect,
+ * deny, ...)
+ */
+ h2s_error(h2s, H2_ERR_CANCEL);
+ }
+
+ if (!(h2s->flags & H2_SF_RST_SENT) &&
+ h2s_send_rst_stream(h2c, h2s) <= 0)
+ goto add_to_list;
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(h2c->wait_event.tasklet);
+ h2s_close(h2s);
+ done:
+ h2s->flags &= ~H2_SF_WANT_SHUTR;
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+add_to_list:
+ /* Let the handler know we want to shutr, and add ourselves to the
+ * most relevant list if not yet done. h2_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ h2s->flags |= H2_SF_WANT_SHUTR;
+ if (!LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL)
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ else if (h2s->flags & (H2_SF_BLK_MBUSY|H2_SF_BLK_MROOM))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+}
+
+/* Performs a synchronous or asynchronous shutw(). */
+static void h2_do_shutw(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+
+ if (h2s->st == H2_SS_HLOC || h2s->st == H2_SS_CLOSED)
+ goto done;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ if (h2s->st != H2_SS_ERROR &&
+ (h2s->flags & (H2_SF_HEADERS_SENT | H2_SF_MORE_HTX_DATA)) == H2_SF_HEADERS_SENT) {
+ /* we can cleanly close using an empty data frame only after headers
+ * and if no more data is expected to be sent.
+ */
+ if (!(h2s->flags & (H2_SF_ES_SENT|H2_SF_RST_SENT)) &&
+ h2_send_empty_data_es(h2s) <= 0)
+ goto add_to_list;
+
+ if (h2s->st == H2_SS_HREM)
+ h2s_close(h2s);
+ else
+ h2s->st = H2_SS_HLOC;
+ } else {
+ /* a connstream may require us to immediately kill the whole connection
+ * for example because of a "tcp-request content reject" rule that is
+ * normally used to limit abuse. In this case we schedule a goaway to
+ * close the connection.
+ */
+ if (se_fl_test(h2s->sd, SE_FL_KILL_CONN) &&
+ !(h2c->flags & (H2_CF_GOAWAY_SENT|H2_CF_GOAWAY_FAILED))) {
+ TRACE_STATE("stream wants to kill the connection", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM);
+ h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM);
+ }
+ else if (h2s->flags & H2_SF_MORE_HTX_DATA) {
+ /* some unsent data were pending (e.g. abort during an upload),
+ * let's send a CANCEL.
+ */
+ TRACE_STATE("shutw before end of data, sending CANCEL", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_CANCEL);
+ }
+ else {
+ /* Nothing was never sent for this stream, so reset with
+ * REFUSED_STREAM error to let the client retry the
+ * request.
+ */
+ TRACE_STATE("no headers sent yet, trying a retryable abort", H2_EV_STRM_SHUT, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_REFUSED_STREAM);
+ }
+
+ if (!(h2s->flags & H2_SF_RST_SENT) &&
+ h2s_send_rst_stream(h2c, h2s) <= 0)
+ goto add_to_list;
+
+ h2s_close(h2s);
+ }
+
+ if (!(h2c->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(h2c->wait_event.tasklet);
+
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ done:
+ h2s->flags &= ~H2_SF_WANT_SHUTW;
+ return;
+
+ add_to_list:
+ /* Let the handler know we want to shutw, and add ourselves to the
+ * most relevant list if not yet done. h2_deferred_shut() will be
+ * automatically called via the shut_tl tasklet when there's room
+ * again.
+ */
+ h2s->flags |= H2_SF_WANT_SHUTW;
+ if (!LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL)
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ else if (h2s->flags & (H2_SF_BLK_MBUSY|H2_SF_BLK_MROOM))
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2c->conn, h2s);
+ return;
+}
+
+/* This is the tasklet referenced in h2s->shut_tl, it is used for
+ * deferred shutdowns when the h2_detach() was done but the mux buffer was full
+ * and prevented the last frame from being emitted.
+ */
+struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state)
+{
+ struct h2s *h2s = ctx;
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s);
+
+ if (h2s->flags & H2_SF_NOTIFIED) {
+ /* some data processing remains to be done first */
+ goto end;
+ }
+
+ if (h2s->flags & H2_SF_WANT_SHUTW)
+ h2_do_shutw(h2s);
+
+ if (h2s->flags & H2_SF_WANT_SHUTR)
+ h2_do_shutr(h2s);
+
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) {
+ /* We're done trying to send, remove ourself from the send_list */
+ h2_remove_from_list(h2s);
+
+ if (!h2s_sc(h2s)) {
+ h2s_destroy(h2s);
+ if (h2c_is_dead(h2c)) {
+ h2_release(h2c);
+ t = NULL;
+ }
+ }
+ }
+ end:
+ TRACE_LEAVE(H2_EV_STRM_SHUT);
+ return t;
+}
+
+/* shutr() called by the stream connector (mux_ops.shutr) */
+static void h2_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+ if (mode)
+ h2_do_shutr(h2s);
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+}
+
+/* shutw() called by the stream connector (mux_ops.shutw) */
+static void h2_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+ h2_do_shutw(h2s);
+ TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s);
+}
+
+/* Decode the payload of a HEADERS frame and produce the HTX request or response
+ * depending on the connection's side. Returns a positive value on success, a
+ * negative value on failure, or 0 if it couldn't proceed. May report connection
+ * errors in h2c->errcode if the frame is non-decodable and the connection
+ * unrecoverable. In absence of connection error when a failure is reported, the
+ * caller must assume a stream error.
+ *
+ * The function may fold CONTINUATION frames into the initial HEADERS frame
+ * by removing padding and next frame header, then moving the CONTINUATION
+ * frame's payload and adjusting h2c->dfl to match the new aggregated frame,
+ * leaving a hole between the main frame and the beginning of the next one.
+ * The possibly remaining incomplete or next frame at the end may be moved
+ * if the aggregated frame is not deleted, in order to fill the hole. Wrapped
+ * HEADERS frames are unwrapped into a temporary buffer before decoding.
+ *
+ * A buffer at the beginning of processing may look like this :
+ *
+ * ,---.---------.-----.--------------.--------------.------.---.
+ * |///| HEADERS | PAD | CONTINUATION | CONTINUATION | DATA |///|
+ * `---^---------^-----^--------------^--------------^------^---'
+ * | | <-----> | |
+ * area | dpl | wrap
+ * |<--------------> |
+ * | dfl |
+ * |<-------------------------------------------------->|
+ * head data
+ *
+ * Padding is automatically overwritten when folding, participating to the
+ * hole size after dfl :
+ *
+ * ,---.------------------------.-----.--------------.------.---.
+ * |///| HEADERS : CONTINUATION |/////| CONTINUATION | DATA |///|
+ * `---^------------------------^-----^--------------^------^---'
+ * | | <-----> | |
+ * area | hole | wrap
+ * |<-----------------------> |
+ * | dfl |
+ * |<-------------------------------------------------->|
+ * head data
+ *
+ * Please note that the HEADERS frame is always deprived from its PADLEN byte
+ * however it may start with the 5 stream-dep+weight bytes in case of PRIORITY
+ * bit.
+ *
+ * The <flags> field must point to either the stream's flags or to a copy of it
+ * so that the function can update the following flags :
+ * - H2_SF_DATA_CLEN when content-length is seen
+ * - H2_SF_HEADERS_RCVD once the frame is successfully decoded
+ *
+ * The H2_SF_HEADERS_RCVD flag is also looked at in the <flags> field prior to
+ * decoding, in order to detect if we're dealing with a headers or a trailers
+ * block (the trailers block appears after H2_SF_HEADERS_RCVD was seen).
+ */
+static int h2c_dec_hdrs(struct h2c *h2c, struct buffer *rxbuf, uint32_t *flags, unsigned long long *body_len, char *upgrade_protocol)
+{
+ const uint8_t *hdrs = (uint8_t *)b_head(&h2c->dbuf);
+ struct buffer *tmp = get_trash_chunk();
+ struct http_hdr list[global.tune.max_http_hdr * 2];
+ struct buffer *copy = NULL;
+ unsigned int msgf;
+ struct htx *htx = NULL;
+ int flen; // header frame len
+ int hole = 0;
+ int ret = 0;
+ int outlen;
+ int wrap;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+
+next_frame:
+ if (b_data(&h2c->dbuf) - hole < h2c->dfl)
+ goto leave; // incomplete input frame
+
+ /* No END_HEADERS means there's one or more CONTINUATION frames. In
+ * this case, we'll try to paste it immediately after the initial
+ * HEADERS frame payload and kill any possible padding. The initial
+ * frame's length will be increased to represent the concatenation
+ * of the two frames. The next frame is read from position <tlen>
+ * and written at position <flen> (minus padding if some is present).
+ */
+ if (unlikely(!(h2c->dff & H2_F_HEADERS_END_HEADERS))) {
+ struct h2_fh hdr;
+ int clen; // CONTINUATION frame's payload length
+
+ TRACE_STATE("EH missing, expecting continuation frame", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR, h2c->conn);
+ if (!h2_peek_frame_hdr(&h2c->dbuf, h2c->dfl + hole, &hdr)) {
+ /* no more data, the buffer may be full, either due to
+ * too large a frame or because of too large a hole that
+ * we're going to compact at the end.
+ */
+ goto leave;
+ }
+
+ if (hdr.ft != H2_FT_CONTINUATION) {
+ /* RFC7540#6.10: frame of unexpected type */
+ TRACE_STATE("not continuation!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if (hdr.sid != h2c->dsi) {
+ /* RFC7540#6.10: frame of different stream */
+ TRACE_STATE("different stream ID!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if ((unsigned)hdr.len > (unsigned)global.tune.bufsize) {
+ /* RFC7540#4.2: invalid frame length */
+ TRACE_STATE("too large frame!", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_HDR|H2_EV_RX_CONT|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ goto fail;
+ }
+
+ /* detect when we must stop aggregating frames */
+ h2c->dff |= hdr.ff & H2_F_HEADERS_END_HEADERS;
+
+ /* Take as much as we can of the CONTINUATION frame's payload */
+ clen = b_data(&h2c->dbuf) - (h2c->dfl + hole + 9);
+ if (clen > hdr.len)
+ clen = hdr.len;
+
+ /* Move the frame's payload over the padding, hole and frame
+ * header. At least one of hole or dpl is null (see diagrams
+ * above). The hole moves after the new aggregated frame.
+ */
+ b_move(&h2c->dbuf, b_peek_ofs(&h2c->dbuf, h2c->dfl + hole + 9), clen, -(h2c->dpl + hole + 9));
+ h2c->dfl += hdr.len - h2c->dpl;
+ hole += h2c->dpl + 9;
+ h2c->dpl = 0;
+ TRACE_STATE("waiting for next continuation frame", H2_EV_RX_FRAME|H2_EV_RX_FHDR|H2_EV_RX_CONT|H2_EV_RX_HDR, h2c->conn);
+ goto next_frame;
+ }
+
+ flen = h2c->dfl - h2c->dpl;
+
+ /* if the input buffer wraps, take a temporary copy of it (rare) */
+ wrap = b_wrap(&h2c->dbuf) - b_head(&h2c->dbuf);
+ if (wrap < h2c->dfl) {
+ copy = alloc_trash_chunk();
+ if (!copy) {
+ TRACE_DEVEL("failed to allocate temporary buffer", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ goto fail;
+ }
+ memcpy(copy->area, b_head(&h2c->dbuf), wrap);
+ memcpy(copy->area + wrap, b_orig(&h2c->dbuf), h2c->dfl - wrap);
+ hdrs = (uint8_t *) copy->area;
+ }
+
+ /* Skip StreamDep and weight for now (we don't support PRIORITY) */
+ if (h2c->dff & H2_F_HEADERS_PRIORITY) {
+ if (read_n32(hdrs) == h2c->dsi) {
+ /* RFC7540#5.3.1 : stream dep may not depend on itself */
+ TRACE_STATE("invalid stream dependency!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ if (flen < 5) {
+ TRACE_STATE("frame too short for priority!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_FRAME_SIZE_ERROR);
+ goto fail;
+ }
+
+ hdrs += 5; // stream dep = 4, weight = 1
+ flen -= 5;
+ }
+
+ if (!h2_get_buf(h2c, rxbuf)) {
+ TRACE_STATE("waiting for h2c rxbuf allocation", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_BLK, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SALLOC;
+ goto leave;
+ }
+
+ /* we can't retry a failed decompression operation so we must be very
+ * careful not to take any risks. In practice the output buffer is
+ * always empty except maybe for trailers, in which case we simply have
+ * to wait for the upper layer to finish consuming what is available.
+ */
+ htx = htx_from_buf(rxbuf);
+ if (!htx_is_empty(htx)) {
+ TRACE_STATE("waiting for room in h2c rxbuf", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_BLK, h2c->conn);
+ h2c->flags |= H2_CF_DEM_SFULL;
+ goto leave;
+ }
+
+ /* past this point we cannot roll back in case of error */
+ outlen = hpack_decode_frame(h2c->ddht, hdrs, flen, list,
+ sizeof(list)/sizeof(list[0]), tmp);
+
+ if (outlen > 0 &&
+ (TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED &&
+ TRACE_ENABLED(TRACE_LEVEL_USER, H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn, 0, 0, 0)) {
+ struct ist n;
+ int i;
+
+ for (i = 0; list[i].n.len; i++) {
+ n = list[i].n;
+
+ if (!isttest(n)) {
+ /* this is in fact a pseudo header whose number is in n.len */
+ n = h2_phdr_to_ist(n.len);
+ }
+
+ h2_trace_header(n, list[i].v, H2_EV_RX_FRAME|H2_EV_RX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, NULL);
+ }
+ }
+
+ if (outlen < 0) {
+ TRACE_STATE("failed to decompress HPACK", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_COMPRESSION_ERROR);
+ goto fail;
+ }
+
+ /* The PACK decompressor was updated, let's update the input buffer and
+ * the parser's state to commit these changes and allow us to later
+ * fail solely on the stream if needed.
+ */
+ b_del(&h2c->dbuf, h2c->dfl + hole);
+ h2c->dfl = hole = 0;
+ h2c->st0 = H2_CS_FRAME_H;
+
+ /* OK now we have our header list in <list> */
+ msgf = (h2c->dff & H2_F_HEADERS_END_STREAM) ? 0 : H2_MSGF_BODY;
+ msgf |= (*flags & H2_SF_BODY_TUNNEL) ? H2_MSGF_BODY_TUNNEL: 0;
+ /* If an Extended CONNECT has been sent on this stream, set message flag
+ * to convert 200 response to 101 htx response */
+ msgf |= (*flags & H2_SF_EXT_CONNECT_SENT) ? H2_MSGF_EXT_CONNECT: 0;
+
+ if (*flags & H2_SF_HEADERS_RCVD)
+ goto trailers;
+
+ /* This is the first HEADERS frame so it's a headers block */
+ if (h2c->flags & H2_CF_IS_BACK)
+ outlen = h2_make_htx_response(list, htx, &msgf, body_len, upgrade_protocol);
+ else
+ outlen = h2_make_htx_request(list, htx, &msgf, body_len,
+ !!(((const struct session *)h2c->conn->owner)->fe->options2 & PR_O2_REQBUG_OK));
+
+ if (outlen < 0 || htx_free_space(htx) < global.tune.maxrewrite) {
+ /* too large headers? this is a stream error only */
+ TRACE_STATE("message headers too large or invalid", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2S_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ htx->flags |= HTX_FL_PARSING_ERROR;
+ goto fail;
+ }
+
+ if (msgf & H2_MSGF_BODY) {
+ /* a payload is present */
+ if (msgf & H2_MSGF_BODY_CL) {
+ *flags |= H2_SF_DATA_CLEN;
+ htx->extra = *body_len;
+ }
+ }
+ if (msgf & H2_MSGF_BODYLESS_RSP)
+ *flags |= H2_SF_BODYLESS_RESP;
+
+ if (msgf & H2_MSGF_BODY_TUNNEL)
+ *flags |= H2_SF_BODY_TUNNEL;
+ else {
+ /* Abort the tunnel attempt, if any */
+ if (*flags & H2_SF_BODY_TUNNEL)
+ *flags |= H2_SF_TUNNEL_ABRT;
+ *flags &= ~H2_SF_BODY_TUNNEL;
+ }
+
+ done:
+ /* indicate that a HEADERS frame was received for this stream, except
+ * for 1xx responses. For 1xx responses, another HEADERS frame is
+ * expected.
+ */
+ if (!(msgf & H2_MSGF_RSP_1XX))
+ *flags |= H2_SF_HEADERS_RCVD;
+
+ if (h2c->dff & H2_F_HEADERS_END_STREAM) {
+ if (msgf & H2_MSGF_RSP_1XX) {
+ /* RFC9113#8.1 : HEADERS frame with the ES flag set that carries an informational status code is malformed */
+ TRACE_STATE("invalid interim response with ES flag!", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ goto fail;
+ }
+ /* no more data are expected for this message */
+ htx->flags |= HTX_FL_EOM;
+ *flags |= H2_SF_ES_RCVD;
+ }
+
+ if (msgf & H2_MSGF_EXT_CONNECT)
+ *flags |= H2_SF_EXT_CONNECT_RCVD;
+
+ /* success */
+ ret = 1;
+
+ leave:
+ /* If there is a hole left and it's not at the end, we are forced to
+ * move the remaining data over it.
+ */
+ if (hole) {
+ if (b_data(&h2c->dbuf) > h2c->dfl + hole)
+ b_move(&h2c->dbuf, b_peek_ofs(&h2c->dbuf, h2c->dfl + hole),
+ b_data(&h2c->dbuf) - (h2c->dfl + hole), -hole);
+ b_sub(&h2c->dbuf, hole);
+ }
+
+ if (b_full(&h2c->dbuf) && h2c->dfl && (!htx || htx_is_empty(htx))) {
+ /* too large frames */
+ h2c_error(h2c, H2_ERR_INTERNAL_ERROR);
+ ret = -1;
+ }
+
+ if (htx)
+ htx_to_buf(htx, rxbuf);
+ free_trash_chunk(copy);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_HDR, h2c->conn);
+ return ret;
+
+ fail:
+ ret = -1;
+ goto leave;
+
+ trailers:
+ /* This is the last HEADERS frame hence a trailer */
+ if (!(h2c->dff & H2_F_HEADERS_END_STREAM)) {
+ /* It's a trailer but it's missing ES flag */
+ TRACE_STATE("missing EH on trailers frame", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2C_ERR|H2_EV_PROTO_ERR, h2c->conn);
+ h2c_error(h2c, H2_ERR_PROTOCOL_ERROR);
+ HA_ATOMIC_INC(&h2c->px_counters->conn_proto_err);
+ goto fail;
+ }
+
+ /* Trailers terminate a DATA sequence */
+ if (h2_make_htx_trailers(list, htx) <= 0) {
+ TRACE_STATE("failed to append HTX trailers into rxbuf", H2_EV_RX_FRAME|H2_EV_RX_HDR|H2_EV_H2S_ERR, h2c->conn);
+ goto fail;
+ }
+ *flags |= H2_SF_ES_RCVD;
+ goto done;
+}
+
+/* Transfer the payload of a DATA frame to the HTTP/1 side. The HTTP/2 frame
+ * parser state is automatically updated. Returns > 0 if it could completely
+ * send the current frame, 0 if it couldn't complete, in which case
+ * SE_FL_RCV_MORE must be checked to know if some data remain pending (an empty
+ * DATA frame can return 0 as a valid result). Stream errors are reported in
+ * h2s->errcode and connection errors in h2c->errcode. The caller must already
+ * have checked the frame header and ensured that the frame was complete or the
+ * buffer full. It changes the frame state to FRAME_A once done.
+ */
+static int h2_frt_transfer_data(struct h2s *h2s)
+{
+ struct h2c *h2c = h2s->h2c;
+ int block;
+ unsigned int flen = 0;
+ struct htx *htx = NULL;
+ struct buffer *scbuf;
+ unsigned int sent;
+
+ TRACE_ENTER(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+
+ h2c->flags &= ~H2_CF_DEM_SFULL;
+
+ scbuf = h2_get_buf(h2c, &h2s->rxbuf);
+ if (!scbuf) {
+ h2c->flags |= H2_CF_DEM_SALLOC;
+ TRACE_STATE("waiting for an h2s rxbuf", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+ htx = htx_from_buf(scbuf);
+
+try_again:
+ flen = h2c->dfl - h2c->dpl;
+ if (!flen)
+ goto end_transfer;
+
+ if (flen > b_data(&h2c->dbuf)) {
+ flen = b_data(&h2c->dbuf);
+ if (!flen)
+ goto fail;
+ }
+
+ block = htx_free_data_space(htx);
+ if (!block) {
+ h2c->flags |= H2_CF_DEM_SFULL;
+ TRACE_STATE("h2s rxbuf is full", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+ if (flen > block)
+ flen = block;
+
+ /* here, flen is the max we can copy into the output buffer */
+ block = b_contig_data(&h2c->dbuf, 0);
+ if (flen > block)
+ flen = block;
+
+ sent = htx_add_data(htx, ist2(b_head(&h2c->dbuf), flen));
+ TRACE_DATA("move some data to h2s rxbuf", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s, 0, (void *)(long)sent);
+
+ b_del(&h2c->dbuf, sent);
+ h2c->dfl -= sent;
+ h2c->rcvd_c += sent;
+ h2c->rcvd_s += sent; // warning, this can also affect the closed streams!
+
+ if (h2s->flags & H2_SF_DATA_CLEN) {
+ h2s->body_len -= sent;
+ htx->extra = h2s->body_len;
+ }
+
+ if (sent < flen) {
+ h2c->flags |= H2_CF_DEM_SFULL;
+ TRACE_STATE("h2s rxbuf is full", H2_EV_RX_FRAME|H2_EV_RX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto fail;
+ }
+
+ goto try_again;
+
+ end_transfer:
+ /* here we're done with the frame, all the payload (except padding) was
+ * transferred.
+ */
+
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL) && (h2c->dff & H2_F_DATA_END_STREAM)) {
+ /* no more data are expected for this message. This add the EOM
+ * flag but only on the response path or if no tunnel attempt
+ * was aborted. Otherwise (request path + tunnel abrted), the
+ * EOM was already reported.
+ */
+ if ((h2c->flags & H2_CF_IS_BACK) || !(h2s->flags & H2_SF_TUNNEL_ABRT)) {
+ /* htx may be empty if receiving an empty DATA frame. */
+ if (!htx_set_eom(htx))
+ goto fail;
+ }
+ }
+
+ h2c->rcvd_c += h2c->dpl;
+ h2c->rcvd_s += h2c->dpl;
+ h2c->dpl = 0;
+ h2c->st0 = H2_CS_FRAME_A; // send the corresponding window update
+ htx_to_buf(htx, scbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 1;
+ fail:
+ if (htx)
+ htx_to_buf(htx, scbuf);
+ TRACE_LEAVE(H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ return 0;
+}
+
+/* Try to send a HEADERS frame matching HTX response present in HTX message
+ * <htx> for the H2 stream <h2s>. Returns the number of bytes sent. The caller
+ * must check the stream's status to detect any error which might have happened
+ * subsequently to a successful send. The htx blocks are automatically removed
+ * from the message. The htx message is assumed to be valid since produced from
+ * the internal code, hence it contains a start line, an optional series of
+ * header blocks and an end of header, otherwise an invalid frame could be
+ * emitted and the resulting htx message could be left in an inconsistent state.
+ */
+static size_t h2s_snd_fhdrs(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_sl *sl;
+ enum htx_blk_type type;
+ int es_now = 0;
+ int ret = 0;
+ int hdr;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ /* get the start line (we do have one) and the rest of the headers,
+ * that we dump starting at header 0 */
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_HDR) {
+ BUG_ON(!sl); /* The start-line mut be defined before any headers */
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else if (type == HTX_BLK_RES_SL) {
+ BUG_ON(sl); /* Only one start-line expected */
+ sl = htx_get_blk_ptr(htx, blk);
+ h2s->status = sl->info.res.status;
+ if ((sl->flags & HTX_SL_F_BODYLESS_RESP) || h2s->status == 204 || h2s->status == 304)
+ h2s->flags |= H2_SF_BODYLESS_RESP;
+ if (h2s->status < 100 || h2s->status > 999) {
+ TRACE_ERROR("will not encode an invalid status code", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ else if (h2s->status == 101) {
+ if (unlikely(h2s->flags & H2_SF_EXT_CONNECT_RCVD)) {
+ /* If an Extended CONNECT has been received, we need to convert 101 to 200 */
+ h2s->status = 200;
+ h2s->flags &= ~H2_SF_EXT_CONNECT_RCVD;
+ }
+ else {
+ /* Otherwise, 101 responses are not supported in H2, so return a error (RFC7540#8.1.1) */
+ TRACE_ERROR("will not encode an invalid status code", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+ else if ((h2s->flags & H2_SF_BODY_TUNNEL) && h2s->status >= 300) {
+ /* Abort the tunnel attempt */
+ h2s->flags &= ~H2_SF_BODY_TUNNEL;
+ h2s->flags |= H2_SF_TUNNEL_ABRT;
+ }
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* The start-line me be defined */
+ BUG_ON(!sl);
+
+ /* marker for end of headers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ return 0;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x04", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ if ((h2c->flags & (H2_CF_SHTS_UPDATED|H2_CF_DTSU_EMITTED)) == H2_CF_SHTS_UPDATED) {
+ /* SETTINGS_HEADER_TABLE_SIZE changed, we must send an HPACK
+ * dynamic table size update so that some clients are not
+ * confused. In practice we only need to send the DTSU when the
+ * advertised size is lower than the current one, and since we
+ * don't use it and don't care about the default 4096 bytes,
+ * we only ack it with a zero size thus we at most have to deal
+ * with this once. See RFC7541#4.2 and #6.3 for the spec, and
+ * below for the whole context and interoperability risks:
+ * https://lists.w3.org/Archives/Public/ietf-http-wg/2021OctDec/0235.html
+ */
+ if (b_room(&outbuf) < 1)
+ goto full;
+ outbuf.area[outbuf.data++] = 0x20; // HPACK DTSU 0 bytes
+
+ /* let's not update the flags now but only once the buffer is
+ * really committed.
+ */
+ }
+
+ /* encode status, which necessarily is the first one */
+ if (!hpack_encode_int_status(&outbuf, h2s->status)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ if ((TRACE_SOURCE)->verbosity >= H2_VERB_ADVANCED) {
+ char sts[4];
+
+ h2_trace_header(ist(":status"), ist(ultoa_r(h2s->status, sts, sizeof(sts))),
+ H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__,
+ h2c, h2s);
+ }
+
+ /* encode all headers, stop at empty name */
+ for (hdr = 0; hdr < sizeof(list)/sizeof(list[0]); hdr++) {
+ /* these ones do not exist in H2 and must be dropped. */
+ if (isteq(list[hdr].n, ist("connection")) ||
+ isteq(list[hdr].n, ist("proxy-connection")) ||
+ isteq(list[hdr].n, ist("keep-alive")) ||
+ isteq(list[hdr].n, ist("upgrade")) ||
+ isteq(list[hdr].n, ist("transfer-encoding")))
+ continue;
+
+ /* Skip all pseudo-headers */
+ if (*(list[hdr].n.ptr) == ':')
+ continue;
+
+ if (isteq(list[hdr].n, ist("")))
+ break; // end
+
+ if (!h2_encode_header(&outbuf, list[hdr].n, list[hdr].v, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ TRACE_USER("sent H2 response ", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+
+ /* remove all header blocks including the EOH and compute the
+ * corresponding size.
+ */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOH */
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ if (!h2s_sc(h2s) || se_fl_test(h2s->sd, SE_FL_SHW)) {
+ /* Response already closed: add END_STREAM */
+ es_now = 1;
+ }
+ else if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx) && h2s->status >= 200) {
+ /* EOM+empty: we may need to add END_STREAM except for 1xx
+ * responses and tunneled response.
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL) || h2s->status >= 300)
+ es_now = 1;
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_HEADERS_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, outbuf.data);
+ h2c->flags |= H2_CF_MBUF_HAS_DATA;
+
+ /* indicates the HEADERS frame was sent, except for 1xx responses. For
+ * 1xx responses, another HEADERS frame is expected.
+ */
+ if (h2s->status >= 200)
+ h2s->flags |= H2_SF_HEADERS_SENT;
+
+ if (h2c->flags & H2_CF_SHTS_UPDATED) {
+ /* was sent above */
+ h2c->flags |= H2_CF_DTSU_EMITTED;
+ h2c->flags &= ~H2_CF_SHTS_UPDATED;
+ }
+
+ if (es_now) {
+ h2s->flags |= H2_SF_ES_SENT;
+ TRACE_PROTO("setting ES on HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+ }
+
+ /* OK we could properly deliver the response */
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Try to send a HEADERS frame matching HTX request present in HTX message
+ * <htx> for the H2 stream <h2s>. Returns the number of bytes sent. The caller
+ * must check the stream's status to detect any error which might have happened
+ * subsequently to a successful send. The htx blocks are automatically removed
+ * from the message. The htx message is assumed to be valid since produced from
+ * the internal code, hence it contains a start line, an optional series of
+ * header blocks and an end of header, otherwise an invalid frame could be
+ * emitted and the resulting htx message could be left in an inconsistent state.
+ */
+static size_t h2s_snd_bhdrs(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ struct htx_sl *sl;
+ struct ist meth, uri, auth, host = IST_NULL;
+ enum htx_blk_type type;
+ int es_now = 0;
+ int ret = 0;
+ int hdr;
+ int extended_connect = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ /* get the start line (we do have one) and the rest of the headers,
+ * that we dump starting at header 0 */
+ sl = NULL;
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOH)
+ break;
+
+ if (type == HTX_BLK_HDR) {
+ BUG_ON(!sl); /* The start-line mut be defined before any headers */
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+
+ /* Skip header if same name is used to add the server name */
+ if ((h2c->flags & H2_CF_IS_BACK) && isttest(h2c->proxy->server_id_hdr_name) &&
+ isteq(list[hdr].n, h2c->proxy->server_id_hdr_name))
+ continue;
+
+ /* Convert connection: upgrade to Extended connect from rfc 8441 */
+ if ((sl->flags & HTX_SL_F_CONN_UPG) && isteqi(list[hdr].n, ist("connection"))) {
+ /* rfc 7230 #6.1 Connection = list of tokens */
+ struct ist connection_ist = list[hdr].v;
+ do {
+ if (isteqi(iststop(connection_ist, ','),
+ ist("upgrade"))) {
+ if (!(h2c->flags & H2_CF_RCVD_RFC8441)) {
+ TRACE_STATE("reject upgrade because of no RFC8441 support", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ TRACE_STATE("convert upgrade to extended connect method", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ h2s->flags |= (H2_SF_BODY_TUNNEL|H2_SF_EXT_CONNECT_SENT);
+ sl->info.req.meth = HTTP_METH_CONNECT;
+ meth = ist("CONNECT");
+
+ extended_connect = 1;
+ break;
+ }
+
+ connection_ist = istadv(istfind(connection_ist, ','), 1);
+ } while (istlen(connection_ist));
+ }
+
+ if ((sl->flags & HTX_SL_F_CONN_UPG) && isteq(list[hdr].n, ist("upgrade"))) {
+ /* rfc 7230 #6.7 Upgrade = list of protocols
+ * rfc 8441 #4 Extended connect = :protocol is single-valued
+ *
+ * only first HTTP/1 protocol is preserved
+ */
+ const struct ist protocol = iststop(list[hdr].v, ',');
+ /* upgrade_protocol field is 16 bytes long in h2s */
+ istpad(h2s->upgrade_protocol, isttrim(protocol, 15));
+ }
+
+ if (isteq(list[hdr].n, ist("host")))
+ host = list[hdr].v;
+
+ hdr++;
+ }
+ else if (type == HTX_BLK_REQ_SL) {
+ BUG_ON(sl); /* Only one start-line expected */
+ sl = htx_get_blk_ptr(htx, blk);
+ meth = htx_sl_req_meth(sl);
+ uri = htx_sl_req_uri(sl);
+ if ((sl->flags & HTX_SL_F_BODYLESS_RESP) || sl->info.req.meth == HTTP_METH_HEAD)
+ h2s->flags |= H2_SF_BODYLESS_RESP;
+ if (unlikely(uri.len == 0)) {
+ TRACE_ERROR("no URI in HTX request", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* The start-line me be defined */
+ BUG_ON(!sl);
+
+ /* Now add the server name to a header (if requested) */
+ if ((h2c->flags & H2_CF_IS_BACK) && isttest(h2c->proxy->server_id_hdr_name)) {
+ struct server *srv = objt_server(h2c->conn->target);
+
+ if (srv) {
+ list[hdr].n = h2c->proxy->server_id_hdr_name;
+ list[hdr].v = ist(srv->id);
+ hdr++;
+ }
+ }
+
+ /* marker for end of headers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ return 0;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x04", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* encode the method, which necessarily is the first one */
+ if (!hpack_encode_method(&outbuf, sl->info.req.meth, meth)) {
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2_trace_header(ist(":method"), meth, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s);
+
+ auth = ist(NULL);
+
+ /* RFC7540 #8.3: the CONNECT method must have :
+ * - :authority set to the URI part (host:port)
+ * - :method set to CONNECT
+ * - :scheme and :path omitted
+ *
+ * Note that this is not applicable in case of the Extended CONNECT
+ * protocol from rfc 8441.
+ */
+ if (unlikely(sl->info.req.meth == HTTP_METH_CONNECT) && !extended_connect) {
+ auth = uri;
+
+ if (!h2_encode_header(&outbuf, ist(":authority"), auth, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2s->flags |= H2_SF_BODY_TUNNEL;
+ } else {
+ /* other methods need a :scheme. If an authority is known from
+ * the request line, it must be sent, otherwise only host is
+ * sent. Host is never sent as the authority.
+ *
+ * This code is also applicable for Extended CONNECT protocol
+ * from rfc 8441.
+ */
+ struct ist scheme = { };
+
+ if (uri.ptr[0] != '/' && uri.ptr[0] != '*') {
+ /* the URI seems to start with a scheme */
+ int len = 1;
+
+ while (len < uri.len && uri.ptr[len] != ':')
+ len++;
+
+ if (len + 2 < uri.len && uri.ptr[len + 1] == '/' && uri.ptr[len + 2] == '/') {
+ /* make the uri start at the authority now */
+ scheme = ist2(uri.ptr, len);
+ uri = istadv(uri, len + 3);
+
+ /* find the auth part of the URI */
+ auth = ist2(uri.ptr, 0);
+ while (auth.len < uri.len && auth.ptr[auth.len] != '/')
+ auth.len++;
+
+ uri = istadv(uri, auth.len);
+ }
+ }
+
+ /* For Extended CONNECT, the :authority must be present.
+ * Use host value for it.
+ */
+ if (unlikely(extended_connect) && isttest(host))
+ auth = host;
+
+ if (!scheme.len) {
+ /* no explicit scheme, we're using an origin-form URI,
+ * probably from an H1 request transcoded to H2 via an
+ * external layer, then received as H2 without authority.
+ * So we have to look up the scheme from the HTX flags.
+ * In such a case only http and https are possible, and
+ * https is the default (sent by browsers).
+ */
+ if ((sl->flags & (HTX_SL_F_HAS_SCHM|HTX_SL_F_SCHM_HTTP)) == (HTX_SL_F_HAS_SCHM|HTX_SL_F_SCHM_HTTP))
+ scheme = ist("http");
+ else
+ scheme = ist("https");
+ }
+
+ if (!hpack_encode_scheme(&outbuf, scheme)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ if (auth.len &&
+ !h2_encode_header(&outbuf, ist(":authority"), auth, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ /* encode the path. RFC7540#8.1.2.3: if path is empty it must
+ * be sent as '/' or '*'.
+ */
+ if (unlikely(!uri.len)) {
+ if (sl->info.req.meth == HTTP_METH_OPTIONS)
+ uri = ist("*");
+ else
+ uri = ist("/");
+ }
+
+ if (!hpack_encode_path(&outbuf, uri)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+
+ h2_trace_header(ist(":path"), uri, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s);
+
+ /* encode the pseudo-header protocol from rfc8441 if using
+ * Extended CONNECT method.
+ */
+ if (unlikely(extended_connect)) {
+ const struct ist protocol = ist(h2s->upgrade_protocol);
+ if (isttest(protocol)) {
+ if (!h2_encode_header(&outbuf, ist(":protocol"), protocol, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+ }
+ }
+
+ /* encode all headers, stop at empty name. Host is only sent if we
+ * do not provide an authority.
+ */
+ for (hdr = 0; hdr < sizeof(list)/sizeof(list[0]); hdr++) {
+ struct ist n = list[hdr].n;
+ struct ist v = list[hdr].v;
+
+ /* these ones do not exist in H2 and must be dropped. */
+ if (isteq(n, ist("connection")) ||
+ (auth.len && isteq(n, ist("host"))) ||
+ isteq(n, ist("proxy-connection")) ||
+ isteq(n, ist("keep-alive")) ||
+ isteq(n, ist("upgrade")) ||
+ isteq(n, ist("transfer-encoding")))
+ continue;
+
+ if (isteq(n, ist("te"))) {
+ /* "te" may only be sent with "trailers" if this value
+ * is present, otherwise it must be deleted.
+ */
+ v = istist(v, ist("trailers"));
+ if (!isttest(v) || (v.len > 8 && v.ptr[8] != ','))
+ continue;
+ v = ist("trailers");
+ }
+
+ /* Skip all pseudo-headers */
+ if (*(n.ptr) == ':')
+ continue;
+
+ if (isteq(n, ist("")))
+ break; // end
+
+ if (!h2_encode_header(&outbuf, n, v, H2_EV_TX_FRAME|H2_EV_TX_HDR, ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ TRACE_USER("sent H2 request ", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+
+ /* remove all header blocks including the EOH and compute the
+ * corresponding size.
+ */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOH */
+ if (type == HTX_BLK_EOH)
+ break;
+ }
+
+ if (!h2s_sc(h2s) || se_fl_test(h2s->sd, SE_FL_SHW)) {
+ /* Request already closed: add END_STREAM */
+ es_now = 1;
+ }
+ if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx)) {
+ /* EOM+empty: we may need to add END_STREAM (except for CONNECT
+ * request)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_HEADERS_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, outbuf.data);
+ h2c->flags |= H2_CF_MBUF_HAS_DATA;
+ h2s->flags |= H2_SF_HEADERS_SENT;
+ h2s->st = H2_SS_OPEN;
+
+ if (es_now) {
+ TRACE_PROTO("setting ES on HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s, htx);
+ // trim any possibly pending data (eg: inconsistent content-length)
+ h2s->flags |= H2_SF_ES_SENT;
+ h2s->st = H2_SS_HLOC;
+ }
+
+ end:
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Try to send a DATA frame matching HTTP response present in HTX structure
+ * present in <buf>, for stream <h2s>. Returns the number of bytes sent. The
+ * caller must check the stream's status to detect any error which might have
+ * happened subsequently to a successful send. Returns the number of data bytes
+ * consumed, or zero if nothing done.
+ */
+static size_t h2s_make_data(struct h2s *h2s, struct buffer *buf, size_t count)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct htx *htx;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ size_t total = 0;
+ int es_now = 0;
+ int bsize; /* htx block size */
+ int fsize; /* h2 frame size */
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ int trunc_out; /* non-zero if truncated on out buf */
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+
+ htx = htx_from_buf(buf);
+
+ /* We only come here with HTX_BLK_DATA blocks */
+
+ new_frame:
+ if (!count || htx_is_empty(htx))
+ goto end;
+
+ if ((h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_HEADERS_RCVD|H2_SF_BODY_TUNNEL)) == H2_SF_BODY_TUNNEL) {
+ /* The response HEADERS frame not received yet. Thus the tunnel
+ * is not fully established yet. In this situation, we block
+ * data sending.
+ */
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ TRACE_STATE("Request DATA frame blocked waiting for tunnel establishment", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+ else if ((h2c->flags & H2_CF_IS_BACK) && (h2s->flags & H2_SF_TUNNEL_ABRT)) {
+ /* a tunnel attempt was aborted but the is pending raw data to xfer to the server.
+ * Thus the stream is closed with the CANCEL error. The error will be reported to
+ * the upper layer as aserver abort. But at this stage there is nothing more we can
+ * do. We just wait for the end of the response to be sure to not truncate it.
+ */
+ if (!(h2s->flags & H2_SF_ES_RCVD)) {
+ TRACE_STATE("Request DATA frame blocked waiting end of aborted tunnel", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ h2s->flags |= H2_SF_BLK_MBUSY;
+ }
+ else {
+ TRACE_ERROR("Request DATA frame for aborted tunnel", H2_EV_RX_FRAME|H2_EV_RX_DATA, h2c->conn, h2s);
+ h2s_error(h2s, H2_ERR_CANCEL);
+ }
+ goto end;
+ }
+
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+ fsize = bsize;
+ trunc_out = 0;
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (br_count(h2c->mbuf) > h2c->nb_streams) {
+ /* more buffers than streams allocated, pointless
+ * to continue, we'd use more RAM for no reason.
+ */
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* Perform some optimizations to reduce the number of buffer copies.
+ * First, if the mux's buffer is empty and the htx area contains
+ * exactly one data block of the same size as the requested count, and
+ * this count fits within the frame size, the stream's window size, and
+ * the connection's window size, then it's possible to simply swap the
+ * caller's buffer with the mux's output buffer and adjust offsets and
+ * length to match the entire DATA HTX block in the middle. In this
+ * case we perform a true zero-copy operation from end-to-end. This is
+ * the situation that happens all the time with large files. Second, if
+ * this is not possible, but the mux's output buffer is empty, we still
+ * have an opportunity to avoid the copy to the intermediary buffer, by
+ * making the intermediary buffer's area point to the output buffer's
+ * area. In this case we want to skip the HTX header to make sure that
+ * copies remain aligned and that this operation remains possible all
+ * the time. This goes for headers, data blocks and any data extracted
+ * from the HTX blocks.
+ */
+ if (unlikely(fsize == count &&
+ htx_nbblks(htx) == 1 && type == HTX_BLK_DATA &&
+ fsize <= h2s_mws(h2s) && fsize <= h2c->mws && fsize <= h2c->mfs)) {
+ void *old_area = mbuf->area;
+
+ if (b_data(mbuf)) {
+ /* Too bad there are data left there. We're willing to memcpy/memmove
+ * up to 1/4 of the buffer, which means that it's OK to copy a large
+ * frame into a buffer containing few data if it needs to be realigned,
+ * and that it's also OK to copy few data without realigning. Otherwise
+ * we'll pretend the mbuf is full and wait for it to become empty.
+ */
+ if (fsize + 9 <= b_room(mbuf) &&
+ (b_data(mbuf) <= b_size(mbuf) / 4 ||
+ (fsize <= b_size(mbuf) / 4 && fsize + 9 <= b_contig_space(mbuf)))) {
+ TRACE_STATE("small data present in output buffer, appending", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto copy;
+ }
+
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("too large data present in output buffer, waiting for emptiness", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (htx->flags & HTX_FL_EOM) {
+ /* EOM+empty: we may need to add END_STREAM (except for tunneled
+ * message)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+ /* map an H2 frame to the HTX block so that we can put the
+ * frame header there.
+ */
+ *mbuf = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - 9, fsize + 9);
+ outbuf.area = b_head(mbuf);
+
+ /* prepend an H2 DATA frame header just before the DATA block */
+ memcpy(outbuf.area, "\x00\x00\x00\x00\x00", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ if (es_now)
+ outbuf.area[4] |= H2_F_DATA_END_STREAM;
+ h2_set_frame_size(outbuf.area, fsize);
+
+ /* update windows */
+ h2s->sws -= fsize;
+ h2c->mws -= fsize;
+
+ /* and exchange with our old area */
+ buf->area = old_area;
+ buf->data = buf->head = 0;
+ total += fsize;
+ fsize = 0;
+ h2c->flags |= H2_CF_MBUF_HAS_DATA;
+
+ TRACE_PROTO("sent H2 DATA frame (zero-copy)", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto out;
+ }
+
+ copy:
+ /* for DATA and EOM we'll have to emit a frame, even if empty */
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9) {
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("output buffer full", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* len: 0x000000 (fill later), type: 0(DATA), flags: none=0 */
+ memcpy(outbuf.area, "\x00\x00\x00\x00\x00", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* we have in <fsize> the exact number of bytes we need to copy from
+ * the HTX buffer. We need to check this against the connection's and
+ * the stream's send windows, and to ensure that this fits in the max
+ * frame size and in the buffer's available space minus 9 bytes (for
+ * the frame header). The connection's flow control is applied last so
+ * that we can use a separate list of streams which are immediately
+ * unblocked on window opening. Note: we don't implement padding.
+ */
+
+ if (!fsize)
+ goto send_empty;
+
+ if (h2s_mws(h2s) <= 0) {
+ h2s->flags |= H2_SF_BLK_SFCTL;
+ if (LIST_INLIST(&h2s->list))
+ h2_remove_from_list(h2s);
+ LIST_APPEND(&h2c->blocked_list, &h2s->list);
+ TRACE_STATE("stream window <=0, flow-controlled", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (fsize > count)
+ fsize = count;
+
+ if (fsize > h2s_mws(h2s))
+ fsize = h2s_mws(h2s); // >0
+
+ if (h2c->mfs && fsize > h2c->mfs)
+ fsize = h2c->mfs; // >0
+
+ if (fsize + 9 > outbuf.size) {
+ /* It doesn't fit at once. If it at least fits once split and
+ * the amount of data to move is low, let's defragment the
+ * buffer now.
+ */
+ if (b_space_wraps(mbuf) &&
+ (fsize + 9 <= b_room(mbuf)) &&
+ b_data(mbuf) <= MAX_DATA_REALIGN)
+ goto realign_again;
+ fsize = outbuf.size - 9;
+ trunc_out = 1;
+
+ if (fsize <= 0) {
+ /* no need to send an empty frame here */
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("output buffer full", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto end;
+ }
+ }
+
+ if (h2c->mws <= 0) {
+ h2s->flags |= H2_SF_BLK_MFCTL;
+ TRACE_STATE("connection window <=0, stream flow-controlled", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2C_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (fsize > h2c->mws)
+ fsize = h2c->mws;
+
+ /* now let's copy this this into the output buffer */
+ memcpy(outbuf.area + 9, htx_get_blk_ptr(htx, blk), fsize);
+ h2s->sws -= fsize;
+ h2c->mws -= fsize;
+ count -= fsize;
+
+ send_empty:
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, fsize);
+
+ /* consume incoming HTX block */
+ total += fsize;
+ if (fsize == bsize) {
+ htx_remove_blk(htx, blk);
+ if ((htx->flags & HTX_FL_EOM) && htx_is_empty(htx)) {
+ /* EOM+empty: we may need to add END_STREAM (except for tunneled
+ * message)
+ */
+ if (!(h2s->flags & H2_SF_BODY_TUNNEL))
+ es_now = 1;
+ }
+ }
+ else {
+ /* we've truncated this block */
+ htx_cut_data_blk(htx, blk, fsize);
+ }
+
+ if (es_now)
+ outbuf.area[4] |= H2_F_DATA_END_STREAM;
+
+ /* commit the H2 response */
+ b_add(mbuf, fsize + 9);
+ h2c->flags |= H2_CF_MBUF_HAS_DATA;
+
+ out:
+ if (es_now) {
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ h2s->flags |= H2_SF_ES_SENT;
+ TRACE_PROTO("ES flag set on outgoing frame", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_TX_EOI, h2c->conn, h2s);
+ }
+ else if (fsize) {
+ if (fsize == bsize) {
+ TRACE_DEVEL("more data may be available, trying to send another frame", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ goto new_frame;
+ }
+ else if (trunc_out) {
+ /* we've truncated this block */
+ goto new_frame;
+ }
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ return total;
+}
+
+/* Skip the message payload (DATA blocks) and emit an empty DATA frame with the
+ * ES flag set for stream <h2s>. This function is called for response known to
+ * have no payload. Only DATA blocks are skipped. This means the trailers are
+ * still emitted. The caller must check the stream's status to detect any error
+ * which might have happened subsequently to a successful send. Returns the
+ * number of data bytes consumed, or zero if nothing done.
+ */
+static size_t h2s_skip_data(struct h2s *h2s, struct buffer *buf, size_t count)
+{
+ struct h2c *h2c = h2s->h2c;
+ struct htx *htx;
+ int bsize; /* htx block size */
+ int fsize; /* h2 frame size */
+ struct htx_blk *blk;
+ enum htx_blk_type type;
+ size_t total = 0;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+
+ htx = htx_from_buf(buf);
+
+ next_data:
+ if (!count || htx_is_empty(htx))
+ goto end;
+ blk = htx_get_head_blk(htx);
+ type = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+ fsize = bsize;
+ if (type != HTX_BLK_DATA)
+ goto end;
+
+ if (fsize > count)
+ fsize = count;
+
+ if (fsize != bsize)
+ goto skip_data;
+
+ if (!(htx->flags & HTX_FL_EOM) || !htx_is_unique_blk(htx, blk))
+ goto skip_data;
+
+ /* Here, it is the last block and it is also the end of the message. So
+ * we can emit an empty DATA frame with the ES flag set
+ */
+ if (h2_send_empty_data_es(h2s) <= 0)
+ goto end;
+
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ skip_data:
+ /* consume incoming HTX block */
+ total += fsize;
+ if (fsize == bsize) {
+ TRACE_DEVEL("more data may be available, trying to skip another frame", H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ htx_remove_blk(htx, blk);
+ goto next_data;
+ }
+ else {
+ /* we've truncated this block */
+ htx_cut_data_blk(htx, blk, fsize);
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_DATA, h2c->conn, h2s);
+ return total;
+}
+
+/* Try to send a HEADERS frame matching HTX_BLK_TLR series of blocks present in
+ * HTX message <htx> for the H2 stream <h2s>. Returns the number of bytes
+ * processed. The caller must check the stream's status to detect any error
+ * which might have happened subsequently to a successful send. The htx blocks
+ * are automatically removed from the message. The htx message is assumed to be
+ * valid since produced from the internal code. Processing stops when meeting
+ * the EOT, which *is* removed. All trailers are processed at once and sent as a
+ * single frame. The ES flag is always set.
+ */
+static size_t h2s_make_trailers(struct h2s *h2s, struct htx *htx)
+{
+ struct http_hdr list[global.tune.max_http_hdr];
+ struct h2c *h2c = h2s->h2c;
+ struct htx_blk *blk;
+ struct buffer outbuf;
+ struct buffer *mbuf;
+ enum htx_blk_type type;
+ int ret = 0;
+ int hdr;
+ int idx;
+
+ TRACE_ENTER(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+
+ /* get trailers. */
+ hdr = 0;
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_UNUSED)
+ continue;
+
+ if (type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_TLR) {
+ if (unlikely(hdr >= sizeof(list)/sizeof(list[0]) - 1)) {
+ TRACE_ERROR("too many headers", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+
+ list[hdr].n = htx_get_blk_name(htx, blk);
+ list[hdr].v = htx_get_blk_value(htx, blk);
+ hdr++;
+ }
+ else {
+ TRACE_ERROR("will not encode unexpected htx block", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_ERR, h2c->conn, h2s);
+ goto fail;
+ }
+ }
+
+ /* marker for end of trailers */
+ list[hdr].n = ist("");
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ chunk_reset(&outbuf);
+
+ while (1) {
+ outbuf = b_make(b_tail(mbuf), b_contig_space(mbuf), 0, 0);
+ if (outbuf.size >= 9 || !b_space_wraps(mbuf))
+ break;
+ realign_again:
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (outbuf.size < 9)
+ goto full;
+
+ /* len: 0x000000 (fill later), type: 1(HEADERS), flags: ENDH=4,ES=1 */
+ memcpy(outbuf.area, "\x00\x00\x00\x01\x05", 5);
+ write_n32(outbuf.area + 5, h2s->id); // 4 bytes
+ outbuf.data = 9;
+
+ /* encode all headers */
+ for (idx = 0; idx < hdr; idx++) {
+ /* these ones do not exist in H2 or must not appear in
+ * trailers and must be dropped.
+ */
+ if (isteq(list[idx].n, ist("host")) ||
+ isteq(list[idx].n, ist("content-length")) ||
+ isteq(list[idx].n, ist("connection")) ||
+ isteq(list[idx].n, ist("proxy-connection")) ||
+ isteq(list[idx].n, ist("keep-alive")) ||
+ isteq(list[idx].n, ist("upgrade")) ||
+ isteq(list[idx].n, ist("te")) ||
+ isteq(list[idx].n, ist("transfer-encoding")))
+ continue;
+
+ /* Skip all pseudo-headers */
+ if (*(list[idx].n.ptr) == ':')
+ continue;
+
+ if (!h2_encode_header(&outbuf, list[idx].n, list[idx].v, H2_EV_TX_FRAME|H2_EV_TX_HDR,
+ ist(TRC_LOC), __FUNCTION__, h2c, h2s)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ if (outbuf.data == 9) {
+ /* here we have a problem, we have nothing to emit (either we
+ * received an empty trailers block followed or we removed its
+ * contents above). Because of this we can't send a HEADERS
+ * frame, so we have to cheat and instead send an empty DATA
+ * frame conveying the ES flag.
+ */
+ outbuf.area[3] = H2_FT_DATA;
+ outbuf.area[4] = H2_F_DATA_END_STREAM;
+ }
+
+ /* update the frame's size */
+ h2_set_frame_size(outbuf.area, outbuf.data - 9);
+
+ if (outbuf.data > h2c->mfs + 9) {
+ if (!h2_fragment_headers(&outbuf, h2c->mfs)) {
+ /* output full */
+ if (b_space_wraps(mbuf))
+ goto realign_again;
+ goto full;
+ }
+ }
+
+ /* commit the H2 response */
+ TRACE_PROTO("sent H2 trailers HEADERS frame", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_TX_EOI, h2c->conn, h2s);
+ b_add(mbuf, outbuf.data);
+ h2c->flags |= H2_CF_MBUF_HAS_DATA;
+ h2s->flags |= H2_SF_ES_SENT;
+
+ if (h2s->st == H2_SS_OPEN)
+ h2s->st = H2_SS_HLOC;
+ else
+ h2s_close(h2s);
+
+ /* OK we could properly deliver the response */
+ done:
+ /* remove all header blocks till the end and compute the corresponding size. */
+ ret = 0;
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ type = htx_get_blk_type(blk);
+ ret += htx_get_blksz(blk);
+ blk = htx_remove_blk(htx, blk);
+ /* The removed block is the EOT */
+ if (type == HTX_BLK_EOT)
+ break;
+ }
+
+ end:
+ TRACE_LEAVE(H2_EV_TX_FRAME|H2_EV_TX_HDR, h2c->conn, h2s);
+ return ret;
+ full:
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ ret = 0;
+ TRACE_STATE("mux buffer full", H2_EV_TX_FRAME|H2_EV_TX_HDR|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ fail:
+ /* unparsable HTX messages, too large ones to be produced in the local
+ * list etc go here (unrecoverable errors).
+ */
+ h2s_error(h2s, H2_ERR_INTERNAL_ERROR);
+ ret = 0;
+ goto end;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int h2_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+
+ TRACE_ENTER(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h2s->subs && h2s->subs != es);
+
+ es->events |= event_type;
+ h2s->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("subscribe(send)", H2_EV_STRM_SEND, h2c->conn, h2s);
+ if (!(h2s->flags & H2_SF_BLK_SFCTL) &&
+ !LIST_INLIST(&h2s->list)) {
+ if (h2s->flags & H2_SF_BLK_MFCTL) {
+ TRACE_DEVEL("Adding to fctl list", H2_EV_STRM_SEND, h2c->conn, h2s);
+ LIST_APPEND(&h2c->fctl_list, &h2s->list);
+ }
+ else {
+ TRACE_DEVEL("Adding to send list", H2_EV_STRM_SEND, h2c->conn, h2s);
+ LIST_APPEND(&h2c->send_list, &h2s->list);
+ }
+ }
+ }
+ TRACE_LEAVE(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2c->conn, h2s);
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int h2_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+
+ TRACE_ENTER(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(h2s->subs && h2s->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ h2s->subs = NULL;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+
+ if (event_type & SUB_RETRY_SEND) {
+ TRACE_DEVEL("unsubscribe(send)", H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ h2s->flags &= ~H2_SF_NOTIFIED;
+ if (!(h2s->flags & (H2_SF_WANT_SHUTR | H2_SF_WANT_SHUTW)))
+ h2_remove_from_list(h2s);
+ }
+
+ TRACE_LEAVE(H2_EV_STRM_SEND|H2_EV_STRM_RECV, h2s->h2c->conn, h2s);
+ return 0;
+}
+
+
+/* Called from the upper layer, to receive data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t h2_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+ struct htx *h2s_htx = NULL;
+ struct htx *buf_htx = NULL;
+ size_t ret = 0;
+
+ TRACE_ENTER(H2_EV_STRM_RECV, h2c->conn, h2s);
+
+ /* transfer possibly pending data to the upper layer */
+ h2s_htx = htx_from_buf(&h2s->rxbuf);
+ if (htx_is_empty(h2s_htx) && !(h2s_htx->flags & HTX_FL_PARSING_ERROR)) {
+ /* Here htx_to_buf() will set buffer data to 0 because
+ * the HTX is empty.
+ */
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ goto end;
+ }
+ ret = h2s_htx->data;
+ buf_htx = htx_from_buf(buf);
+
+ /* <buf> is empty and the message is small enough, swap the
+ * buffers. */
+ if (htx_is_empty(buf_htx) && htx_used_space(h2s_htx) <= count) {
+ htx_to_buf(buf_htx, buf);
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ b_xfer(buf, &h2s->rxbuf, b_data(&h2s->rxbuf));
+ goto end;
+ }
+
+ htx_xfer_blks(buf_htx, h2s_htx, count, HTX_BLK_UNUSED);
+
+ if (h2s_htx->flags & HTX_FL_PARSING_ERROR) {
+ buf_htx->flags |= HTX_FL_PARSING_ERROR;
+ if (htx_is_empty(buf_htx))
+ se_fl_set(h2s->sd, SE_FL_EOI);
+ }
+ else if (htx_is_empty(h2s_htx)) {
+ buf_htx->flags |= (h2s_htx->flags & HTX_FL_EOM);
+ }
+
+ buf_htx->extra = (h2s_htx->extra ? (h2s_htx->data + h2s_htx->extra) : 0);
+ htx_to_buf(buf_htx, buf);
+ htx_to_buf(h2s_htx, &h2s->rxbuf);
+ ret -= h2s_htx->data;
+
+ end:
+ if (b_data(&h2s->rxbuf))
+ se_fl_set(h2s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ else {
+ if (!(h2c->flags & H2_CF_IS_BACK) && (h2s->flags & (H2_SF_BODY_TUNNEL|H2_SF_ES_RCVD))) {
+ /* If request ES is reported to the upper layer, it means the
+ * H2S now expects data from the opposite side.
+ */
+ se_expect_data(h2s->sd);
+ }
+
+ se_fl_clr(h2s->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ h2s_propagate_term_flags(h2c, h2s);
+ if (b_size(&h2s->rxbuf)) {
+ b_free(&h2s->rxbuf);
+ offer_buffers(NULL, 1);
+ }
+ }
+
+ if (ret && h2c->dsi == h2s->id) {
+ /* demux is blocking on this stream's buffer */
+ h2c->flags &= ~H2_CF_DEM_SFULL;
+ h2c_restart_reading(h2c, 1);
+ }
+
+ TRACE_LEAVE(H2_EV_STRM_RECV, h2c->conn, h2s);
+ return ret;
+}
+
+
+/* Called from the upper layer, to send data from buffer <buf> for no more than
+ * <count> bytes. Returns the number of bytes effectively sent. Some status
+ * flags may be updated on the stream connector.
+ */
+static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ size_t total = 0;
+ size_t ret;
+ struct htx *htx;
+ struct htx_blk *blk;
+ enum htx_blk_type btype;
+ uint32_t bsize;
+ int32_t idx;
+
+ TRACE_ENTER(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+
+ /* If we were not just woken because we wanted to send but couldn't,
+ * and there's somebody else that is waiting to send, do nothing,
+ * we will subscribe later and be put at the end of the list
+ */
+ if (!(h2s->flags & H2_SF_NOTIFIED) &&
+ (!LIST_ISEMPTY(&h2s->h2c->send_list) || !LIST_ISEMPTY(&h2s->h2c->fctl_list))) {
+ if (LIST_INLIST(&h2s->list))
+ TRACE_DEVEL("stream already waiting, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ else {
+ TRACE_DEVEL("other streams already waiting, going to the queue and leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ h2s->h2c->flags |= H2_CF_WAIT_INLIST;
+ }
+ return 0;
+ }
+ h2s->flags &= ~H2_SF_NOTIFIED;
+
+ if (h2s->h2c->st0 < H2_CS_FRAME_H) {
+ TRACE_DEVEL("connection not ready, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ if (h2s->h2c->st0 >= H2_CS_ERROR) {
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ TRACE_DEVEL("connection is in error, leaving in error", H2_EV_H2S_SEND|H2_EV_H2S_BLK|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ htx = htx_from_buf(buf);
+
+ if (!(h2s->flags & H2_SF_OUTGOING_DATA) && count)
+ h2s->flags |= H2_SF_OUTGOING_DATA;
+
+ if (htx->extra && htx->extra != HTX_UNKOWN_PAYLOAD_LENGTH)
+ h2s->flags |= H2_SF_MORE_HTX_DATA;
+ else
+ h2s->flags &= ~H2_SF_MORE_HTX_DATA;
+
+ if (h2s->id == 0) {
+ int32_t id = h2c_get_next_sid(h2s->h2c);
+
+ if (id < 0) {
+ se_fl_set(h2s->sd, SE_FL_ERROR);
+ TRACE_DEVEL("couldn't get a stream ID, leaving in error", H2_EV_H2S_SEND|H2_EV_H2S_BLK|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ return 0;
+ }
+
+ eb32_delete(&h2s->by_id);
+ h2s->by_id.key = h2s->id = id;
+ h2s->h2c->max_id = id;
+ h2s->h2c->nb_reserved--;
+ eb32_insert(&h2s->h2c->streams_by_id, &h2s->by_id);
+ }
+
+ while (h2s->st < H2_SS_HLOC && !(h2s->flags & H2_SF_BLK_ANY) &&
+ count && !htx_is_empty(htx)) {
+ idx = htx_get_head(htx);
+ blk = htx_get_blk(htx, idx);
+ btype = htx_get_blk_type(blk);
+ bsize = htx_get_blksz(blk);
+
+ switch (btype) {
+ case HTX_BLK_REQ_SL:
+ /* start-line before headers */
+ ret = h2s_snd_bhdrs(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_RES_SL:
+ /* start-line before headers */
+ ret = h2s_snd_fhdrs(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_DATA:
+ /* all these cause the emission of a DATA frame (possibly empty) */
+ if (!(h2s->h2c->flags & H2_CF_IS_BACK) &&
+ (h2s->flags & (H2_SF_BODY_TUNNEL|H2_SF_BODYLESS_RESP)) == H2_SF_BODYLESS_RESP)
+ ret = h2s_skip_data(h2s, buf, count);
+ else
+ ret = h2s_make_data(h2s, buf, count);
+ if (ret > 0) {
+ htx = htx_from_buf(buf);
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ case HTX_BLK_TLR:
+ case HTX_BLK_EOT:
+ /* This is the first trailers block, all the subsequent ones */
+ ret = h2s_make_trailers(h2s, htx);
+ if (ret > 0) {
+ total += ret;
+ count -= ret;
+ if (ret < bsize)
+ goto done;
+ }
+ break;
+
+ default:
+ htx_remove_blk(htx, blk);
+ total += bsize;
+ count -= bsize;
+ break;
+ }
+ }
+
+ done:
+ if (h2s->st >= H2_SS_HLOC) {
+ /* trim any possibly pending data after we close (extra CR-LF,
+ * unprocessed trailers, abnormal extra data, ...)
+ */
+ total += count;
+ count = 0;
+ }
+
+ /* RST are sent similarly to frame acks */
+ if (h2s->st == H2_SS_ERROR || h2s->flags & H2_SF_RST_RCVD) {
+ TRACE_DEVEL("reporting RST/error to the app-layer stream", H2_EV_H2S_SEND|H2_EV_H2S_ERR|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ se_fl_set_error(h2s->sd);
+ if (h2s_send_rst_stream(h2s->h2c, h2s) > 0)
+ h2s_close(h2s);
+ }
+
+ htx_to_buf(htx, buf);
+
+ if (total > 0) {
+ if (!(h2s->h2c->wait_event.events & SUB_RETRY_SEND)) {
+ TRACE_DEVEL("data queued, waking up h2c sender", H2_EV_H2S_SEND|H2_EV_H2C_SEND, h2s->h2c->conn, h2s);
+ if (h2_send(h2s->h2c))
+ tasklet_wakeup(h2s->h2c->wait_event.tasklet);
+ }
+
+ }
+ /* If we're waiting for flow control, and we got a shutr on the
+ * connection, we will never be unlocked, so add an error on
+ * the stream connector.
+ */
+ if ((h2s->h2c->flags & H2_CF_RCVD_SHUT) &&
+ !b_data(&h2s->h2c->dbuf) &&
+ (h2s->flags & (H2_SF_BLK_SFCTL | H2_SF_BLK_MFCTL))) {
+ TRACE_DEVEL("fctl with shutr, reporting error to app-layer", H2_EV_H2S_SEND|H2_EV_STRM_SEND|H2_EV_STRM_ERR, h2s->h2c->conn, h2s);
+ se_fl_set_error(h2s->sd);
+ }
+
+ if (total > 0 && !(h2s->flags & H2_SF_BLK_SFCTL) &&
+ !(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) {
+ /* Ok we managed to send something, leave the send_list if we were still there */
+ h2_remove_from_list(h2s);
+ TRACE_DEVEL("Removed from h2s list", H2_EV_H2S_SEND|H2_EV_H2C_SEND, h2s->h2c->conn, h2s);
+ }
+
+ TRACE_LEAVE(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ return total;
+}
+
+static size_t h2_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+ struct buffer *mbuf;
+ size_t sz , ret = 0;
+
+ TRACE_ENTER(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+
+ /* If we were not just woken because we wanted to send but couldn't,
+ * and there's somebody else that is waiting to send, do nothing,
+ * we will subscribe later and be put at the end of the list
+ *
+ * WARNING: h2_done_ff() is responsible to remove H2_SF_NOTIFIED flags
+ * depending on iobuf flags.
+ */
+ if (!(h2s->flags & H2_SF_NOTIFIED) &&
+ (!LIST_ISEMPTY(&h2c->send_list) || !LIST_ISEMPTY(&h2c->fctl_list))) {
+ if (LIST_INLIST(&h2s->list))
+ TRACE_DEVEL("stream already waiting, leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ else {
+ TRACE_DEVEL("other streams already waiting, going to the queue and leaving", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2s->h2c->conn, h2s);
+ h2s->h2c->flags |= H2_CF_WAIT_INLIST;
+ }
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ goto end;
+ }
+
+ if (h2s_mws(h2s) <= 0) {
+ h2s->flags |= H2_SF_BLK_SFCTL;
+ if (LIST_INLIST(&h2s->list))
+ LIST_DEL_INIT(&h2s->list);
+ LIST_APPEND(&h2c->blocked_list, &h2s->list);
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("stream window <=0, flow-controlled", H2_EV_H2S_SEND|H2_EV_H2S_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+ if (h2c->mws <= 0) {
+ h2s->flags |= H2_SF_BLK_MFCTL;
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("connection window <=0, stream flow-controlled", H2_EV_H2S_SEND|H2_EV_H2C_FCTL, h2c->conn, h2s);
+ goto end;
+ }
+
+ sz = count;
+ if (sz > h2s_mws(h2s))
+ sz = h2s_mws(h2s);
+ if (h2c->mfs && sz > h2c->mfs)
+ sz = h2c->mfs; // >0
+ if (sz > h2c->mws)
+ sz = h2c->mws;
+
+ if (count > sz)
+ count = sz;
+
+ mbuf = br_tail(h2c->mbuf);
+ retry:
+ if (br_count(h2c->mbuf) > h2c->nb_streams) {
+ /* more buffers than streams allocated, pointless
+ * to continue, we'd use more RAM for no reason.
+ */
+ h2s->flags |= H2_SF_BLK_MROOM;
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_TX_FRAME|H2_EV_TX_DATA|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (!h2_get_buf(h2c, mbuf)) {
+ h2c->flags |= H2_CF_MUX_MALLOC;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("waiting for room in output buffer", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ if (b_room(mbuf) < sz && b_room(mbuf) < b_size(mbuf) / 4) {
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("too large data present in output buffer, waiting for emptiness", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ while (1) {
+ if (b_contig_space(mbuf) >= 9 || !b_space_wraps(mbuf))
+ break;
+ b_slow_realign(mbuf, trash.area, b_data(mbuf));
+ }
+
+ if (b_contig_space(mbuf) <= 9) {
+ if ((mbuf = br_tail_add(h2c->mbuf)) != NULL)
+ goto retry;
+ h2c->flags |= H2_CF_MUX_MFULL;
+ h2s->flags |= H2_SF_BLK_MROOM;
+ h2s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED;
+ TRACE_STATE("output buffer full", H2_EV_H2S_SEND|H2_EV_H2S_BLK, h2c->conn, h2s);
+ goto end;
+ }
+
+ /* Cannot forward more than available room in output buffer */
+ sz = b_contig_space(mbuf) - 9;
+ if (count > sz)
+ count = sz;
+
+ /* len: 0x000000 (fill later), type: 0(DATA), flags: none=0 */
+ memcpy(b_tail(mbuf), "\x00\x00\x00\x00\x00", 5);
+ write_n32(b_tail(mbuf) + 5, h2s->id); // 4 bytes
+
+ h2s->sd->iobuf.buf = mbuf;
+ h2s->sd->iobuf.offset = 9;
+ h2s->sd->iobuf.data = 0;
+
+ /* forward remaining input data */
+ if (b_data(input)) {
+ size_t xfer = count;
+
+ if (xfer > b_data(input))
+ xfer = b_data(input);
+ b_add(mbuf, 9);
+ h2s->sd->iobuf.data = b_xfer(mbuf, input, xfer);
+ b_sub(mbuf, 9);
+
+ /* Cannot forward more data, wait for room */
+ if (b_data(input))
+ goto end;
+ }
+
+ ret = count - h2s->sd->iobuf.data;
+ end:
+ if (h2s->sd->iobuf.flags & IOBUF_FL_FF_BLOCKED)
+ h2s->flags &= ~H2_SF_NOTIFIED;
+ TRACE_LEAVE(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ return ret;
+}
+
+static size_t h2_done_ff(struct stconn *sc)
+{
+ struct h2s *h2s = __sc_mux_strm(sc);
+ struct h2c *h2c = h2s->h2c;
+ struct sedesc *sd = h2s->sd;
+ struct buffer *mbuf;
+ char *head;
+ size_t total = 0;
+
+ TRACE_ENTER(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+
+ mbuf = sd->iobuf.buf;
+ if (!mbuf)
+ goto end;
+ head = b_peek(mbuf, b_data(mbuf) - sd->iobuf.data);
+
+ if (sd->iobuf.flags & IOBUF_FL_EOI)
+ h2s->flags &= ~H2_SF_MORE_HTX_DATA;
+
+ if (!(sd->iobuf.flags & IOBUF_FL_FF_BLOCKED) &&
+ !(h2s->flags & H2_SF_BLK_SFCTL) &&
+ !(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) {
+ /* Ok we managed to send something, leave the send_list if we were still there */
+ h2_remove_from_list(h2s);
+ }
+
+ if (!sd->iobuf.data)
+ goto end;
+
+ /* Perform a synchronous send but in all cases, consider
+ * everything was already sent from the SC point of view.
+ */
+ total = sd->iobuf.data;
+ h2_set_frame_size(head, total);
+ b_add(mbuf, 9);
+ h2s->sws -= total;
+ h2c->mws -= total;
+ if (h2_send(h2s->h2c))
+ tasklet_wakeup(h2s->h2c->wait_event.tasklet);
+
+ end:
+ sd->iobuf.buf = NULL;
+ sd->iobuf.offset = 0;
+ sd->iobuf.data = 0;
+
+ if (!(sd->iobuf.flags & IOBUF_FL_INTERIM_FF))
+ h2s->flags &= ~H2_SF_NOTIFIED;
+
+ TRACE_LEAVE(H2_EV_H2S_SEND|H2_EV_STRM_SEND, h2s->h2c->conn, h2s);
+ return total;
+}
+
+static int h2_resume_ff(struct stconn *sc, unsigned int flags)
+{
+ return 0;
+}
+
+/* appends some info about stream <h2s> to buffer <msg>, or does nothing if
+ * <h2s> is NULL. Returns non-zero if the stream is considered suspicious. May
+ * emit multiple lines, each new one being prefixed with <pfx>, if <pfx> is not
+ * NULL, otherwise a single line is used.
+ */
+static int h2_dump_h2s_info(struct buffer *msg, const struct h2s *h2s, const char *pfx)
+{
+ int ret = 0;
+
+ if (!h2s)
+ return ret;
+
+ chunk_appendf(msg, " h2s.id=%d .st=%s .flg=0x%04x .rxbuf=%u@%p+%u/%u",
+ h2s->id, h2s_st_to_str(h2s->st), h2s->flags,
+ (unsigned int)b_data(&h2s->rxbuf), b_orig(&h2s->rxbuf),
+ (unsigned int)b_head_ofs(&h2s->rxbuf), (unsigned int)b_size(&h2s->rxbuf));
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .sc=%p", h2s_sc(h2s));
+ if (h2s_sc(h2s))
+ chunk_appendf(msg, "(.flg=0x%08x .app=%p)",
+ h2s_sc(h2s)->flags, h2s_sc(h2s)->app);
+
+ chunk_appendf(msg, " .sd=%p", h2s->sd);
+ chunk_appendf(msg, "(.flg=0x%08x)", se_fl_get(h2s->sd));
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .subs=%p", h2s->subs);
+ if (h2s->subs) {
+ chunk_appendf(msg, "(ev=%d tl=%p", h2s->subs->events, h2s->subs->tasklet);
+ chunk_appendf(msg, " tl.calls=%d tl.ctx=%p tl.fct=",
+ h2s->subs->tasklet->calls,
+ h2s->subs->tasklet->context);
+ if (h2s->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ resolve_sym_name(msg, NULL, h2s->subs->tasklet->process);
+ chunk_appendf(msg, ")");
+ }
+ return ret;
+}
+
+/* appends some info about connection <h2c> to buffer <msg>, or does nothing if
+ * <h2c> is NULL. Returns non-zero if the connection is considered suspicious.
+ * May emit multiple lines, each new one being prefixed with <pfx>, if <pfx> is
+ * not NULL, otherwise a single line is used.
+ */
+static int h2_dump_h2c_info(struct buffer *msg, struct h2c *h2c, const char *pfx)
+{
+ const struct buffer *hmbuf, *tmbuf;
+ const struct h2s *h2s = NULL;
+ struct eb32_node *node;
+ int fctl_cnt = 0;
+ int send_cnt = 0;
+ int tree_cnt = 0;
+ int orph_cnt = 0;
+ int ret = 0;
+
+ if (!h2c)
+ return ret;
+
+ list_for_each_entry(h2s, &h2c->fctl_list, list)
+ fctl_cnt++;
+
+ list_for_each_entry(h2s, &h2c->send_list, list)
+ send_cnt++;
+
+ node = eb32_first(&h2c->streams_by_id);
+ while (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ tree_cnt++;
+ if (!h2s_sc(h2s))
+ orph_cnt++;
+ node = eb32_next(node);
+ }
+
+ hmbuf = br_head(h2c->mbuf);
+ tmbuf = br_tail(h2c->mbuf);
+ chunk_appendf(msg, " h2c.st0=%s .err=%d .maxid=%d .lastid=%d .flg=0x%04x"
+ " .nbst=%u .nbsc=%u",
+ h2c_st_to_str(h2c->st0), h2c->errcode, h2c->max_id, h2c->last_sid, h2c->flags,
+ h2c->nb_streams, h2c->nb_sc);
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .fctl_cnt=%d .send_cnt=%d .tree_cnt=%d"
+ " .orph_cnt=%d .sub=%d .dsi=%d .dbuf=%u@%p+%u/%u",
+ fctl_cnt, send_cnt, tree_cnt, orph_cnt,
+ h2c->wait_event.events, h2c->dsi,
+ (unsigned int)b_data(&h2c->dbuf), b_orig(&h2c->dbuf),
+ (unsigned int)b_head_ofs(&h2c->dbuf), (unsigned int)b_size(&h2c->dbuf));
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ chunk_appendf(msg, " .mbuf=[%u..%u|%u],h=[%u@%p+%u/%u],t=[%u@%p+%u/%u]",
+ br_head_idx(h2c->mbuf), br_tail_idx(h2c->mbuf), br_size(h2c->mbuf),
+ (unsigned int)b_data(hmbuf), b_orig(hmbuf),
+ (unsigned int)b_head_ofs(hmbuf), (unsigned int)b_size(hmbuf),
+ (unsigned int)b_data(tmbuf), b_orig(tmbuf),
+ (unsigned int)b_head_ofs(tmbuf), (unsigned int)b_size(tmbuf));
+
+ chunk_appendf(msg, " .task=%p", h2c->task);
+ if (h2c->task) {
+ chunk_appendf(msg, " .exp=%s",
+ h2c->task->expire ? tick_is_expired(h2c->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(h2c->task->expire - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ }
+
+ return ret;
+}
+
+/* for debugging with CLI's "show fd" command */
+static int h2_show_fd(struct buffer *msg, struct connection *conn)
+{
+ struct h2c *h2c = conn->ctx;
+ const struct h2s *h2s;
+ struct eb32_node *node;
+ int ret = 0;
+
+ if (!h2c)
+ return ret;
+
+ ret |= h2_dump_h2c_info(msg, h2c, NULL);
+
+ node = eb32_last(&h2c->streams_by_id);
+ if (node) {
+ h2s = container_of(node, struct h2s, by_id);
+ chunk_appendf(msg, " last_h2s=%p", h2s);
+ ret |= h2_dump_h2s_info(msg, h2s, NULL);
+ }
+
+ return ret;
+}
+
+/* for debugging with CLI's "show sess" command. May emit multiple lines, each
+ * new one being prefixed with <pfx>, if <pfx> is not NULL, otherwise a single
+ * line is used. Each field starts with a space so it's safe to print it after
+ * existing fields.
+ */
+static int h2_show_sd(struct buffer *msg, struct sedesc *sd, const char *pfx)
+{
+ struct h2s *h2s = sd->se;
+ int ret = 0;
+
+ if (!h2s)
+ return ret;
+
+ chunk_appendf(msg, " h2s=%p", h2s);
+ ret |= h2_dump_h2s_info(msg, h2s, pfx);
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+ chunk_appendf(msg, " h2c=%p", h2s->h2c);
+ ret |= h2_dump_h2c_info(msg, h2s->h2c, pfx);
+ return ret;
+}
+
+/* Migrate the the connection to the current thread.
+ * Return 0 if successful, non-zero otherwise.
+ * Expected to be called with the old thread lock held.
+ */
+static int h2_takeover(struct connection *conn, int orig_tid)
+{
+ struct h2c *h2c = conn->ctx;
+ struct task *task;
+ struct task *new_task;
+ struct tasklet *new_tasklet;
+
+ /* Pre-allocate tasks so that we don't have to roll back after the xprt
+ * has been migrated.
+ */
+ new_task = task_new_here();
+ new_tasklet = tasklet_new();
+ if (!new_task || !new_tasklet)
+ goto fail;
+
+ if (fd_takeover(conn->handle.fd, conn) != 0)
+ goto fail;
+
+ if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) {
+ /* We failed to takeover the xprt, even if the connection may
+ * still be valid, flag it as error'd, as we have already
+ * taken over the fd, and wake the tasklet, so that it will
+ * destroy it.
+ */
+ conn->flags |= CO_FL_ERROR;
+ tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid);
+ goto fail;
+ }
+
+ if (h2c->wait_event.events)
+ h2c->conn->xprt->unsubscribe(h2c->conn, h2c->conn->xprt_ctx,
+ h2c->wait_event.events, &h2c->wait_event);
+
+ task = h2c->task;
+ if (task) {
+ /* only assign a task if there was already one, otherwise
+ * the preallocated new task will be released.
+ */
+ task->context = NULL;
+ h2c->task = NULL;
+ __ha_barrier_store();
+ task_kill(task);
+
+ h2c->task = new_task;
+ new_task = NULL;
+ h2c->task->process = h2_timeout_task;
+ h2c->task->context = h2c;
+ }
+
+ /* To let the tasklet know it should free itself, and do nothing else,
+ * set its context to NULL.
+ */
+ h2c->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid);
+
+ h2c->wait_event.tasklet = new_tasklet;
+ h2c->wait_event.tasklet->process = h2_io_cb;
+ h2c->wait_event.tasklet->context = h2c;
+ h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx,
+ SUB_RETRY_RECV, &h2c->wait_event);
+
+ if (new_task)
+ __task_free(new_task);
+ return 0;
+ fail:
+ if (new_task)
+ __task_free(new_task);
+ tasklet_free(new_tasklet);
+ return -1;
+}
+
+/*******************************************************/
+/* functions below are dedicated to the config parsers */
+/*******************************************************/
+
+/* config parser for global "tune.h2.header-table-size" */
+static int h2_parse_header_table_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_header_table_size = atoi(args[1]);
+ if (h2_settings_header_table_size < 4096 || h2_settings_header_table_size > 65536) {
+ memprintf(err, "'%s' expects a numeric value between 4096 and 65536.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.{be.,fe.,}initial-window-size" */
+static int h2_parse_initial_window_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int *vptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* backend/frontend/default */
+ vptr = (args[0][8] == 'b') ? &h2_be_settings_initial_window_size :
+ (args[0][8] == 'f') ? &h2_fe_settings_initial_window_size :
+ &h2_settings_initial_window_size;
+
+ *vptr = atoi(args[1]);
+ if (*vptr < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.{be.,fe.,}max-concurrent-streams" */
+static int h2_parse_max_concurrent_streams(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ uint *vptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* backend/frontend/default */
+ vptr = (args[0][8] == 'b') ? &h2_be_settings_max_concurrent_streams :
+ (args[0][8] == 'f') ? &h2_fe_settings_max_concurrent_streams :
+ &h2_settings_max_concurrent_streams;
+
+ *vptr = atoi(args[1]);
+ if ((int)*vptr < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.fe.max-total-streams" */
+static int h2_parse_max_total_streams(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ uint *vptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ /* frontend only for now */
+ vptr = &h2_fe_max_total_streams;
+
+ *vptr = atoi(args[1]);
+ if ((int)*vptr < 0) {
+ memprintf(err, "'%s' expects a positive numeric value.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.h2.max-frame-size" */
+static int h2_parse_max_frame_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ h2_settings_max_frame_size = atoi(args[1]);
+ if (h2_settings_max_frame_size < 16384 || h2_settings_max_frame_size > 16777215) {
+ memprintf(err, "'%s' expects a numeric value between 16384 and 16777215.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* config parser for global "tune.h2.zero-copy-fwd-send" */
+static int h2_parse_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_H2_SND;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_H2_SND;
+ else {
+ memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/****************************************/
+/* MUX initialization and instantiation */
+/***************************************/
+
+/* The mux operations */
+static const struct mux_ops h2_ops = {
+ .init = h2_init,
+ .wake = h2_wake,
+ .snd_buf = h2_snd_buf,
+ .rcv_buf = h2_rcv_buf,
+ .nego_fastfwd = h2_nego_ff,
+ .done_fastfwd = h2_done_ff,
+ .resume_fastfwd = h2_resume_ff,
+ .subscribe = h2_subscribe,
+ .unsubscribe = h2_unsubscribe,
+ .attach = h2_attach,
+ .get_first_sc = h2_get_first_sc,
+ .detach = h2_detach,
+ .destroy = h2_destroy,
+ .avail_streams = h2_avail_streams,
+ .used_streams = h2_used_streams,
+ .shutr = h2_shutr,
+ .shutw = h2_shutw,
+ .ctl = h2_ctl,
+ .sctl = h2_sctl,
+ .show_fd = h2_show_fd,
+ .show_sd = h2_show_sd,
+ .takeover = h2_takeover,
+ .flags = MX_FL_HTX|MX_FL_HOL_RISK|MX_FL_NO_UPG|MX_FL_REVERSABLE,
+ .name = "H2",
+};
+
+static struct mux_proto_list mux_proto_h2 =
+ { .token = IST("h2"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_BOTH, .mux = &h2_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_h2);
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.h2.be.initial-window-size", h2_parse_initial_window_size },
+ { CFG_GLOBAL, "tune.h2.be.max-concurrent-streams", h2_parse_max_concurrent_streams },
+ { CFG_GLOBAL, "tune.h2.fe.initial-window-size", h2_parse_initial_window_size },
+ { CFG_GLOBAL, "tune.h2.fe.max-concurrent-streams", h2_parse_max_concurrent_streams },
+ { CFG_GLOBAL, "tune.h2.fe.max-total-streams", h2_parse_max_total_streams },
+ { CFG_GLOBAL, "tune.h2.header-table-size", h2_parse_header_table_size },
+ { CFG_GLOBAL, "tune.h2.initial-window-size", h2_parse_initial_window_size },
+ { CFG_GLOBAL, "tune.h2.max-concurrent-streams", h2_parse_max_concurrent_streams },
+ { CFG_GLOBAL, "tune.h2.max-frame-size", h2_parse_max_frame_size },
+ { CFG_GLOBAL, "tune.h2.zero-copy-fwd-send", h2_parse_zero_copy_fwd_snd },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* initialize internal structs after the config is parsed.
+ * Returns zero on success, non-zero on error.
+ */
+static int init_h2()
+{
+ pool_head_hpack_tbl = create_pool("hpack_tbl",
+ h2_settings_header_table_size,
+ MEM_F_SHARED|MEM_F_EXACT);
+ if (!pool_head_hpack_tbl) {
+ ha_alert("failed to allocate hpack_tbl memory pool\n");
+ return (ERR_ALERT | ERR_FATAL);
+ }
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_h2);
diff --git a/src/mux_pt.c b/src/mux_pt.c
new file mode 100644
index 0000000..3cca6a1
--- /dev/null
+++ b/src/mux_pt.c
@@ -0,0 +1,904 @@
+/*
+ * Pass-through mux-demux for connections
+ *
+ * Copyright 2017 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/connection.h>
+#include <haproxy/pipe.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/trace.h>
+#include <haproxy/xref.h>
+
+struct mux_pt_ctx {
+ struct sedesc *sd;
+ struct connection *conn;
+ struct wait_event wait_event;
+};
+
+DECLARE_STATIC_POOL(pool_head_pt_ctx, "mux_pt", sizeof(struct mux_pt_ctx));
+
+/* trace source and events */
+static void pt_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * pt_ctx - internal PT context
+ * strm - application layer
+ */
+static const struct trace_event pt_trace_events[] = {
+#define PT_EV_CONN_NEW (1ULL << 0)
+ { .mask = PT_EV_CONN_NEW, .name = "pt_conn_new", .desc = "new PT connection" },
+#define PT_EV_CONN_WAKE (1ULL << 1)
+ { .mask = PT_EV_CONN_WAKE, .name = "pt_conn_wake", .desc = "PT connection woken up" },
+#define PT_EV_CONN_END (1ULL << 2)
+ { .mask = PT_EV_CONN_END, .name = "pt_conn_end", .desc = "PT connection terminated" },
+#define PT_EV_CONN_ERR (1ULL << 3)
+ { .mask = PT_EV_CONN_ERR, .name = "pt_conn_err", .desc = "error on PT connection" },
+#define PT_EV_STRM_NEW (1ULL << 4)
+ { .mask = PT_EV_STRM_NEW, .name = "strm_new", .desc = "app-layer stream creation" },
+#define PT_EV_STRM_SHUT (1ULL << 5)
+ { .mask = PT_EV_STRM_SHUT, .name = "strm_shut", .desc = "stream shutdown" },
+#define PT_EV_STRM_END (1ULL << 6)
+ { .mask = PT_EV_STRM_END, .name = "strm_end", .desc = "detaching app-layer stream" },
+#define PT_EV_STRM_ERR (1ULL << 7)
+ { .mask = PT_EV_STRM_ERR, .name = "strm_err", .desc = "stream error" },
+#define PT_EV_RX_DATA (1ULL << 8)
+ { .mask = PT_EV_RX_DATA, .name = "pt_rx_data", .desc = "Rx on PT connection" },
+#define PT_EV_TX_DATA (1ULL << 9)
+ { .mask = PT_EV_TX_DATA, .name = "pt_tx_data", .desc = "Tx on PT connection" },
+
+ {}
+};
+
+
+static const struct name_desc pt_trace_decoding[] = {
+#define PT_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define PT_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only h1c/h1s state and flags, no real decoding" },
+#define PT_VERB_SIMPLE 3
+ { .name="simple", .desc="add request/response status line or htx info when available" },
+#define PT_VERB_ADVANCED 4
+ { .name="advanced", .desc="add header fields or frame decoding when available" },
+#define PT_VERB_COMPLETE 5
+ { .name="complete", .desc="add full data dump when available" },
+ { /* end */ }
+};
+
+static struct trace_source trace_pt __read_mostly = {
+ .name = IST("pt"),
+ .desc = "Passthrough multiplexer",
+ .arg_def = TRC_ARG1_CONN, // TRACE()'s first argument is always a connection
+ .default_cb = pt_trace,
+ .known_events = pt_trace_events,
+ .lockon_args = NULL,
+ .decoding = pt_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_pt
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* returns the stconn associated to the stream */
+static forceinline struct stconn *pt_sc(const struct mux_pt_ctx *pt)
+{
+ return pt->sd->sc;
+}
+
+static inline void pt_trace_buf(const struct buffer *buf, size_t ofs, size_t len)
+{
+ size_t block1, block2;
+ int line, ptr, newptr;
+
+ block1 = b_contig_data(buf, ofs);
+ block2 = 0;
+ if (block1 > len)
+ block1 = len;
+ block2 = len - block1;
+
+ ofs = b_peek_ofs(buf, ofs);
+
+ line = 0;
+ ptr = ofs;
+ while (ptr < ofs + block1) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), ofs + block1, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+
+ line = ptr = 0;
+ while (ptr < block2) {
+ newptr = dump_text_line(&trace_buf, b_orig(buf), b_size(buf), block2, &line, ptr);
+ if (newptr == ptr)
+ break;
+ ptr = newptr;
+ }
+}
+
+/* the PT traces always expect that arg1, if non-null, is of type connection
+ * (from which we can derive the pt context), that arg2, if non-null, is a
+ * stream connector, and that arg3, if non-null, is a buffer.
+ */
+static void pt_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct mux_pt_ctx *ctx = conn ? conn->ctx : NULL;
+ const struct stconn *sc = a2;
+ const struct buffer *buf = a3;
+ const size_t *val = a4;
+
+ if (!ctx || src->verbosity < PT_VERB_CLEAN)
+ return;
+
+ /* Display frontend/backend info by default */
+ chunk_appendf(&trace_buf, " : [%c]", (conn_is_back(conn) ? 'B' : 'F'));
+
+ if (src->verbosity == PT_VERB_CLEAN)
+ return;
+
+ if (!sc)
+ sc = pt_sc(ctx);
+
+ /* Display the value to the 4th argument (level > STATE) */
+ if (src->level > TRACE_LEVEL_STATE && val)
+ chunk_appendf(&trace_buf, " - VAL=%lu", (long)*val);
+
+ /* Display conn and sc info, if defined (pointer + flags) */
+ chunk_appendf(&trace_buf, " - conn=%p(0x%08x)", conn, conn->flags);
+ chunk_appendf(&trace_buf, " sd=%p(0x%08x)", ctx->sd, se_fl_get(ctx->sd));
+ if (sc)
+ chunk_appendf(&trace_buf, " sc=%p(0x%08x)", sc, sc->flags);
+
+ if (src->verbosity == PT_VERB_MINIMAL)
+ return;
+
+ /* Display buffer info, if defined (level > USER & verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_USER && buf) {
+ int full = 0, max = 3000, chunk = 1024;
+
+ /* Full info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == PT_VERB_COMPLETE)
+ full = 1;
+ else if (src->verbosity == PT_VERB_ADVANCED) {
+ full = 1;
+ max = 256;
+ chunk = 64;
+ }
+ }
+
+ chunk_appendf(&trace_buf, " buf=%u@%p+%u/%u",
+ (unsigned int)b_data(buf), b_orig(buf),
+ (unsigned int)b_head_ofs(buf), (unsigned int)b_size(buf));
+
+ if (b_data(buf) && full) {
+ chunk_memcat(&trace_buf, "\n", 1);
+ if (b_data(buf) < max)
+ pt_trace_buf(buf, 0, b_data(buf));
+ else {
+ pt_trace_buf(buf, 0, chunk);
+ chunk_memcat(&trace_buf, " ...\n", 6);
+ pt_trace_buf(buf, b_data(buf) - chunk, chunk);
+ }
+ }
+ }
+}
+
+static void mux_pt_destroy(struct mux_pt_ctx *ctx)
+{
+ struct connection *conn = NULL;
+
+ TRACE_POINT(PT_EV_CONN_END);
+
+ /* The connection must be attached to this mux to be released */
+ if (ctx->conn && ctx->conn->ctx == ctx)
+ conn = ctx->conn;
+
+ tasklet_free(ctx->wait_event.tasklet);
+
+ if (conn && ctx->wait_event.events != 0)
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx, ctx->wait_event.events,
+ &ctx->wait_event);
+ BUG_ON(ctx->sd && !se_fl_test(ctx->sd, SE_FL_ORPHAN));
+ sedesc_free(ctx->sd);
+ pool_free(pool_head_pt_ctx, ctx);
+
+ if (conn) {
+ conn->mux = NULL;
+ conn->ctx = NULL;
+ TRACE_DEVEL("freeing conn", PT_EV_CONN_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+}
+
+/* Callback, used when we get I/Os while in idle mode. This one is exported so
+ * that "show fd" can resolve it.
+ */
+struct task *mux_pt_io_cb(struct task *t, void *tctx, unsigned int status)
+{
+ struct mux_pt_ctx *ctx = tctx;
+
+ TRACE_ENTER(PT_EV_CONN_WAKE, ctx->conn);
+ if (!se_fl_test(ctx->sd, SE_FL_ORPHAN)) {
+ /* There's a small race condition.
+ * mux_pt_io_cb() is only supposed to be called if we have no
+ * stream attached. However, maybe the tasklet got woken up,
+ * and this connection was then attached to a new stream.
+ * If this happened, just wake the tasklet up if anybody
+ * subscribed to receive events, and otherwise call the wake
+ * method, to make sure the event is noticed.
+ */
+ if (ctx->conn->subs) {
+ ctx->conn->subs->events = 0;
+ tasklet_wakeup(ctx->conn->subs->tasklet);
+ ctx->conn->subs = NULL;
+ } else if (pt_sc(ctx)->app_ops->wake)
+ pt_sc(ctx)->app_ops->wake(pt_sc(ctx));
+ TRACE_DEVEL("leaving waking up SC", PT_EV_CONN_WAKE, ctx->conn);
+ return t;
+ }
+ conn_ctrl_drain(ctx->conn);
+ if (ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH)) {
+ TRACE_DEVEL("leaving destroying pt context", PT_EV_CONN_WAKE, ctx->conn);
+ mux_pt_destroy(ctx);
+ t = NULL;
+ }
+ else {
+ ctx->conn->xprt->subscribe(ctx->conn, ctx->conn->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ TRACE_DEVEL("leaving subscribing for reads", PT_EV_CONN_WAKE, ctx->conn);
+ }
+
+ return t;
+}
+
+/* Initialize the mux once it's attached. It is expected that conn->ctx points
+ * to the existing stream connector (for outgoing connections) or NULL (for
+ * incoming ones, in which case one will be allocated and a new stream will be
+ * instantiated). Returns < 0 on error.
+ */
+static int mux_pt_init(struct connection *conn, struct proxy *prx, struct session *sess,
+ struct buffer *input)
+{
+ struct stconn *sc = conn->ctx;
+ struct mux_pt_ctx *ctx = pool_alloc(pool_head_pt_ctx);
+
+ TRACE_ENTER(PT_EV_CONN_NEW);
+
+ if (!ctx) {
+ TRACE_ERROR("PT context allocation failure", PT_EV_CONN_NEW|PT_EV_CONN_END|PT_EV_CONN_ERR);
+ goto fail;
+ }
+
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet)
+ goto fail_free_ctx;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.tasklet->process = mux_pt_io_cb;
+ ctx->wait_event.events = 0;
+ ctx->conn = conn;
+
+ if (!sc) {
+ ctx->sd = sedesc_new();
+ if (!ctx->sd) {
+ TRACE_ERROR("SC allocation failure", PT_EV_STRM_NEW|PT_EV_STRM_END|PT_EV_STRM_ERR, conn);
+ goto fail_free_ctx;
+ }
+ ctx->sd->se = ctx;
+ ctx->sd->conn = conn;
+ se_fl_set(ctx->sd, SE_FL_T_MUX | SE_FL_ORPHAN);
+
+ sc = sc_new_from_endp(ctx->sd, sess, input);
+ if (!sc) {
+ TRACE_ERROR("SC allocation failure", PT_EV_STRM_NEW|PT_EV_STRM_END|PT_EV_STRM_ERR, conn);
+ goto fail_free_sd;
+ }
+ TRACE_POINT(PT_EV_STRM_NEW, conn, sc);
+ }
+ else {
+ if (sc_attach_mux(sc, ctx, conn) < 0)
+ goto fail_free_ctx;
+ ctx->sd = sc->sedesc;
+ }
+ conn->ctx = ctx;
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE);
+ if ((global.tune.options & GTUNE_USE_SPLICE) && !(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_PT))
+ se_fl_set(ctx->sd, SE_FL_MAY_FASTFWD_PROD|SE_FL_MAY_FASTFWD_CONS);
+
+ TRACE_LEAVE(PT_EV_CONN_NEW, conn);
+ return 0;
+
+ fail_free_sd:
+ sedesc_free(ctx->sd);
+ fail_free_ctx:
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(pool_head_pt_ctx, ctx);
+ fail:
+ TRACE_DEVEL("leaving in error", PT_EV_CONN_NEW|PT_EV_CONN_END|PT_EV_CONN_ERR);
+ return -1;
+}
+
+/* callback to be used by default for the pass-through mux. It calls the data
+ * layer wake() callback if it is set otherwise returns 0.
+ */
+static int mux_pt_wake(struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+ int ret = 0;
+
+ TRACE_ENTER(PT_EV_CONN_WAKE, ctx->conn);
+ if (!se_fl_test(ctx->sd, SE_FL_ORPHAN)) {
+ ret = pt_sc(ctx)->app_ops->wake ? pt_sc(ctx)->app_ops->wake(pt_sc(ctx)) : 0;
+
+ if (ret < 0) {
+ TRACE_DEVEL("leaving waking up SC", PT_EV_CONN_WAKE, ctx->conn);
+ return ret;
+ }
+ } else {
+ conn_ctrl_drain(conn);
+ if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH)) {
+ TRACE_DEVEL("leaving destroying PT context", PT_EV_CONN_WAKE, ctx->conn);
+ mux_pt_destroy(ctx);
+ return -1;
+ }
+ }
+
+ /* If we had early data, and we're done with the handshake
+ * then we know the data are safe, and we can remove the flag.
+ */
+ if ((conn->flags & (CO_FL_EARLY_DATA | CO_FL_EARLY_SSL_HS | CO_FL_WAIT_XPRT)) ==
+ CO_FL_EARLY_DATA)
+ conn->flags &= ~CO_FL_EARLY_DATA;
+
+ TRACE_LEAVE(PT_EV_CONN_WAKE, ctx->conn);
+ return ret;
+}
+
+/*
+ * Attach a new stream to a connection
+ * (Used for outgoing connections)
+ */
+static int mux_pt_attach(struct connection *conn, struct sedesc *sd, struct session *sess)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_NEW, conn);
+ if (ctx->wait_event.events)
+ conn->xprt->unsubscribe(ctx->conn, conn->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ if (sc_attach_mux(sd->sc, ctx, conn) < 0)
+ return -1;
+ ctx->sd = sd;
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE);
+ if ((global.tune.options & GTUNE_USE_SPLICE) && !(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_PT))
+ se_fl_set(ctx->sd, SE_FL_MAY_FASTFWD_PROD|SE_FL_MAY_FASTFWD_CONS);
+
+ TRACE_LEAVE(PT_EV_STRM_NEW, conn, sd->sc);
+ return 0;
+}
+
+/* Retrieves a valid stream connector from this connection, or returns NULL.
+ * For this mux, it's easy as we can only store a single stream connector.
+ */
+static struct stconn *mux_pt_get_first_sc(const struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ return pt_sc(ctx);
+}
+
+/* Destroy the mux and the associated connection if still attached to this mux
+ * and no longer used */
+static void mux_pt_destroy_meth(void *ctx)
+{
+ struct mux_pt_ctx *pt = ctx;
+
+ TRACE_POINT(PT_EV_CONN_END, pt->conn, pt_sc(pt));
+ if (se_fl_test(pt->sd, SE_FL_ORPHAN) || pt->conn->ctx != pt) {
+ if (pt->conn->ctx != pt) {
+ pt->sd = NULL;
+ }
+ mux_pt_destroy(pt);
+ }
+}
+
+/*
+ * Detach the stream from the connection and possibly release the connection.
+ */
+static void mux_pt_detach(struct sedesc *sd)
+{
+ struct connection *conn = sd->conn;
+ struct mux_pt_ctx *ctx;
+
+ TRACE_ENTER(PT_EV_STRM_END, conn, sd->sc);
+
+ ctx = conn->ctx;
+
+ /* Subscribe, to know if we got disconnected */
+ if (!conn_is_back(conn) && conn->owner != NULL &&
+ !(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH))) {
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ } else {
+ /* There's no session attached to that connection, destroy it */
+ TRACE_DEVEL("killing dead connection", PT_EV_STRM_END, conn, sd->sc);
+ mux_pt_destroy(ctx);
+ }
+
+ TRACE_LEAVE(PT_EV_STRM_END);
+}
+
+/* returns the number of streams in use on a connection */
+static int mux_pt_used_streams(struct connection *conn)
+{
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ return (!se_fl_test(ctx->sd, SE_FL_ORPHAN) ? 1 : 0);
+}
+
+/* returns the number of streams still available on a connection */
+static int mux_pt_avail_streams(struct connection *conn)
+{
+ return 1 - mux_pt_used_streams(conn);
+}
+
+static void mux_pt_shutr(struct stconn *sc, enum co_shr_mode mode)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc);
+
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (conn_xprt_ready(conn) && conn->xprt->shutr)
+ conn->xprt->shutr(conn, conn->xprt_ctx,
+ (mode == CO_SHR_DRAIN));
+ else if (mode == CO_SHR_DRAIN)
+ conn_ctrl_drain(conn);
+ if (se_fl_test(ctx->sd, SE_FL_SHW))
+ conn_full_close(conn);
+
+ TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc);
+}
+
+static void mux_pt_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+
+ TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc);
+
+ if (conn_xprt_ready(conn) && conn->xprt->shutw)
+ conn->xprt->shutw(conn, conn->xprt_ctx,
+ (mode == CO_SHW_NORMAL));
+ if (!se_fl_test(ctx->sd, SE_FL_SHR))
+ conn_sock_shutw(conn, (mode == CO_SHW_NORMAL));
+ else
+ conn_full_close(conn);
+
+ TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc);
+}
+
+/*
+ * Called from the upper layer, to get more data
+ *
+ * The caller is responsible for defragmenting <buf> if necessary. But <flags>
+ * must be tested to know the calling context. If CO_RFL_BUF_FLUSH is set, it
+ * means the caller wants to flush input data (from the mux buffer and the
+ * channel buffer) to be able to use kernel splicing or any kind of mux-to-mux
+ * xfer. If CO_RFL_KEEP_RECV is set, the mux must always subscribe for read
+ * events before giving back. CO_RFL_BUF_WET is set if <buf> is congested with
+ * data scheduled for leaving soon. CO_RFL_BUF_NOT_STUCK is set to instruct the
+ * mux it may optimize the data copy to <buf> if necessary. Otherwise, it should
+ * copy as much data as possible.
+ */
+static size_t mux_pt_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ size_t ret = 0;
+
+ TRACE_ENTER(PT_EV_RX_DATA, conn, sc, buf, (size_t[]){count});
+
+ if (!count) {
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ goto end;
+ }
+ b_realign_if_empty(buf);
+ ret = conn->xprt->rcv_buf(conn, conn->xprt_ctx, buf, count, flags);
+ if (conn->flags & CO_FL_ERROR) {
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ if (conn_xprt_read0_pending(conn))
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_RX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+ else if (conn_xprt_read0_pending(conn)) {
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ se_fl_set(ctx->sd, (SE_FL_EOI|SE_FL_EOS));
+ TRACE_DEVEL("read0 on connection", PT_EV_RX_DATA, conn, sc);
+ }
+ end:
+ TRACE_LEAVE(PT_EV_RX_DATA, conn, sc, buf, (size_t[]){ret});
+ return ret;
+}
+
+/* Called from the upper layer, to send data */
+static size_t mux_pt_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ size_t ret;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc, buf, (size_t[]){count});
+
+ ret = conn->xprt->snd_buf(conn, conn->xprt_ctx, buf, count, flags);
+
+ if (ret > 0)
+ b_del(buf, ret);
+
+ if (conn->flags & CO_FL_ERROR) {
+ if (conn_xprt_read0_pending(conn))
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ se_fl_set_error(ctx->sd);
+ TRACE_DEVEL("error on connection", PT_EV_TX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, buf, (size_t[]){ret});
+ return ret;
+}
+
+static inline struct sedesc *mux_pt_opposite_sd(struct mux_pt_ctx *ctx)
+{
+ struct xref *peer;
+ struct sedesc *sdo;
+
+ peer = xref_get_peer_and_lock(&ctx->sd->xref);
+ if (!peer)
+ return NULL;
+
+ sdo = container_of(peer, struct sedesc, xref);
+ xref_unlock(&ctx->sd->xref, peer);
+ return sdo;
+}
+
+static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ size_t ret = 0;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){count});
+
+ /* Use kernel splicing if it is supported by the sender and if there
+ * are no input data _AND_ no output data.
+ *
+ * TODO: It may be good to add a flag to send obuf data first if any,
+ * and then data in pipe, or the opposite. For now, it is not
+ * supported to mix data.
+ */
+ if (!b_data(input) && may_splice) {
+ if (conn->xprt->snd_pipe && (ctx->sd->iobuf.pipe || (pipes_used < global.maxpipes && (ctx->sd->iobuf.pipe = get_pipe())))) {
+ ctx->sd->iobuf.offset = 0;
+ ctx->sd->iobuf.data = 0;
+ ret = count;
+ goto out;
+ }
+ ctx->sd->iobuf.flags |= IOBUF_FL_NO_SPLICING;
+ TRACE_DEVEL("Unable to allocate pipe for splicing, fallback to buffer", PT_EV_TX_DATA, conn, sc);
+ }
+
+ /* No buffer case */
+
+ out:
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){ret});
+ return ret;
+}
+
+static size_t mux_pt_done_ff(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ struct sedesc *sd = ctx->sd;
+ size_t total = 0;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc);
+
+ if (sd->iobuf.pipe) {
+ total = conn->xprt->snd_pipe(conn, conn->xprt_ctx, sd->iobuf.pipe, sd->iobuf.pipe->data);
+ if (!sd->iobuf.pipe->data) {
+ put_pipe(sd->iobuf.pipe);
+ sd->iobuf.pipe = NULL;
+ }
+ }
+ else {
+ BUG_ON(sd->iobuf.buf);
+ }
+
+ out:
+ if (conn->flags & CO_FL_ERROR) {
+ if (conn_xprt_read0_pending(conn))
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ se_fl_set_error(ctx->sd);
+ TRACE_DEVEL("error on connection", PT_EV_TX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){total});
+ return total;
+}
+
+static int mux_pt_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ struct sedesc *sdo = NULL;
+ size_t total = 0, try = 0;
+ int ret = 0;
+
+ TRACE_ENTER(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){count});
+
+ se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ conn->flags &= ~CO_FL_WAIT_ROOM;
+ sdo = mux_pt_opposite_sd(ctx);
+ if (!sdo) {
+ TRACE_STATE("Opposite endpoint not available yet", PT_EV_RX_DATA, conn, sc);
+ goto out;
+ }
+
+ try = se_nego_ff(sdo, &BUF_NULL, count, conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING));
+ if (sdo->iobuf.flags & IOBUF_FL_NO_FF) {
+ /* Fast forwarding is not supported by the consumer */
+ se_fl_clr(ctx->sd, SE_FL_MAY_FASTFWD_PROD);
+ TRACE_DEVEL("Fast-forwarding not supported by opposite endpoint, disable it", PT_EV_RX_DATA, conn, sc);
+ goto end;
+ }
+ if (sdo->iobuf.flags & IOBUF_FL_FF_BLOCKED) {
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ TRACE_STATE("waiting for more room", PT_EV_RX_DATA|PT_EV_STRM_ERR, conn, sc);
+ goto out;
+ }
+
+ total += sdo->iobuf.data;
+
+ if (sdo->iobuf.pipe) {
+ /* Here, not data was xferred */
+ ret = conn->xprt->rcv_pipe(conn, conn->xprt_ctx, sdo->iobuf.pipe, try);
+ if (ret < 0) {
+ TRACE_ERROR("Error when trying to fast-forward data, disable it and abort",
+ PT_EV_RX_DATA|PT_EV_STRM_ERR|PT_EV_CONN_ERR, conn, sc);
+ se_fl_clr(ctx->sd, SE_FL_MAY_FASTFWD_PROD);
+ BUG_ON(sdo->iobuf.pipe->data);
+ put_pipe(sdo->iobuf.pipe);
+ sdo->iobuf.pipe = NULL;
+ goto end;
+ }
+ total += ret;
+ }
+ else {
+ BUG_ON(sdo->iobuf.buf);
+ ret = -1; /* abort splicing for now and fallback to buffer mode */
+ goto end;
+ }
+
+ ret = total;
+ se_done_ff(sdo);
+
+ if (sdo->iobuf.pipe) {
+ se_fl_set(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ }
+
+ TRACE_DEVEL("Data fast-forwarded", PT_EV_RX_DATA, conn, sc, 0, (size_t[]){ret});
+
+
+ out:
+ if (conn->flags & CO_FL_ERROR) {
+ if (conn_xprt_read0_pending(conn))
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ se_fl_set(ctx->sd, SE_FL_ERROR);
+ TRACE_DEVEL("error on connection", PT_EV_RX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+ else if (conn_xprt_read0_pending(conn)) {
+ se_fl_set(ctx->sd, (SE_FL_EOS|SE_FL_EOI));
+ TRACE_DEVEL("read0 on connection", PT_EV_RX_DATA, conn, sc);
+ }
+ end:
+ TRACE_LEAVE(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){ret});
+ return ret;
+}
+
+static int mux_pt_resume_fastfwd(struct stconn *sc, unsigned int flags)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct mux_pt_ctx *ctx = conn->ctx;
+ struct sedesc *sd = ctx->sd;
+ size_t total = 0;
+
+ TRACE_ENTER(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){flags});
+
+ if (sd->iobuf.pipe) {
+ total = conn->xprt->snd_pipe(conn, conn->xprt_ctx, sd->iobuf.pipe, sd->iobuf.pipe->data);
+ if (!sd->iobuf.pipe->data) {
+ put_pipe(sd->iobuf.pipe);
+ sd->iobuf.pipe = NULL;
+ }
+ }
+ else {
+ BUG_ON(sd->iobuf.buf);
+ }
+
+ out:
+ if (conn->flags & CO_FL_ERROR) {
+ if (conn_xprt_read0_pending(conn))
+ se_fl_set(ctx->sd, SE_FL_EOS);
+ se_fl_set_error(ctx->sd);
+ TRACE_DEVEL("error on connection", PT_EV_TX_DATA|PT_EV_CONN_ERR, conn, sc);
+ }
+
+ TRACE_LEAVE(PT_EV_TX_DATA, conn, sc, 0, (size_t[]){total});
+ return total;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int mux_pt_subscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct connection *conn = __sc_conn(sc);
+
+ TRACE_POINT(PT_EV_RX_DATA|PT_EV_TX_DATA, conn, sc, 0, (size_t[]){event_type});
+ return conn->xprt->subscribe(conn, conn->xprt_ctx, event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int mux_pt_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct connection *conn = __sc_conn(sc);
+
+ TRACE_POINT(PT_EV_RX_DATA|PT_EV_TX_DATA, conn, sc, 0, (size_t[]){event_type});
+ return conn->xprt->unsubscribe(conn, conn->xprt_ctx, event_type, es);
+}
+
+static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output)
+{
+ int ret = 0;
+ switch (mux_ctl) {
+ case MUX_CTL_STATUS:
+ if (!(conn->flags & CO_FL_WAIT_XPRT))
+ ret |= MUX_STATUS_READY;
+ return ret;
+ case MUX_CTL_EXIT_STATUS:
+ return MUX_ES_UNKNOWN;
+ default:
+ return -1;
+ }
+}
+
+static int mux_pt_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output)
+{
+ int ret = 0;
+
+ switch (mux_sctl) {
+ case MUX_SCTL_SID:
+ if (output)
+ *((int64_t *)output) = 0;
+ return ret;
+
+ default:
+ return -1;
+ }
+}
+
+/* config parser for global "tune.pt.zero-copy-forwarding" */
+static int cfg_parse_pt_zero_copy_fwd(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_PT;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_PT;
+ else {
+ memprintf(err, "'%s' expects 'on' or 'off'.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.pt.zero-copy-forwarding", cfg_parse_pt_zero_copy_fwd },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+
+/* The mux operations */
+const struct mux_ops mux_tcp_ops = {
+ .init = mux_pt_init,
+ .wake = mux_pt_wake,
+ .rcv_buf = mux_pt_rcv_buf,
+ .snd_buf = mux_pt_snd_buf,
+ .nego_fastfwd = mux_pt_nego_ff,
+ .done_fastfwd = mux_pt_done_ff,
+ .fastfwd = mux_pt_fastfwd,
+ .resume_fastfwd = mux_pt_resume_fastfwd,
+ .subscribe = mux_pt_subscribe,
+ .unsubscribe = mux_pt_unsubscribe,
+ .attach = mux_pt_attach,
+ .get_first_sc = mux_pt_get_first_sc,
+ .detach = mux_pt_detach,
+ .avail_streams = mux_pt_avail_streams,
+ .used_streams = mux_pt_used_streams,
+ .destroy = mux_pt_destroy_meth,
+ .ctl = mux_pt_ctl,
+ .sctl = mux_pt_sctl,
+ .shutr = mux_pt_shutr,
+ .shutw = mux_pt_shutw,
+ .flags = MX_FL_NONE,
+ .name = "PASS",
+};
+
+
+const struct mux_ops mux_pt_ops = {
+ .init = mux_pt_init,
+ .wake = mux_pt_wake,
+ .rcv_buf = mux_pt_rcv_buf,
+ .snd_buf = mux_pt_snd_buf,
+ .nego_fastfwd = mux_pt_nego_ff,
+ .done_fastfwd = mux_pt_done_ff,
+ .fastfwd = mux_pt_fastfwd,
+ .resume_fastfwd = mux_pt_resume_fastfwd,
+ .subscribe = mux_pt_subscribe,
+ .unsubscribe = mux_pt_unsubscribe,
+ .attach = mux_pt_attach,
+ .get_first_sc = mux_pt_get_first_sc,
+ .detach = mux_pt_detach,
+ .avail_streams = mux_pt_avail_streams,
+ .used_streams = mux_pt_used_streams,
+ .destroy = mux_pt_destroy_meth,
+ .ctl = mux_pt_ctl,
+ .sctl = mux_pt_sctl,
+ .shutr = mux_pt_shutr,
+ .shutw = mux_pt_shutw,
+ .flags = MX_FL_NONE|MX_FL_NO_UPG,
+ .name = "PASS",
+};
+
+/* PROT selection : default mux has empty name */
+static struct mux_proto_list mux_proto_none =
+ { .token = IST("none"), .mode = PROTO_MODE_TCP, .side = PROTO_SIDE_BOTH, .mux = &mux_pt_ops };
+static struct mux_proto_list mux_proto_tcp =
+ { .token = IST(""), .mode = PROTO_MODE_TCP, .side = PROTO_SIDE_BOTH, .mux = &mux_tcp_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_none);
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_tcp);
diff --git a/src/mux_quic.c b/src/mux_quic.c
new file mode 100644
index 0000000..de87368
--- /dev/null
+++ b/src/mux_quic.c
@@ -0,0 +1,3067 @@
+#include <haproxy/mux_quic.h>
+
+#include <import/eb64tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/h3.h>
+#include <haproxy/list.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/qmux_http.h>
+#include <haproxy/qmux_trace.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_stream.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/ssl_sock-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/time.h>
+#include <haproxy/trace.h>
+#include <haproxy/xref.h>
+
+DECLARE_POOL(pool_head_qcc, "qcc", sizeof(struct qcc));
+DECLARE_POOL(pool_head_qcs, "qcs", sizeof(struct qcs));
+
+static void qcs_free_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf)
+{
+ struct buffer buf;
+
+ if (ncb_is_null(ncbuf))
+ return;
+
+ buf = b_make(ncbuf->area, ncbuf->size, 0, 0);
+ b_free(&buf);
+ offer_buffers(NULL, 1);
+
+ *ncbuf = NCBUF_NULL;
+
+ /* Reset DEM_FULL as buffer is released. This ensures mux is not woken
+ * up from rcv_buf stream callback when demux was previously blocked.
+ */
+ qcs->flags &= ~QC_SF_DEM_FULL;
+}
+
+/* Free <qcs> instance. This function is reserved for internal usage : it must
+ * only be called on qcs alloc error or on connection shutdown. Else
+ * qcs_destroy must be preferred to handle QUIC flow-control increase.
+ */
+static void qcs_free(struct qcs *qcs)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, qcc->conn, qcs);
+
+ /* Safe to use even if already removed from the list. */
+ LIST_DEL_INIT(&qcs->el_opening);
+ LIST_DEL_INIT(&qcs->el_send);
+
+ /* Release stream endpoint descriptor. */
+ BUG_ON(qcs->sd && !se_fl_test(qcs->sd, SE_FL_ORPHAN));
+ sedesc_free(qcs->sd);
+
+ /* Release app-layer context. */
+ if (qcs->ctx && qcc->app_ops->detach)
+ qcc->app_ops->detach(qcs);
+
+ /* Release qc_stream_desc buffer from quic-conn layer. */
+ qc_stream_desc_release(qcs->stream, qcs->tx.sent_offset);
+
+ /* Free Rx/Tx buffers. */
+ qcs_free_ncbuf(qcs, &qcs->rx.ncbuf);
+ b_free(&qcs->tx.buf);
+
+ /* Remove qcs from qcc tree. */
+ eb64_delete(&qcs->by_id);
+
+ pool_free(pool_head_qcs, qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, qcc->conn);
+}
+
+/* Allocate a new QUIC streams with id <id> and type <type>. */
+static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ qcs = pool_alloc(pool_head_qcs);
+ if (!qcs) {
+ TRACE_ERROR("alloc failure", QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+ }
+
+ qcs->stream = NULL;
+ qcs->qcc = qcc;
+ qcs->sd = NULL;
+ qcs->flags = QC_SF_NONE;
+ qcs->st = QC_SS_IDLE;
+ qcs->ctx = NULL;
+
+ /* App callback attach may register the stream for http-request wait.
+ * These fields must be initialed before.
+ */
+ LIST_INIT(&qcs->el_opening);
+ LIST_INIT(&qcs->el_send);
+ qcs->start = TICK_ETERNITY;
+
+ /* store transport layer stream descriptor in qcc tree */
+ qcs->id = qcs->by_id.key = id;
+ eb64_insert(&qcc->streams_by_id, &qcs->by_id);
+
+ /* If stream is local, use peer remote-limit, or else the opposite. */
+ if (quic_stream_is_bidi(id)) {
+ qcs->tx.msd = quic_stream_is_local(qcc, id) ? qcc->rfctl.msd_bidi_r :
+ qcc->rfctl.msd_bidi_l;
+ }
+ else if (quic_stream_is_local(qcc, id)) {
+ qcs->tx.msd = qcc->rfctl.msd_uni_l;
+ }
+
+ /* Properly set flow-control blocking if initial MSD is nul. */
+ if (!qcs->tx.msd)
+ qcs->flags |= QC_SF_BLK_SFCTL;
+
+ qcs->rx.ncbuf = NCBUF_NULL;
+ qcs->rx.app_buf = BUF_NULL;
+ qcs->rx.offset = qcs->rx.offset_max = 0;
+
+ if (quic_stream_is_bidi(id)) {
+ qcs->rx.msd = quic_stream_is_local(qcc, id) ? qcc->lfctl.msd_bidi_l :
+ qcc->lfctl.msd_bidi_r;
+ }
+ else if (quic_stream_is_remote(qcc, id)) {
+ qcs->rx.msd = qcc->lfctl.msd_uni_r;
+ }
+ qcs->rx.msd_init = qcs->rx.msd;
+
+ qcs->tx.buf = BUF_NULL;
+ qcs->tx.offset = 0;
+ qcs->tx.sent_offset = 0;
+
+ qcs->wait_event.tasklet = NULL;
+ qcs->wait_event.events = 0;
+ qcs->subs = NULL;
+
+ qcs->err = 0;
+
+ /* Allocate transport layer stream descriptor. Only needed for TX. */
+ if (!quic_stream_is_uni(id) || !quic_stream_is_remote(qcc, id)) {
+ struct quic_conn *qc = qcc->conn->handle.qc;
+ qcs->stream = qc_stream_desc_new(id, type, qcs, qc);
+ if (!qcs->stream) {
+ TRACE_ERROR("qc_stream_desc alloc failure", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ goto err;
+ }
+ }
+
+ if (qcc->app_ops->attach && qcc->app_ops->attach(qcs, qcc->ctx)) {
+ TRACE_ERROR("app proto failure", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ goto err;
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+
+ err:
+ qcs_free(qcs);
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+}
+
+static forceinline struct stconn *qcs_sc(const struct qcs *qcs)
+{
+ return qcs->sd ? qcs->sd->sc : NULL;
+}
+
+/* Reset the <qcc> inactivity timeout for http-keep-alive timeout. */
+static forceinline void qcc_reset_idle_start(struct qcc *qcc)
+{
+ qcc->idle_start = now_ms;
+}
+
+/* Decrement <qcc> sc. */
+static forceinline void qcc_rm_sc(struct qcc *qcc)
+{
+ BUG_ON(!qcc->nb_sc); /* Ensure sc count is always valid (ie >=0). */
+ --qcc->nb_sc;
+
+ /* Reset qcc idle start for http-keep-alive timeout. Timeout will be
+ * refreshed after this on stream detach.
+ */
+ if (!qcc->nb_sc && !qcc->nb_hreq)
+ qcc_reset_idle_start(qcc);
+}
+
+/* Decrement <qcc> hreq. */
+static forceinline void qcc_rm_hreq(struct qcc *qcc)
+{
+ BUG_ON(!qcc->nb_hreq); /* Ensure http req count is always valid (ie >=0). */
+ --qcc->nb_hreq;
+
+ /* Reset qcc idle start for http-keep-alive timeout. Timeout will be
+ * refreshed after this on I/O handler.
+ */
+ if (!qcc->nb_sc && !qcc->nb_hreq)
+ qcc_reset_idle_start(qcc);
+}
+
+static inline int qcc_is_dead(const struct qcc *qcc)
+{
+ /* Maintain connection if stream endpoints are still active. */
+ if (qcc->nb_sc)
+ return 0;
+
+ /* Connection considered dead if either :
+ * - remote error detected at transport level
+ * - error detected locally
+ * - MUX timeout expired
+ */
+ if (qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL_DONE) ||
+ !qcc->task) {
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Return true if the mux timeout should be armed. */
+static inline int qcc_may_expire(struct qcc *qcc)
+{
+ return !qcc->nb_sc;
+}
+
+/* Refresh the timeout on <qcc> if needed depending on its state. */
+static void qcc_refresh_timeout(struct qcc *qcc)
+{
+ const struct proxy *px = qcc->proxy;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ if (!qcc->task) {
+ TRACE_DEVEL("already expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto leave;
+ }
+
+ /* Check if upper layer is responsible of timeout management. */
+ if (!qcc_may_expire(qcc)) {
+ TRACE_DEVEL("not eligible for timeout", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = TICK_ETERNITY;
+ task_queue(qcc->task);
+ goto leave;
+ }
+
+ /* Frontend timeout management
+ * - shutdown done -> timeout client-fin
+ * - detached streams with data left to send -> default timeout
+ * - stream waiting on incomplete request or no stream yet activated -> timeout http-request
+ * - idle after stream processing -> timeout http-keep-alive
+ *
+ * If proxy stop-stop in progress, immediate or spread close will be
+ * processed if shutdown already one or connection is idle.
+ */
+ if (!conn_is_back(qcc->conn)) {
+ if (qcc->nb_hreq && !(qcc->flags & QC_CF_APP_SHUT)) {
+ TRACE_DEVEL("one or more requests still in progress", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(now_ms, qcc->timeout);
+ task_queue(qcc->task);
+ goto leave;
+ }
+
+ if ((!LIST_ISEMPTY(&qcc->opening_list) || unlikely(!qcc->largest_bidi_r)) &&
+ !(qcc->flags & QC_CF_APP_SHUT)) {
+ int timeout = px->timeout.httpreq;
+ struct qcs *qcs = NULL;
+ int base_time;
+
+ /* Use start time of first stream waiting on HTTP or
+ * qcc idle if no stream not yet used.
+ */
+ if (likely(!LIST_ISEMPTY(&qcc->opening_list)))
+ qcs = LIST_ELEM(qcc->opening_list.n, struct qcs *, el_opening);
+ base_time = qcs ? qcs->start : qcc->idle_start;
+
+ TRACE_DEVEL("waiting on http request", QMUX_EV_QCC_WAKE, qcc->conn, qcs);
+ qcc->task->expire = tick_add_ifset(base_time, timeout);
+ }
+ else {
+ if (qcc->flags & QC_CF_APP_SHUT) {
+ TRACE_DEVEL("connection in closing", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(now_ms,
+ qcc->shut_timeout);
+ }
+ else {
+ /* Use http-request timeout if keep-alive timeout not set */
+ int timeout = tick_isset(px->timeout.httpka) ?
+ px->timeout.httpka : px->timeout.httpreq;
+ TRACE_DEVEL("at least one request achieved but none currently in progress", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(qcc->idle_start, timeout);
+ }
+
+ /* If proxy soft-stop in progress and connection is
+ * inactive, close the connection immediately. If a
+ * close-spread-time is configured, randomly spread the
+ * timer over a closing window.
+ */
+ if ((qcc->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) &&
+ !(global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE)) {
+
+ /* Wake timeout task immediately if window already expired. */
+ int remaining_window = tick_isset(global.close_spread_end) ?
+ tick_remain(now_ms, global.close_spread_end) : 0;
+
+ TRACE_DEVEL("proxy disabled, prepare connection soft-stop", QMUX_EV_QCC_WAKE, qcc->conn);
+ if (remaining_window) {
+ /* We don't need to reset the expire if it would
+ * already happen before the close window end.
+ */
+ if (!tick_isset(qcc->task->expire) ||
+ tick_is_le(global.close_spread_end, qcc->task->expire)) {
+ /* Set an expire value shorter than the current value
+ * because the close spread window end comes earlier.
+ */
+ qcc->task->expire = tick_add(now_ms,
+ statistical_prng_range(remaining_window));
+ }
+ }
+ else {
+ /* We are past the soft close window end, wake the timeout
+ * task up immediately.
+ */
+ qcc->task->expire = now_ms;
+ task_wakeup(qcc->task, TASK_WOKEN_TIMER);
+ }
+ }
+ }
+ }
+
+ /* fallback to default timeout if frontend specific undefined or for
+ * backend connections.
+ */
+ if (!tick_isset(qcc->task->expire)) {
+ TRACE_DEVEL("fallback to default timeout", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->task->expire = tick_add_ifset(now_ms, qcc->timeout);
+ }
+
+ task_queue(qcc->task);
+
+ leave:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+}
+
+/* Mark a stream as open if it was idle. This can be used on every
+ * successful emission/reception operation to update the stream state.
+ */
+static void qcs_idle_open(struct qcs *qcs)
+{
+ /* This operation must not be used if the stream is already closed. */
+ BUG_ON_HOT(qcs->st == QC_SS_CLO);
+
+ if (qcs->st == QC_SS_IDLE) {
+ TRACE_STATE("opening stream", QMUX_EV_QCS_NEW, qcs->qcc->conn, qcs);
+ qcs->st = QC_SS_OPEN;
+ }
+}
+
+/* Close the local channel of <qcs> instance. */
+static void qcs_close_local(struct qcs *qcs)
+{
+ TRACE_STATE("closing stream locally", QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+
+ /* The stream must have already been opened. */
+ BUG_ON_HOT(qcs->st == QC_SS_IDLE);
+
+ /* This operation cannot be used multiple times. */
+ BUG_ON_HOT(qcs->st == QC_SS_HLOC || qcs->st == QC_SS_CLO);
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ qcs->st = (qcs->st == QC_SS_HREM) ? QC_SS_CLO : QC_SS_HLOC;
+
+ if (qcs->flags & QC_SF_HREQ_RECV)
+ qcc_rm_hreq(qcs->qcc);
+ }
+ else {
+ /* Only local uni streams are valid for this operation. */
+ BUG_ON_HOT(quic_stream_is_remote(qcs->qcc, qcs->id));
+ qcs->st = QC_SS_CLO;
+ }
+}
+
+/* Close the remote channel of <qcs> instance. */
+static void qcs_close_remote(struct qcs *qcs)
+{
+ TRACE_STATE("closing stream remotely", QMUX_EV_QCS_RECV, qcs->qcc->conn, qcs);
+
+ /* The stream must have already been opened. */
+ BUG_ON_HOT(qcs->st == QC_SS_IDLE);
+
+ /* This operation cannot be used multiple times. */
+ BUG_ON_HOT(qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO);
+
+ if (quic_stream_is_bidi(qcs->id)) {
+ qcs->st = (qcs->st == QC_SS_HLOC) ? QC_SS_CLO : QC_SS_HREM;
+ }
+ else {
+ /* Only remote uni streams are valid for this operation. */
+ BUG_ON_HOT(quic_stream_is_local(qcs->qcc, qcs->id));
+ qcs->st = QC_SS_CLO;
+ }
+}
+
+int qcs_is_close_local(struct qcs *qcs)
+{
+ return qcs->st == QC_SS_HLOC || qcs->st == QC_SS_CLO;
+}
+
+int qcs_is_close_remote(struct qcs *qcs)
+{
+ return qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO;
+}
+
+/* Allocate if needed buffer <bptr> for stream <qcs>.
+ *
+ * Returns the buffer instance or NULL on allocation failure.
+ */
+struct buffer *qcs_get_buf(struct qcs *qcs, struct buffer *bptr)
+{
+ return b_alloc(bptr);
+}
+
+/* Allocate if needed buffer <ncbuf> for stream <qcs>.
+ *
+ * Returns the buffer instance or NULL on allocation failure.
+ */
+static struct ncbuf *qcs_get_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf)
+{
+ struct buffer buf = BUF_NULL;
+
+ if (ncb_is_null(ncbuf)) {
+ if (!b_alloc(&buf))
+ return NULL;
+
+ *ncbuf = ncb_make(buf.area, buf.size, 0);
+ ncb_init(ncbuf, 0);
+ }
+
+ return ncbuf;
+}
+
+/* Notify an eventual subscriber on <qcs> or else wakeup up the stconn layer if
+ * initialized.
+ */
+static void qcs_alert(struct qcs *qcs)
+{
+ if (qcs->subs) {
+ qcs_notify_recv(qcs);
+ qcs_notify_send(qcs);
+ }
+ else if (qcs_sc(qcs) && qcs->sd->sc->app_ops->wake) {
+ TRACE_POINT(QMUX_EV_STRM_WAKE, qcs->qcc->conn, qcs);
+ qcs->sd->sc->app_ops->wake(qcs->sd->sc);
+ }
+}
+
+int qcs_subscribe(struct qcs *qcs, int event_type, struct wait_event *es)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND|QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qcs->subs && qcs->subs != es);
+
+ es->events |= event_type;
+ qcs->subs = es;
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("subscribe(send)", QMUX_EV_STRM_SEND, qcc->conn, qcs);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND|QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ return 0;
+}
+
+void qcs_notify_recv(struct qcs *qcs)
+{
+ if (qcs->subs && qcs->subs->events & SUB_RETRY_RECV) {
+ TRACE_POINT(QMUX_EV_STRM_WAKE, qcs->qcc->conn, qcs);
+ tasklet_wakeup(qcs->subs->tasklet);
+ qcs->subs->events &= ~SUB_RETRY_RECV;
+ if (!qcs->subs->events)
+ qcs->subs = NULL;
+ }
+}
+
+void qcs_notify_send(struct qcs *qcs)
+{
+ if (qcs->subs && qcs->subs->events & SUB_RETRY_SEND) {
+ TRACE_POINT(QMUX_EV_STRM_WAKE, qcs->qcc->conn, qcs);
+ tasklet_wakeup(qcs->subs->tasklet);
+ qcs->subs->events &= ~SUB_RETRY_SEND;
+ if (!qcs->subs->events)
+ qcs->subs = NULL;
+ }
+}
+
+/* A fatal error is detected locally for <qcc> connection. It should be closed
+ * with a CONNECTION_CLOSE using <err> code. Set <app> to true to indicate that
+ * the code must be considered as an application level error. This function
+ * must not be called more than once by connection.
+ */
+void qcc_set_error(struct qcc *qcc, int err, int app)
+{
+ /* This must not be called multiple times per connection. */
+ BUG_ON(qcc->flags & QC_CF_ERRL);
+
+ TRACE_STATE("connection on error", QMUX_EV_QCC_ERR, qcc->conn);
+
+ qcc->flags |= QC_CF_ERRL;
+ qcc->err = app ? quic_err_app(err) : quic_err_transport(err);
+
+ /* TODO
+ * Ensure qcc_io_send() will be conducted to convert QC_CF_ERRL in
+ * QC_CF_ERRL_DONE with CONNECTION_CLOSE frame emission. This may be
+ * unnecessary if we are currently in the MUX tasklet context, but it
+ * is too tedious too not forget a wakeup outside of this function for
+ * the moment.
+ */
+ tasklet_wakeup(qcc->wait_event.tasklet);
+}
+
+/* Open a locally initiated stream for the connection <qcc>. Set <bidi> for a
+ * bidirectional stream, else an unidirectional stream is opened. The next
+ * available ID on the connection will be used according to the stream type.
+ *
+ * Returns the allocated stream instance or NULL on error.
+ */
+struct qcs *qcc_init_stream_local(struct qcc *qcc, int bidi)
+{
+ struct qcs *qcs;
+ enum qcs_type type;
+ uint64_t *next;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ if (bidi) {
+ next = &qcc->next_bidi_l;
+ type = conn_is_back(qcc->conn) ? QCS_CLT_BIDI : QCS_SRV_BIDI;
+ }
+ else {
+ next = &qcc->next_uni_l;
+ type = conn_is_back(qcc->conn) ? QCS_CLT_UNI : QCS_SRV_UNI;
+ }
+
+ /* TODO ensure that we won't overflow remote peer flow control limit on
+ * streams. Else, we should emit a STREAMS_BLOCKED frame.
+ */
+
+ qcs = qcs_new(qcc, *next, type);
+ if (!qcs) {
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ return NULL;
+ }
+
+ TRACE_PROTO("opening local stream", QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ *next += 4;
+
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+}
+
+/* Open a remote initiated stream for the connection <qcc> with ID <id>. The
+ * caller is responsible to ensure that a stream with the same ID was not
+ * already opened. This function will also create all intermediaries streams
+ * with ID smaller than <id> not already opened before.
+ *
+ * Returns the allocated stream instance or NULL on error.
+ */
+static struct qcs *qcc_init_stream_remote(struct qcc *qcc, uint64_t id)
+{
+ struct qcs *qcs = NULL;
+ enum qcs_type type;
+ uint64_t *largest, max_id;
+
+ TRACE_ENTER(QMUX_EV_QCS_NEW, qcc->conn);
+
+ /* Function reserved to remote stream IDs. */
+ BUG_ON(quic_stream_is_local(qcc, id));
+
+ if (quic_stream_is_bidi(id)) {
+ largest = &qcc->largest_bidi_r;
+ type = conn_is_back(qcc->conn) ? QCS_SRV_BIDI : QCS_CLT_BIDI;
+ }
+ else {
+ largest = &qcc->largest_uni_r;
+ type = conn_is_back(qcc->conn) ? QCS_SRV_UNI : QCS_CLT_UNI;
+ }
+
+ /* RFC 9000 4.6. Controlling Concurrency
+ *
+ * An endpoint that receives a frame with a stream ID exceeding the
+ * limit it has sent MUST treat this as a connection error of type
+ * STREAM_LIMIT_ERROR
+ */
+ max_id = quic_stream_is_bidi(id) ? qcc->lfctl.ms_bidi * 4 :
+ qcc->lfctl.ms_uni * 4;
+ if (id >= max_id) {
+ TRACE_ERROR("flow control error", QMUX_EV_QCS_NEW|QMUX_EV_PROTO_ERR, qcc->conn);
+ qcc_set_error(qcc, QC_ERR_STREAM_LIMIT_ERROR, 0);
+ goto err;
+ }
+
+ /* Only stream ID not already opened can be used. */
+ BUG_ON(id < *largest);
+
+ while (id >= *largest) {
+ const char *str = *largest < id ? "initializing intermediary remote stream" :
+ "initializing remote stream";
+
+ qcs = qcs_new(qcc, *largest, type);
+ if (!qcs) {
+ TRACE_ERROR("stream fallocation failure", QMUX_EV_QCS_NEW, qcc->conn);
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ goto err;
+ }
+
+ TRACE_PROTO(str, QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ *largest += 4;
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn, qcs);
+ return qcs;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_NEW, qcc->conn);
+ return NULL;
+}
+
+struct stconn *qcs_attach_sc(struct qcs *qcs, struct buffer *buf, char fin)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct session *sess = qcc->conn->owner;
+
+ qcs->sd = sedesc_new();
+ if (!qcs->sd)
+ return NULL;
+
+ qcs->sd->se = qcs;
+ qcs->sd->conn = qcc->conn;
+ se_fl_set(qcs->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST);
+ se_expect_no_data(qcs->sd);
+
+ if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_QUIC_SND))
+ se_fl_set(qcs->sd, SE_FL_MAY_FASTFWD_CONS);
+
+ /* TODO duplicated from mux_h2 */
+ sess->t_idle = ns_to_ms(now_ns - sess->accept_ts) - sess->t_handshake;
+
+ if (!sc_new_from_endp(qcs->sd, sess, buf))
+ return NULL;
+
+ /* QC_SF_HREQ_RECV must be set once for a stream. Else, nb_hreq counter
+ * will be incorrect for the connection.
+ */
+ BUG_ON_HOT(qcs->flags & QC_SF_HREQ_RECV);
+ qcs->flags |= QC_SF_HREQ_RECV;
+ ++qcc->nb_sc;
+ ++qcc->nb_hreq;
+
+ /* TODO duplicated from mux_h2 */
+ sess->accept_date = date;
+ sess->accept_ts = now_ns;
+ sess->t_handshake = 0;
+ sess->t_idle = 0;
+
+ /* A stream must have been registered for HTTP wait before attaching
+ * it to sedesc. See <qcs_wait_http_req> for more info.
+ */
+ BUG_ON_HOT(!LIST_INLIST(&qcs->el_opening));
+ LIST_DEL_INIT(&qcs->el_opening);
+
+ if (fin) {
+ TRACE_STATE("report end-of-input", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set(qcs->sd, SE_FL_EOI);
+ }
+
+ /* A QCS can be already locally closed before stream layer
+ * instantiation. This notably happens if STOP_SENDING was the first
+ * frame received for this instance. In this case, an error is
+ * immediately to the stream layer to prevent transmission.
+ *
+ * TODO it could be better to not instantiate at all the stream layer.
+ * However, extra care is required to ensure QCS instance is released.
+ */
+ if (unlikely(qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET))) {
+ TRACE_STATE("report early error", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set_error(qcs->sd);
+ }
+
+ return qcs->sd->sc;
+}
+
+/* Use this function for a stream <id> which is not in <qcc> stream tree. It
+ * returns true if the associated stream is closed.
+ */
+static int qcc_stream_id_is_closed(struct qcc *qcc, uint64_t id)
+{
+ uint64_t *largest;
+
+ /* This function must only be used for stream not present in the stream tree. */
+ BUG_ON_HOT(eb64_lookup(&qcc->streams_by_id, id));
+
+ if (quic_stream_is_local(qcc, id)) {
+ largest = quic_stream_is_uni(id) ? &qcc->next_uni_l :
+ &qcc->next_bidi_l;
+ }
+ else {
+ largest = quic_stream_is_uni(id) ? &qcc->largest_uni_r :
+ &qcc->largest_bidi_r;
+ }
+
+ return id < *largest;
+}
+
+/* Retrieve the stream instance from <id> ID. This can be used when receiving
+ * STREAM, STREAM_DATA_BLOCKED, RESET_STREAM, MAX_STREAM_DATA or STOP_SENDING
+ * frames. Set to false <receive_only> or <send_only> if these particular types
+ * of streams are not allowed. If the stream instance is found, it is stored in
+ * <out>.
+ *
+ * Returns 0 on success else non-zero. On error, a RESET_STREAM or a
+ * CONNECTION_CLOSE is automatically emitted. Beware that <out> may be NULL
+ * on success if the stream has already been closed.
+ */
+int qcc_get_qcs(struct qcc *qcc, uint64_t id, int receive_only, int send_only,
+ struct qcs **out)
+{
+ struct eb64_node *node;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+ *out = NULL;
+
+ if (!receive_only && quic_stream_is_uni(id) && quic_stream_is_remote(qcc, id)) {
+ TRACE_ERROR("receive-only stream not allowed", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_set_error(qcc, QC_ERR_STREAM_STATE_ERROR, 0);
+ goto err;
+ }
+
+ if (!send_only && quic_stream_is_uni(id) && quic_stream_is_local(qcc, id)) {
+ TRACE_ERROR("send-only stream not allowed", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_set_error(qcc, QC_ERR_STREAM_STATE_ERROR, 0);
+ goto err;
+ }
+
+ /* Search the stream in the connection tree. */
+ node = eb64_lookup(&qcc->streams_by_id, id);
+ if (node) {
+ *out = eb64_entry(node, struct qcs, by_id);
+ TRACE_DEVEL("using stream from connection tree", QMUX_EV_QCC_RECV, qcc->conn, *out);
+ goto out;
+ }
+
+ /* Check if stream is already closed. */
+ if (qcc_stream_id_is_closed(qcc, id)) {
+ TRACE_DATA("already closed stream", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS, qcc->conn, NULL, &id);
+ /* Consider this as a success even if <out> is left NULL. */
+ goto out;
+ }
+
+ /* Create the stream. This is valid only for remote initiated one. A
+ * local stream must have already been explicitly created by the
+ * application protocol layer.
+ */
+ if (quic_stream_is_local(qcc, id)) {
+ /* RFC 9000 19.8. STREAM Frames
+ *
+ * An endpoint MUST terminate the connection with error
+ * STREAM_STATE_ERROR if it receives a STREAM frame for a locally
+ * initiated stream that has not yet been created, or for a send-only
+ * stream.
+ */
+ TRACE_ERROR("locally initiated stream not yet created", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS|QMUX_EV_PROTO_ERR, qcc->conn, NULL, &id);
+ qcc_set_error(qcc, QC_ERR_STREAM_STATE_ERROR, 0);
+ goto err;
+ }
+ else {
+ /* Remote stream not found - try to open it. */
+ *out = qcc_init_stream_remote(qcc, id);
+ if (!*out) {
+ TRACE_ERROR("stream creation error", QMUX_EV_QCC_RECV|QMUX_EV_QCC_NQCS, qcc->conn, NULL, &id);
+ goto err;
+ }
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn, *out);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Simple function to duplicate a buffer */
+static inline struct buffer qcs_b_dup(const struct ncbuf *b)
+{
+ return b_make(ncb_orig(b), b->size, b->head, ncb_data(b, 0));
+}
+
+/* Remove <bytes> from <qcs> Rx buffer. Flow-control for received offsets may
+ * be allocated for the peer if needed.
+ */
+static void qcs_consume(struct qcs *qcs, uint64_t bytes)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct quic_frame *frm;
+ struct ncbuf *buf = &qcs->rx.ncbuf;
+ enum ncb_ret ret;
+
+ TRACE_ENTER(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ ret = ncb_advance(buf, bytes);
+ if (ret) {
+ ABORT_NOW(); /* should not happens because removal only in data */
+ }
+
+ if (ncb_is_empty(buf))
+ qcs_free_ncbuf(qcs, buf);
+
+ qcs->rx.offset += bytes;
+ /* Not necessary to emit a MAX_STREAM_DATA if all data received. */
+ if (qcs->flags & QC_SF_SIZE_KNOWN)
+ goto conn_fctl;
+
+ if (qcs->rx.msd - qcs->rx.offset < qcs->rx.msd_init / 2) {
+ TRACE_DATA("increase stream credit via MAX_STREAM_DATA", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ frm = qc_frm_alloc(QUIC_FT_MAX_STREAM_DATA);
+ if (!frm) {
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ return;
+ }
+
+ qcs->rx.msd = qcs->rx.offset + qcs->rx.msd_init;
+
+ frm->max_stream_data.id = qcs->id;
+ frm->max_stream_data.max_stream_data = qcs->rx.msd;
+
+ LIST_APPEND(&qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ conn_fctl:
+ qcc->lfctl.offsets_consume += bytes;
+ if (qcc->lfctl.md - qcc->lfctl.offsets_consume < qcc->lfctl.md_init / 2) {
+ TRACE_DATA("increase conn credit via MAX_DATA", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ frm = qc_frm_alloc(QUIC_FT_MAX_DATA);
+ if (!frm) {
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ return;
+ }
+
+ qcc->lfctl.md = qcc->lfctl.offsets_consume + qcc->lfctl.md_init;
+
+ frm->max_data.max_data = qcc->lfctl.md;
+
+ LIST_APPEND(&qcs->qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcs->qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+}
+
+/* Decode the content of STREAM frames already received on the stream instance
+ * <qcs>.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs)
+{
+ struct buffer b;
+ ssize_t ret;
+ int fin = 0;
+
+ TRACE_ENTER(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ b = qcs_b_dup(&qcs->rx.ncbuf);
+
+ /* Signal FIN to application if STREAM FIN received with all data. */
+ if (qcs_is_close_remote(qcs))
+ fin = 1;
+
+ if (!(qcs->flags & QC_SF_READ_ABORTED)) {
+ ret = qcc->app_ops->decode_qcs(qcs, &b, fin);
+ if (ret < 0) {
+ TRACE_ERROR("decoding error", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto err;
+ }
+
+ if (qcs->flags & QC_SF_TO_RESET) {
+ if (qcs_sc(qcs) && !se_fl_test(qcs->sd, SE_FL_ERROR|SE_FL_ERR_PENDING)) {
+ se_fl_set_error(qcs->sd);
+ qcs_alert(qcs);
+ }
+ }
+ }
+ else {
+ TRACE_DATA("ignore read on stream", QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ ret = b_data(&b);
+ }
+
+ if (ret)
+ qcs_consume(qcs, ret);
+ if (ret || (!b_data(&b) && fin))
+ qcs_notify_recv(qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ return 1;
+}
+
+/* Prepare for the emission of RESET_STREAM on <qcs> with error code <err>. */
+void qcc_reset_stream(struct qcs *qcs, int err)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ if ((qcs->flags & QC_SF_TO_RESET) || qcs_is_close_local(qcs))
+ return;
+
+ TRACE_STATE("reset stream", QMUX_EV_QCS_END, qcc->conn, qcs);
+ qcs->flags |= QC_SF_TO_RESET;
+ qcs->err = err;
+
+ /* Remove prepared stream data from connection flow-control calcul. */
+ if (qcs->tx.offset > qcs->tx.sent_offset) {
+ const uint64_t diff = qcs->tx.offset - qcs->tx.sent_offset;
+ BUG_ON(qcc->tx.offsets - diff < qcc->tx.sent_offsets);
+ qcc->tx.offsets -= diff;
+ /* Reset qcs offset to prevent BUG_ON() on qcs_destroy(). */
+ qcs->tx.offset = qcs->tx.sent_offset;
+ }
+
+ /* Report send error to stream-endpoint layer. */
+ if (qcs_sc(qcs)) {
+ se_fl_set_error(qcs->sd);
+ qcs_alert(qcs);
+ }
+
+ qcc_send_stream(qcs, 1);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+}
+
+/* Register <qcs> stream for emission of STREAM, STOP_SENDING or RESET_STREAM.
+ * Set <urg> to 1 if stream content should be treated in priority compared to
+ * other streams.
+ */
+void qcc_send_stream(struct qcs *qcs, int urg)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ /* Cannot send if already closed. */
+ BUG_ON(qcs_is_close_local(qcs));
+
+ if (urg) {
+ LIST_DEL_INIT(&qcs->el_send);
+ LIST_INSERT(&qcc->send_list, &qcs->el_send);
+ }
+ else {
+ if (!LIST_INLIST(&qcs->el_send))
+ LIST_APPEND(&qcs->qcc->send_list, &qcs->el_send);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+}
+
+/* Prepare for the emission of STOP_SENDING on <qcs>. */
+void qcc_abort_stream_read(struct qcs *qcs)
+{
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_QCC_NEW, qcc->conn, qcs);
+
+ if ((qcs->flags & QC_SF_TO_STOP_SENDING) || qcs_is_close_remote(qcs))
+ goto end;
+
+ TRACE_STATE("abort stream read", QMUX_EV_QCS_END, qcc->conn, qcs);
+ qcs->flags |= (QC_SF_TO_STOP_SENDING|QC_SF_READ_ABORTED);
+
+ qcc_send_stream(qcs, 1);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ end:
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn, qcs);
+}
+
+/* Install the <app_ops> applicative layer of a QUIC connection on mux <qcc>.
+ * Returns 0 on success else non-zero.
+ */
+int qcc_install_app_ops(struct qcc *qcc, const struct qcc_app_ops *app_ops)
+{
+ TRACE_ENTER(QMUX_EV_QCC_NEW, qcc->conn);
+
+ if (app_ops->init && !app_ops->init(qcc)) {
+ TRACE_ERROR("app ops init error", QMUX_EV_QCC_NEW, qcc->conn);
+ goto err;
+ }
+
+ TRACE_PROTO("application layer initialized", QMUX_EV_QCC_NEW, qcc->conn);
+ qcc->app_ops = app_ops;
+
+ /* RFC 9114 7.2.4.2. Initialization
+ *
+ * Endpoints MUST NOT require any data to be
+ * received from the peer prior to sending the SETTINGS frame;
+ * settings MUST be sent as soon as the transport is ready to
+ * send data.
+ */
+ if (qcc->app_ops->finalize) {
+ if (qcc->app_ops->finalize(qcc->ctx)) {
+ TRACE_ERROR("app ops finalize error", QMUX_EV_QCC_NEW, qcc->conn);
+ goto err;
+ }
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 1;
+}
+
+/* Handle a new STREAM frame for stream with id <id>. Payload is pointed by
+ * <data> with length <len> and represents the offset <offset>. <fin> is set if
+ * the QUIC frame FIN bit is set.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset,
+ char fin, char *data)
+{
+ struct qcs *qcs;
+ enum ncb_ret ret;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_ERRL) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ /* RFC 9000 19.8. STREAM Frames
+ *
+ * An endpoint MUST terminate the connection with error
+ * STREAM_STATE_ERROR if it receives a STREAM frame for a locally
+ * initiated stream that has not yet been created, or for a send-only
+ * stream.
+ */
+ if (qcc_get_qcs(qcc, id, 1, 0, &qcs)) {
+ TRACE_DATA("qcs retrieval error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ if (!qcs) {
+ TRACE_DATA("already closed stream", QMUX_EV_QCC_RECV, qcc->conn);
+ goto out;
+ }
+
+ /* RFC 9000 4.5. Stream Final Size
+ *
+ * Once a final size for a stream is known, it cannot change. If a
+ * RESET_STREAM or STREAM frame is received indicating a change in the
+ * final size for the stream, an endpoint SHOULD respond with an error
+ * of type FINAL_SIZE_ERROR; see Section 11 for details on error
+ * handling.
+ */
+ if (qcs->flags & QC_SF_SIZE_KNOWN &&
+ (offset + len > qcs->rx.offset_max || (fin && offset + len < qcs->rx.offset_max))) {
+ TRACE_ERROR("final size error", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR, qcc->conn, qcs);
+ qcc_set_error(qcc, QC_ERR_FINAL_SIZE_ERROR, 0);
+ goto err;
+ }
+
+ if (qcs_is_close_remote(qcs)) {
+ TRACE_DATA("skipping STREAM for remotely closed", QMUX_EV_QCC_RECV, qcc->conn);
+ goto out;
+ }
+
+ if (offset + len < qcs->rx.offset ||
+ (offset + len == qcs->rx.offset && (!fin || (qcs->flags & QC_SF_SIZE_KNOWN)))) {
+ TRACE_DATA("already received offset", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+
+ TRACE_PROTO("receiving STREAM", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ qcs_idle_open(qcs);
+
+ if (offset + len > qcs->rx.offset_max) {
+ uint64_t diff = offset + len - qcs->rx.offset_max;
+ qcs->rx.offset_max = offset + len;
+ qcc->lfctl.offsets_recv += diff;
+
+ if (offset + len > qcs->rx.msd ||
+ qcc->lfctl.offsets_recv > qcc->lfctl.md) {
+ /* RFC 9000 4.1. Data Flow Control
+ *
+ * A receiver MUST close the connection with an error
+ * of type FLOW_CONTROL_ERROR if the sender violates
+ * the advertised connection or stream data limits
+ */
+ TRACE_ERROR("flow control error", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR,
+ qcc->conn, qcs);
+ qcc_set_error(qcc, QC_ERR_FLOW_CONTROL_ERROR, 0);
+ goto err;
+ }
+ }
+
+ if (!qcs_get_ncbuf(qcs, &qcs->rx.ncbuf) || ncb_is_null(&qcs->rx.ncbuf)) {
+ TRACE_ERROR("receive ncbuf alloc failure", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ goto err;
+ }
+
+ TRACE_DATA("newly received offset", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ if (offset < qcs->rx.offset) {
+ size_t diff = qcs->rx.offset - offset;
+
+ len -= diff;
+ data += diff;
+ offset = qcs->rx.offset;
+ }
+
+ if (len) {
+ ret = ncb_add(&qcs->rx.ncbuf, offset - qcs->rx.offset, data, len, NCB_ADD_COMPARE);
+ switch (ret) {
+ case NCB_RET_OK:
+ break;
+
+ case NCB_RET_DATA_REJ:
+ /* RFC 9000 2.2. Sending and Receiving Data
+ *
+ * An endpoint could receive data for a stream at the
+ * same stream offset multiple times. Data that has
+ * already been received can be discarded. The data at
+ * a given offset MUST NOT change if it is sent
+ * multiple times; an endpoint MAY treat receipt of
+ * different data at the same offset within a stream as
+ * a connection error of type PROTOCOL_VIOLATION.
+ */
+ TRACE_ERROR("overlapping data rejected", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV|QMUX_EV_PROTO_ERR,
+ qcc->conn, qcs);
+ qcc_set_error(qcc, QC_ERR_PROTOCOL_VIOLATION, 0);
+ return 1;
+
+ case NCB_RET_GAP_SIZE:
+ TRACE_DATA("cannot bufferize frame due to gap size limit", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV,
+ qcc->conn, qcs);
+ return 1;
+ }
+ }
+
+ if (fin)
+ qcs->flags |= QC_SF_SIZE_KNOWN;
+
+ if (qcs->flags & QC_SF_SIZE_KNOWN &&
+ qcs->rx.offset_max == qcs->rx.offset + ncb_data(&qcs->rx.ncbuf, 0)) {
+ qcs_close_remote(qcs);
+ }
+
+ if ((ncb_data(&qcs->rx.ncbuf, 0) && !(qcs->flags & QC_SF_DEM_FULL)) || fin) {
+ qcc_decode_qcs(qcc, qcs);
+ qcc_refresh_timeout(qcc);
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Handle a new MAX_DATA frame. <max> must contains the maximum data field of
+ * the frame.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qcc_recv_max_data(struct qcc *qcc, uint64_t max)
+{
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ TRACE_PROTO("receiving MAX_DATA", QMUX_EV_QCC_RECV, qcc->conn);
+ if (qcc->rfctl.md < max) {
+ qcc->rfctl.md = max;
+ TRACE_DATA("increase remote max-data", QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_BLK_MFCTL) {
+ qcc->flags &= ~QC_CF_BLK_MFCTL;
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+/* Handle a new MAX_STREAM_DATA frame. <max> must contains the maximum data
+ * field of the frame and <id> is the identifier of the QUIC stream.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv_max_stream_data(struct qcc *qcc, uint64_t id, uint64_t max)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_ERRL) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ /* RFC 9000 19.10. MAX_STREAM_DATA Frames
+ *
+ * Receiving a MAX_STREAM_DATA frame for a locally
+ * initiated stream that has not yet been created MUST be treated as a
+ * connection error of type STREAM_STATE_ERROR. An endpoint that
+ * receives a MAX_STREAM_DATA frame for a receive-only stream MUST
+ * terminate the connection with error STREAM_STATE_ERROR.
+ */
+ if (qcc_get_qcs(qcc, id, 0, 1, &qcs))
+ goto err;
+
+ if (qcs) {
+ TRACE_PROTO("receiving MAX_STREAM_DATA", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ if (max > qcs->tx.msd) {
+ qcs->tx.msd = max;
+ TRACE_DATA("increase remote max-stream-data", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ if (qcs->flags & QC_SF_BLK_SFCTL) {
+ qcs->flags &= ~QC_SF_BLK_SFCTL;
+ /* TODO optim: only wakeup IO-CB if stream has data to sent. */
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+ }
+ }
+
+ if (qcc_may_expire(qcc) && !qcc->nb_hreq)
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Handle a new RESET_STREAM frame from stream ID <id> with error code <err>
+ * and final stream size <final_size>.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv_reset_stream(struct qcc *qcc, uint64_t id, uint64_t err, uint64_t final_size)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_ERRL) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ /* RFC 9000 19.4. RESET_STREAM Frames
+ *
+ * An endpoint that receives a RESET_STREAM frame for a send-only stream
+ * MUST terminate the connection with error STREAM_STATE_ERROR.
+ */
+ if (qcc_get_qcs(qcc, id, 1, 0, &qcs)) {
+ TRACE_ERROR("RESET_STREAM for send-only stream received", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto err;
+ }
+
+ /* RFC 9000 3.2. Receiving Stream States
+ *
+ * A RESET_STREAM signal might be suppressed or withheld
+ * if stream data is completely received and is buffered to be read by
+ * the application. If the RESET_STREAM is suppressed, the receiving
+ * part of the stream remains in "Data Recvd".
+ */
+ if (!qcs || qcs_is_close_remote(qcs))
+ goto out;
+
+ TRACE_PROTO("receiving RESET_STREAM", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ qcs_idle_open(qcs);
+
+ /* Ensure stream closure is not forbidden by application protocol. */
+ if (qcc->app_ops->close) {
+ if (qcc->app_ops->close(qcs, QCC_APP_OPS_CLOSE_SIDE_RD)) {
+ TRACE_ERROR("closure rejected by app layer", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+ }
+
+ if (qcs->rx.offset_max > final_size ||
+ ((qcs->flags & QC_SF_SIZE_KNOWN) && qcs->rx.offset_max != final_size)) {
+ TRACE_ERROR("final size error on RESET_STREAM", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ qcc_set_error(qcc, QC_ERR_FINAL_SIZE_ERROR, 0);
+ goto err;
+ }
+
+ /* RFC 9000 3.2. Receiving Stream States
+ *
+ * An
+ * implementation MAY interrupt delivery of stream data, discard any
+ * data that was not consumed, and signal the receipt of the
+ * RESET_STREAM.
+ */
+ qcs->flags |= QC_SF_SIZE_KNOWN|QC_SF_RECV_RESET;
+ qcs_close_remote(qcs);
+ qcs_free_ncbuf(qcs, &qcs->rx.ncbuf);
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Handle a new STOP_SENDING frame for stream ID <id>. The error code should be
+ * specified in <err>.
+ *
+ * Returns 0 on success else non-zero. On error, the received frame should not
+ * be acknowledged.
+ */
+int qcc_recv_stop_sending(struct qcc *qcc, uint64_t id, uint64_t err)
+{
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_ERRL) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_RECV, qcc->conn);
+ goto err;
+ }
+
+ /* RFC 9000 19.5. STOP_SENDING Frames
+ *
+ * Receiving a STOP_SENDING frame for a
+ * locally initiated stream that has not yet been created MUST be
+ * treated as a connection error of type STREAM_STATE_ERROR. An
+ * endpoint that receives a STOP_SENDING frame for a receive-only stream
+ * MUST terminate the connection with error STREAM_STATE_ERROR.
+ */
+ if (qcc_get_qcs(qcc, id, 0, 1, &qcs))
+ goto err;
+
+ if (!qcs)
+ goto out;
+
+ TRACE_PROTO("receiving STOP_SENDING", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+
+ /* RFC 9000 3.5. Solicited State Transitions
+ *
+ * An endpoint is expected to send another STOP_SENDING frame if a
+ * packet containing a previous STOP_SENDING is lost. However, once
+ * either all stream data or a RESET_STREAM frame has been received for
+ * the stream -- that is, the stream is in any state other than "Recv"
+ * or "Size Known" -- sending a STOP_SENDING frame is unnecessary.
+ */
+
+ /* TODO thanks to previous RFC clause, STOP_SENDING is ignored if current stream
+ * has already been closed locally. This is useful to not emit multiple
+ * RESET_STREAM for a single stream. This is functional if stream is
+ * locally closed due to all data transmitted, but in this case the RFC
+ * advices to use an explicit RESET_STREAM.
+ */
+ if (qcs_is_close_local(qcs)) {
+ TRACE_STATE("ignoring STOP_SENDING", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+
+ qcs_idle_open(qcs);
+
+ if (qcc->app_ops->close) {
+ if (qcc->app_ops->close(qcs, QCC_APP_OPS_CLOSE_SIDE_WR)) {
+ TRACE_ERROR("closure rejected by app layer", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs);
+ goto out;
+ }
+ }
+
+ /* If FIN already reached, future RESET_STREAMS will be ignored.
+ * Manually set EOS in this case.
+ */
+ if (qcs_sc(qcs) && se_fl_test(qcs->sd, SE_FL_EOI)) {
+ se_fl_set(qcs->sd, SE_FL_EOS);
+ qcs_alert(qcs);
+ }
+
+ /* RFC 9000 3.5. Solicited State Transitions
+ *
+ * An endpoint that receives a STOP_SENDING frame
+ * MUST send a RESET_STREAM frame if the stream is in the "Ready" or
+ * "Send" state. If the stream is in the "Data Sent" state, the
+ * endpoint MAY defer sending the RESET_STREAM frame until the packets
+ * containing outstanding data are acknowledged or declared lost. If
+ * any outstanding data is declared lost, the endpoint SHOULD send a
+ * RESET_STREAM frame instead of retransmitting the data.
+ *
+ * An endpoint SHOULD copy the error code from the STOP_SENDING frame to
+ * the RESET_STREAM frame it sends, but it can use any application error
+ * code.
+ */
+ qcc_reset_stream(qcs, err);
+
+ if (qcc_may_expire(qcc) && !qcc->nb_hreq)
+ qcc_refresh_timeout(qcc);
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCC_RECV, qcc->conn);
+ return 1;
+}
+
+/* Signal the closing of remote stream with id <id>. Flow-control for new
+ * streams may be allocated for the peer if needed.
+ */
+static int qcc_release_remote_stream(struct qcc *qcc, uint64_t id)
+{
+ struct quic_frame *frm;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, qcc->conn);
+
+ if (quic_stream_is_bidi(id)) {
+ ++qcc->lfctl.cl_bidi_r;
+ if (qcc->lfctl.cl_bidi_r > qcc->lfctl.ms_bidi_init / 2) {
+ TRACE_DATA("increase max stream limit with MAX_STREAMS_BIDI", QMUX_EV_QCC_SEND, qcc->conn);
+ frm = qc_frm_alloc(QUIC_FT_MAX_STREAMS_BIDI);
+ if (!frm) {
+ qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0);
+ goto err;
+ }
+
+ frm->max_streams_bidi.max_streams = qcc->lfctl.ms_bidi +
+ qcc->lfctl.cl_bidi_r;
+ LIST_APPEND(&qcc->lfctl.frms, &frm->list);
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ qcc->lfctl.ms_bidi += qcc->lfctl.cl_bidi_r;
+ qcc->lfctl.cl_bidi_r = 0;
+ }
+ }
+ else {
+ /* TODO unidirectional stream flow control with MAX_STREAMS_UNI
+ * emission not implemented. It should be unnecessary for
+ * HTTP/3 but may be required if other application protocols
+ * are supported.
+ */
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, qcc->conn);
+
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCS_END, qcc->conn);
+ return 1;
+}
+
+/* detaches the QUIC stream from its QCC and releases it to the QCS pool. */
+static void qcs_destroy(struct qcs *qcs)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct connection *conn = qcc->conn;
+ const uint64_t id = qcs->id;
+
+ TRACE_ENTER(QMUX_EV_QCS_END, conn, qcs);
+
+ /* MUST not removed a stream with sending prepared data left. This is
+ * to ensure consistency on connection flow-control calculation.
+ */
+ BUG_ON(qcs->tx.offset < qcs->tx.sent_offset);
+
+ if (!(qcc->flags & QC_CF_ERRL)) {
+ if (quic_stream_is_remote(qcc, id))
+ qcc_release_remote_stream(qcc, id);
+ }
+
+ qcs_free(qcs);
+
+ TRACE_LEAVE(QMUX_EV_QCS_END, conn);
+}
+
+/* Transfer as much as possible data on <qcs> from <in> to <out>. This is done
+ * in respect with available flow-control at stream and connection level.
+ *
+ * Returns the total bytes of transferred data or a negative error code.
+ */
+static int qcs_xfer_data(struct qcs *qcs, struct buffer *out, struct buffer *in)
+{
+ struct qcc *qcc = qcs->qcc;
+ int left, to_xfer;
+ int total = 0;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ if (!qcs_get_buf(qcs, out)) {
+ TRACE_ERROR("buffer alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto err;
+ }
+
+ /*
+ * QCS out buffer diagram
+ * head left to_xfer
+ * -------------> ----------> ----->
+ * --------------------------------------------------
+ * |...............|xxxxxxxxxxx|<<<<<
+ * --------------------------------------------------
+ * ^ ack-off ^ sent-off ^ off
+ *
+ * STREAM frame
+ * ^ ^
+ * |xxxxxxxxxxxxxxxxx|
+ */
+
+ BUG_ON_HOT(qcs->tx.sent_offset < qcs->stream->ack_offset);
+ BUG_ON_HOT(qcs->tx.offset < qcs->tx.sent_offset);
+ BUG_ON_HOT(qcc->tx.offsets < qcc->tx.sent_offsets);
+
+ left = qcs->tx.offset - qcs->tx.sent_offset;
+ to_xfer = QUIC_MIN(b_data(in), b_room(out));
+
+ BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd);
+ /* do not exceed flow control limit */
+ if (qcs->tx.offset + to_xfer > qcs->tx.msd) {
+ TRACE_DATA("do not exceed stream flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ to_xfer = qcs->tx.msd - qcs->tx.offset;
+ }
+
+ BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md);
+ /* do not overcome flow control limit on connection */
+ if (qcc->tx.offsets + to_xfer > qcc->rfctl.md) {
+ TRACE_DATA("do not exceed conn flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ to_xfer = qcc->rfctl.md - qcc->tx.offsets;
+ }
+
+ if (!left && !to_xfer)
+ goto out;
+
+ total = b_force_xfer(out, in, to_xfer);
+
+ out:
+ {
+ struct qcs_xfer_data_trace_arg arg = {
+ .prep = b_data(out), .xfer = total,
+ };
+ TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_XFER_DATA,
+ qcc->conn, qcs, &arg);
+ }
+
+ return total;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return -1;
+}
+
+/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame
+ * is appended in <frm_list>. Set <fin> if this is supposed to be the last
+ * stream frame. If <out> is NULL an empty STREAM frame is built : this may be
+ * useful if FIN needs to be sent without any data left.
+ *
+ * Returns the payload length of the STREAM frame or a negative error code.
+ */
+static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin,
+ struct list *frm_list)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct quic_frame *frm;
+ int head, total;
+ uint64_t base_off;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ /* if ack_offset < buf_offset, it points to an older buffer. */
+ base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset);
+ BUG_ON(qcs->tx.sent_offset < base_off);
+
+ head = qcs->tx.sent_offset - base_off;
+ total = out ? b_data(out) - head : 0;
+ BUG_ON(total < 0);
+
+ if (!total && !fin) {
+ /* No need to send anything if total is NULL and no FIN to signal. */
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return 0;
+ }
+ BUG_ON((!total && qcs->tx.sent_offset > qcs->tx.offset) ||
+ (total && qcs->tx.sent_offset >= qcs->tx.offset));
+ BUG_ON(qcs->tx.sent_offset + total > qcs->tx.offset);
+ BUG_ON(qcc->tx.sent_offsets + total > qcc->rfctl.md);
+
+ TRACE_PROTO("sending STREAM frame", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ frm = qc_frm_alloc(QUIC_FT_STREAM_8);
+ if (!frm) {
+ TRACE_ERROR("frame alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto err;
+ }
+
+ frm->stream.stream = qcs->stream;
+ frm->stream.id = qcs->id;
+ frm->stream.offset.key = 0;
+ frm->stream.dup = 0;
+
+ if (total) {
+ frm->stream.buf = out;
+ frm->stream.data = (unsigned char *)b_peek(out, head);
+ }
+ else {
+ /* Empty STREAM frame. */
+ frm->stream.buf = NULL;
+ frm->stream.data = NULL;
+ }
+
+ /* FIN is positioned only when the buffer has been totally emptied. */
+ if (fin)
+ frm->type |= QUIC_STREAM_FRAME_TYPE_FIN_BIT;
+
+ if (qcs->tx.sent_offset) {
+ frm->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT;
+ frm->stream.offset.key = qcs->tx.sent_offset;
+ }
+
+ /* Always set length bit as we do not know if there is remaining frames
+ * in the final packet after this STREAM.
+ */
+ frm->type |= QUIC_STREAM_FRAME_TYPE_LEN_BIT;
+ frm->stream.len = total;
+
+ LIST_APPEND(frm_list, &frm->list);
+
+ out:
+ {
+ struct qcs_build_stream_trace_arg arg = {
+ .len = frm->stream.len, .fin = fin,
+ .offset = frm->stream.offset.key,
+ };
+ TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_BUILD_STRM,
+ qcc->conn, qcs, &arg);
+ }
+
+ return total;
+
+ err:
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return -1;
+}
+
+/* Check after transferring data from qcs.tx.buf if FIN must be set on the next
+ * STREAM frame for <qcs>.
+ *
+ * Returns true if FIN must be set else false.
+ */
+static int qcs_stream_fin(struct qcs *qcs)
+{
+ return qcs->flags & QC_SF_FIN_STREAM && !b_data(&qcs->tx.buf);
+}
+
+/* Return true if <qcs> has data to send in new STREAM frames. */
+static forceinline int qcs_need_sending(struct qcs *qcs)
+{
+ return b_data(&qcs->tx.buf) || qcs->tx.sent_offset < qcs->tx.offset ||
+ qcs_stream_fin(qcs);
+}
+
+/* This function must be called by the upper layer to inform about the sending
+ * of a STREAM frame for <qcs> instance. The frame is of <data> length and on
+ * <offset>.
+ */
+void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset)
+{
+ struct qcc *qcc = qcs->qcc;
+ uint64_t diff;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ BUG_ON(offset > qcs->tx.sent_offset);
+ BUG_ON(offset + data > qcs->tx.offset);
+
+ /* check if the STREAM frame has already been notified. It can happen
+ * for retransmission.
+ */
+ if (offset + data < qcs->tx.sent_offset) {
+ TRACE_DEVEL("offset already notified", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto out;
+ }
+
+ qcs_idle_open(qcs);
+
+ diff = offset + data - qcs->tx.sent_offset;
+ if (diff) {
+ /* increase offset sum on connection */
+ qcc->tx.sent_offsets += diff;
+ BUG_ON_HOT(qcc->tx.sent_offsets > qcc->rfctl.md);
+ if (qcc->tx.sent_offsets == qcc->rfctl.md) {
+ qcc->flags |= QC_CF_BLK_MFCTL;
+ TRACE_STATE("connection flow-control reached", QMUX_EV_QCS_SEND, qcc->conn);
+ }
+
+ /* increase offset on stream */
+ qcs->tx.sent_offset += diff;
+ BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.msd);
+ BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.offset);
+ if (qcs->tx.sent_offset == qcs->tx.msd) {
+ qcs->flags |= QC_SF_BLK_SFCTL;
+ TRACE_STATE("stream flow-control reached", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ }
+
+ /* If qcs.stream.buf is full, release it to the lower layer. */
+ if (qcs->tx.offset == qcs->tx.sent_offset &&
+ b_full(&qcs->stream->buf->buf)) {
+ qc_stream_buf_release(qcs->stream);
+ }
+
+ /* Add measurement for send rate. This is done at the MUX layer
+ * to account only for STREAM frames without retransmission.
+ */
+ increment_send_rate(diff, 0);
+ }
+
+ if (qcs->tx.offset == qcs->tx.sent_offset && !b_data(&qcs->tx.buf)) {
+ /* Remove stream from send_list if all was sent. */
+ LIST_DEL_INIT(&qcs->el_send);
+ TRACE_STATE("stream sent done", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ if (qcs->flags & (QC_SF_FIN_STREAM|QC_SF_DETACH)) {
+ /* Close stream locally. */
+ qcs_close_local(qcs);
+ /* Reset flag to not emit multiple FIN STREAM frames. */
+ qcs->flags &= ~QC_SF_FIN_STREAM;
+ }
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+}
+
+/* Returns true if subscribe set, false otherwise. */
+static int qcc_subscribe_send(struct qcc *qcc)
+{
+ struct connection *conn = qcc->conn;
+
+ /* Do not subscribe if lower layer in error. */
+ if (conn->flags & CO_FL_ERROR)
+ return 0;
+
+ if (qcc->wait_event.events & SUB_RETRY_SEND)
+ return 1;
+
+ TRACE_DEVEL("subscribe for send", QMUX_EV_QCC_SEND, qcc->conn);
+ conn->xprt->subscribe(conn, conn->xprt_ctx, SUB_RETRY_SEND, &qcc->wait_event);
+ return 1;
+}
+
+/* Wrapper for send on transport layer. Send a list of frames <frms> for the
+ * connection <qcc>.
+ *
+ * Returns 0 if all data sent with success else non-zero.
+ */
+static int qcc_send_frames(struct qcc *qcc, struct list *frms)
+{
+ TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn);
+
+ if (LIST_ISEMPTY(frms)) {
+ TRACE_DEVEL("no frames to send", QMUX_EV_QCC_SEND, qcc->conn);
+ goto err;
+ }
+
+ if (!qc_send_mux(qcc->conn->handle.qc, frms)) {
+ TRACE_DEVEL("error on sending", QMUX_EV_QCC_SEND, qcc->conn);
+ qcc_subscribe_send(qcc);
+ goto err;
+ }
+
+ /* If there is frames left at this stage, transport layer is blocked.
+ * Subscribe on it to retry later.
+ */
+ if (!LIST_ISEMPTY(frms)) {
+ TRACE_DEVEL("remaining frames to send", QMUX_EV_QCC_SEND, qcc->conn);
+ qcc_subscribe_send(qcc);
+ goto err;
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return 0;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCC_SEND, qcc->conn);
+ return 1;
+}
+
+/* Emit a RESET_STREAM on <qcs>.
+ *
+ * Returns 0 if the frame has been successfully sent else non-zero.
+ */
+static int qcs_send_reset(struct qcs *qcs)
+{
+ struct list frms = LIST_HEAD_INIT(frms);
+ struct quic_frame *frm;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+
+ frm = qc_frm_alloc(QUIC_FT_RESET_STREAM);
+ if (!frm) {
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ frm->reset_stream.id = qcs->id;
+ frm->reset_stream.app_error_code = qcs->err;
+ frm->reset_stream.final_size = qcs->tx.sent_offset;
+
+ LIST_APPEND(&frms, &frm->list);
+ if (qcc_send_frames(qcs->qcc, &frms)) {
+ if (!LIST_ISEMPTY(&frms))
+ qc_frm_free(qcs->qcc->conn->handle.qc, &frm);
+ TRACE_DEVEL("cannot send RESET_STREAM", QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ qcs_close_local(qcs);
+ qcs->flags &= ~QC_SF_TO_RESET;
+
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 0;
+}
+
+/* Emit a STOP_SENDING on <qcs>.
+ *
+ * Returns 0 if the frame has been successfully sent else non-zero.
+ */
+static int qcs_send_stop_sending(struct qcs *qcs)
+{
+ struct list frms = LIST_HEAD_INIT(frms);
+ struct quic_frame *frm;
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+
+ /* RFC 9000 3.3. Permitted Frame Types
+ *
+ * A
+ * receiver MAY send a STOP_SENDING frame in any state where it has not
+ * received a RESET_STREAM frame -- that is, states other than "Reset
+ * Recvd" or "Reset Read". However, there is little value in sending a
+ * STOP_SENDING frame in the "Data Recvd" state, as all stream data has
+ * been received. A sender could receive either of these two types of
+ * frames in any state as a result of delayed delivery of packets.¶
+ */
+ if (qcs_is_close_remote(qcs)) {
+ TRACE_STATE("skip STOP_SENDING on remote already closed", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto done;
+ }
+
+ frm = qc_frm_alloc(QUIC_FT_STOP_SENDING);
+ if (!frm) {
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ frm->stop_sending.id = qcs->id;
+ frm->stop_sending.app_error_code = qcs->err;
+
+ LIST_APPEND(&frms, &frm->list);
+ if (qcc_send_frames(qcs->qcc, &frms)) {
+ if (!LIST_ISEMPTY(&frms))
+ qc_frm_free(qcc->conn->handle.qc, &frm);
+ TRACE_DEVEL("cannot send STOP_SENDING", QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 1;
+ }
+
+ done:
+ qcs->flags &= ~QC_SF_TO_STOP_SENDING;
+
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcs->qcc->conn, qcs);
+ return 0;
+}
+
+/* Used internally by qcc_io_send function. Proceed to send for <qcs>. This will
+ * transfer data from qcs buffer to its quic_stream counterpart. A STREAM frame
+ * is then generated and inserted in <frms> list.
+ *
+ * Returns the total bytes transferred between qcs and quic_stream buffers. Can
+ * be null if out buffer cannot be allocated. On error a negative error code is
+ * used.
+ */
+static int qcs_send(struct qcs *qcs, struct list *frms)
+{
+ struct qcc *qcc = qcs->qcc;
+ struct buffer *buf = &qcs->tx.buf;
+ struct buffer *out = qc_stream_buf_get(qcs->stream);
+ int xfer = 0, buf_avail;
+ char fin = 0;
+
+ TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+
+ /* Cannot send STREAM on remote unidirectional streams. */
+ BUG_ON(quic_stream_is_uni(qcs->id) && quic_stream_is_remote(qcc, qcs->id));
+
+ if (b_data(buf)) {
+ /* Allocate <out> buffer if not already done. */
+ if (!out) {
+ if (qcc->flags & QC_CF_CONN_FULL)
+ goto out;
+
+ out = qc_stream_buf_alloc(qcs->stream, qcs->tx.offset,
+ &buf_avail);
+ if (!out) {
+ if (buf_avail) {
+ TRACE_ERROR("stream desc alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto err;
+ }
+
+ TRACE_STATE("hitting stream desc buffer limit", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ qcc->flags |= QC_CF_CONN_FULL;
+ goto out;
+ }
+ }
+
+ /* Transfer data from <buf> to <out>. */
+ xfer = qcs_xfer_data(qcs, out, buf);
+ if (xfer < 0)
+ goto err;
+
+ if (xfer > 0) {
+ qcs_notify_send(qcs);
+ qcs->flags &= ~QC_SF_BLK_MROOM;
+ }
+
+ qcs->tx.offset += xfer;
+ BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd);
+ qcc->tx.offsets += xfer;
+ BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md);
+
+ /* out buffer cannot be emptied if qcs offsets differ. */
+ BUG_ON(!b_data(out) && qcs->tx.sent_offset != qcs->tx.offset);
+ }
+
+ /* FIN is set if all incoming data were transferred. */
+ fin = qcs_stream_fin(qcs);
+
+ /* Build a new STREAM frame with <out> buffer. */
+ if (qcs->tx.sent_offset != qcs->tx.offset || fin) {
+ /* Skip STREAM frame allocation if already subscribed for send.
+ * Happens on sendto transient error or network congestion.
+ */
+ if (qcc->wait_event.events & SUB_RETRY_SEND) {
+ TRACE_DEVEL("already subscribed for sending",
+ QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ goto err;
+ }
+
+ if (qcs_build_stream_frm(qcs, out, fin, frms) < 0)
+ goto err;
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return xfer;
+
+ err:
+ TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs);
+ return -1;
+}
+
+/* Proceed to sending. Loop through all available streams for the <qcc>
+ * instance and try to send as much as possible.
+ *
+ * Returns the total of bytes sent to the transport layer.
+ */
+static int qcc_io_send(struct qcc *qcc)
+{
+ struct list frms = LIST_HEAD_INIT(frms);
+ /* Temporary list for QCS on error. */
+ struct list qcs_failed = LIST_HEAD_INIT(qcs_failed);
+ struct qcs *qcs, *qcs_tmp, *first_qcs = NULL;
+ int ret, total = 0;
+
+ TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn);
+
+ /* TODO if socket in transient error, sending should be temporarily
+ * disabled for all frames. However, checking for send subscription is
+ * not valid as this may be caused by a congestion error which only
+ * apply for STREAM frames.
+ */
+
+ /* Check for transport error. */
+ if (qcc->flags & QC_CF_ERR_CONN || qcc->conn->flags & CO_FL_ERROR) {
+ TRACE_DEVEL("connection on error", QMUX_EV_QCC_SEND, qcc->conn);
+ goto out;
+ }
+
+ /* Check for locally detected connection error. */
+ if (qcc->flags & QC_CF_ERRL) {
+ /* Prepare a CONNECTION_CLOSE if not already done. */
+ if (!(qcc->flags & QC_CF_ERRL_DONE)) {
+ TRACE_DATA("report a connection error", QMUX_EV_QCC_SEND|QMUX_EV_QCC_ERR, qcc->conn);
+ quic_set_connection_close(qcc->conn->handle.qc, qcc->err);
+ qcc->flags |= QC_CF_ERRL_DONE;
+ }
+ goto out;
+ }
+
+ if (qcc->conn->flags & CO_FL_SOCK_WR_SH) {
+ qcc->conn->flags |= CO_FL_ERROR;
+ TRACE_DEVEL("connection on error", QMUX_EV_QCC_SEND, qcc->conn);
+ goto out;
+ }
+
+ if (!LIST_ISEMPTY(&qcc->lfctl.frms)) {
+ if (qcc_send_frames(qcc, &qcc->lfctl.frms)) {
+ TRACE_DEVEL("flow-control frames rejected by transport, aborting send", QMUX_EV_QCC_SEND, qcc->conn);
+ goto out;
+ }
+ }
+
+ /* Send STREAM/STOP_SENDING/RESET_STREAM data for registered streams. */
+ list_for_each_entry_safe(qcs, qcs_tmp, &qcc->send_list, el_send) {
+ /* Check if all QCS were processed. */
+ if (qcs == first_qcs)
+ break;
+
+ /* Stream must not be present in send_list if it has nothing to send. */
+ BUG_ON(!(qcs->flags & (QC_SF_TO_STOP_SENDING|QC_SF_TO_RESET)) &&
+ !qcs_need_sending(qcs));
+
+ /* Each STOP_SENDING/RESET_STREAM frame is sent individually to
+ * guarantee its emission.
+ *
+ * TODO multiplex several frames in same datagram to optimize sending
+ */
+ if (qcs->flags & QC_SF_TO_STOP_SENDING) {
+ if (qcs_send_stop_sending(qcs))
+ goto sent_done;
+
+ /* Remove stream from send_list if it had only STOP_SENDING
+ * to send.
+ */
+ if (!(qcs->flags & QC_SF_TO_RESET) && !qcs_need_sending(qcs)) {
+ LIST_DEL_INIT(&qcs->el_send);
+ continue;
+ }
+ }
+
+ if (qcs->flags & QC_SF_TO_RESET) {
+ if (qcs_send_reset(qcs))
+ goto sent_done;
+
+ /* RFC 9000 3.3. Permitted Frame Types
+ *
+ * A sender MUST NOT send
+ * a STREAM or STREAM_DATA_BLOCKED frame for a stream in the
+ * "Reset Sent" state or any terminal state -- that is, after
+ * sending a RESET_STREAM frame.
+ */
+ LIST_DEL_INIT(&qcs->el_send);
+ continue;
+ }
+
+ if (!(qcc->flags & QC_CF_BLK_MFCTL) &&
+ !(qcs->flags & QC_SF_BLK_SFCTL)) {
+ if ((ret = qcs_send(qcs, &frms)) < 0) {
+ /* Temporarily remove QCS from send-list. */
+ LIST_DEL_INIT(&qcs->el_send);
+ LIST_APPEND(&qcs_failed, &qcs->el_send);
+ continue;
+ }
+
+ total += ret;
+ if (ret) {
+ /* Move QCS with some bytes transferred at the
+ * end of send-list for next iterations.
+ */
+ LIST_DEL_INIT(&qcs->el_send);
+ LIST_APPEND(&qcc->send_list, &qcs->el_send);
+ /* Remember first moved QCS as checkpoint to interrupt loop */
+ if (!first_qcs)
+ first_qcs = qcs;
+ }
+ }
+ }
+
+ /* Retry sending until no frame to send, data rejected or connection
+ * flow-control limit reached.
+ */
+ while (qcc_send_frames(qcc, &frms) == 0 && !(qcc->flags & QC_CF_BLK_MFCTL)) {
+ /* Reloop over <qcc.send_list>. Useful for streams which have
+ * fulfilled their qc_stream_desc buf and have now release it.
+ */
+ list_for_each_entry_safe(qcs, qcs_tmp, &qcc->send_list, el_send) {
+ /* Only streams blocked on flow-control or waiting on a
+ * new qc_stream_desc should be present in send_list as
+ * long as transport layer can handle all data.
+ */
+ BUG_ON(qcs->stream->buf && !(qcs->flags & QC_SF_BLK_SFCTL));
+
+ if (!(qcs->flags & QC_SF_BLK_SFCTL)) {
+ if ((ret = qcs_send(qcs, &frms)) < 0) {
+ LIST_DEL_INIT(&qcs->el_send);
+ LIST_APPEND(&qcs_failed, &qcs->el_send);
+ continue;
+ }
+
+ total += ret;
+ }
+ }
+ }
+
+ sent_done:
+ /* Deallocate frames that the transport layer has rejected. */
+ if (!LIST_ISEMPTY(&frms)) {
+ struct quic_frame *frm, *frm2;
+
+ list_for_each_entry_safe(frm, frm2, &frms, list)
+ qc_frm_free(qcc->conn->handle.qc, &frm);
+ }
+
+ /* Re-insert on-error QCS at the end of the send-list. */
+ if (!LIST_ISEMPTY(&qcs_failed)) {
+ list_for_each_entry_safe(qcs, qcs_tmp, &qcs_failed, el_send) {
+ LIST_DEL_INIT(&qcs->el_send);
+ LIST_APPEND(&qcc->send_list, &qcs->el_send);
+ }
+
+ if (!(qcc->flags & QC_CF_BLK_MFCTL))
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ out:
+ if (qcc->conn->flags & CO_FL_ERROR && !(qcc->flags & QC_CF_ERR_CONN)) {
+ TRACE_ERROR("error reported by transport layer",
+ QMUX_EV_QCC_SEND, qcc->conn);
+ qcc->flags |= QC_CF_ERR_CONN;
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_SEND, qcc->conn);
+ return total;
+}
+
+/* Proceed on receiving. Loop through all streams from <qcc> and use decode_qcs
+ * operation.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int qcc_io_recv(struct qcc *qcc)
+{
+ struct eb64_node *node;
+ struct qcs *qcs;
+
+ TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn);
+
+ if (qcc->flags & QC_CF_ERRL) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_RECV, qcc->conn);
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+ }
+
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ uint64_t id;
+
+ qcs = eb64_entry(node, struct qcs, by_id);
+ id = qcs->id;
+
+ if (!ncb_data(&qcs->rx.ncbuf, 0) || (qcs->flags & QC_SF_DEM_FULL)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ if (quic_stream_is_uni(id) && quic_stream_is_local(qcc, id)) {
+ node = eb64_next(node);
+ continue;
+ }
+
+ qcc_decode_qcs(qcc, qcs);
+ node = eb64_next(node);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn);
+ return 0;
+}
+
+
+/* Release all streams which have their transfer operation achieved.
+ *
+ * Returns true if at least one stream is released.
+ */
+static int qcc_purge_streams(struct qcc *qcc)
+{
+ struct eb64_node *node;
+ int release = 0;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ struct qcs *qcs = eb64_entry(node, struct qcs, by_id);
+ node = eb64_next(node);
+
+ /* Release not attached closed streams. */
+ if (qcs->st == QC_SS_CLO && !qcs_sc(qcs)) {
+ TRACE_STATE("purging closed stream", QMUX_EV_QCC_WAKE, qcs->qcc->conn, qcs);
+ qcs_destroy(qcs);
+ release = 1;
+ continue;
+ }
+
+ /* Release detached streams with empty buffer. */
+ if (qcs->flags & QC_SF_DETACH) {
+ if (qcs_is_close_local(qcs)) {
+ TRACE_STATE("purging detached stream", QMUX_EV_QCC_WAKE, qcs->qcc->conn, qcs);
+ qcs_destroy(qcs);
+ release = 1;
+ continue;
+ }
+ }
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn);
+ return release;
+}
+
+/* Execute application layer shutdown. If this operation is not defined, a
+ * CONNECTION_CLOSE will be prepared as a fallback. This function is protected
+ * against multiple invocation with the flag QC_CF_APP_SHUT.
+ */
+static void qcc_shutdown(struct qcc *qcc)
+{
+ TRACE_ENTER(QMUX_EV_QCC_END, qcc->conn);
+
+ if (qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL)) {
+ TRACE_DATA("connection on error", QMUX_EV_QCC_END, qcc->conn);
+ goto out;
+ }
+
+ if (qcc->flags & QC_CF_APP_SHUT)
+ goto out;
+
+ TRACE_STATE("perform graceful shutdown", QMUX_EV_QCC_END, qcc->conn);
+ if (qcc->app_ops && qcc->app_ops->shutdown) {
+ qcc->app_ops->shutdown(qcc->ctx);
+ qcc_io_send(qcc);
+ }
+ else {
+ qcc->err = quic_err_app(QC_ERR_NO_ERROR);
+ }
+
+ /* Register "no error" code at transport layer. Do not use
+ * quic_set_connection_close() as retransmission may be performed to
+ * finalized transfers. Do not overwrite quic-conn existing code if
+ * already set.
+ *
+ * TODO implement a wrapper function for this in quic-conn module
+ */
+ if (!(qcc->conn->handle.qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE))
+ qcc->conn->handle.qc->err = qcc->err;
+
+ out:
+ qcc->flags |= QC_CF_APP_SHUT;
+ TRACE_LEAVE(QMUX_EV_QCC_END, qcc->conn);
+}
+
+/* Loop through all qcs from <qcc>. Report error on stream endpoint if
+ * connection on error and wake them.
+ */
+static int qcc_wake_some_streams(struct qcc *qcc)
+{
+ struct qcs *qcs;
+ struct eb64_node *node;
+
+ TRACE_POINT(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ for (node = eb64_first(&qcc->streams_by_id); node;
+ node = eb64_next(node)) {
+ qcs = eb64_entry(node, struct qcs, by_id);
+
+ if (!qcs_sc(qcs))
+ continue;
+
+ if (qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL)) {
+ TRACE_POINT(QMUX_EV_QCC_WAKE, qcc->conn, qcs);
+ se_fl_set_error(qcs->sd);
+ qcs_alert(qcs);
+ }
+ }
+
+ return 0;
+}
+
+/* Conduct operations which should be made for <qcc> connection after
+ * input/output. Most notably, closed streams are purged which may leave the
+ * connection has ready to be released.
+ *
+ * Returns 1 if <qcc> must be released else 0.
+ */
+static int qcc_io_process(struct qcc *qcc)
+{
+ qcc_purge_streams(qcc);
+
+ /* Check if a soft-stop is in progress.
+ *
+ * TODO this is relevant for frontend connections only.
+ */
+ if (unlikely(qcc->proxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ int close = 1;
+
+ /* If using listener socket, soft-stop is not supported. The
+ * connection must be closed immediately.
+ */
+ if (!qc_test_fd(qcc->conn->handle.qc)) {
+ TRACE_DEVEL("proxy disabled with listener socket, closing connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc->conn->flags |= (CO_FL_SOCK_RD_SH|CO_FL_SOCK_WR_SH);
+ qcc_io_send(qcc);
+ goto out;
+ }
+
+ TRACE_DEVEL("proxy disabled, prepare connection soft-stop", QMUX_EV_QCC_WAKE, qcc->conn);
+
+ /* If a close-spread-time option is set, we want to avoid
+ * closing all the active HTTP3 connections at once so we add a
+ * random factor that will spread the closing.
+ */
+ if (tick_isset(global.close_spread_end)) {
+ int remaining_window = tick_remain(now_ms, global.close_spread_end);
+ if (remaining_window) {
+ /* This should increase the closing rate the
+ * further along the window we are. */
+ close = (remaining_window <= statistical_prng_range(global.close_spread_time));
+ }
+ }
+ else if (global.tune.options & GTUNE_DISABLE_ACTIVE_CLOSE) {
+ close = 0; /* let the client close his connection himself */
+ }
+
+ if (close)
+ qcc_shutdown(qcc);
+ }
+
+ /* Report error if set on stream endpoint layer. */
+ if (qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL))
+ qcc_wake_some_streams(qcc);
+
+ out:
+ if (qcc_is_dead(qcc))
+ return 1;
+
+ return 0;
+}
+
+/* release function. This one should be called to free all resources allocated
+ * to the mux.
+ */
+static void qcc_release(struct qcc *qcc)
+{
+ struct connection *conn = qcc->conn;
+ struct eb64_node *node;
+
+ TRACE_ENTER(QMUX_EV_QCC_END, conn);
+
+ qcc_shutdown(qcc);
+
+ if (qcc->task) {
+ task_destroy(qcc->task);
+ qcc->task = NULL;
+ }
+
+ tasklet_free(qcc->wait_event.tasklet);
+ if (conn && qcc->wait_event.events) {
+ conn->xprt->unsubscribe(conn, conn->xprt_ctx,
+ qcc->wait_event.events,
+ &qcc->wait_event);
+ }
+
+ /* liberate remaining qcs instances */
+ node = eb64_first(&qcc->streams_by_id);
+ while (node) {
+ struct qcs *qcs = eb64_entry(node, struct qcs, by_id);
+ node = eb64_next(node);
+ qcs_free(qcs);
+ }
+
+ while (!LIST_ISEMPTY(&qcc->lfctl.frms)) {
+ struct quic_frame *frm = LIST_ELEM(qcc->lfctl.frms.n, struct quic_frame *, list);
+ qc_frm_free(qcc->conn->handle.qc, &frm);
+ }
+
+ if (qcc->app_ops && qcc->app_ops->release)
+ qcc->app_ops->release(qcc->ctx);
+ TRACE_PROTO("application layer released", QMUX_EV_QCC_END, conn);
+
+ pool_free(pool_head_qcc, qcc);
+
+ if (conn) {
+ LIST_DEL_INIT(&conn->stopping_list);
+
+ conn->handle.qc->conn = NULL;
+ conn->mux = NULL;
+ conn->ctx = NULL;
+
+ TRACE_DEVEL("freeing conn", QMUX_EV_QCC_END, conn);
+
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+
+ TRACE_LEAVE(QMUX_EV_QCC_END);
+}
+
+struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int status)
+{
+ struct qcc *qcc = ctx;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn);
+
+ qcc_io_send(qcc);
+
+ qcc_io_recv(qcc);
+
+ if (qcc_io_process(qcc)) {
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto release;
+ }
+
+ qcc_refresh_timeout(qcc);
+
+ end:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn);
+ return NULL;
+
+ release:
+ qcc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return NULL;
+}
+
+static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int state)
+{
+ struct qcc *qcc = ctx;
+ int expired = tick_is_expired(t->expire, now_ms);
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc ? qcc->conn : NULL);
+
+ if (qcc) {
+ if (!expired) {
+ TRACE_DEVEL("not expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto requeue;
+ }
+
+ if (!qcc_may_expire(qcc)) {
+ TRACE_DEVEL("cannot expired", QMUX_EV_QCC_WAKE, qcc->conn);
+ t->expire = TICK_ETERNITY;
+ goto requeue;
+ }
+ }
+
+ task_destroy(t);
+
+ if (!qcc) {
+ TRACE_DEVEL("no more qcc", QMUX_EV_QCC_WAKE);
+ goto out;
+ }
+
+ /* Mark timeout as triggered by setting task to NULL. */
+ qcc->task = NULL;
+
+ /* TODO depending on the timeout condition, different shutdown mode
+ * should be used. For http keep-alive or disabled proxy, a graceful
+ * shutdown should occurs. For all other cases, an immediate close
+ * seems legitimate.
+ */
+ if (qcc_is_dead(qcc)) {
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ qcc_release(qcc);
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return NULL;
+
+ requeue:
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return t;
+}
+
+static int qmux_init(struct connection *conn, struct proxy *prx,
+ struct session *sess, struct buffer *input)
+{
+ struct qcc *qcc;
+ struct quic_transport_params *lparams, *rparams;
+
+ TRACE_ENTER(QMUX_EV_QCC_NEW);
+
+ qcc = pool_alloc(pool_head_qcc);
+ if (!qcc) {
+ TRACE_ERROR("alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_qcc;
+ }
+
+ qcc->conn = conn;
+ conn->ctx = qcc;
+ qcc->nb_hreq = qcc->nb_sc = 0;
+ qcc->flags = 0;
+
+ qcc->app_ops = NULL;
+
+ qcc->streams_by_id = EB_ROOT_UNIQUE;
+
+ /* Server parameters, params used for RX flow control. */
+ lparams = &conn->handle.qc->rx.params;
+
+ qcc->tx.sent_offsets = qcc->tx.offsets = 0;
+
+ LIST_INIT(&qcc->lfctl.frms);
+ qcc->lfctl.ms_bidi = qcc->lfctl.ms_bidi_init = lparams->initial_max_streams_bidi;
+ qcc->lfctl.ms_uni = lparams->initial_max_streams_uni;
+ qcc->lfctl.msd_bidi_l = lparams->initial_max_stream_data_bidi_local;
+ qcc->lfctl.msd_bidi_r = lparams->initial_max_stream_data_bidi_remote;
+ qcc->lfctl.msd_uni_r = lparams->initial_max_stream_data_uni;
+ qcc->lfctl.cl_bidi_r = 0;
+
+ qcc->lfctl.md = qcc->lfctl.md_init = lparams->initial_max_data;
+ qcc->lfctl.offsets_recv = qcc->lfctl.offsets_consume = 0;
+
+ rparams = &conn->handle.qc->tx.params;
+ qcc->rfctl.md = rparams->initial_max_data;
+ qcc->rfctl.msd_bidi_l = rparams->initial_max_stream_data_bidi_local;
+ qcc->rfctl.msd_bidi_r = rparams->initial_max_stream_data_bidi_remote;
+ qcc->rfctl.msd_uni_l = rparams->initial_max_stream_data_uni;
+
+ if (conn_is_back(conn)) {
+ qcc->next_bidi_l = 0x00;
+ qcc->largest_bidi_r = 0x01;
+ qcc->next_uni_l = 0x02;
+ qcc->largest_uni_r = 0x03;
+ }
+ else {
+ qcc->largest_bidi_r = 0x00;
+ qcc->next_bidi_l = 0x01;
+ qcc->largest_uni_r = 0x02;
+ qcc->next_uni_l = 0x03;
+ }
+
+ qcc->wait_event.tasklet = tasklet_new();
+ if (!qcc->wait_event.tasklet) {
+ TRACE_ERROR("taslket alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_tasklet;
+ }
+
+ LIST_INIT(&qcc->send_list);
+
+ qcc->wait_event.tasklet->process = qcc_io_cb;
+ qcc->wait_event.tasklet->context = qcc;
+ qcc->wait_event.events = 0;
+
+ qcc->proxy = prx;
+ /* haproxy timeouts */
+ if (conn_is_back(qcc->conn)) {
+ qcc->timeout = prx->timeout.server;
+ qcc->shut_timeout = tick_isset(prx->timeout.serverfin) ?
+ prx->timeout.serverfin : prx->timeout.server;
+ }
+ else {
+ qcc->timeout = prx->timeout.client;
+ qcc->shut_timeout = tick_isset(prx->timeout.clientfin) ?
+ prx->timeout.clientfin : prx->timeout.client;
+ }
+
+ /* Always allocate task even if timeout is unset. In MUX code, if task
+ * is NULL, it indicates that a timeout has stroke earlier.
+ */
+ qcc->task = task_new_here();
+ if (!qcc->task) {
+ TRACE_ERROR("timeout task alloc failure", QMUX_EV_QCC_NEW);
+ goto fail_no_timeout_task;
+ }
+ qcc->task->process = qcc_timeout_task;
+ qcc->task->context = qcc;
+ qcc->task->expire = tick_add_ifset(now_ms, qcc->timeout);
+
+ qcc_reset_idle_start(qcc);
+ LIST_INIT(&qcc->opening_list);
+
+ HA_ATOMIC_STORE(&conn->handle.qc->qcc, qcc);
+
+ if (qcc_install_app_ops(qcc, conn->handle.qc->app_ops)) {
+ TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW|QMUX_EV_QCC_ERR, qcc->conn);
+ /* prepare a CONNECTION_CLOSE frame */
+ quic_set_connection_close(conn->handle.qc, quic_err_transport(QC_ERR_APPLICATION_ERROR));
+ goto fail_install_app_ops;
+ }
+
+ if (qcc->app_ops == &h3_ops)
+ proxy_inc_fe_cum_sess_ver_ctr(sess->listener, prx, 3);
+
+ /* Register conn for idle front closing. This is done once everything is allocated. */
+ if (!conn_is_back(conn))
+ LIST_APPEND(&mux_stopping_data[tid].list, &conn->stopping_list);
+
+ /* init read cycle */
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn);
+ return 0;
+
+ fail_install_app_ops:
+ if (qcc->app_ops && qcc->app_ops->release)
+ qcc->app_ops->release(qcc->ctx);
+ task_destroy(qcc->task);
+ fail_no_timeout_task:
+ tasklet_free(qcc->wait_event.tasklet);
+ fail_no_tasklet:
+ pool_free(pool_head_qcc, qcc);
+ fail_no_qcc:
+ TRACE_LEAVE(QMUX_EV_QCC_NEW);
+ return -1;
+}
+
+static void qmux_destroy(void *ctx)
+{
+ struct qcc *qcc = ctx;
+
+ TRACE_ENTER(QMUX_EV_QCC_END, qcc->conn);
+ qcc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_END);
+}
+
+static void qmux_strm_detach(struct sedesc *sd)
+{
+ struct qcs *qcs = sd->se;
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_STRM_END, qcc->conn, qcs);
+
+ /* TODO this BUG_ON_HOT() is not correct as the stconn layer may detach
+ * from the stream even if it is not closed remotely at the QUIC layer.
+ * This happens for example when a stream must be closed due to a
+ * rejected request. To better handle these cases, it will be required
+ * to implement shutr/shutw MUX operations. Once this is done, this
+ * BUG_ON_HOT() statement can be adjusted.
+ */
+ //BUG_ON_HOT(!qcs_is_close_remote(qcs));
+
+ qcc_rm_sc(qcc);
+
+ if (!qcs_is_close_local(qcs) &&
+ !(qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL))) {
+ TRACE_STATE("remaining data, detaching qcs", QMUX_EV_STRM_END, qcc->conn, qcs);
+ qcs->flags |= QC_SF_DETACH;
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_STRM_END, qcc->conn, qcs);
+ return;
+ }
+
+ qcs_destroy(qcs);
+
+ if (qcc_is_dead(qcc)) {
+ TRACE_STATE("killing dead connection", QMUX_EV_STRM_END, qcc->conn);
+ goto release;
+ }
+ else {
+ TRACE_DEVEL("refreshing connection's timeout", QMUX_EV_STRM_END, qcc->conn);
+ qcc_refresh_timeout(qcc);
+ }
+
+ TRACE_LEAVE(QMUX_EV_STRM_END, qcc->conn);
+ return;
+
+ release:
+ qcc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_STRM_END);
+ return;
+}
+
+/* Called from the upper layer, to receive data */
+static size_t qmux_strm_rcv_buf(struct stconn *sc, struct buffer *buf,
+ size_t count, int flags)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ struct qcc *qcc = qcs->qcc;
+ size_t ret = 0;
+ char fin = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ ret = qcs_http_rcv_buf(qcs, buf, count, &fin);
+
+ if (b_data(&qcs->rx.app_buf)) {
+ se_fl_set(qcs->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+ }
+ else {
+ se_fl_clr(qcs->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM);
+
+ /* Set end-of-input when full message properly received. */
+ if (fin) {
+ TRACE_STATE("report end-of-input", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set(qcs->sd, SE_FL_EOI);
+
+ /* If request EOM is reported to the upper layer, it means the
+ * QCS now expects data from the opposite side.
+ */
+ se_expect_data(qcs->sd);
+ }
+
+ /* Set end-of-stream on read closed. */
+ if (qcs->flags & QC_SF_RECV_RESET ||
+ qcc->conn->flags & CO_FL_SOCK_RD_SH) {
+ TRACE_STATE("report end-of-stream", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set(qcs->sd, SE_FL_EOS);
+
+ /* Set error if EOI not reached. This may happen on
+ * RESET_STREAM reception or connection error.
+ */
+ if (!se_fl_test(qcs->sd, SE_FL_EOI)) {
+ TRACE_STATE("report error on stream aborted", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set(qcs->sd, SE_FL_ERROR);
+ }
+ }
+
+ if (se_fl_test(qcs->sd, SE_FL_ERR_PENDING)) {
+ TRACE_STATE("report error", QMUX_EV_STRM_RECV, qcc->conn, qcs);
+ se_fl_set(qcs->sd, SE_FL_ERROR);
+ }
+
+ if (b_size(&qcs->rx.app_buf)) {
+ b_free(&qcs->rx.app_buf);
+ offer_buffers(NULL, 1);
+ }
+ }
+
+ /* Restart demux if it was interrupted on full buffer. */
+ if (ret && qcs->flags & QC_SF_DEM_FULL) {
+ /* Ensure DEM_FULL is only set if there is available data to
+ * ensure we never do unnecessary wakeup here.
+ */
+ BUG_ON(!ncb_data(&qcs->rx.ncbuf, 0));
+
+ qcs->flags &= ~QC_SF_DEM_FULL;
+ if (!(qcc->flags & QC_CF_ERRL))
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QMUX_EV_STRM_RECV, qcc->conn, qcs);
+
+ return ret;
+}
+
+static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf,
+ size_t count, int flags)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ size_t ret = 0;
+ char fin;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ /* stream layer has been detached so no transfer must occur after. */
+ BUG_ON_HOT(qcs->flags & QC_SF_DETACH);
+
+ /* Report error if set on stream endpoint layer. */
+ if (qcs->qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL)) {
+ se_fl_set(qcs->sd, SE_FL_ERROR);
+ TRACE_DEVEL("connection in error", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ goto end;
+ }
+
+ if (qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)) {
+ ret = qcs_http_reset_buf(qcs, buf, count);
+ goto end;
+ }
+
+ ret = qcs_http_snd_buf(qcs, buf, count, &fin);
+ if (fin) {
+ TRACE_STATE("reached stream fin", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ qcs->flags |= QC_SF_FIN_STREAM;
+ }
+
+ if (ret || fin) {
+ qcc_send_stream(qcs, 0);
+ if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(qcs->qcc->wait_event.tasklet);
+ }
+
+ end:
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+
+static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ size_t ret = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ /* stream layer has been detached so no transfer must occur after. */
+ BUG_ON_HOT(qcs->flags & QC_SF_DETACH);
+
+ if (!qcs->qcc->app_ops->nego_ff || !qcs->qcc->app_ops->done_ff) {
+ /* Fast forwading is not supported by the QUIC application layer */
+ qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF;
+ goto end;
+ }
+
+ if (qcs->qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL)) {
+ /* Disable fast-forward if connection is on error. Eventually,
+ * error will be reported to stream-conn if snd_buf is invoked.
+ */
+ TRACE_DEVEL("connection in error", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF;
+ goto end;
+ }
+
+ /* Alawys disable splicing */
+ qcs->sd->iobuf.flags |= IOBUF_FL_NO_SPLICING;
+
+ ret = qcs->qcc->app_ops->nego_ff(qcs, count);
+ if (!ret)
+ goto end;
+
+ /* forward remaining input data */
+ if (b_data(input)) {
+ size_t xfer = ret;
+
+ if (xfer > b_data(input))
+ xfer = b_data(input);
+ b_add(qcs->sd->iobuf.buf, qcs->sd->iobuf.offset);
+ qcs->sd->iobuf.data = b_xfer(qcs->sd->iobuf.buf, input, xfer);
+ b_sub(qcs->sd->iobuf.buf, qcs->sd->iobuf.offset);
+
+ /* Cannot forward more data, wait for room */
+ if (b_data(input)) {
+ ret = 0;
+ goto end;
+ }
+ }
+ ret -= qcs->sd->iobuf.data;
+
+ end:
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ return ret;
+}
+
+static size_t qmux_done_ff(struct stconn *sc)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ struct qcc *qcc = qcs->qcc;
+ struct sedesc *sd = qcs->sd;
+ size_t total = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ if (sd->iobuf.flags & IOBUF_FL_EOI)
+ qcs->flags |= QC_SF_FIN_STREAM;
+
+ if (!(qcs->flags & QC_SF_FIN_STREAM) && !sd->iobuf.data)
+ goto end;
+
+ total = qcs->qcc->app_ops->done_ff(qcs);
+
+ qcc_send_stream(qcs, 0);
+ if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND))
+ tasklet_wakeup(qcc->wait_event.tasklet);
+
+ end:
+ if (!b_data(&qcs->tx.buf))
+ b_free(&qcs->tx.buf);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+ return total;
+}
+
+static int qmux_resume_ff(struct stconn *sc, unsigned int flags)
+{
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int qmux_strm_subscribe(struct stconn *sc, int event_type,
+ struct wait_event *es)
+{
+ return qcs_subscribe(__sc_mux_strm(sc), event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int qmux_strm_unsubscribe(struct stconn *sc, int event_type, struct wait_event *es)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qcs->subs && qcs->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ qcs->subs = NULL;
+
+ return 0;
+}
+
+static int qmux_wake(struct connection *conn)
+{
+ struct qcc *qcc = conn->ctx;
+
+ TRACE_ENTER(QMUX_EV_QCC_WAKE, conn);
+
+ if (qcc_io_process(qcc)) {
+ TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn);
+ goto release;
+ }
+
+ qcc_wake_some_streams(qcc);
+
+ qcc_refresh_timeout(qcc);
+
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE, conn);
+ return 0;
+
+ release:
+ qcc_release(qcc);
+ TRACE_LEAVE(QMUX_EV_QCC_WAKE);
+ return 1;
+}
+
+static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode)
+{
+ struct qcs *qcs = __sc_mux_strm(sc);
+ struct qcc *qcc = qcs->qcc;
+
+ TRACE_ENTER(QMUX_EV_STRM_SHUT, qcc->conn, qcs);
+
+ /* Early closure reported if QC_SF_FIN_STREAM not yet set. */
+ if (!qcs_is_close_local(qcs) &&
+ !(qcs->flags & (QC_SF_FIN_STREAM|QC_SF_TO_RESET))) {
+
+ if (qcs->flags & QC_SF_UNKNOWN_PL_LENGTH) {
+ /* Close stream with a FIN STREAM frame. */
+ if (!(qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL))) {
+ TRACE_STATE("set FIN STREAM",
+ QMUX_EV_STRM_SHUT, qcc->conn, qcs);
+ qcs->flags |= QC_SF_FIN_STREAM;
+ qcc_send_stream(qcs, 0);
+ }
+ }
+ else {
+ /* RESET_STREAM necessary. */
+ qcc_reset_stream(qcs, 0);
+ }
+
+ tasklet_wakeup(qcc->wait_event.tasklet);
+ }
+
+ out:
+ TRACE_LEAVE(QMUX_EV_STRM_SHUT, qcc->conn, qcs);
+}
+
+static int qmux_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output)
+{
+ int ret = 0;
+ struct qcs *qcs = __sc_mux_strm(sc);
+
+ switch (mux_sctl) {
+ case MUX_SCTL_SID:
+ if (output)
+ *((int64_t *)output) = qcs->id;
+ return ret;
+
+ default:
+ return -1;
+ }
+}
+
+/* for debugging with CLI's "show sess" command. May emit multiple lines, each
+ * new one being prefixed with <pfx>, if <pfx> is not NULL, otherwise a single
+ * line is used. Each field starts with a space so it's safe to print it after
+ * existing fields.
+ */
+static int qmux_strm_show_sd(struct buffer *msg, struct sedesc *sd, const char *pfx)
+{
+ struct qcs *qcs = sd->se;
+ struct qcc *qcc;
+ int ret = 0;
+
+ if (!qcs)
+ return ret;
+
+ chunk_appendf(msg, " qcs=%p .flg=%#x .id=%llu .st=%s .ctx=%p, .err=%#llx",
+ qcs, qcs->flags, (ull)qcs->id, qcs_st_to_str(qcs->st), qcs->ctx, (ull)qcs->err);
+
+ if (pfx)
+ chunk_appendf(msg, "\n%s", pfx);
+
+ qcc = qcs->qcc;
+ chunk_appendf(msg, " qcc=%p .flg=%#x .nbsc=%llu .nbhreq=%llu, .task=%p",
+ qcc, qcc->flags, (ull)qcc->nb_sc, (ull)qcc->nb_hreq, qcc->task);
+ return ret;
+}
+
+
+static const struct mux_ops qmux_ops = {
+ .init = qmux_init,
+ .destroy = qmux_destroy,
+ .detach = qmux_strm_detach,
+ .rcv_buf = qmux_strm_rcv_buf,
+ .snd_buf = qmux_strm_snd_buf,
+ .nego_fastfwd = qmux_nego_ff,
+ .done_fastfwd = qmux_done_ff,
+ .resume_fastfwd = qmux_resume_ff,
+ .subscribe = qmux_strm_subscribe,
+ .unsubscribe = qmux_strm_unsubscribe,
+ .wake = qmux_wake,
+ .shutw = qmux_strm_shutw,
+ .sctl = qmux_sctl,
+ .show_sd = qmux_strm_show_sd,
+ .flags = MX_FL_HTX|MX_FL_NO_UPG|MX_FL_FRAMED,
+ .name = "QUIC",
+};
+
+static struct mux_proto_list mux_proto_quic =
+ { .token = IST("quic"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_FE, .mux = &qmux_ops };
+
+INITCALL1(STG_REGISTER, register_mux_proto, &mux_proto_quic);
diff --git a/src/mworker-prog.c b/src/mworker-prog.c
new file mode 100644
index 0000000..2734d95
--- /dev/null
+++ b/src/mworker-prog.c
@@ -0,0 +1,359 @@
+/*
+ * Master Worker - program
+ *
+ * Copyright HAProxy Technologies - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <errno.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/mworker.h>
+#include <haproxy/task.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+static int use_program = 0; /* do we use the program section ? */
+
+/*
+ * Launch every programs
+ */
+int mworker_ext_launch_all()
+{
+ int ret;
+ struct mworker_proc *child;
+ struct mworker_proc *tmp;
+ int reexec = 0;
+
+ if (!use_program)
+ return 0;
+
+ reexec = getenv("HAPROXY_MWORKER_REEXEC") ? 1 : 0;
+
+ /* find the right mworker_proc */
+ list_for_each_entry_safe(child, tmp, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+
+ if (reexec && (!(child->options & PROC_O_START_RELOAD))) {
+ struct mworker_proc *old_child;
+
+ /*
+ * This is a reload and we don't want to fork a
+ * new program so have to remove the entry in
+ * the list.
+ *
+ * But before that, we need to mark the
+ * previous program as not leaving, if we find one.
+ */
+
+ list_for_each_entry(old_child, &proc_list, list) {
+ if (!(old_child->options & PROC_O_TYPE_PROG) || (!(old_child->options & PROC_O_LEAVING)))
+ continue;
+
+ if (strcmp(old_child->id, child->id) == 0)
+ old_child->options &= ~PROC_O_LEAVING;
+ }
+
+
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ child = NULL;
+
+ continue;
+ }
+
+ child->timestamp = ns_to_sec(now_ns);
+
+ ret = fork();
+ if (ret < 0) {
+ ha_alert("Cannot fork program '%s'.\n", child->id);
+ exit(EXIT_FAILURE); /* there has been an error */
+ } else if (ret > 0) { /* parent */
+ child->pid = ret;
+ ha_notice("New program '%s' (%d) forked\n", child->id, ret);
+ continue;
+ } else if (ret == 0) {
+ /* In child */
+ mworker_unblock_signals();
+ mworker_cleanlisteners();
+ mworker_cleantasks();
+
+ /* setgid / setuid */
+ if (child->gid != -1) {
+ if (getgroups(0, NULL) > 0 && setgroups(0, NULL) == -1)
+ ha_warning("[%s.main()] Failed to drop supplementary groups. Using 'gid'/'group'"
+ " without 'uid'/'user' is generally useless.\n", child->command[0]);
+
+ if (setgid(child->gid) == -1) {
+ ha_alert("[%s.main()] Cannot set gid %d.\n", child->command[0], child->gid);
+ exit(1);
+ }
+ }
+
+ if (child->uid != -1 && setuid(child->uid) == -1) {
+ ha_alert("[%s.main()] Cannot set uid %d.\n", child->command[0], child->gid);
+ exit(1);
+ }
+
+ /* This one must not be exported, it's internal! */
+ unsetenv("HAPROXY_MWORKER_REEXEC");
+ unsetenv("HAPROXY_STARTUPLOGS_FD");
+ unsetenv("HAPROXY_MWORKER_WAIT_ONLY");
+ unsetenv("HAPROXY_PROCESSES");
+ execvp(child->command[0], child->command);
+
+ ha_alert("Cannot execute %s: %s\n", child->command[0], strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+
+ return 0;
+
+}
+
+
+/* Configuration */
+
+int cfg_parse_program(const char *file, int linenum, char **args, int kwm)
+{
+ static struct mworker_proc *ext_child = NULL;
+ struct mworker_proc *child;
+ int err_code = 0;
+
+ if (strcmp(args[0], "program") == 0) {
+ if (alertif_too_many_args(1, file, linenum, args, &err_code)) {
+ err_code |= ERR_ABORT;
+ goto error;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <id> argument\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ ext_child = calloc(1, sizeof(*ext_child));
+ if (!ext_child) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ ext_child->options |= PROC_O_TYPE_PROG; /* external process */
+ ext_child->command = NULL;
+ ext_child->path = NULL;
+ ext_child->id = NULL;
+ ext_child->pid = -1;
+ ext_child->reloads = 0;
+ ext_child->timestamp = -1;
+ ext_child->ipc_fd[0] = -1;
+ ext_child->ipc_fd[1] = -1;
+ ext_child->options |= PROC_O_START_RELOAD; /* restart the programs by default */
+ ext_child->uid = -1;
+ ext_child->gid = -1;
+ LIST_INIT(&ext_child->list);
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+ if (strcmp(args[1], child->id) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' program section already exists in the configuration.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+ }
+ }
+
+ ext_child->id = strdup(args[1]);
+ if (!ext_child->id) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ LIST_APPEND(&proc_list, &ext_child->list);
+
+ } else if (strcmp(args[0], "command") == 0) {
+ int arg_nb = 0;
+ int i = 0;
+
+ if (*(args[1]) == 0) {
+ ha_alert("parsing [%s:%d]: '%s' expects a command with optional arguments separated in words.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ while (*args[arg_nb+1])
+ arg_nb++;
+
+ ext_child->command = calloc(arg_nb+1, sizeof(*ext_child->command));
+
+ if (!ext_child->command) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+
+ while (i < arg_nb) {
+ ext_child->command[i] = strdup(args[i+1]);
+ if (!ext_child->command[i]) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto error;
+ }
+ i++;
+ }
+ ext_child->command[i] = NULL;
+
+ } else if (strcmp(args[0], "option") == 0) {
+
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects an option name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (strcmp(args[1], "start-on-reload") == 0) {
+ if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code))
+ goto error;
+ if (kwm == KWM_STD)
+ ext_child->options |= PROC_O_START_RELOAD;
+ else if (kwm == KWM_NO)
+ ext_child->options &= ~PROC_O_START_RELOAD;
+ goto out;
+
+ } else {
+ ha_alert("parsing [%s:%d] : unknown option '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ } else if (strcmp(args[0], "user") == 0) {
+ struct passwd *ext_child_user;
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects a user name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto error;
+
+ if (ext_child->uid != -1) {
+ ha_alert("parsing [%s:%d] : user/uid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ext_child_user = getpwnam(args[1]);
+ if (ext_child_user != NULL) {
+ ext_child->uid = (int)ext_child_user->pw_uid;
+ } else {
+ ha_alert("parsing [%s:%d] : cannot find user id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ } else if (strcmp(args[0], "group") == 0) {
+ struct group *ext_child_group;
+ if (*(args[1]) == '\0') {
+ ha_alert("parsing [%s:%d]: '%s' expects a group name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto error;
+
+ if (ext_child->gid != -1) {
+ ha_alert("parsing [%s:%d] : group/gid already specified. Continuing.\n", file, linenum);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ ext_child_group = getgrnam(args[1]);
+ if (ext_child_group != NULL) {
+ ext_child->gid = (int)ext_child_group->gr_gid;
+ } else {
+ ha_alert("parsing [%s:%d] : cannot find group id for '%s' (%d:%s)\n", file, linenum, args[1], errno, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ } else {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], "program");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ use_program = 1;
+
+ return err_code;
+
+error:
+ if (ext_child) {
+ LIST_DELETE(&ext_child->list);
+ if (ext_child->command) {
+ int i;
+
+ for (i = 0; ext_child->command[i]; i++) {
+ ha_free(&ext_child->command[i]);
+ }
+ ha_free(&ext_child->command);
+ }
+ ha_free(&ext_child->id);
+ }
+
+ ha_free(&ext_child);
+
+out:
+ return err_code;
+
+}
+
+int cfg_program_postparser()
+{
+ int err_code = 0;
+ struct mworker_proc *child;
+
+ /* we only need to check this during configuration parsing,
+ * wait mode doesn't have the complete description of a program */
+ if (global.mode & MODE_MWORKER_WAIT)
+ return err_code;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads == 0 && (child->options & PROC_O_TYPE_PROG)) {
+ if (child->command == NULL) {
+ ha_alert("The program section '%s' lacks a command to launch.\n", child->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ }
+
+ if (use_program && !(global.mode & MODE_MWORKER)) {
+ ha_alert("Can't use a 'program' section without master worker mode.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+
+ return err_code;
+}
+
+
+REGISTER_CONFIG_SECTION("program", cfg_parse_program, NULL);
+REGISTER_CONFIG_POSTPARSER("program", cfg_program_postparser);
diff --git a/src/mworker.c b/src/mworker.c
new file mode 100644
index 0000000..c71446a
--- /dev/null
+++ b/src/mworker.c
@@ -0,0 +1,821 @@
+/*
+ * Master Worker
+ *
+ * Copyright HAProxy Technologies 2019 - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#if defined(USE_SYSTEMD)
+#include <systemd/sd-daemon.h>
+#endif
+
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/listener.h>
+#include <haproxy/mworker.h>
+#include <haproxy/peers.h>
+#include <haproxy/proto_sockpair.h>
+#include <haproxy/proxy.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/signal.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int exitcode = -1;
+static int max_reloads = -1; /* number max of reloads a worker can have until they are killed */
+struct mworker_proc *proc_self = NULL; /* process structure of current process */
+
+/* ----- children processes handling ----- */
+
+/*
+ * Send signal to every known children.
+ */
+
+static void mworker_kill(int sig)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ /* careful there, we must be sure that the pid > 0, we don't want to emit a kill -1 */
+ if ((child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) && (child->pid > 0))
+ kill(child->pid, sig);
+ }
+}
+
+void mworker_kill_max_reloads(int sig)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (max_reloads != -1 && (child->options & PROC_O_TYPE_WORKER) &&
+ (child->pid > 0) && (child->reloads > max_reloads))
+ kill(child->pid, sig);
+ }
+}
+
+/* return 1 if a pid is a current child otherwise 0 */
+int mworker_current_child(int pid)
+{
+ struct mworker_proc *child;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if ((child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG)) && (!(child->options & PROC_O_LEAVING)) && (child->pid == pid))
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Return the number of new and old children (including workers and external
+ * processes)
+ */
+int mworker_child_nb()
+{
+ struct mworker_proc *child;
+ int ret = 0;
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->options & (PROC_O_TYPE_WORKER|PROC_O_TYPE_PROG))
+ ret++;
+ }
+
+ return ret;
+}
+
+
+/*
+ * serialize the proc list and put it in the environment
+ */
+void mworker_proc_list_to_env()
+{
+ char *msg = NULL;
+ struct mworker_proc *child;
+ int minreloads = INT_MAX; /* minimum number of reloads to chose which processes are "current" ones */
+
+ list_for_each_entry(child, &proc_list, list) {
+ char type = '?';
+
+ if (child->options & PROC_O_TYPE_MASTER)
+ type = 'm';
+ else if (child->options & PROC_O_TYPE_PROG)
+ type = 'e';
+ else if (child->options &= PROC_O_TYPE_WORKER)
+ type = 'w';
+
+ if (child->reloads < minreloads)
+ minreloads = child->reloads;
+
+ if (child->pid > -1)
+ memprintf(&msg, "%s|type=%c;fd=%d;cfd=%d;pid=%d;reloads=%d;failedreloads=%d;timestamp=%d;id=%s;version=%s", msg ? msg : "", type, child->ipc_fd[0], child->ipc_fd[1], child->pid, child->reloads, child->failedreloads, child->timestamp, child->id ? child->id : "", child->version);
+ }
+ if (msg)
+ setenv("HAPROXY_PROCESSES", msg, 1);
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads > minreloads && !(child->options & PROC_O_TYPE_MASTER)) {
+ child->options |= PROC_O_LEAVING;
+ }
+ }
+
+
+}
+
+struct mworker_proc *mworker_proc_new()
+{
+ struct mworker_proc *child;
+
+ child = calloc(1, sizeof(*child));
+ if (!child)
+ return NULL;
+
+ child->failedreloads = 0;
+ child->reloads = 0;
+ child->pid = -1;
+ child->ipc_fd[0] = -1;
+ child->ipc_fd[1] = -1;
+ child->timestamp = -1;
+
+ return child;
+}
+
+
+/*
+ * unserialize the proc list from the environment
+ * Return < 0 upon error.
+ */
+int mworker_env_to_proc_list()
+{
+ char *env, *msg, *omsg = NULL, *token = NULL, *s1;
+ struct mworker_proc *child;
+ int minreloads = INT_MAX; /* minimum number of reloads to chose which processes are "current" ones */
+ int err = 0;
+
+ env = getenv("HAPROXY_PROCESSES");
+ if (!env)
+ goto no_env;
+
+ omsg = msg = strdup(env);
+ if (!msg) {
+ ha_alert("Out of memory while trying to allocate a worker process structure.");
+ err = -1;
+ goto out;
+ }
+
+ while ((token = strtok_r(msg, "|", &s1))) {
+ char *subtoken = NULL;
+ char *s2;
+
+ msg = NULL;
+
+ child = mworker_proc_new();
+ if (!child) {
+ ha_alert("out of memory while trying to allocate a worker process structure.");
+ err = -1;
+ goto out;
+ }
+
+ while ((subtoken = strtok_r(token, ";", &s2))) {
+
+ token = NULL;
+
+ if (strncmp(subtoken, "type=", 5) == 0) {
+ char type;
+
+ type = *(subtoken+5);
+ if (type == 'm') { /* we are in the master, assign it */
+ proc_self = child;
+ child->options |= PROC_O_TYPE_MASTER;
+ } else if (type == 'e') {
+ child->options |= PROC_O_TYPE_PROG;
+ } else if (type == 'w') {
+ child->options |= PROC_O_TYPE_WORKER;
+ }
+
+ } else if (strncmp(subtoken, "fd=", 3) == 0) {
+ child->ipc_fd[0] = atoi(subtoken+3);
+ if (child->ipc_fd[0] > -1)
+ global.maxsock++;
+ } else if (strncmp(subtoken, "cfd=", 4) == 0) {
+ child->ipc_fd[1] = atoi(subtoken+4);
+ if (child->ipc_fd[1] > -1)
+ global.maxsock++;
+ } else if (strncmp(subtoken, "pid=", 4) == 0) {
+ child->pid = atoi(subtoken+4);
+ } else if (strncmp(subtoken, "reloads=", 8) == 0) {
+ /* we only increment the number of asked reload */
+ child->reloads = atoi(subtoken+8);
+
+ if (child->reloads < minreloads)
+ minreloads = child->reloads;
+ } else if (strncmp(subtoken, "failedreloads=", 14) == 0) {
+ child->failedreloads = atoi(subtoken+14);
+ } else if (strncmp(subtoken, "timestamp=", 10) == 0) {
+ child->timestamp = atoi(subtoken+10);
+ } else if (strncmp(subtoken, "id=", 3) == 0) {
+ child->id = strdup(subtoken+3);
+ } else if (strncmp(subtoken, "version=", 8) == 0) {
+ child->version = strdup(subtoken+8);
+ }
+ }
+ if (child->pid) {
+ LIST_APPEND(&proc_list, &child->list);
+ } else {
+ mworker_free_child(child);
+ }
+ }
+
+ /* set the leaving processes once we know which number of reloads are the current processes */
+
+ list_for_each_entry(child, &proc_list, list) {
+ if (child->reloads > minreloads)
+ child->options |= PROC_O_LEAVING;
+ }
+
+ unsetenv("HAPROXY_PROCESSES");
+
+no_env:
+
+ if (!proc_self) {
+
+ proc_self = mworker_proc_new();
+ if (!proc_self) {
+ ha_alert("Cannot allocate process structures.\n");
+ err = -1;
+ goto out;
+ }
+ proc_self->options |= PROC_O_TYPE_MASTER;
+ proc_self->pid = pid;
+ proc_self->timestamp = 0; /* we don't know the startime anymore */
+
+ LIST_APPEND(&proc_list, &proc_self->list);
+ ha_warning("The master internals are corrupted or it was started with a too old version (< 1.9). Please restart the master process.\n");
+ }
+
+out:
+ free(omsg);
+ return err;
+}
+
+/* Signal blocking and unblocking */
+
+void mworker_block_signals()
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ sigaddset(&set, SIGUSR2);
+ sigaddset(&set, SIGTTIN);
+ sigaddset(&set, SIGTTOU);
+ sigaddset(&set, SIGHUP);
+ sigaddset(&set, SIGCHLD);
+ ha_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+void mworker_unblock_signals()
+{
+ haproxy_unblock_signals();
+}
+
+/* ----- mworker signal handlers ----- */
+
+/* broadcast the configured signal to the workers */
+void mworker_broadcast_signal(struct sig_handler *sh)
+{
+ mworker_kill(sh->arg);
+}
+
+/*
+ * When called, this function reexec haproxy with -sf followed by current
+ * children PIDs and possibly old children PIDs if they didn't leave yet.
+ */
+void mworker_catch_sighup(struct sig_handler *sh)
+{
+ mworker_reload(0);
+}
+
+void mworker_catch_sigterm(struct sig_handler *sh)
+{
+ int sig = sh->arg;
+
+#if defined(USE_SYSTEMD)
+ if (global.tune.options & GTUNE_USE_SYSTEMD) {
+ sd_notify(0, "STOPPING=1");
+ }
+#endif
+ ha_warning("Exiting Master process...\n");
+ mworker_kill(sig);
+}
+
+/*
+ * Wait for every children to exit
+ */
+
+void mworker_catch_sigchld(struct sig_handler *sh)
+{
+ int exitpid = -1;
+ int status = 0;
+ int childfound;
+
+restart_wait:
+
+ childfound = 0;
+
+ exitpid = waitpid(-1, &status, WNOHANG);
+ if (exitpid > 0) {
+ struct mworker_proc *child, *it;
+
+ if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ else if (WIFSIGNALED(status))
+ status = 128 + WTERMSIG(status);
+ else if (WIFSTOPPED(status))
+ status = 128 + WSTOPSIG(status);
+ else
+ status = 255;
+
+ /* delete the child from the process list */
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+ if (child->pid != exitpid)
+ continue;
+
+ LIST_DELETE(&child->list);
+ close(child->ipc_fd[0]);
+ childfound = 1;
+ break;
+ }
+
+ if (!childfound) {
+ /* We didn't find the PID in the list, that shouldn't happen but we can emit a warning */
+ ha_warning("Process %d exited with code %d (%s)\n", exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ } else {
+ /* check if exited child is a current child */
+ if (!(child->options & PROC_O_LEAVING)) {
+ if (child->options & PROC_O_TYPE_WORKER) {
+ if (status < 128)
+ ha_warning("Current worker (%d) exited with code %d (%s)\n", exitpid, status, "Exit");
+ else
+ ha_alert("Current worker (%d) exited with code %d (%s)\n", exitpid, status, strsignal(status - 128));
+ }
+ else if (child->options & PROC_O_TYPE_PROG)
+ ha_alert("Current program '%s' (%d) exited with code %d (%s)\n", child->id, exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+
+ if (status != 0 && status != 130 && status != 143) {
+ if (child->options & PROC_O_TYPE_WORKER) {
+ ha_warning("A worker process unexpectedly died and this can only be explained by a bug in haproxy or its dependencies.\nPlease check that you are running an up to date and maintained version of haproxy and open a bug report.\n");
+ display_version();
+ }
+ if (!(global.tune.options & GTUNE_NOEXIT_ONFAILURE)) {
+ ha_alert("exit-on-failure: killing every processes with SIGTERM\n");
+ mworker_kill(SIGTERM);
+ }
+ }
+ /* 0 & SIGTERM (143) are normal, but we should report SIGINT (130) and other signals */
+ if (exitcode < 0 && status != 0 && status != 143)
+ exitcode = status;
+ } else {
+ if (child->options & PROC_O_TYPE_WORKER) {
+ ha_warning("Former worker (%d) exited with code %d (%s)\n", exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ delete_oldpid(exitpid);
+ } else if (child->options & PROC_O_TYPE_PROG) {
+ ha_warning("Former program '%s' (%d) exited with code %d (%s)\n", child->id, exitpid, status, (status >= 128) ? strsignal(status - 128) : "Exit");
+ }
+ }
+ mworker_free_child(child);
+ child = NULL;
+ }
+
+ /* do it again to check if it was the last worker */
+ goto restart_wait;
+ }
+ /* Better rely on the system than on a list of process to check if it was the last one */
+ else if (exitpid == -1 && errno == ECHILD) {
+ ha_warning("All workers exited. Exiting... (%d)\n", (exitcode > 0) ? exitcode : EXIT_SUCCESS);
+ atexit_flag = 0;
+ if (exitcode > 0)
+ exit(exitcode); /* parent must leave using the status code that provoked the exit */
+ exit(EXIT_SUCCESS);
+ }
+
+}
+
+/* ----- IPC FD (sockpair) related ----- */
+
+/* This wrapper is called from the workers. It is registered instead of the
+ * normal listener_accept() so the worker can exit() when it detects that the
+ * master closed the IPC FD. If it's not a close, we just call the regular
+ * listener_accept() function.
+ */
+void mworker_accept_wrapper(int fd)
+{
+ char c;
+ int ret;
+
+ while (1) {
+ ret = recv(fd, &c, 1, MSG_PEEK);
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_recv(fd);
+ return;
+ }
+ break;
+ } else if (ret > 0) {
+ struct listener *l = fdtab[fd].owner;
+
+ if (l)
+ listener_accept(l);
+ return;
+ } else if (ret == 0) {
+ /* At this step the master is down before
+ * this worker perform a 'normal' exit.
+ * So we want to exit with an error but
+ * other threads could currently process
+ * some stuff so we can't perform a clean
+ * deinit().
+ */
+ exit(EXIT_FAILURE);
+ }
+ }
+ return;
+}
+
+/*
+ * This function registers the accept wrapper for the sockpair of the master
+ * worker. It's only handled by worker thread #0. Other threads and master do
+ * nothing here. It always returns 1 (success).
+ */
+static int mworker_sockpair_register_per_thread()
+{
+ if (!(global.mode & MODE_MWORKER) || master)
+ return 1;
+
+ if (tid != 0)
+ return 1;
+
+ if (proc_self->ipc_fd[1] < 0) /* proc_self was incomplete and we can't find the socketpair */
+ return 1;
+
+ fd_set_nonblock(proc_self->ipc_fd[1]);
+ /* register the wrapper to handle read 0 when the master exits */
+ fdtab[proc_self->ipc_fd[1]].iocb = mworker_accept_wrapper;
+ fd_want_recv(proc_self->ipc_fd[1]);
+ return 1;
+}
+
+REGISTER_PER_THREAD_INIT(mworker_sockpair_register_per_thread);
+
+/* ----- proxies ----- */
+/*
+ * Upon a reload, the master worker needs to close all listeners FDs but the mworker_pipe
+ * fd, and the FD provided by fd@
+ */
+void mworker_cleanlisteners()
+{
+ struct listener *l, *l_next;
+ struct proxy *curproxy;
+ struct peers *curpeers;
+
+ /* peers proxies cleanup */
+ for (curpeers = cfg_peers; curpeers; curpeers = curpeers->next) {
+ if (!curpeers->peers_fe)
+ continue;
+
+ stop_proxy(curpeers->peers_fe);
+ /* disable this peer section so that it kills itself */
+ if (curpeers->sighandler)
+ signal_unregister_handler(curpeers->sighandler);
+ task_destroy(curpeers->sync_task);
+ curpeers->sync_task = NULL;
+ curpeers->peers_fe = NULL;
+ }
+
+ /* main proxies cleanup */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ int listen_in_master = 0;
+
+ list_for_each_entry_safe(l, l_next, &curproxy->conf.listeners, by_fe) {
+ /* remove the listener, but not those we need in the master... */
+ if (!(l->rx.flags & RX_F_MWORKER)) {
+ unbind_listener(l);
+ delete_listener(l);
+ } else {
+ listen_in_master = 1;
+ }
+ }
+ /* if the proxy shouldn't be in the master, we stop it */
+ if (!listen_in_master)
+ curproxy->flags |= PR_FL_DISABLED;
+ }
+}
+
+/* Upon a configuration loading error some mworker_proc and FDs/server were
+ * assigned but the worker was never forked, we must close the FDs and
+ * remove the server
+ */
+void mworker_cleanup_proc()
+{
+ struct mworker_proc *child, *it;
+
+ list_for_each_entry_safe(child, it, &proc_list, list) {
+
+ if (child->pid == -1) {
+ /* Close the socketpairs. */
+ if (child->ipc_fd[0] > -1)
+ close(child->ipc_fd[0]);
+ if (child->ipc_fd[1] > -1)
+ close(child->ipc_fd[1]);
+ if (child->srv) {
+ /* only exists if we created a master CLI listener */
+ srv_drop(child->srv);
+ }
+ LIST_DELETE(&child->list);
+ mworker_free_child(child);
+ }
+ }
+}
+
+
+/* Displays workers and processes */
+static int cli_io_handler_show_proc(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct mworker_proc *child;
+ int old = 0;
+ int up = date.tv_sec - proc_self->timestamp;
+ char *uptime = NULL;
+ char *reloadtxt = NULL;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ chunk_reset(&trash);
+
+ memprintf(&reloadtxt, "%d [failed: %d]", proc_self->reloads, proc_self->failedreloads);
+ chunk_printf(&trash, "#%-14s %-15s %-15s %-15s %-15s\n", "<PID>", "<type>", "<reloads>", "<uptime>", "<version>");
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15s %-15s %-15s\n", (unsigned int)getpid(), "master", reloadtxt, uptime, haproxy_version);
+ ha_free(&reloadtxt);
+ ha_free(&uptime);
+
+ /* displays current processes */
+
+ chunk_appendf(&trash, "# workers\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ old++;
+ continue;
+ }
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, "worker", child->reloads, uptime, child->version);
+ ha_free(&uptime);
+ }
+
+ /* displays old processes */
+
+ if (old) {
+ char *msg = NULL;
+
+ chunk_appendf(&trash, "# old workers\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up <= 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_WORKER))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, "worker", child->reloads, uptime, child->version);
+ ha_free(&uptime);
+ }
+ }
+ free(msg);
+ }
+
+ /* displays external process */
+ chunk_appendf(&trash, "# programs\n");
+ old = 0;
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_PROG))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ old++;
+ continue;
+ }
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, child->id, child->reloads, uptime, "-");
+ ha_free(&uptime);
+ }
+
+ if (old) {
+ chunk_appendf(&trash, "# old programs\n");
+ list_for_each_entry(child, &proc_list, list) {
+ up = date.tv_sec - child->timestamp;
+ if (up < 0) /* must never be negative because of clock drift */
+ up = 0;
+
+ if (!(child->options & PROC_O_TYPE_PROG))
+ continue;
+
+ if (child->options & PROC_O_LEAVING) {
+ memprintf(&uptime, "%dd%02dh%02dm%02ds", up / 86400, (up % 86400) / 3600, (up % 3600) / 60, (up % 60));
+ chunk_appendf(&trash, "%-15u %-15s %-15d %-15s %-15s\n", child->pid, child->id, child->reloads, uptime, "-");
+ ha_free(&uptime);
+ }
+ }
+ }
+
+
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* dump complete */
+ return 1;
+}
+
+/* reload the master process */
+static int cli_parse_reload(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stconn *scb = NULL;
+ struct stream *strm = NULL;
+ struct connection *conn = NULL;
+ int fd = -1;
+ int hardreload = 0;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ /* hard reload requested */
+ if (*args[0] == 'h')
+ hardreload = 1;
+
+ /* This ask for a synchronous reload, which means we will keep this FD
+ instead of closing it. */
+
+ scb = appctx_sc(appctx);
+ if (scb)
+ strm = sc_strm(scb);
+ if (strm && strm->scf)
+ conn = sc_conn(strm->scf);
+ if (conn)
+ fd = conn_fd(conn);
+
+ /* Send the FD of the current session to the "cli_reload" FD, which won't be polled */
+ if (fd != -1 && send_fd_uxst(proc_self->ipc_fd[0], fd) == 0) {
+ fd_delete(fd); /* avoid the leak of the FD after sending it via the socketpair */
+ }
+ mworker_reload(hardreload);
+
+ return 1;
+}
+
+/* Displays if the current reload failed or succeed.
+ * If the startup-logs is available, dump it. */
+static int cli_io_handler_show_loadstatus(struct appctx *appctx)
+{
+ char *env;
+ struct stconn *sc = appctx_sc(appctx);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ env = getenv("HAPROXY_LOAD_SUCCESS");
+ if (!env)
+ return 1;
+
+ if (strcmp(env, "0") == 0) {
+ chunk_printf(&trash, "Success=0\n");
+ } else if (strcmp(env, "1") == 0) {
+ chunk_printf(&trash, "Success=1\n");
+ }
+#ifdef USE_SHM_OPEN
+ if (startup_logs && b_data(&startup_logs->buf) > 1)
+ chunk_appendf(&trash, "--\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ if (startup_logs) {
+ appctx->io_handler = NULL;
+ ring_attach_cli(startup_logs, appctx, 0);
+ return 0;
+ }
+#else
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+#endif
+ return 1;
+}
+
+static int mworker_parse_global_max_reloads(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int linenum, char **err)
+{
+
+ int err_code = 0;
+
+ if (alertif_too_many_args(1, file, linenum, args, &err_code))
+ goto out;
+
+ if (*(args[1]) == 0) {
+ memprintf(err, "%sparsing [%s:%d] : '%s' expects an integer argument.\n", *err, file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ max_reloads = atol(args[1]);
+ if (max_reloads < 0) {
+ memprintf(err, "%sparsing [%s:%d] '%s' : invalid value %d, must be >= 0", *err, file, linenum, args[0], max_reloads);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ return err_code;
+}
+
+void mworker_free_child(struct mworker_proc *child)
+{
+ int i;
+
+ if (child == NULL)
+ return;
+
+ for (i = 0; child->command && child->command[i]; i++)
+ ha_free(&child->command[i]);
+
+ ha_free(&child->command);
+ ha_free(&child->id);
+ ha_free(&child->version);
+ free(child);
+}
+
+static struct cfg_kw_list mworker_kws = {{ }, {
+ { CFG_GLOBAL, "mworker-max-reloads", mworker_parse_global_max_reloads },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &mworker_kws);
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "@<relative pid>", NULL }, "@<relative pid> : send a command to the <relative pid> process", NULL, cli_io_handler_show_proc, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "@!<pid>", NULL }, "@!<pid> : send a command to the <pid> process", cli_parse_default, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "@master", NULL }, "@master : send a command to the master process", cli_parse_default, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "show", "proc", NULL }, "show proc : show processes status", cli_parse_default, cli_io_handler_show_proc, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "reload", NULL }, "reload : achieve a soft-reload (-sf) of haproxy", cli_parse_reload, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "hard-reload", NULL }, "hard-reload : achieve a hard-reload (-st) of haproxy", cli_parse_reload, NULL, NULL, NULL, ACCESS_MASTER_ONLY},
+ { { "_loadstatus", NULL }, NULL, cli_parse_default, cli_io_handler_show_loadstatus, NULL, NULL, ACCESS_MASTER_ONLY},
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/namespace.c b/src/namespace.c
new file mode 100644
index 0000000..9cc85a3
--- /dev/null
+++ b/src/namespace.c
@@ -0,0 +1,132 @@
+#define _GNU_SOURCE
+
+#include <sched.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/chunk.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/namespace.h>
+#include <haproxy/signal.h>
+
+/* Opens the namespace <ns_name> and returns the FD or -1 in case of error
+ * (check errno).
+ */
+static int open_named_namespace(const char *ns_name)
+{
+ if (chunk_printf(&trash, "/var/run/netns/%s", ns_name) < 0)
+ return -1;
+ return open(trash.area, O_RDONLY | O_CLOEXEC);
+}
+
+static int default_namespace = -1;
+
+static int init_default_namespace()
+{
+ if (chunk_printf(&trash, "/proc/%d/ns/net", getpid()) < 0)
+ return -1;
+ default_namespace = open(trash.area, O_RDONLY | O_CLOEXEC);
+ return default_namespace;
+}
+
+static struct eb_root namespace_tree_root = EB_ROOT;
+
+static void netns_sig_stop(struct sig_handler *sh)
+{
+ struct ebpt_node *node, *next;
+ struct netns_entry *entry;
+
+ /* close namespace file descriptors and remove registered namespaces from the
+ * tree when stopping */
+ node = ebpt_first(&namespace_tree_root);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ entry = container_of(node, struct netns_entry, node);
+ free(entry->node.key);
+ close(entry->fd);
+ free(entry);
+ node = next;
+ }
+}
+
+int netns_init(void)
+{
+ int err_code = 0;
+
+ /* if no namespaces have been defined in the config then
+ * there is no point in trying to initialize anything:
+ * my_socketat() will never be called with a valid namespace
+ * structure and thus switching back to the default namespace
+ * is not needed either */
+ if (!eb_is_empty(&namespace_tree_root)) {
+ if (init_default_namespace() < 0) {
+ ha_alert("Failed to open the default namespace.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ signal_register_fct(0, netns_sig_stop, 0);
+
+ return err_code;
+}
+
+struct netns_entry* netns_store_insert(const char *ns_name)
+{
+ struct netns_entry *entry = NULL;
+ int fd = open_named_namespace(ns_name);
+ if (fd == -1)
+ goto out;
+
+ entry = calloc(1, sizeof(*entry));
+ if (!entry)
+ goto out;
+ entry->fd = fd;
+ entry->node.key = strdup(ns_name);
+ entry->name_len = strlen(ns_name);
+ ebis_insert(&namespace_tree_root, &entry->node);
+out:
+ return entry;
+}
+
+const struct netns_entry* netns_store_lookup(const char *ns_name, size_t ns_name_len)
+{
+ struct ebpt_node *node;
+
+ node = ebis_lookup_len(&namespace_tree_root, ns_name, ns_name_len);
+ if (node)
+ return ebpt_entry(node, struct netns_entry, node);
+ else
+ return NULL;
+}
+
+/* Opens a socket in the namespace described by <ns> with the parameters <domain>,
+ * <type> and <protocol> and returns the FD or -1 in case of error (check errno).
+ */
+int my_socketat(const struct netns_entry *ns, int domain, int type, int protocol)
+{
+ int sock;
+
+ if (default_namespace >= 0 && ns && setns(ns->fd, CLONE_NEWNET) == -1)
+ return -1;
+
+ sock = socket(domain, type, protocol);
+
+ if (default_namespace >= 0 && ns && setns(default_namespace, CLONE_NEWNET) == -1) {
+ if (sock >= 0)
+ close(sock);
+ return -1;
+ }
+ return sock;
+}
+
+REGISTER_BUILD_OPTS("Built with network namespace support.");
diff --git a/src/ncbuf.c b/src/ncbuf.c
new file mode 100644
index 0000000..e1452f1
--- /dev/null
+++ b/src/ncbuf.c
@@ -0,0 +1,986 @@
+#include <haproxy/ncbuf.h>
+
+#include <string.h>
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifdef STANDALONE
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <haproxy/list.h>
+#endif /* STANDALONE */
+
+#ifdef DEBUG_STRICT
+# include <haproxy/bug.h>
+#else
+# include <stdio.h>
+# include <stdlib.h>
+
+# undef BUG_ON
+# define BUG_ON(x) if (x) { fprintf(stderr, "CRASH ON %s:%d\n", __func__, __LINE__); abort(); }
+
+# undef BUG_ON_HOT
+# define BUG_ON_HOT(x) if (x) { fprintf(stderr, "CRASH ON %s:%d\n", __func__, __LINE__); abort(); }
+#endif /* DEBUG_DEV */
+
+#include <haproxy/compiler.h>
+
+/* ******** internal API ******** */
+
+#define NCB_BLK_NULL ((struct ncb_blk){ .st = NULL })
+
+#define NCB_BK_F_GAP 0x01 /* block represents a gap */
+#define NCB_BK_F_FIN 0x02 /* special reduced gap present at the end of the buffer */
+struct ncb_blk {
+ char *st; /* first byte of the block */
+ char *end; /* first byte after this block */
+
+ char *sz_ptr; /* pointer to size element - NULL for reduced gap */
+ ncb_sz_t sz; /* size of the block */
+ ncb_sz_t sz_data; /* size of the data following the block - invalid for reduced GAP */
+ ncb_sz_t off; /* offset of block in buffer */
+
+ char flag;
+};
+
+/* Return pointer to <off> relative to <buf> head. Support buffer wrapping. */
+static char *ncb_peek(const struct ncbuf *buf, ncb_sz_t off)
+{
+ char *ptr = ncb_head(buf) + off;
+ if (ptr >= buf->area + buf->size)
+ ptr -= buf->size;
+ return ptr;
+}
+
+/* Returns the reserved space of <buf> which contains the size of the first
+ * data block.
+ */
+static char *ncb_reserved(const struct ncbuf *buf)
+{
+ return ncb_peek(buf, buf->size - NCB_RESERVED_SZ);
+}
+
+/* Encode <off> at <st> position in <buf>. Support wrapping. */
+static forceinline void ncb_write_off(const struct ncbuf *buf, char *st, ncb_sz_t off)
+{
+ int i;
+
+ BUG_ON_HOT(st >= buf->area + buf->size);
+
+ for (i = 0; i < sizeof(ncb_sz_t); ++i) {
+ (*st) = off >> (8 * i) & 0xff;
+
+ if ((++st) == ncb_wrap(buf))
+ st = ncb_orig(buf);
+ }
+}
+
+/* Decode offset stored at <st> position in <buf>. Support wrapping. */
+static forceinline ncb_sz_t ncb_read_off(const struct ncbuf *buf, char *st)
+{
+ int i;
+ ncb_sz_t off = 0;
+
+ BUG_ON_HOT(st >= buf->area + buf->size);
+
+ for (i = 0; i < sizeof(ncb_sz_t); ++i) {
+ off |= (unsigned char )(*st) << (8 * i);
+
+ if ((++st) == ncb_wrap(buf))
+ st = ncb_orig(buf);
+ }
+
+ return off;
+}
+
+/* Add <off> to the offset stored at <st> in <buf>. Support wrapping. */
+static forceinline void ncb_inc_off(const struct ncbuf *buf, char *st, ncb_sz_t off)
+{
+ const ncb_sz_t old = ncb_read_off(buf, st);
+ ncb_write_off(buf, st, old + off);
+}
+
+/* Returns true if a gap cannot be inserted at <off> : a reduced gap must be used. */
+static forceinline int ncb_off_reduced(const struct ncbuf *b, ncb_sz_t off)
+{
+ return off + NCB_GAP_MIN_SZ > ncb_size(b);
+}
+
+/* Returns true if <blk> is the special NULL block. */
+static forceinline int ncb_blk_is_null(const struct ncb_blk *blk)
+{
+ return !blk->st;
+}
+
+/* Returns true if <blk> is the last block of <buf>. */
+static forceinline int ncb_blk_is_last(const struct ncbuf *buf, const struct ncb_blk *blk)
+{
+ BUG_ON_HOT(blk->off + blk->sz > ncb_size(buf));
+ return blk->off + blk->sz == ncb_size(buf);
+}
+
+/* Returns the first block of <buf> which is always a DATA. */
+static struct ncb_blk ncb_blk_first(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+
+ if (ncb_is_null(buf))
+ return NCB_BLK_NULL;
+
+ blk.st = ncb_head(buf);
+
+ blk.sz_ptr = ncb_reserved(buf);
+ blk.sz = ncb_read_off(buf, ncb_reserved(buf));
+ blk.sz_data = 0;
+ BUG_ON_HOT(blk.sz > ncb_size(buf));
+
+ blk.end = ncb_peek(buf, blk.sz);
+ blk.off = 0;
+ blk.flag = 0;
+
+ return blk;
+}
+
+/* Returns the block following <prev> in the buffer <buf>. */
+static struct ncb_blk ncb_blk_next(const struct ncbuf *buf,
+ const struct ncb_blk *prev)
+{
+ struct ncb_blk blk;
+
+ BUG_ON_HOT(ncb_blk_is_null(prev));
+
+ if (ncb_blk_is_last(buf, prev))
+ return NCB_BLK_NULL;
+
+ blk.st = prev->end;
+ blk.off = prev->off + prev->sz;
+ blk.flag = ~prev->flag & NCB_BK_F_GAP;
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ if (ncb_off_reduced(buf, blk.off)) {
+ blk.flag |= NCB_BK_F_FIN;
+ blk.sz_ptr = NULL;
+ blk.sz = ncb_size(buf) - blk.off;
+ blk.sz_data = 0;
+
+ /* A reduced gap can only be the last block. */
+ BUG_ON_HOT(!ncb_blk_is_last(buf, &blk));
+ }
+ else {
+ blk.sz_ptr = ncb_peek(buf, blk.off + NCB_GAP_SZ_OFF);
+ blk.sz = ncb_read_off(buf, blk.sz_ptr);
+ blk.sz_data = ncb_read_off(buf, ncb_peek(buf, blk.off + NCB_GAP_SZ_DATA_OFF));
+ BUG_ON_HOT(blk.sz < NCB_GAP_MIN_SZ);
+ }
+ }
+ else {
+ blk.sz_ptr = ncb_peek(buf, prev->off + NCB_GAP_SZ_DATA_OFF);
+ blk.sz = prev->sz_data;
+ blk.sz_data = 0;
+
+ /* only first DATA block can be empty. If this happens, a GAP
+ * merge should have been realized.
+ */
+ BUG_ON_HOT(!blk.sz);
+ }
+
+ BUG_ON_HOT(blk.off + blk.sz > ncb_size(buf));
+ blk.end = ncb_peek(buf, blk.off + blk.sz);
+
+ return blk;
+}
+
+/* Returns the block containing offset <off>. Note that if <off> is at the
+ * frontier between two blocks, this function will return the preceding one.
+ * This is done to easily merge blocks on insertion/deletion.
+ */
+static struct ncb_blk ncb_blk_find(const struct ncbuf *buf, ncb_sz_t off)
+{
+ struct ncb_blk blk;
+
+ if (ncb_is_null(buf))
+ return NCB_BLK_NULL;
+
+ BUG_ON_HOT(off >= ncb_size(buf));
+
+ for (blk = ncb_blk_first(buf); off > blk.off + blk.sz;
+ blk = ncb_blk_next(buf, &blk)) {
+ }
+
+ return blk;
+}
+
+/* Transform absolute offset <off> to a relative one from <blk> start. */
+static forceinline ncb_sz_t ncb_blk_off(const struct ncb_blk *blk, ncb_sz_t off)
+{
+ BUG_ON_HOT(off < blk->off || off > blk->off + blk->sz);
+ BUG_ON_HOT(off - blk->off > blk->sz);
+ return off - blk->off;
+}
+
+/* Simulate insertion in <buf> of <data> of length <len> at offset <off>. This
+ * ensures that minimal block size are respected for newly formed gaps. <blk>
+ * must be the block where the insert operation begins. If <mode> is
+ * NCB_ADD_COMPARE, old and new overlapped data are compared to validate the
+ * insertion.
+ *
+ * Returns NCB_RET_OK if insertion can proceed.
+ */
+static enum ncb_ret ncb_check_insert(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len,
+ enum ncb_add_mode mode)
+{
+ struct ncb_blk next;
+ ncb_sz_t off_blk = ncb_blk_off(blk, off);
+ ncb_sz_t to_copy;
+ ncb_sz_t left = len;
+
+ /* If insertion starts in a gap, it must leave enough space to keep the
+ * gap header.
+ */
+ if (left && (blk->flag & NCB_BK_F_GAP)) {
+ if (off_blk < NCB_GAP_MIN_SZ)
+ return NCB_RET_GAP_SIZE;
+ }
+
+ next = *blk;
+ while (left) {
+ off_blk = ncb_blk_off(&next, off);
+ to_copy = MIN(left, next.sz - off_blk);
+
+ if (next.flag & NCB_BK_F_GAP && off_blk + to_copy < next.sz) {
+ /* Insertion must leave enough space for a new gap
+ * header if stopped in a middle of a gap.
+ */
+ const ncb_sz_t gap_sz = next.sz - (off_blk + to_copy);
+ if (gap_sz < NCB_GAP_MIN_SZ && !ncb_blk_is_last(buf, &next))
+ return NCB_RET_GAP_SIZE;
+ }
+ else if (!(next.flag & NCB_BK_F_GAP) && mode == NCB_ADD_COMPARE) {
+ /* Compare memory of data block in NCB_ADD_COMPARE mode. */
+ const ncb_sz_t off_blk = ncb_blk_off(&next, off);
+ char *st = ncb_peek(buf, off);
+
+ to_copy = MIN(left, next.sz - off_blk);
+ if (st + to_copy > ncb_wrap(buf)) {
+ const ncb_sz_t sz1 = ncb_wrap(buf) - st;
+ if (memcmp(st, data, sz1))
+ return NCB_RET_DATA_REJ;
+ if (memcmp(ncb_orig(buf), data + sz1, to_copy - sz1))
+ return NCB_RET_DATA_REJ;
+ }
+ else {
+ if (memcmp(st, data, to_copy))
+ return NCB_RET_DATA_REJ;
+ }
+ }
+
+ left -= to_copy;
+ data += to_copy;
+ off += to_copy;
+
+ next = ncb_blk_next(buf, &next);
+ }
+
+ return NCB_RET_OK;
+}
+
+/* Fill new <data> of length <len> inside an already existing data <blk> at
+ * offset <off>. Offset is relative to <blk> so it cannot be greater than the
+ * block size. <mode> specifies if old data are preserved or overwritten.
+ */
+static ncb_sz_t ncb_fill_data_blk(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len,
+ enum ncb_add_mode mode)
+{
+ const ncb_sz_t to_copy = MIN(len, blk->sz - off);
+ char *ptr = NULL;
+
+ BUG_ON_HOT(off > blk->sz);
+ /* This can happens due to previous ncb_blk_find() usage. In this
+ * case the current fill is a noop.
+ */
+ if (off == blk->sz)
+ return 0;
+
+ if (mode == NCB_ADD_OVERWRT) {
+ ptr = ncb_peek(buf, blk->off + off);
+
+ if (ptr + to_copy >= ncb_wrap(buf)) {
+ const ncb_sz_t sz1 = ncb_wrap(buf) - ptr;
+ memcpy(ptr, data, sz1);
+ memcpy(ncb_orig(buf), data + sz1, to_copy - sz1);
+ }
+ else {
+ memcpy(ptr, data, to_copy);
+ }
+ }
+
+ return to_copy;
+}
+
+/* Fill the gap <blk> starting at <off> with new <data> of length <len>. <off>
+ * is relative to <blk> so it cannot be greater than the block size.
+ */
+static ncb_sz_t ncb_fill_gap_blk(const struct ncbuf *buf,
+ const struct ncb_blk *blk, ncb_sz_t off,
+ const char *data, ncb_sz_t len)
+{
+ const ncb_sz_t to_copy = MIN(len, blk->sz - off);
+ char *ptr;
+
+ BUG_ON_HOT(off > blk->sz);
+ /* This can happens due to previous ncb_blk_find() usage. In this
+ * case the current fill is a noop.
+ */
+ if (off == blk->sz)
+ return 0;
+
+ /* A new gap must be created if insertion stopped before gap end. */
+ if (off + to_copy < blk->sz) {
+ const ncb_sz_t gap_off = blk->off + off + to_copy;
+ const ncb_sz_t gap_sz = blk->sz - off - to_copy;
+
+ BUG_ON_HOT(!ncb_off_reduced(buf, gap_off) &&
+ blk->off + blk->sz - gap_off < NCB_GAP_MIN_SZ);
+
+ /* write the new gap header unless this is a reduced gap. */
+ if (!ncb_off_reduced(buf, gap_off)) {
+ char *gap_ptr = ncb_peek(buf, gap_off + NCB_GAP_SZ_OFF);
+ char *gap_data_ptr = ncb_peek(buf, gap_off + NCB_GAP_SZ_DATA_OFF);
+
+ ncb_write_off(buf, gap_ptr, gap_sz);
+ ncb_write_off(buf, gap_data_ptr, blk->sz_data);
+ }
+ }
+
+ /* fill the gap with new data */
+ ptr = ncb_peek(buf, blk->off + off);
+ if (ptr + to_copy >= ncb_wrap(buf)) {
+ ncb_sz_t sz1 = ncb_wrap(buf) - ptr;
+ memcpy(ptr, data, sz1);
+ memcpy(ncb_orig(buf), data + sz1, to_copy - sz1);
+ }
+ else {
+ memcpy(ptr, data, to_copy);
+ }
+
+ return to_copy;
+}
+
+/* ******** public API ******** */
+
+/* Initialize or reset <buf> by clearing all data. Its size is untouched.
+ * Buffer is positioned to <head> offset. Use 0 to realign it. <buf> must not
+ * be NCBUF_NULL.
+ */
+void ncb_init(struct ncbuf *buf, ncb_sz_t head)
+{
+ BUG_ON_HOT(ncb_is_null(buf));
+
+ BUG_ON_HOT(head >= buf->size);
+ buf->head = head;
+
+ ncb_write_off(buf, ncb_reserved(buf), 0);
+ ncb_write_off(buf, ncb_head(buf), ncb_size(buf));
+ ncb_write_off(buf, ncb_peek(buf, sizeof(ncb_sz_t)), 0);
+}
+
+/* Construct a ncbuf with all its parameters. */
+struct ncbuf ncb_make(char *area, ncb_sz_t size, ncb_sz_t head)
+{
+ struct ncbuf buf;
+
+ /* Ensure that there is enough space for the reserved space and data.
+ * This is the minimal value to not crash later.
+ */
+ BUG_ON_HOT(size <= NCB_RESERVED_SZ);
+
+ buf.area = area;
+ buf.size = size;
+ buf.head = head;
+
+ return buf;
+}
+
+/* Returns the total number of bytes stored in whole <buf>. */
+ncb_sz_t ncb_total_data(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+ int total = 0;
+
+ for (blk = ncb_blk_first(buf); !ncb_blk_is_null(&blk); blk = ncb_blk_next(buf, &blk)) {
+ if (!(blk.flag & NCB_BK_F_GAP))
+ total += blk.sz;
+ }
+
+ return total;
+}
+
+/* Returns true if there is no data anywhere in <buf>. */
+int ncb_is_empty(const struct ncbuf *buf)
+{
+ int first_data, first_gap;
+
+ if (ncb_is_null(buf))
+ return 1;
+
+ first_data = ncb_read_off(buf, ncb_reserved(buf));
+ BUG_ON_HOT(first_data > ncb_size(buf));
+ /* Buffer is not empty if first data block is not nul. */
+ if (first_data)
+ return 0;
+
+ /* Head contains the first gap size if first data block is empty. */
+ first_gap = ncb_read_off(buf, ncb_head(buf));
+ BUG_ON_HOT(first_gap > ncb_size(buf));
+ return first_gap == ncb_size(buf);
+}
+
+/* Returns true if no more data can be inserted in <buf>. */
+int ncb_is_full(const struct ncbuf *buf)
+{
+ int first_data;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ /* First data block must cover whole buffer if full. */
+ first_data = ncb_read_off(buf, ncb_reserved(buf));
+ BUG_ON_HOT(first_data > ncb_size(buf));
+ return first_data == ncb_size(buf);
+}
+
+/* Returns true if <buf> contains data fragmented by gaps. */
+int ncb_is_fragmented(const struct ncbuf *buf)
+{
+ struct ncb_blk data, gap;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ /* check if buffer is empty or full */
+ if (ncb_is_empty(buf) || ncb_is_full(buf))
+ return 0;
+
+ /* check that following gap is the last block */
+ data = ncb_blk_first(buf);
+ gap = ncb_blk_next(buf, &data);
+ return !ncb_blk_is_last(buf, &gap);
+}
+
+/* Returns the number of bytes of data available in <buf> starting at offset
+ * <off> until the next gap or the buffer end. The counted data may wrapped if
+ * the buffer storage is not aligned.
+ */
+ncb_sz_t ncb_data(const struct ncbuf *buf, ncb_sz_t off)
+{
+ struct ncb_blk blk;
+ ncb_sz_t off_blk;
+
+ if (ncb_is_null(buf))
+ return 0;
+
+ blk = ncb_blk_find(buf, off);
+ off_blk = ncb_blk_off(&blk, off);
+
+ /* if <off> at the frontier between two and <blk> is gap, retrieve the
+ * next data block.
+ */
+ if (blk.flag & NCB_BK_F_GAP && off_blk == blk.sz &&
+ !ncb_blk_is_last(buf, &blk)) {
+ blk = ncb_blk_next(buf, &blk);
+ off_blk = ncb_blk_off(&blk, off);
+ }
+
+ if (blk.flag & NCB_BK_F_GAP)
+ return 0;
+
+ return blk.sz - off_blk;
+}
+
+/* Add a new block at <data> of size <len> in <buf> at offset <off>.
+ *
+ * Returns NCB_RET_OK on success. On error the following codes are returned :
+ * - NCB_RET_GAP_SIZE : cannot add data because the gap formed is too small
+ * - NCB_RET_DATA_REJ : old data would be overwritten by different ones in
+ * NCB_ADD_COMPARE mode.
+ */
+enum ncb_ret ncb_add(struct ncbuf *buf, ncb_sz_t off,
+ const char *data, ncb_sz_t len, enum ncb_add_mode mode)
+{
+ struct ncb_blk blk;
+ ncb_sz_t left = len;
+ enum ncb_ret ret;
+ char *new_sz;
+
+ if (!len)
+ return NCB_RET_OK;
+
+ BUG_ON_HOT(off + len > ncb_size(buf));
+
+ /* Get block where insertion begins. */
+ blk = ncb_blk_find(buf, off);
+
+ /* Check if insertion is possible. */
+ ret = ncb_check_insert(buf, &blk, off, data, len, mode);
+ if (ret != NCB_RET_OK)
+ return ret;
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ /* Reduce gap size if insertion begins in a gap. Gap data size
+ * is reset and will be recalculated during insertion.
+ */
+ const ncb_sz_t gap_sz = off - blk.off;
+ BUG_ON_HOT(gap_sz < NCB_GAP_MIN_SZ);
+
+ /* pointer to data size to increase. */
+ new_sz = ncb_peek(buf, blk.off + NCB_GAP_SZ_DATA_OFF);
+
+ ncb_write_off(buf, blk.sz_ptr, gap_sz);
+ ncb_write_off(buf, new_sz, 0);
+ }
+ else {
+ /* pointer to data size to increase. */
+ new_sz = blk.sz_ptr;
+ }
+
+ /* insert data */
+ while (left) {
+ struct ncb_blk next;
+ const ncb_sz_t off_blk = ncb_blk_off(&blk, off);
+ ncb_sz_t done;
+
+ /* retrieve the next block. This is necessary to do this
+ * before overwriting a gap.
+ */
+ next = ncb_blk_next(buf, &blk);
+
+ if (blk.flag & NCB_BK_F_GAP) {
+ done = ncb_fill_gap_blk(buf, &blk, off_blk, data, left);
+
+ /* update the inserted data block size */
+ if (off + done == blk.off + blk.sz) {
+ /* merge next data block if insertion reached gap end */
+ ncb_inc_off(buf, new_sz, done + blk.sz_data);
+ }
+ else {
+ /* insertion stopped before gap end */
+ ncb_inc_off(buf, new_sz, done);
+ }
+ }
+ else {
+ done = ncb_fill_data_blk(buf, &blk, off_blk, data, left, mode);
+ }
+
+ BUG_ON_HOT(done > blk.sz || done > left);
+ left -= done;
+ data += done;
+ off += done;
+
+ blk = next;
+ }
+
+ return NCB_RET_OK;
+}
+
+/* Advance the head of <buf> to the offset <adv>. Data at the start of buffer
+ * will be lost while some space will be formed at the end to be able to insert
+ * new data.
+ *
+ * Returns NCB_RET_OK on success. It may return NCB_RET_GAP_SIZE if operation
+ * is rejected due to the formation of a too small gap in front. If advance is
+ * done only inside a data block it is guaranteed to succeed.
+ */
+enum ncb_ret ncb_advance(struct ncbuf *buf, ncb_sz_t adv)
+{
+ struct ncb_blk start, last;
+ ncb_sz_t off_blk;
+ ncb_sz_t first_data_sz;
+
+ BUG_ON_HOT(adv > ncb_size(buf));
+ if (!adv)
+ return NCB_RET_OK;
+
+ /* Special case if adv is full size. This is equivalent to a reset. */
+ if (adv == ncb_size(buf)) {
+ ncb_init(buf, buf->head);
+ return NCB_RET_OK;
+ }
+
+ start = ncb_blk_find(buf, adv);
+
+ /* Special case if advance until the last block which is a GAP. The
+ * buffer will be left empty and is thus equivalent to a reset.
+ */
+ if (ncb_blk_is_last(buf, &start) && (start.flag & NCB_BK_F_GAP)) {
+ ncb_sz_t new_head = buf->head + adv;
+ if (new_head >= buf->size)
+ new_head -= buf->size;
+
+ ncb_init(buf, new_head);
+ return NCB_RET_OK;
+ }
+
+ last = start;
+ while (!ncb_blk_is_last(buf, &last))
+ last = ncb_blk_next(buf, &last);
+
+ off_blk = ncb_blk_off(&start, adv);
+
+ if (start.flag & NCB_BK_F_GAP) {
+ /* If advance in a GAP, its new size must be big enough. */
+ if (start.sz == off_blk) {
+ /* GAP removed. Buffer will start with following DATA block. */
+ first_data_sz = start.sz_data;
+ }
+ else if (start.sz - off_blk < NCB_GAP_MIN_SZ) {
+ return NCB_RET_GAP_SIZE;
+ }
+ else {
+ /* Buffer will start with this GAP block. */
+ first_data_sz = 0;
+ }
+ }
+ else {
+ /* If off_blk less than start.sz, the data block will becomes the
+ * first block. If equal, the data block is completely removed
+ * and thus the following GAP will be the first block.
+ */
+ first_data_sz = start.sz - off_blk;
+ }
+
+ if (last.flag & NCB_BK_F_GAP) {
+ /* Extend last GAP unless this is a reduced gap. */
+ if (!(last.flag & NCB_BK_F_FIN) || last.sz + adv >= NCB_GAP_MIN_SZ) {
+ /* use .st instead of .sz_ptr which can be NULL if reduced gap */
+ ncb_write_off(buf, last.st, last.sz + adv);
+ ncb_write_off(buf, ncb_peek(buf, last.off + NCB_GAP_SZ_DATA_OFF), 0);
+ }
+ }
+ else {
+ /* Insert a GAP after the last DATA block. */
+ if (adv >= NCB_GAP_MIN_SZ) {
+ ncb_write_off(buf, ncb_peek(buf, last.off + last.sz + NCB_GAP_SZ_OFF), adv);
+ ncb_write_off(buf, ncb_peek(buf, last.off + last.sz + NCB_GAP_SZ_DATA_OFF), 0);
+ }
+ }
+
+ /* Advance head and update reserved header with new first data size. */
+ buf->head += adv;
+ if (buf->head >= buf->size)
+ buf->head -= buf->size;
+ ncb_write_off(buf, ncb_reserved(buf), first_data_sz);
+
+ /* If advance in a GAP, reduce its size. */
+ if (start.flag & NCB_BK_F_GAP && !first_data_sz) {
+ ncb_write_off(buf, ncb_head(buf), start.sz - off_blk);
+ /* Recopy the block sz_data at the new position. */
+ ncb_write_off(buf, ncb_peek(buf, NCB_GAP_SZ_DATA_OFF), start.sz_data);
+ }
+
+ return NCB_RET_OK;
+}
+
+/* ******** testing API ******** */
+/* To build it :
+ * gcc -Wall -DSTANDALONE -lasan -I./include -o ncbuf src/ncbuf.c
+ */
+#ifdef STANDALONE
+
+int ncb_print = 0;
+
+static void ncbuf_printf(char *str, ...)
+{
+ va_list args;
+
+ va_start(args, str);
+ if (ncb_print)
+ vfprintf(stderr, str, args);
+ va_end(args);
+}
+
+struct rand_off {
+ struct list el;
+ ncb_sz_t off;
+ ncb_sz_t len;
+};
+
+static struct rand_off *ncb_generate_rand_off(const struct ncbuf *buf)
+{
+ struct rand_off *roff;
+ roff = calloc(1, sizeof(*roff));
+ BUG_ON(!roff);
+
+ roff->off = rand() % (ncb_size(buf));
+ if (roff->off > 0 && roff->off < NCB_GAP_MIN_SZ)
+ roff->off = 0;
+
+ roff->len = rand() % (ncb_size(buf) - roff->off + 1);
+
+ return roff;
+}
+
+static void ncb_print_blk(const struct ncb_blk *blk)
+{
+ if (ncb_print) {
+ fprintf(stderr, "%s(%s): %2u/%u.\n",
+ blk->flag & NCB_BK_F_GAP ? "GAP " : "DATA",
+ blk->flag & NCB_BK_F_FIN ? "F" : "-", blk->off, blk->sz);
+ }
+}
+
+static int ncb_is_null_blk(const struct ncb_blk *blk)
+{
+ return !blk->st;
+}
+
+static void ncb_loop(const struct ncbuf *buf)
+{
+ struct ncb_blk blk;
+
+ blk = ncb_blk_first(buf);
+ do {
+ ncb_print_blk(&blk);
+ blk = ncb_blk_next(buf, &blk);
+ } while (!ncb_is_null_blk(&blk));
+
+ ncbuf_printf("\n");
+}
+
+static void ncbuf_print_buf(struct ncbuf *b, ncb_sz_t len,
+ unsigned char *area, int line)
+{
+ int i;
+
+ ncbuf_printf("buffer status at line %d\n", line);
+ for (i = 0; i < len; ++i) {
+ ncbuf_printf("%02x.", area[i]);
+ if (i && i % 32 == 31) ncbuf_printf("\n");
+ else if (i && i % 8 == 7) ncbuf_printf(" ");
+ }
+ ncbuf_printf("\n");
+
+ ncb_loop(b);
+
+ if (ncb_print)
+ getchar();
+}
+
+static struct ncbuf b;
+static unsigned char *bufarea = NULL;
+static ncb_sz_t bufsize = 16384;
+static ncb_sz_t bufhead = 15;
+
+#define NCB_INIT(buf) \
+ if ((reset)) { memset(bufarea, 0xaa, bufsize); } \
+ ncb_init(buf, bufhead); \
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+
+#define NCB_ADD_EQ(buf, off, data, sz, mode, ret) \
+ BUG_ON(ncb_add((buf), (off), (data), (sz), (mode)) != (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_ADD_NEQ(buf, off, data, sz, mode, ret) \
+ BUG_ON(ncb_add((buf), (off), (data), (sz), (mode)) == (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_ADVANCE_EQ(buf, off, ret) \
+ BUG_ON(ncb_advance((buf), (off)) != (ret)); \
+ ncbuf_print_buf(buf, bufsize, bufarea, __LINE__);
+
+#define NCB_TOTAL_DATA_EQ(buf, data) \
+ BUG_ON(ncb_total_data((buf)) != (data));
+
+#define NCB_DATA_EQ(buf, off, data) \
+ BUG_ON(ncb_data((buf), (off)) != (data));
+
+static int ncbuf_test(ncb_sz_t head, int reset, int print_delay)
+{
+ char *data0, data1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+ struct list list = LIST_HEAD_INIT(list);
+ struct rand_off *roff, *roff_tmp;
+ enum ncb_ret ret;
+
+ data0 = malloc(bufsize);
+ BUG_ON(!data0);
+ memset(data0, 0xff, bufsize);
+
+ bufarea = malloc(bufsize);
+ BUG_ON(!bufarea);
+
+ fprintf(stderr, "running unit tests\n");
+
+ b = NCBUF_NULL;
+ BUG_ON(!ncb_is_null(&b));
+ NCB_DATA_EQ(&b, 0, 0);
+ NCB_TOTAL_DATA_EQ(&b, 0);
+ BUG_ON(ncb_size(&b) != 0);
+ BUG_ON(!ncb_is_empty(&b));
+ BUG_ON(ncb_is_full(&b));
+ BUG_ON(ncb_is_fragmented(&b));
+
+ b.area = (char *)bufarea;
+ b.size = bufsize;
+ b.head = head;
+ NCB_INIT(&b);
+
+ /* insertion test suite */
+ NCB_INIT(&b);
+ NCB_DATA_EQ(&b, 0, 0); NCB_DATA_EQ(&b, bufsize - NCB_RESERVED_SZ - 1, 0); /* first and last offset */
+ NCB_ADD_EQ(&b, 24, data0, 9, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 24, 9);
+ /* insert new data at the same offset as old */
+ NCB_ADD_EQ(&b, 24, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 24, 16);
+
+ NCB_INIT(&b); NCB_DATA_EQ(&b, 0, 0);
+ NCB_ADD_EQ(&b, 0, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16);
+ BUG_ON(ncb_is_fragmented(&b));
+ NCB_ADD_EQ(&b, 24, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ /* insert data overlapping two data blocks and a gap */
+ NCB_ADD_EQ(&b, 12, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 40);
+ BUG_ON(ncb_is_fragmented(&b));
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 32, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 0); NCB_DATA_EQ(&b, 16, 0); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ NCB_ADD_EQ(&b, 0, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 16); NCB_DATA_EQ(&b, 16, 0); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(!ncb_is_fragmented(&b));
+ /* insert data to exactly cover a gap between two data blocks */
+ NCB_ADD_EQ(&b, 16, data0, 16, NCB_ADD_PRESERVE, NCB_RET_OK); NCB_DATA_EQ(&b, 0, 48); NCB_DATA_EQ(&b, 16, 32); NCB_DATA_EQ(&b, 32, 16);
+ BUG_ON(ncb_is_fragmented(&b));
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 0, data0, 8, NCB_ADD_PRESERVE, NCB_RET_OK);
+ /* this insertion must be rejected because of minimal gap size */
+ NCB_ADD_EQ(&b, 10, data0, 8, NCB_ADD_PRESERVE, NCB_RET_GAP_SIZE);
+
+ /* Test reduced gap support */
+ NCB_INIT(&b);
+ /* this insertion will form a reduced gap */
+ NCB_ADD_EQ(&b, 0, data0, bufsize - (NCB_GAP_MIN_SZ - 1), NCB_ADD_COMPARE, NCB_RET_OK);
+
+ /* Test the various insertion mode */
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 10, data1, 16, NCB_ADD_PRESERVE, NCB_RET_OK);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_COMPARE, NCB_RET_DATA_REJ);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_PRESERVE, NCB_RET_OK); BUG_ON(*ncb_peek(&b, 12) != data1[2]);
+ NCB_ADD_EQ(&b, 12, data1, 16, NCB_ADD_OVERWRT, NCB_RET_OK); BUG_ON(*ncb_peek(&b, 12) == data1[2]);
+
+ /* advance test suite */
+ NCB_INIT(&b);
+ NCB_ADVANCE_EQ(&b, 10, NCB_RET_OK); /* advance in an empty buffer; this ensures we do not leave an empty DATA in the middle of the buffer */
+ NCB_ADVANCE_EQ(&b, ncb_size(&b) - 2, NCB_RET_OK);
+
+ NCB_INIT(&b);
+ /* first fill the buffer */
+ NCB_ADD_EQ(&b, 0, data0, bufsize - NCB_RESERVED_SZ, NCB_ADD_COMPARE, NCB_RET_OK);
+ /* delete 2 bytes : a reduced gap must be created */
+ NCB_ADVANCE_EQ(&b, 2, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 2);
+ /* delete 1 byte : extend the reduced gap */
+ NCB_ADVANCE_EQ(&b, 1, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 3);
+ /* delete 5 bytes : a full gap must be present */
+ NCB_ADVANCE_EQ(&b, 5, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, ncb_size(&b) - 8);
+ /* completely clear the buffer */
+ NCB_ADVANCE_EQ(&b, bufsize - NCB_RESERVED_SZ, NCB_RET_OK); NCB_TOTAL_DATA_EQ(&b, 0);
+
+
+ NCB_INIT(&b);
+ NCB_ADD_EQ(&b, 10, data0, 10, NCB_ADD_PRESERVE, NCB_RET_OK);
+ NCB_ADVANCE_EQ(&b, 2, NCB_RET_OK); /* reduce a gap in front of the buffer */
+ NCB_ADVANCE_EQ(&b, 1, NCB_RET_GAP_SIZE); /* reject */
+ NCB_ADVANCE_EQ(&b, 8, NCB_RET_OK); /* remove completely the gap */
+ NCB_ADVANCE_EQ(&b, 8, NCB_RET_OK); /* remove inside the data */
+ NCB_ADVANCE_EQ(&b, 10, NCB_RET_OK); /* remove completely the data */
+
+ fprintf(stderr, "first random pass\n");
+ NCB_INIT(&b);
+
+ /* generate randon data offsets until the buffer is full */
+ while (!ncb_is_full(&b)) {
+ roff = ncb_generate_rand_off(&b);
+ LIST_INSERT(&list, &roff->el);
+
+ ret = ncb_add(&b, roff->off, data0, roff->len, NCB_ADD_COMPARE);
+ BUG_ON(ret == NCB_RET_DATA_REJ);
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+ usleep(print_delay);
+ }
+
+ fprintf(stderr, "buf full, prepare for reverse random\n");
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+
+ /* insert the previously generated random offsets in the reverse order.
+ * At the end, the buffer should be full.
+ */
+ NCB_INIT(&b);
+ list_for_each_entry_safe(roff, roff_tmp, &list, el) {
+ int full = ncb_is_full(&b);
+ if (!full) {
+ ret = ncb_add(&b, roff->off, data0, roff->len, NCB_ADD_COMPARE);
+ BUG_ON(ret == NCB_RET_DATA_REJ);
+ ncbuf_print_buf(&b, bufsize, bufarea, __LINE__);
+ usleep(print_delay);
+ }
+
+ LIST_DELETE(&roff->el);
+ free(roff);
+ }
+
+ if (!ncb_is_full(&b))
+ abort();
+
+ fprintf(stderr, "done\n");
+
+ free(bufarea);
+ free(data0);
+
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ int reset = 0;
+ int print_delay = 100000;
+ char c;
+
+ opterr = 0;
+ while ((c = getopt(argc, argv, "h:s:rp::")) != -1) {
+ switch (c) {
+ case 'h':
+ bufhead = atoi(optarg);
+ break;
+ case 's':
+ bufsize = atoi(optarg);
+ if (bufsize < 64) {
+ fprintf(stderr, "bufsize should be at least 64 bytes for unit test suite\n");
+ exit(127);
+ }
+ break;
+ case 'r':
+ reset = 1;
+ break;
+ case 'p':
+ if (optarg)
+ print_delay = atoi(optarg);
+ ncb_print = 1;
+ break;
+ case '?':
+ default:
+ fprintf(stderr, "usage: %s [-r] [-s bufsize] [-h bufhead] [-p <delay_msec>]\n", argv[0]);
+ exit(127);
+ }
+ }
+
+ ncbuf_test(bufhead, reset, print_delay);
+ return EXIT_SUCCESS;
+}
+
+#endif /* STANDALONE */
diff --git a/src/pattern.c b/src/pattern.c
new file mode 100644
index 0000000..52dda5e
--- /dev/null
+++ b/src/pattern.c
@@ -0,0 +1,2683 @@
+/*
+ * Pattern management functions.
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <import/ebistree.h>
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+#include <import/lru.h>
+
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+const char *const pat_match_names[PAT_MATCH_NUM] = {
+ [PAT_MATCH_FOUND] = "found",
+ [PAT_MATCH_BOOL] = "bool",
+ [PAT_MATCH_INT] = "int",
+ [PAT_MATCH_IP] = "ip",
+ [PAT_MATCH_BIN] = "bin",
+ [PAT_MATCH_LEN] = "len",
+ [PAT_MATCH_STR] = "str",
+ [PAT_MATCH_BEG] = "beg",
+ [PAT_MATCH_SUB] = "sub",
+ [PAT_MATCH_DIR] = "dir",
+ [PAT_MATCH_DOM] = "dom",
+ [PAT_MATCH_END] = "end",
+ [PAT_MATCH_REG] = "reg",
+ [PAT_MATCH_REGM] = "regm",
+};
+
+int (*const pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
+ [PAT_MATCH_FOUND] = pat_parse_nothing,
+ [PAT_MATCH_BOOL] = pat_parse_nothing,
+ [PAT_MATCH_INT] = pat_parse_int,
+ [PAT_MATCH_IP] = pat_parse_ip,
+ [PAT_MATCH_BIN] = pat_parse_bin,
+ [PAT_MATCH_LEN] = pat_parse_int,
+ [PAT_MATCH_STR] = pat_parse_str,
+ [PAT_MATCH_BEG] = pat_parse_str,
+ [PAT_MATCH_SUB] = pat_parse_str,
+ [PAT_MATCH_DIR] = pat_parse_str,
+ [PAT_MATCH_DOM] = pat_parse_str,
+ [PAT_MATCH_END] = pat_parse_str,
+ [PAT_MATCH_REG] = pat_parse_reg,
+ [PAT_MATCH_REGM] = pat_parse_reg,
+};
+
+int (*const pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
+ [PAT_MATCH_FOUND] = pat_idx_list_val,
+ [PAT_MATCH_BOOL] = pat_idx_list_val,
+ [PAT_MATCH_INT] = pat_idx_list_val,
+ [PAT_MATCH_IP] = pat_idx_tree_ip,
+ [PAT_MATCH_BIN] = pat_idx_list_ptr,
+ [PAT_MATCH_LEN] = pat_idx_list_val,
+ [PAT_MATCH_STR] = pat_idx_tree_str,
+ [PAT_MATCH_BEG] = pat_idx_tree_pfx,
+ [PAT_MATCH_SUB] = pat_idx_list_str,
+ [PAT_MATCH_DIR] = pat_idx_list_str,
+ [PAT_MATCH_DOM] = pat_idx_list_str,
+ [PAT_MATCH_END] = pat_idx_list_str,
+ [PAT_MATCH_REG] = pat_idx_list_reg,
+ [PAT_MATCH_REGM] = pat_idx_list_regm,
+};
+
+void (*const pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
+ [PAT_MATCH_FOUND] = pat_prune_gen,
+ [PAT_MATCH_BOOL] = pat_prune_gen,
+ [PAT_MATCH_INT] = pat_prune_gen,
+ [PAT_MATCH_IP] = pat_prune_gen,
+ [PAT_MATCH_BIN] = pat_prune_gen,
+ [PAT_MATCH_LEN] = pat_prune_gen,
+ [PAT_MATCH_STR] = pat_prune_gen,
+ [PAT_MATCH_BEG] = pat_prune_gen,
+ [PAT_MATCH_SUB] = pat_prune_gen,
+ [PAT_MATCH_DIR] = pat_prune_gen,
+ [PAT_MATCH_DOM] = pat_prune_gen,
+ [PAT_MATCH_END] = pat_prune_gen,
+ [PAT_MATCH_REG] = pat_prune_gen,
+ [PAT_MATCH_REGM] = pat_prune_gen,
+};
+
+struct pattern *(*const pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
+ [PAT_MATCH_FOUND] = NULL,
+ [PAT_MATCH_BOOL] = pat_match_nothing,
+ [PAT_MATCH_INT] = pat_match_int,
+ [PAT_MATCH_IP] = pat_match_ip,
+ [PAT_MATCH_BIN] = pat_match_bin,
+ [PAT_MATCH_LEN] = pat_match_len,
+ [PAT_MATCH_STR] = pat_match_str,
+ [PAT_MATCH_BEG] = pat_match_beg,
+ [PAT_MATCH_SUB] = pat_match_sub,
+ [PAT_MATCH_DIR] = pat_match_dir,
+ [PAT_MATCH_DOM] = pat_match_dom,
+ [PAT_MATCH_END] = pat_match_end,
+ [PAT_MATCH_REG] = pat_match_reg,
+ [PAT_MATCH_REGM] = pat_match_regm,
+};
+
+/* Just used for checking configuration compatibility */
+int const pat_match_types[PAT_MATCH_NUM] = {
+ [PAT_MATCH_FOUND] = SMP_T_SINT,
+ [PAT_MATCH_BOOL] = SMP_T_SINT,
+ [PAT_MATCH_INT] = SMP_T_SINT,
+ [PAT_MATCH_IP] = SMP_T_ADDR,
+ [PAT_MATCH_BIN] = SMP_T_BIN,
+ [PAT_MATCH_LEN] = SMP_T_STR,
+ [PAT_MATCH_STR] = SMP_T_STR,
+ [PAT_MATCH_BEG] = SMP_T_STR,
+ [PAT_MATCH_SUB] = SMP_T_STR,
+ [PAT_MATCH_DIR] = SMP_T_STR,
+ [PAT_MATCH_DOM] = SMP_T_STR,
+ [PAT_MATCH_END] = SMP_T_STR,
+ [PAT_MATCH_REG] = SMP_T_STR,
+ [PAT_MATCH_REGM] = SMP_T_STR,
+};
+
+/* this struct is used to return information */
+static THREAD_LOCAL struct pattern static_pattern;
+static THREAD_LOCAL struct sample_data static_sample_data;
+
+/* This is the root of the list of all pattern_ref avalaibles. */
+struct list pattern_reference = LIST_HEAD_INIT(pattern_reference);
+
+static THREAD_LOCAL struct lru64_head *pat_lru_tree;
+static unsigned long long pat_lru_seed __read_mostly;
+
+/*
+ *
+ * The following functions are not exported and are used by internals process
+ * of pattern matching
+ *
+ */
+
+/* Background: Fast way to find a zero byte in a word
+ * http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+ * hasZeroByte = (v - 0x01010101UL) & ~v & 0x80808080UL;
+ *
+ * To look for 4 different byte values, xor the word with those bytes and
+ * then check for zero bytes:
+ *
+ * v = (((unsigned char)c * 0x1010101U) ^ delimiter)
+ * where <delimiter> is the 4 byte values to look for (as an uint)
+ * and <c> is the character that is being tested
+ */
+static inline unsigned int is_delimiter(unsigned char c, unsigned int mask)
+{
+ mask ^= (c * 0x01010101); /* propagate the char to all 4 bytes */
+ return (mask - 0x01010101) & ~mask & 0x80808080U;
+}
+
+static inline unsigned int make_4delim(unsigned char d1, unsigned char d2, unsigned char d3, unsigned char d4)
+{
+ return d1 << 24 | d2 << 16 | d3 << 8 | d4;
+}
+
+
+/*
+ *
+ * These functions are exported and may be used by any other component.
+ *
+ * The following functions are used for parsing pattern matching input value.
+ * The <text> contain the string to be parsed. <pattern> must be a preallocated
+ * pattern. The pat_parse_* functions fill this structure with the parsed value.
+ * <err> is filled with an error message built with memprintf() function. It is
+ * allowed to use a trash as a temporary storage for the returned pattern, as
+ * the next call after these functions will be pat_idx_*.
+ *
+ * In success case, the pat_parse_* function returns 1. If the function
+ * fails, it returns 0 and <err> is filled.
+ */
+
+/* ignore the current line */
+int pat_parse_nothing(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ return 1;
+}
+
+/* Parse a string. It is allocated and duplicated. */
+int pat_parse_str(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ pattern->type = SMP_T_STR;
+ pattern->ptr.str = (char *)text;
+ pattern->len = strlen(text);
+ return 1;
+}
+
+/* Parse a binary written in hexa. It is allocated. */
+int pat_parse_bin(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ struct buffer *trash;
+
+ pattern->type = SMP_T_BIN;
+ trash = get_trash_chunk();
+ pattern->len = trash->size;
+ pattern->ptr.str = trash->area;
+ return !!parse_binary(text, &pattern->ptr.str, &pattern->len, err);
+}
+
+/* Parse a regex. It is allocated. */
+int pat_parse_reg(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ pattern->ptr.str = (char *)text;
+ return 1;
+}
+
+/* Parse a range of positive integers delimited by either ':' or '-'. If only
+ * one integer is read, it is set as both min and max. An operator may be
+ * specified as the prefix, among this list of 5 :
+ *
+ * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
+ *
+ * The default operator is "eq". It supports range matching. Ranges are
+ * rejected for other operators. The operator may be changed at any time.
+ * The operator is stored in the 'opaque' argument.
+ *
+ * If err is non-NULL, an error message will be returned there on errors and
+ * the caller will have to free it. The function returns zero on error, and
+ * non-zero on success.
+ *
+ */
+int pat_parse_int(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ const char *ptr = text;
+
+ pattern->type = SMP_T_SINT;
+
+ /* Empty string is not valid */
+ if (!*text)
+ goto not_valid_range;
+
+ /* Search ':' or '-' separator. */
+ while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
+ ptr++;
+
+ /* If separator not found. */
+ if (!*ptr) {
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a number", text);
+ return 0;
+ }
+ pattern->val.range.max = pattern->val.range.min;
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If the separator is the first character. */
+ if (ptr == text && *(ptr + 1) != '\0') {
+ if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 0;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If separator is the last character. */
+ if (*(ptr + 1) == '\0') {
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 0;
+ return 1;
+ }
+
+ /* Else, parse two numbers. */
+ if (strl2llrc(text, ptr - text, &pattern->val.range.min) != 0)
+ goto not_valid_range;
+
+ if (strl2llrc(ptr + 1, strlen(ptr + 1), &pattern->val.range.max) != 0)
+ goto not_valid_range;
+
+ if (pattern->val.range.min > pattern->val.range.max)
+ goto not_valid_range;
+
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+
+ not_valid_range:
+ memprintf(err, "'%s' is not a valid number range", text);
+ return 0;
+}
+
+/* Parse a range of positive 2-component versions delimited by either ':' or
+ * '-'. The version consists in a major and a minor, both of which must be
+ * smaller than 65536, because internally they will be represented as a 32-bit
+ * integer.
+ * If only one version is read, it is set as both min and max. Just like for
+ * pure integers, an operator may be specified as the prefix, among this list
+ * of 5 :
+ *
+ * 0:eq, 1:gt, 2:ge, 3:lt, 4:le
+ *
+ * The default operator is "eq". It supports range matching. Ranges are
+ * rejected for other operators. The operator may be changed at any time.
+ * The operator is stored in the 'opaque' argument. This allows constructs
+ * such as the following one :
+ *
+ * acl obsolete_ssl ssl_req_proto lt 3
+ * acl unsupported_ssl ssl_req_proto gt 3.1
+ * acl valid_ssl ssl_req_proto 3.0-3.1
+ *
+ */
+int pat_parse_dotted_ver(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ const char *ptr = text;
+
+ pattern->type = SMP_T_SINT;
+
+ /* Search ':' or '-' separator. */
+ while (*ptr != '\0' && *ptr != ':' && *ptr != '-')
+ ptr++;
+
+ /* If separator not found. */
+ if (*ptr == '\0' && ptr > text) {
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a dotted number", text);
+ return 0;
+ }
+ pattern->val.range.max = pattern->val.range.min;
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If the separator is the first character. */
+ if (ptr == text && *(ptr+1) != '\0') {
+ if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 0;
+ pattern->val.range.max_set = 1;
+ return 1;
+ }
+
+ /* If separator is the last character. */
+ if (ptr == &text[strlen(text)-1]) {
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 0;
+ return 1;
+ }
+
+ /* Else, parse two numbers. */
+ if (strl2llrc_dotted(text, ptr-text, &pattern->val.range.min) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ if (strl2llrc_dotted(ptr+1, strlen(ptr+1), &pattern->val.range.max) != 0) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ if (pattern->val.range.min > pattern->val.range.max) {
+ memprintf(err, "'%s' is not a valid dotted number range", text);
+ return 0;
+ }
+ pattern->val.range.min_set = 1;
+ pattern->val.range.max_set = 1;
+ return 1;
+}
+
+/* Parse an IP address and an optional mask in the form addr[/mask].
+ * The addr may either be an IPv4 address or a hostname. The mask
+ * may either be a dotted mask or a number of bits. Returns 1 if OK,
+ * otherwise 0. NOTE: IP address patterns are typed (IPV4/IPV6).
+ */
+int pat_parse_ip(const char *text, struct pattern *pattern, int mflags, char **err)
+{
+ if (str2net(text, !(mflags & PAT_MF_NO_DNS) && (global.mode & MODE_STARTING),
+ &pattern->val.ipv4.addr, &pattern->val.ipv4.mask)) {
+ pattern->type = SMP_T_IPV4;
+ return 1;
+ }
+ else if (str62net(text, &pattern->val.ipv6.addr, &pattern->val.ipv6.mask)) {
+ pattern->type = SMP_T_IPV6;
+ return 1;
+ }
+ else {
+ memprintf(err, "'%s' is not a valid IPv4 or IPv6 address", text);
+ return 0;
+ }
+}
+
+/*
+ *
+ * These functions are exported and may be used by any other component.
+ *
+ * This function just takes a sample <smp> and checks if this sample matches
+ * with the pattern <pattern>. This function returns only PAT_MATCH or
+ * PAT_NOMATCH.
+ *
+ */
+
+/* always return false */
+struct pattern *pat_match_nothing(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ if (smp->data.u.sint) {
+ if (fill) {
+ static_pattern.data = NULL;
+ static_pattern.ref = NULL;
+ static_pattern.type = 0;
+ static_pattern.ptr.str = NULL;
+ }
+ return &static_pattern;
+ }
+ else
+ return NULL;
+}
+
+
+/* NB: For two strings to be identical, it is required that their length match */
+struct pattern *pat_match_str(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ /* Lookup a string in the expression's pattern tree. */
+ if (!eb_is_empty(&expr->pattern_tree)) {
+ char prev = 0;
+
+ if (smp->data.u.str.data < smp->data.u.str.size) {
+ /* we may have to force a trailing zero on the test pattern and
+ * the buffer is large enough to accommodate it. If the flag
+ * CONST is set, duplicate the string
+ */
+ prev = smp->data.u.str.area[smp->data.u.str.data];
+ if (prev) {
+ if (smp->flags & SMP_F_CONST) {
+ if (!smp_dup(smp))
+ return NULL;
+ } else {
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+ }
+ }
+ }
+ else {
+ /* Otherwise, the sample is duplicated. A trailing zero
+ * is automatically added to the string.
+ */
+ if (!smp_dup(smp))
+ return NULL;
+ }
+
+ node = ebst_lookup(&expr->pattern_tree, smp->data.u.str.area);
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = prev;
+
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_next_dup(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_STR;
+ static_pattern.ptr.str = (char *)elt->node.key;
+ }
+ return &static_pattern;
+ }
+ }
+
+ /* look in the list */
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len != smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0)) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* NB: For two binaries buf to be identical, it is required that their lengths match */
+struct pattern *pat_match_bin(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len != smp->data.u.str.data)
+ continue;
+
+ if (memcmp(pattern->ptr.str, smp->data.u.str.area, smp->data.u.str.data) == 0) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Executes a regex. It temporarily changes the data to add a trailing zero,
+ * and restores the previous character when leaving. This function fills
+ * a matching array.
+ */
+struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data,
+ MAX_MATCH, pmatch, 0)) {
+ ret = pattern;
+ smp->ctx.a[0] = pmatch;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Executes a regex. It temporarily changes the data to add a trailing zero,
+ * and restores the previous character when leaving.
+ */
+struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (regex_exec2(pattern->ptr.reg, smp->data.u.str.area, smp->data.u.str.data)) {
+ ret = pattern;
+ break;
+ }
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern matches the beginning of the tested string. */
+struct pattern *pat_match_beg(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ /* Lookup a string in the expression's pattern tree. */
+ if (!eb_is_empty(&expr->pattern_tree)) {
+ char prev = 0;
+
+ if (smp->data.u.str.data < smp->data.u.str.size) {
+ /* we may have to force a trailing zero on the test pattern and
+ * the buffer is large enough to accommodate it.
+ */
+ prev = smp->data.u.str.area[smp->data.u.str.data];
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = '\0';
+ }
+ else {
+ /* Otherwise, the sample is duplicated. A trailing zero
+ * is automatically added to the string.
+ */
+ if (!smp_dup(smp))
+ return NULL;
+ }
+
+ node = ebmb_lookup_longest(&expr->pattern_tree,
+ smp->data.u.str.area);
+ if (prev)
+ smp->data.u.str.area[smp->data.u.str.data] = prev;
+
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_STR;
+ static_pattern.ptr.str = (char *)elt->node.key;
+ }
+ return &static_pattern;
+ }
+ }
+
+ /* look in the list */
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area, pattern->len) != 0))
+ continue;
+
+ ret = pattern;
+ break;
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern matches the end of the tested string. */
+struct pattern *pat_match_end(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if ((icase && strncasecmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0) ||
+ (!icase && strncmp(pattern->ptr.str, smp->data.u.str.area + smp->data.u.str.data - pattern->len, pattern->len) != 0))
+ continue;
+
+ ret = pattern;
+ break;
+ }
+
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* Checks that the pattern is included inside the tested string.
+ * NB: Suboptimal, should be rewritten using a Boyer-Moore method.
+ */
+struct pattern *pat_match_sub(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ int icase;
+ char *end;
+ char *c;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+ struct lru64 *lru = NULL;
+
+ if (pat_lru_tree && !LIST_ISEMPTY(&expr->patterns)) {
+ unsigned long long seed = pat_lru_seed ^ (long)expr;
+
+ lru = lru64_get(XXH3(smp->data.u.str.area, smp->data.u.str.data, seed),
+ pat_lru_tree, expr, expr->ref->revision);
+ if (lru && lru->domain) {
+ ret = lru->data;
+ return ret;
+ }
+ }
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (pattern->len > smp->data.u.str.data)
+ continue;
+
+ end = smp->data.u.str.area + smp->data.u.str.data - pattern->len;
+ icase = expr->mflags & PAT_MF_IGNORE_CASE;
+ if (icase) {
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (tolower((unsigned char)*c) != tolower((unsigned char)*pattern->ptr.str))
+ continue;
+ if (strncasecmp(pattern->ptr.str, c, pattern->len) == 0) {
+ ret = pattern;
+ goto leave;
+ }
+ }
+ } else {
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (*c != *pattern->ptr.str)
+ continue;
+ if (strncmp(pattern->ptr.str, c, pattern->len) == 0) {
+ ret = pattern;
+ goto leave;
+ }
+ }
+ }
+ }
+ leave:
+ if (lru)
+ lru64_commit(lru, ret, expr, expr->ref->revision, NULL);
+
+ return ret;
+}
+
+/* This one is used by other real functions. It checks that the pattern is
+ * included inside the tested string, but enclosed between the specified
+ * delimiters or at the beginning or end of the string. The delimiters are
+ * provided as an unsigned int made by make_4delim() and match up to 4 different
+ * delimiters. Delimiters are stripped at the beginning and end of the pattern.
+ */
+static int match_word(struct sample *smp, struct pattern *pattern, int mflags, unsigned int delimiters)
+{
+ int may_match, icase;
+ char *c, *end;
+ char *ps;
+ int pl;
+
+ pl = pattern->len;
+ ps = pattern->ptr.str;
+
+ while (pl > 0 && is_delimiter(*ps, delimiters)) {
+ pl--;
+ ps++;
+ }
+
+ while (pl > 0 && is_delimiter(ps[pl - 1], delimiters))
+ pl--;
+
+ if (pl > smp->data.u.str.data)
+ return PAT_NOMATCH;
+
+ may_match = 1;
+ icase = mflags & PAT_MF_IGNORE_CASE;
+ end = smp->data.u.str.area + smp->data.u.str.data - pl;
+ for (c = smp->data.u.str.area; c <= end; c++) {
+ if (is_delimiter(*c, delimiters)) {
+ may_match = 1;
+ continue;
+ }
+
+ if (!may_match)
+ continue;
+
+ if (icase) {
+ if ((tolower((unsigned char)*c) == tolower((unsigned char)*ps)) &&
+ (strncasecmp(ps, c, pl) == 0) &&
+ (c == end || is_delimiter(c[pl], delimiters)))
+ return PAT_MATCH;
+ } else {
+ if ((*c == *ps) &&
+ (strncmp(ps, c, pl) == 0) &&
+ (c == end || is_delimiter(c[pl], delimiters)))
+ return PAT_MATCH;
+ }
+ may_match = 0;
+ }
+ return PAT_NOMATCH;
+}
+
+/* Checks that the pattern is included inside the tested string, but enclosed
+ * between the delimiters '?' or '/' or at the beginning or end of the string.
+ * Delimiters at the beginning or end of the pattern are ignored.
+ */
+struct pattern *pat_match_dir(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '?', '?')))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the pattern is included inside the tested string, but enclosed
+ * between the delmiters '/', '?', '.' or ":" or at the beginning or end of
+ * the string. Delimiters at the beginning or end of the pattern are ignored.
+ */
+struct pattern *pat_match_dom(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if (match_word(smp, pattern, expr->mflags, make_4delim('/', '?', '.', ':')))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the integer in <test> is included between min and max */
+struct pattern *pat_match_int(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.sint) &&
+ (!pattern->val.range.max_set || smp->data.u.sint <= pattern->val.range.max))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Checks that the length of the pattern in <test> is included between min and max */
+struct pattern *pat_match_len(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ if ((!pattern->val.range.min_set || pattern->val.range.min <= smp->data.u.str.data) &&
+ (!pattern->val.range.max_set || smp->data.u.str.data <= pattern->val.range.max))
+ return pattern;
+ }
+ return NULL;
+}
+
+/* Performs ipv4 key lookup in <expr> ipv4 tree
+ * Returns NULL on failure
+ */
+static struct pattern *_pat_match_tree_ipv4(struct in_addr *key, struct pattern_expr *expr, int fill)
+{
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+
+ /* Lookup an IPv4 address in the expression's pattern tree using
+ * the longest match method.
+ */
+ node = ebmb_lookup_longest(&expr->pattern_tree, key);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV4;
+ static_pattern.val.ipv4.addr.s_addr = read_u32(elt->node.key);
+ if (!cidr2dotted(elt->node.node.pfx, &static_pattern.val.ipv4.mask))
+ return NULL;
+ }
+ return &static_pattern;
+ }
+ return NULL;
+}
+
+/* Performs ipv6 key lookup in <expr> ipv6 tree
+ * Returns NULL on failure
+ */
+static struct pattern *_pat_match_tree_ipv6(struct in6_addr *key, struct pattern_expr *expr, int fill)
+{
+ struct ebmb_node *node;
+ struct pattern_tree *elt;
+
+ /* Lookup an IPv6 address in the expression's pattern tree using
+ * the longest match method.
+ */
+ node = ebmb_lookup_longest(&expr->pattern_tree_2, key);
+ while (node) {
+ elt = ebmb_entry(node, struct pattern_tree, node);
+ if (elt->ref->gen_id != expr->ref->curr_gen) {
+ node = ebmb_lookup_shorter(node);
+ continue;
+ }
+ if (fill) {
+ static_pattern.data = elt->data;
+ static_pattern.ref = elt->ref;
+ static_pattern.sflags = PAT_SF_TREE;
+ static_pattern.type = SMP_T_IPV6;
+ memcpy(&static_pattern.val.ipv6.addr, elt->node.key, 16);
+ static_pattern.val.ipv6.mask = elt->node.node.pfx;
+ }
+ return &static_pattern;
+ }
+ return NULL;
+}
+
+struct pattern *pat_match_ip(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct in_addr v4;
+ struct in6_addr v6;
+ struct pattern_list *lst;
+ struct pattern *pattern;
+
+ /* The input sample is IPv4. Try to match in the trees. */
+ if (smp->data.type == SMP_T_IPV4) {
+ pattern = _pat_match_tree_ipv4(&smp->data.u.ipv4, expr, fill);
+ if (pattern)
+ return pattern;
+ /* The IPv4 sample don't match the IPv4 tree. Convert the IPv4
+ * sample address to IPv6 and try to lookup in the IPv6 tree.
+ */
+ v4tov6(&v6, &smp->data.u.ipv4);
+ pattern = _pat_match_tree_ipv6(&v6, expr, fill);
+ if (pattern)
+ return pattern;
+ /* eligible for list lookup using IPv4 address */
+ v4 = smp->data.u.ipv4;
+ goto list_lookup;
+ }
+
+ /* The input sample is IPv6. Try to match in the trees. */
+ if (smp->data.type == SMP_T_IPV6) {
+ pattern = _pat_match_tree_ipv6(&smp->data.u.ipv6, expr, fill);
+ if (pattern)
+ return pattern;
+ /* No match in the IPv6 tree. Try to convert 6 to 4 to lookup in
+ * the IPv4 tree
+ */
+ if (v6tov4(&v4, &smp->data.u.ipv6)) {
+ pattern = _pat_match_tree_ipv4(&v4, expr, fill);
+ if (pattern)
+ return pattern;
+ /* eligible for list lookup using IPv4 address */
+ goto list_lookup;
+ }
+ }
+
+ not_found:
+ return NULL;
+
+ list_lookup:
+ /* No match in the trees, but we still have a valid IPv4 address: lookup
+ * in the IPv4 list (non-contiguous masks list). This is our last resort
+ */
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (pattern->ref->gen_id != expr->ref->curr_gen)
+ continue;
+
+ /* Check if the input sample match the current pattern. */
+ if (((v4.s_addr ^ pattern->val.ipv4.addr.s_addr) & pattern->val.ipv4.mask.s_addr) == 0)
+ return pattern;
+ }
+ goto not_found;
+}
+
+/* finds the pattern holding <list> from list head <head> and deletes it.
+ * This is made for use for pattern removal within an expression.
+ */
+static void pat_unlink_from_head(void **head, void **list)
+{
+ while (*head) {
+ if (*head == list) {
+ *head = *list;
+ return;
+ }
+ head = *head;
+ }
+}
+
+void free_pattern_tree(struct eb_root *root)
+{
+ struct eb_node *node, *next;
+ struct pattern_tree *elt;
+
+ node = eb_first(root);
+ while (node) {
+ next = eb_next(node);
+ eb_delete(node);
+ elt = container_of(node, struct pattern_tree, node);
+ pat_unlink_from_head(&elt->ref->tree_head, &elt->from_ref);
+ free(elt->data);
+ free(elt);
+ node = next;
+ }
+}
+
+void pat_prune_gen(struct pattern_expr *expr)
+{
+ struct pattern_list *pat, *tmp;
+
+ list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
+ LIST_DELETE(&pat->list);
+ pat_unlink_from_head(&pat->pat.ref->list_head, &pat->from_ref);
+ if (pat->pat.sflags & PAT_SF_REGFREE)
+ regex_free(pat->pat.ptr.ptr);
+ else
+ free(pat->pat.ptr.ptr);
+ free(pat->pat.data);
+ free(pat);
+ }
+
+ free_pattern_tree(&expr->pattern_tree);
+ free_pattern_tree(&expr->pattern_tree_2);
+ LIST_INIT(&expr->patterns);
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt = 0;
+}
+
+/*
+ *
+ * The following functions are used for the pattern indexation
+ *
+ */
+
+int pat_idx_list_val(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ patl->expr = expr;
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+ patl->pat.ptr.ptr = malloc(patl->pat.len);
+ if (!patl->pat.ptr.ptr) {
+ free(patl);
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+ memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ patl->expr = expr;
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+ patl->pat.ptr.str = malloc(patl->pat.len + 1);
+ if (!patl->pat.ptr.str) {
+ free(patl);
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+ memcpy(patl->pat.ptr.ptr, pat->ptr.ptr, pat->len);
+ patl->pat.ptr.str[patl->pat.len] = '\0';
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ patl->expr = expr;
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
+{
+ struct pattern_list *patl;
+
+ /* allocate pattern */
+ patl = calloc(1, sizeof(*patl));
+ if (!patl) {
+ memprintf(err, "out of memory while indexing pattern");
+ return 0;
+ }
+
+ /* duplicate pattern */
+ memcpy(&patl->pat, pat, sizeof(*pat));
+
+ /* compile regex */
+ patl->pat.sflags |= PAT_SF_REGFREE;
+ if (!(patl->pat.ptr.reg = regex_comp(pat->ptr.str, !(expr->mflags & PAT_MF_IGNORE_CASE),
+ cap, err))) {
+ free(patl);
+ return 0;
+ }
+
+ /* chain pattern in the expression */
+ LIST_APPEND(&expr->patterns, &patl->list);
+ patl->expr = expr;
+ /* and from the reference */
+ patl->from_ref = pat->ref->list_head;
+ pat->ref->list_head = &patl->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 0, err);
+}
+
+int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 1, err);
+}
+
+int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ unsigned int mask;
+ struct pattern_tree *node;
+
+ /* Only IPv4 can be indexed */
+ if (pat->type == SMP_T_IPV4) {
+ /* in IPv4 case, check if the mask is contiguous so that we can
+ * insert the network into the tree. A continuous mask has only
+ * ones on the left. This means that this mask + its lower bit
+ * added once again is null.
+ */
+ mask = ntohl(pat->val.ipv4.mask.s_addr);
+ if (mask + (mask & -mask) == 0) {
+ mask = mask ? 33 - flsnz(mask & -mask) : 0; /* equals cidr value */
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + 4);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* FIXME: insert <addr>/<mask> into the tree here */
+ memcpy(node->node.key, &pat->val.ipv4.addr, 4); /* network byte order */
+ node->node.node.pfx = mask;
+
+ /* Insert the entry. */
+ ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
+
+ node->expr = expr;
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+ }
+ else {
+ /* If the mask is not contiguous, just add the pattern to the list */
+ return pat_idx_list_val(expr, pat, err);
+ }
+ }
+ else if (pat->type == SMP_T_IPV6) {
+ /* IPv6 also can be indexed */
+ node = calloc(1, sizeof(*node) + 16);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* FIXME: insert <addr>/<mask> into the tree here */
+ memcpy(node->node.key, &pat->val.ipv6.addr, 16); /* network byte order */
+ node->node.node.pfx = pat->val.ipv6.mask;
+
+ /* Insert the entry. */
+ ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
+
+ node->expr = expr;
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+ }
+
+ return 0;
+}
+
+int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ int len;
+ struct pattern_tree *node;
+
+ /* Only string can be indexed */
+ if (pat->type != SMP_T_STR) {
+ memprintf(err, "internal error: string expected, but the type is '%s'",
+ smp_to_type[pat->type]);
+ return 0;
+ }
+
+ /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
+ if (expr->mflags & PAT_MF_IGNORE_CASE)
+ return pat_idx_list_str(expr, pat, err);
+
+ /* Process the key len */
+ len = strlen(pat->ptr.str) + 1;
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + len);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* copy the string */
+ memcpy(node->node.key, pat->ptr.str, len);
+
+ /* index the new node */
+ ebst_insert(&expr->pattern_tree, &node->node);
+
+ node->expr = expr;
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ int len;
+ struct pattern_tree *node;
+
+ /* Only string can be indexed */
+ if (pat->type != SMP_T_STR) {
+ memprintf(err, "internal error: string expected, but the type is '%s'",
+ smp_to_type[pat->type]);
+ return 0;
+ }
+
+ /* If the flag PAT_F_IGNORE_CASE is set, we cannot use trees */
+ if (expr->mflags & PAT_MF_IGNORE_CASE)
+ return pat_idx_list_str(expr, pat, err);
+
+ /* Process the key len */
+ len = strlen(pat->ptr.str);
+
+ /* node memory allocation */
+ node = calloc(1, sizeof(*node) + len + 1);
+ if (!node) {
+ memprintf(err, "out of memory while loading pattern");
+ return 0;
+ }
+
+ /* copy the pointer to sample associated to this node */
+ node->data = pat->data;
+ node->ref = pat->ref;
+
+ /* copy the string and the trailing zero */
+ memcpy(node->node.key, pat->ptr.str, len + 1);
+ node->node.node.pfx = len * 8;
+
+ /* index the new node */
+ ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
+
+ node->expr = expr;
+ node->from_ref = pat->ref->tree_head;
+ pat->ref->tree_head = &node->from_ref;
+ expr->ref->revision = rdtsc();
+ expr->ref->entry_cnt++;
+
+ /* that's ok */
+ return 1;
+}
+
+/* Deletes all patterns from reference <elt>. Note that all of their
+ * expressions must be locked, and the pattern lock must be held as well.
+ */
+void pat_delete_gen(struct pat_ref *ref, struct pat_ref_elt *elt)
+{
+ struct pattern_tree *tree;
+ struct pattern_list *pat;
+ void **node;
+
+ /* delete all known tree nodes. They are all allocated inline */
+ for (node = elt->tree_head; node;) {
+ tree = container_of(node, struct pattern_tree, from_ref);
+ node = *node;
+ BUG_ON(tree->ref != elt);
+
+ ebmb_delete(&tree->node);
+ free(tree->data);
+ free(tree);
+ }
+
+ /* delete all list nodes and free their pattern entries (str/reg) */
+ for (node = elt->list_head; node;) {
+ pat = container_of(node, struct pattern_list, from_ref);
+ node = *node;
+ BUG_ON(pat->pat.ref != elt);
+
+ /* Delete and free entry. */
+ LIST_DELETE(&pat->list);
+ if (pat->pat.sflags & PAT_SF_REGFREE)
+ regex_free(pat->pat.ptr.reg);
+ else
+ free(pat->pat.ptr.ptr);
+ free(pat->pat.data);
+ free(pat);
+ }
+
+ /* update revision number to refresh the cache */
+ ref->revision = rdtsc();
+ ref->entry_cnt--;
+ elt->tree_head = NULL;
+ elt->list_head = NULL;
+}
+
+void pattern_init_expr(struct pattern_expr *expr)
+{
+ LIST_INIT(&expr->patterns);
+ expr->pattern_tree = EB_ROOT;
+ expr->pattern_tree_2 = EB_ROOT;
+}
+
+void pattern_init_head(struct pattern_head *head)
+{
+ LIST_INIT(&head->head);
+}
+
+/* The following functions are relative to the management of the reference
+ * lists. These lists are used to store the original pattern and associated
+ * value as string form.
+ *
+ * This is used with modifiable ACL and MAPS
+ *
+ * The pattern reference are stored with two identifiers: the unique_id and
+ * the reference.
+ *
+ * The reference identify a file. Each file with the same name point to the
+ * same reference. We can register many times one file. If the file is modified,
+ * all his dependencies are also modified. The reference can be used with map or
+ * acl.
+ *
+ * The unique_id identify inline acl. The unique id is unique for each acl.
+ * You cannot force the same id in the configuration file, because this repoort
+ * an error.
+ *
+ * A particular case appears if the filename is a number. In this case, the
+ * unique_id is set with the number represented by the filename and the
+ * reference is also set. This method prevent double unique_id.
+ *
+ */
+
+/* This function looks up a reference by name. If the reference is found, a
+ * pointer to the struct pat_ref is returned, otherwise NULL is returned.
+ */
+struct pat_ref *pat_ref_lookup(const char *reference)
+{
+ struct pat_ref *ref;
+
+ list_for_each_entry(ref, &pattern_reference, list)
+ if (ref->reference && strcmp(reference, ref->reference) == 0)
+ return ref;
+ return NULL;
+}
+
+/* This function looks up a reference's unique id. If the reference is found, a
+ * pointer to the struct pat_ref is returned, otherwise NULL is returned.
+ */
+struct pat_ref *pat_ref_lookupid(int unique_id)
+{
+ struct pat_ref *ref;
+
+ list_for_each_entry(ref, &pattern_reference, list)
+ if (ref->unique_id == unique_id)
+ return ref;
+ return NULL;
+}
+
+/* This function removes from the pattern reference <ref> all the patterns
+ * attached to the reference element <elt>, and the element itself. The
+ * reference must be locked.
+ */
+void pat_ref_delete_by_ptr(struct pat_ref *ref, struct pat_ref_elt *elt)
+{
+ struct pattern_expr *expr;
+ struct bref *bref, *back;
+
+ /*
+ * we have to unlink all watchers from this reference pattern. We must
+ * not relink them if this elt was the last one in the list.
+ */
+ list_for_each_entry_safe(bref, back, &elt->back_refs, users) {
+ LIST_DELETE(&bref->users);
+ LIST_INIT(&bref->users);
+ if (elt->list.n != &ref->head)
+ LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
+ bref->ref = elt->list.n;
+ }
+
+ /* delete all entries from all expressions for this pattern */
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+
+ pat_delete_gen(ref, elt);
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+ LIST_DELETE(&elt->list);
+ ebmb_delete(&elt->node);
+ free(elt->sample);
+ free(elt);
+}
+
+/* This function removes the pattern matching the pointer <refelt> from
+ * the reference and from each expr member of this reference. This function
+ * returns 1 if the entry was found and deleted, otherwise zero.
+ *
+ * <refelt> is user input: it is provided as an ID and should never be
+ * dereferenced without making sure that it is valid.
+ */
+int pat_ref_delete_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt)
+{
+ struct pat_ref_elt *elt, *safe;
+
+ /* delete pattern from reference */
+ list_for_each_entry_safe(elt, safe, &ref->head, list) {
+ if (elt == refelt) {
+ pat_ref_delete_by_ptr(ref, elt);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* This function removes all patterns matching <key> from the reference
+ * and from each expr member of the reference. This function returns 1
+ * if the deletion is done and returns 0 is the entry is not found.
+ */
+int pat_ref_delete(struct pat_ref *ref, const char *key)
+{
+ struct ebmb_node *node;
+ int found = 0;
+
+ /* delete pattern from reference */
+ node = ebst_lookup(&ref->ebmb_root, key);
+ while (node) {
+ struct pat_ref_elt *elt;
+
+ elt = ebmb_entry(node, struct pat_ref_elt, node);
+ node = ebmb_next_dup(node);
+ pat_ref_delete_by_ptr(ref, elt);
+ found = 1;
+ }
+
+ return found;
+}
+
+/*
+ * find and return an element <elt> matching <key> in a reference <ref>
+ * return NULL if not found
+ */
+struct pat_ref_elt *pat_ref_find_elt(struct pat_ref *ref, const char *key)
+{
+ struct ebmb_node *node;
+
+ node = ebst_lookup(&ref->ebmb_root, key);
+ if (node)
+ return ebmb_entry(node, struct pat_ref_elt, node);
+
+ return NULL;
+}
+
+
+/* This function modifies the sample of pat_ref_elt <elt> in all expressions
+ * found under <ref> to become <value>. It is assumed that the caller has
+ * already verified that <elt> belongs to <ref>.
+ */
+static inline int pat_ref_set_elt(struct pat_ref *ref, struct pat_ref_elt *elt,
+ const char *value, char **err)
+{
+ struct pattern_expr *expr;
+ struct sample_data **data;
+ char *sample;
+ struct sample_data test;
+ struct pattern_tree *tree;
+ struct pattern_list *pat;
+ void **node;
+
+
+ /* Try all needed converters. */
+ list_for_each_entry(expr, &ref->pat, list) {
+ if (!expr->pat_head->parse_smp)
+ continue;
+
+ if (!expr->pat_head->parse_smp(value, &test)) {
+ memprintf(err, "unable to parse '%s'", value);
+ return 0;
+ }
+ }
+
+ /* Modify pattern from reference. */
+ sample = strdup(value);
+ if (!sample) {
+ memprintf(err, "out of memory error");
+ return 0;
+ }
+ /* Load sample in each reference. All the conversions are tested
+ * below, normally these calls don't fail.
+ */
+ for (node = elt->tree_head; node;) {
+ tree = container_of(node, struct pattern_tree, from_ref);
+ node = *node;
+ BUG_ON(tree->ref != elt);
+ expr = tree->expr;
+ if (!expr->pat_head->parse_smp)
+ continue;
+
+ data = &tree->data;
+ if (data && *data) {
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+ if (!expr->pat_head->parse_smp(sample, *data))
+ *data = NULL;
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+ }
+ }
+
+ for (node = elt->list_head; node;) {
+ pat = container_of(node, struct pattern_list, from_ref);
+ node = *node;
+ BUG_ON(pat->pat.ref != elt);
+ expr = pat->expr;
+ if (!expr->pat_head->parse_smp)
+ continue;
+
+ data = &pat->pat.data;
+ if (data && *data) {
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+ if (!expr->pat_head->parse_smp(sample, *data))
+ *data = NULL;
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+ }
+ }
+
+ /* free old sample only when all exprs are updated */
+ free(elt->sample);
+ elt->sample = sample;
+
+
+ return 1;
+}
+
+/* This function modifies the sample of pat_ref_elt <refelt> in all expressions
+ * found under <ref> to become <value>, after checking that <refelt> really
+ * belongs to <ref>.
+ *
+ * <refelt> is user input: it is provided as an ID and should never be
+ * dereferenced without making sure that it is valid.
+ */
+int pat_ref_set_by_id(struct pat_ref *ref, struct pat_ref_elt *refelt, const char *value, char **err)
+{
+ struct pat_ref_elt *elt;
+
+ /* Look for pattern in the reference. */
+ list_for_each_entry(elt, &ref->head, list) {
+ if (elt == refelt) {
+ if (!pat_ref_set_elt(ref, elt, value, err))
+ return 0;
+ return 1;
+ }
+ }
+
+ memprintf(err, "key or pattern not found");
+ return 0;
+}
+
+/* This function modifies to <value> the sample of all patterns matching <key>
+ * under <ref>.
+ */
+int pat_ref_set(struct pat_ref *ref, const char *key, const char *value, char **err, struct pat_ref_elt *elt)
+{
+ int found = 0;
+ char *_merr;
+ char **merr;
+ struct ebmb_node *node;
+
+ if (err) {
+ merr = &_merr;
+ *merr = NULL;
+ }
+ else
+ merr = NULL;
+
+ if (elt) {
+ node = &elt->node;
+ }
+ else {
+ /* Look for pattern in the reference. */
+ node = ebst_lookup(&ref->ebmb_root, key);
+ }
+
+ while (node) {
+ elt = ebmb_entry(node, struct pat_ref_elt, node);
+ node = ebmb_next_dup(node);
+ if (!pat_ref_set_elt(ref, elt, value, merr)) {
+ if (err && merr) {
+ if (!found) {
+ *err = *merr;
+ } else {
+ memprintf(err, "%s, %s", *err, *merr);
+ ha_free(merr);
+ }
+ }
+ }
+ found = 1;
+ }
+
+ if (!found) {
+ memprintf(err, "entry not found");
+ return 0;
+ }
+ return 1;
+}
+
+/* This function creates a new reference. <ref> is the reference name.
+ * <flags> are PAT_REF_*. /!\ The reference is not checked, and must
+ * be unique. The user must check the reference with "pat_ref_lookup()"
+ * before calling this function. If the function fails, it returns NULL,
+ * otherwise it returns the new struct pat_ref.
+ */
+struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned int flags)
+{
+ struct pat_ref *ref;
+
+ ref = calloc(1, sizeof(*ref));
+ if (!ref)
+ return NULL;
+
+ if (display) {
+ ref->display = strdup(display);
+ if (!ref->display) {
+ free(ref);
+ return NULL;
+ }
+ }
+
+ ref->reference = strdup(reference);
+ if (!ref->reference) {
+ free(ref->display);
+ free(ref);
+ return NULL;
+ }
+
+ ref->flags = flags;
+ ref->unique_id = -1;
+ ref->revision = 0;
+ ref->entry_cnt = 0;
+
+ LIST_INIT(&ref->head);
+ ref->ebmb_root = EB_ROOT;
+ LIST_INIT(&ref->pat);
+ HA_RWLOCK_INIT(&ref->lock);
+ LIST_APPEND(&pattern_reference, &ref->list);
+
+ return ref;
+}
+
+/* This function creates a new reference. <unique_id> is the unique id. If
+ * the value of <unique_id> is -1, the unique id is calculated later.
+ * <flags> are PAT_REF_*. /!\ The reference is not checked, and must
+ * be unique. The user must check the reference with "pat_ref_lookup()"
+ * or pat_ref_lookupid before calling this function. If the function
+ * fails, it returns NULL, otherwise it returns the new struct pat_ref.
+ */
+struct pat_ref *pat_ref_newid(int unique_id, const char *display, unsigned int flags)
+{
+ struct pat_ref *ref;
+
+ ref = calloc(1, sizeof(*ref));
+ if (!ref)
+ return NULL;
+
+ if (display) {
+ ref->display = strdup(display);
+ if (!ref->display) {
+ free(ref);
+ return NULL;
+ }
+ }
+
+ ref->reference = NULL;
+ ref->flags = flags;
+ ref->curr_gen = 0;
+ ref->next_gen = 0;
+ ref->unique_id = unique_id;
+ LIST_INIT(&ref->head);
+ ref->ebmb_root = EB_ROOT;
+ LIST_INIT(&ref->pat);
+ HA_RWLOCK_INIT(&ref->lock);
+ LIST_APPEND(&pattern_reference, &ref->list);
+
+ return ref;
+}
+
+/* This function adds entry to <ref>. It can fail on memory error. It returns
+ * the newly added element on success, or NULL on failure. The PATREF_LOCK on
+ * <ref> must be held. It sets the newly created pattern's generation number
+ * to the same value as the reference's.
+ */
+struct pat_ref_elt *pat_ref_append(struct pat_ref *ref, const char *pattern, const char *sample, int line)
+{
+ struct pat_ref_elt *elt;
+ int len = strlen(pattern);
+
+ elt = calloc(1, sizeof(*elt) + len + 1);
+ if (!elt)
+ goto fail;
+
+ elt->gen_id = ref->curr_gen;
+ elt->line = line;
+
+ memcpy((char*)elt->pattern, pattern, len + 1);
+
+ if (sample) {
+ elt->sample = strdup(sample);
+ if (!elt->sample)
+ goto fail;
+ }
+
+ LIST_INIT(&elt->back_refs);
+ elt->list_head = NULL;
+ elt->tree_head = NULL;
+ LIST_APPEND(&ref->head, &elt->list);
+ /* Even if calloc()'ed, ensure this node is not linked to a tree. */
+ elt->node.node.leaf_p = NULL;
+ ebst_insert(&ref->ebmb_root, &elt->node);
+ return elt;
+ fail:
+ free(elt);
+ return NULL;
+}
+
+/* This function creates sample found in <elt>, parses the pattern also
+ * found in <elt> and inserts it in <expr>. The function copies <patflags>
+ * into <expr>. If the function fails, it returns 0 and <err> is filled.
+ * In success case, the function returns 1.
+ */
+int pat_ref_push(struct pat_ref_elt *elt, struct pattern_expr *expr,
+ int patflags, char **err)
+{
+ struct sample_data *data;
+ struct pattern pattern;
+
+ /* Create sample */
+ if (elt->sample && expr->pat_head->parse_smp) {
+ /* New sample. */
+ data = malloc(sizeof(*data));
+ if (!data)
+ return 0;
+
+ /* Parse value. */
+ if (!expr->pat_head->parse_smp(elt->sample, data)) {
+ memprintf(err, "unable to parse '%s'", elt->sample);
+ free(data);
+ return 0;
+ }
+
+ }
+ else
+ data = NULL;
+
+ /* initialise pattern */
+ memset(&pattern, 0, sizeof(pattern));
+ pattern.data = data;
+ pattern.ref = elt;
+
+ /* parse pattern */
+ if (!expr->pat_head->parse(elt->pattern, &pattern, expr->mflags, err)) {
+ free(data);
+ return 0;
+ }
+
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+ /* index pattern */
+ if (!expr->pat_head->index(expr, &pattern, err)) {
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+ free(data);
+ return 0;
+ }
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+ return 1;
+}
+
+/* This function tries to commit entry <elt> into <ref>. The new entry must
+ * have already been inserted using pat_ref_append(), and its generation number
+ * may have been adjusted as it will not be changed. <err> must point to a NULL
+ * pointer. The PATREF lock on <ref> must be held. All the pattern_expr for
+ * this reference will be updated (parsing, indexing). On success, non-zero is
+ * returned. On failure, all the operation is rolled back (the element is
+ * deleted from all expressions and is freed), zero is returned and the error
+ * pointer <err> may have been updated (and the caller must free it). Failure
+ * causes include memory allocation, parsing error or indexing error.
+ */
+int pat_ref_commit_elt(struct pat_ref *ref, struct pat_ref_elt *elt, char **err)
+{
+ struct pattern_expr *expr;
+
+ list_for_each_entry(expr, &ref->pat, list) {
+ if (!pat_ref_push(elt, expr, 0, err)) {
+ pat_ref_delete_by_ptr(ref, elt);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Loads <pattern>:<sample> into <ref> for generation <gen>. <sample> may be
+ * NULL if none exists (e.g. ACL). If not needed, the generation number should
+ * be set to ref->curr_gen. The error pointer must initially point to NULL. The
+ * new entry will be propagated to all use places, involving allocation, parsing
+ * and indexing. On error (parsing, allocation), the operation will be rolled
+ * back, an error may be reported, and NULL will be reported. On success, the
+ * freshly allocated element will be returned. The PATREF lock on <ref> must be
+ * held during the operation.
+ */
+struct pat_ref_elt *pat_ref_load(struct pat_ref *ref, unsigned int gen,
+ const char *pattern, const char *sample,
+ int line, char **err)
+{
+ struct pat_ref_elt *elt;
+
+ elt = pat_ref_append(ref, pattern, sample, line);
+ if (elt) {
+ elt->gen_id = gen;
+ if (!pat_ref_commit_elt(ref, elt, err))
+ elt = NULL;
+ } else
+ memprintf(err, "out of memory error");
+
+ return elt;
+}
+
+/* This function adds entry to <ref>. It can fail on memory error. The new
+ * entry is added at all the pattern_expr registered in this reference. The
+ * function stops on the first error encountered. It returns 0 and <err> is
+ * filled. If an error is encountered, the complete add operation is cancelled.
+ * If the insertion is a success the function returns 1.
+ */
+int pat_ref_add(struct pat_ref *ref,
+ const char *pattern, const char *sample,
+ char **err)
+{
+ return !!pat_ref_load(ref, ref->curr_gen, pattern, sample, -1, err);
+}
+
+/* This function purges all elements from <ref> whose generation is included in
+ * the range of <from> to <to> (inclusive), taking wrapping into consideration.
+ * It will not purge more than <budget> entries at once, in order to remain
+ * responsive. If budget is negative, no limit is applied.
+ * The caller must already hold the PATREF_LOCK on <ref>. The function will
+ * take the PATEXP_LOCK on all expressions of the pattern as needed. It returns
+ * non-zero on completion, or zero if it had to stop before the end after
+ * <budget> was depleted.
+ */
+int pat_ref_purge_range(struct pat_ref *ref, uint from, uint to, int budget)
+{
+ struct pat_ref_elt *elt, *elt_bck;
+ struct bref *bref, *bref_bck;
+ struct pattern_expr *expr;
+ int done;
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &expr->lock);
+
+ /* all expr are locked, we can safely remove all pat_ref */
+
+ /* assume completion for e.g. empty lists */
+ done = 1;
+ list_for_each_entry_safe(elt, elt_bck, &ref->head, list) {
+ if (elt->gen_id - from > to - from)
+ continue;
+
+ if (budget >= 0 && !budget--) {
+ done = 0;
+ break;
+ }
+
+ /*
+ * we have to unlink all watchers from this reference pattern. We must
+ * not relink them if this elt was the last one in the list.
+ */
+ list_for_each_entry_safe(bref, bref_bck, &elt->back_refs, users) {
+ LIST_DELETE(&bref->users);
+ LIST_INIT(&bref->users);
+ if (elt->list.n != &ref->head)
+ LIST_APPEND(&LIST_ELEM(elt->list.n, typeof(elt), list)->back_refs, &bref->users);
+ bref->ref = elt->list.n;
+ }
+
+ /* delete the storage for all representations of this pattern. */
+ pat_delete_gen(ref, elt);
+
+ LIST_DELETE(&elt->list);
+ ebmb_delete(&elt->node);
+ free(elt->sample);
+ free(elt);
+ }
+
+ list_for_each_entry(expr, &ref->pat, list)
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &expr->lock);
+
+ return done;
+}
+
+/* This function prunes all entries of <ref> and all their associated
+ * pattern_expr. It may return before the end of the list is reached,
+ * returning 0, to yield, indicating to the caller that it must call it again.
+ * until it returns non-zero. All patterns are purged, both current ones and
+ * future or incomplete ones. This is used by "clear map" or "clear acl".
+ */
+int pat_ref_prune(struct pat_ref *ref)
+{
+ return pat_ref_purge_range(ref, 0, ~0, 100);
+}
+
+/* This function looks up any existing reference <ref> in pattern_head <head>, and
+ * returns the associated pattern_expr pointer if found, otherwise NULL.
+ */
+struct pattern_expr *pattern_lookup_expr(struct pattern_head *head, struct pat_ref *ref)
+{
+ struct pattern_expr_list *expr;
+
+ list_for_each_entry(expr, &head->head, list)
+ if (expr->expr->ref == ref)
+ return expr->expr;
+ return NULL;
+}
+
+/* This function creates new pattern_expr associated to the reference <ref>.
+ * <ref> can be NULL. If an error occurs, the function returns NULL and
+ * <err> is filled. Otherwise, the function returns new pattern_expr linked
+ * with <head> and <ref>.
+ *
+ * The returned value can be an already filled pattern list, in this case the
+ * flag <reuse> is set.
+ */
+struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref *ref,
+ int patflags, char **err, int *reuse)
+{
+ struct pattern_expr *expr;
+ struct pattern_expr_list *list;
+
+ if (reuse)
+ *reuse = 0;
+
+ /* Memory and initialization of the chain element. */
+ list = calloc(1, sizeof(*list));
+ if (!list) {
+ memprintf(err, "out of memory");
+ return NULL;
+ }
+
+ /* Look for existing similar expr. No that only the index, parse and
+ * parse_smp function must be identical for having similar pattern.
+ * The other function depends of these first.
+ */
+ if (ref) {
+ list_for_each_entry(expr, &ref->pat, list)
+ if (expr->pat_head->index == head->index &&
+ expr->pat_head->parse == head->parse &&
+ expr->pat_head->parse_smp == head->parse_smp &&
+ expr->mflags == patflags)
+ break;
+ if (&expr->list == &ref->pat)
+ expr = NULL;
+ }
+ else
+ expr = NULL;
+
+ /* If no similar expr was found, we create new expr. */
+ if (!expr) {
+ /* Get a lot of memory for the expr struct. */
+ expr = calloc(1, sizeof(*expr));
+ if (!expr) {
+ free(list);
+ memprintf(err, "out of memory");
+ return NULL;
+ }
+
+ /* Initialize this new expr. */
+ pattern_init_expr(expr);
+
+ /* Copy the pattern matching and indexing flags. */
+ expr->mflags = patflags;
+
+ /* This new pattern expression reference one of his heads. */
+ expr->pat_head = head;
+
+ /* Link with ref, or to self to facilitate LIST_DELETE() */
+ if (ref)
+ LIST_APPEND(&ref->pat, &expr->list);
+ else
+ LIST_INIT(&expr->list);
+
+ expr->ref = ref;
+
+ HA_RWLOCK_INIT(&expr->lock);
+
+ /* We must free this pattern if it is no more used. */
+ list->do_free = 1;
+ }
+ else {
+ /* If the pattern used already exists, it is already linked
+ * with ref and we must not free it.
+ */
+ list->do_free = 0;
+ if (reuse)
+ *reuse = 1;
+ }
+
+ /* The new list element reference the pattern_expr. */
+ list->expr = expr;
+
+ /* Link the list element with the pattern_head. */
+ LIST_APPEND(&head->head, &list->list);
+ return expr;
+}
+
+/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
+ * be returned there on errors and the caller will have to free it.
+ *
+ * The file contains one key + value per line. Lines which start with '#' are
+ * ignored, just like empty lines. Leading tabs/spaces are stripped. The key is
+ * then the first "word" (series of non-space/tabs characters), and the value is
+ * what follows this series of space/tab till the end of the line excluding
+ * trailing spaces/tabs.
+ *
+ * Example :
+ *
+ * # this is a comment and is ignored
+ * 62.212.114.60 1wt.eu \n
+ * <-><-----------><---><----><---->
+ * | | | | `--- trailing spaces ignored
+ * | | | `-------- value
+ * | | `--------------- middle spaces ignored
+ * | `------------------------ key
+ * `-------------------------------- leading spaces ignored
+ *
+ * Return non-zero in case of success, otherwise 0.
+ */
+int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char **err)
+{
+ FILE *file;
+ char *c;
+ int ret = 0;
+ int line = 0;
+ char *key_beg;
+ char *key_end;
+ char *value_beg;
+ char *value_end;
+
+ file = fopen(filename, "r");
+ if (!file) {
+ memprintf(err, "failed to open pattern file <%s>", filename);
+ return 0;
+ }
+
+ /* now parse all patterns. The file may contain only one pattern
+ * followed by one value per line. The start spaces, separator spaces
+ * and and spaces are stripped. Each can contain comment started by '#'
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* ignore lines beginning with a dash */
+ if (*c == '#')
+ continue;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* empty lines are ignored too */
+ if (*c == '\0' || *c == '\r' || *c == '\n')
+ continue;
+
+ /* look for the end of the key */
+ key_beg = c;
+ while (*c && *c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
+ c++;
+
+ key_end = c;
+
+ /* strip middle spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+ /* look for the end of the value, it is the end of the line */
+ value_beg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ value_end = c;
+
+ /* trim possibly trailing spaces and tabs */
+ while (value_end > value_beg && (value_end[-1] == ' ' || value_end[-1] == '\t'))
+ value_end--;
+
+ /* set final \0 and check entries */
+ *key_end = '\0';
+ *value_end = '\0';
+
+ /* insert values */
+ if (!pat_ref_append(ref, key_beg, value_beg, line)) {
+ memprintf(err, "out of memory");
+ goto out_close;
+ }
+ }
+
+ if (ferror(file)) {
+ memprintf(err, "error encountered while reading <%s> : %s",
+ filename, strerror(errno));
+ goto out_close;
+ }
+ /* success */
+ ret = 1;
+
+ out_close:
+ fclose(file);
+ return ret;
+}
+
+/* Reads patterns from a file. If <err_msg> is non-NULL, an error message will
+ * be returned there on errors and the caller will have to free it.
+ */
+int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err)
+{
+ FILE *file;
+ char *c;
+ char *arg;
+ int ret = 0;
+ int line = 0;
+
+ file = fopen(filename, "r");
+ if (!file) {
+ memprintf(err, "failed to open pattern file <%s>", filename);
+ return 0;
+ }
+
+ /* now parse all patterns. The file may contain only one pattern per
+ * line. If the line contains spaces, they will be part of the pattern.
+ * The pattern stops at the first CR, LF or EOF encountered.
+ */
+ while (fgets(trash.area, trash.size, file) != NULL) {
+ line++;
+ c = trash.area;
+
+ /* ignore lines beginning with a dash */
+ if (*c == '#')
+ continue;
+
+ /* strip leading spaces and tabs */
+ while (*c == ' ' || *c == '\t')
+ c++;
+
+
+ arg = c;
+ while (*c && *c != '\n' && *c != '\r')
+ c++;
+ *c = 0;
+
+ /* empty lines are ignored too */
+ if (c == arg)
+ continue;
+
+ if (!pat_ref_append(ref, arg, NULL, line)) {
+ memprintf(err, "out of memory when loading patterns from file <%s>", filename);
+ goto out_close;
+ }
+ }
+
+ if (ferror(file)) {
+ memprintf(err, "error encountered while reading <%s> : %s",
+ filename, strerror(errno));
+ goto out_close;
+ }
+ ret = 1; /* success */
+
+ out_close:
+ fclose(file);
+ return ret;
+}
+
+int pattern_read_from_file(struct pattern_head *head, unsigned int refflags,
+ const char *filename, int patflags, int load_smp,
+ char **err, const char *file, int line)
+{
+ struct pat_ref *ref;
+ struct pattern_expr *expr;
+ struct pat_ref_elt *elt;
+ int reuse = 0;
+
+ /* Lookup for the existing reference. */
+ ref = pat_ref_lookup(filename);
+
+ /* If the reference doesn't exists, create it and load associated file. */
+ if (!ref) {
+ chunk_printf(&trash,
+ "pattern loaded from file '%s' used by %s at file '%s' line %d",
+ filename, refflags & PAT_REF_MAP ? "map" : "acl", file, line);
+
+ ref = pat_ref_new(filename, trash.area, refflags);
+ if (!ref) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ if (load_smp) {
+ ref->flags |= PAT_REF_SMP;
+ if (!pat_ref_read_from_file_smp(ref, filename, err))
+ return 0;
+ }
+ else {
+ if (!pat_ref_read_from_file(ref, filename, err))
+ return 0;
+ }
+ }
+ else {
+ /* The reference already exists, check the map compatibility. */
+
+ /* If the load require samples and the flag PAT_REF_SMP is not set,
+ * the reference doesn't contain sample, and cannot be used.
+ */
+ if (load_smp) {
+ if (!(ref->flags & PAT_REF_SMP)) {
+ memprintf(err, "The file \"%s\" is already used as one column file "
+ "and cannot be used by as two column file.",
+ filename);
+ return 0;
+ }
+ }
+ else {
+ /* The load doesn't require samples. If the flag PAT_REF_SMP is
+ * set, the reference contains a sample, and cannot be used.
+ */
+ if (ref->flags & PAT_REF_SMP) {
+ memprintf(err, "The file \"%s\" is already used as two column file "
+ "and cannot be used by as one column file.",
+ filename);
+ return 0;
+ }
+ }
+
+ /* Extends display */
+ chunk_printf(&trash, "%s", ref->display);
+ chunk_appendf(&trash, ", by %s at file '%s' line %d",
+ refflags & PAT_REF_MAP ? "map" : "acl", file, line);
+ free(ref->display);
+ ref->display = strdup(trash.area);
+ if (!ref->display) {
+ memprintf(err, "out of memory");
+ return 0;
+ }
+
+ /* Merge flags. */
+ ref->flags |= refflags;
+ }
+
+ /* Now, we can loading patterns from the reference. */
+
+ /* Lookup for existing reference in the head. If the reference
+ * doesn't exists, create it.
+ */
+ expr = pattern_lookup_expr(head, ref);
+ if (!expr || (expr->mflags != patflags)) {
+ expr = pattern_new_expr(head, ref, patflags, err, &reuse);
+ if (!expr)
+ return 0;
+ }
+
+ /* The returned expression may be not empty, because the function
+ * "pattern_new_expr" lookup for similar pattern list and can
+ * reuse a already filled pattern list. In this case, we can not
+ * reload the patterns.
+ */
+ if (reuse)
+ return 1;
+
+ /* Load reference content in the pattern expression.
+ * We need to load elements in the same order they were seen in the
+ * file as list-based matching types may rely on it.
+ */
+ list_for_each_entry(elt, &ref->head, list) {
+ if (!pat_ref_push(elt, expr, patflags, err)) {
+ if (elt->line > 0)
+ memprintf(err, "%s at line %d of file '%s'",
+ *err, elt->line, filename);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/* This function executes a pattern match on a sample. It applies pattern <expr>
+ * to sample <smp>. The function returns NULL if the sample don't match. It returns
+ * non-null if the sample match. If <fill> is true and the sample match, the
+ * function returns the matched pattern. In many cases, this pattern can be a
+ * static buffer.
+ */
+struct pattern *pattern_exec_match(struct pattern_head *head, struct sample *smp, int fill)
+{
+ struct pattern_expr_list *list;
+ struct pattern *pat;
+
+ if (!head->match) {
+ if (fill) {
+ static_pattern.data = NULL;
+ static_pattern.ref = NULL;
+ static_pattern.sflags = 0;
+ static_pattern.type = SMP_T_SINT;
+ static_pattern.val.i = 1;
+ }
+ return &static_pattern;
+ }
+
+ /* convert input to string */
+ if (!sample_convert(smp, head->expect_type))
+ return NULL;
+
+ list_for_each_entry(list, &head->head, list) {
+ HA_RWLOCK_RDLOCK(PATEXP_LOCK, &list->expr->lock);
+ pat = head->match(smp, list->expr, fill);
+ if (pat) {
+ /* We duplicate the pattern cause it could be modified
+ by another thread */
+ if (pat != &static_pattern) {
+ memcpy(&static_pattern, pat, sizeof(struct pattern));
+ pat = &static_pattern;
+ }
+
+ /* We also duplicate the sample data for
+ same reason */
+ if (pat->data && (pat->data != &static_sample_data)) {
+ switch(pat->data->type) {
+ case SMP_T_STR:
+ static_sample_data.type = SMP_T_STR;
+ static_sample_data.u.str = *get_trash_chunk();
+ static_sample_data.u.str.data = pat->data->u.str.data;
+ if (static_sample_data.u.str.data >= static_sample_data.u.str.size)
+ static_sample_data.u.str.data = static_sample_data.u.str.size - 1;
+ memcpy(static_sample_data.u.str.area,
+ pat->data->u.str.area, static_sample_data.u.str.data);
+ static_sample_data.u.str.area[static_sample_data.u.str.data] = 0;
+ pat->data = &static_sample_data;
+ break;
+
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ case SMP_T_SINT:
+ memcpy(&static_sample_data, pat->data, sizeof(struct sample_data));
+ pat->data = &static_sample_data;
+ break;
+ default:
+ /* unimplemented pattern type */
+ pat->data = NULL;
+ break;
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ return pat;
+ }
+ HA_RWLOCK_RDUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ }
+ return NULL;
+}
+
+/* This function prunes the pattern expressions starting at pattern_head <head>. */
+void pattern_prune(struct pattern_head *head)
+{
+ struct pattern_expr_list *list, *safe;
+
+ list_for_each_entry_safe(list, safe, &head->head, list) {
+ LIST_DELETE(&list->list);
+ if (list->do_free) {
+ LIST_DELETE(&list->expr->list);
+ HA_RWLOCK_WRLOCK(PATEXP_LOCK, &list->expr->lock);
+ head->prune(list->expr);
+ HA_RWLOCK_WRUNLOCK(PATEXP_LOCK, &list->expr->lock);
+ free(list->expr);
+ }
+ free(list);
+ }
+}
+
+/* This function compares two pat_ref** on their unique_id, and returns -1/0/1
+ * depending on their order (suitable for sorting).
+ */
+static int cmp_pat_ref(const void *_a, const void *_b)
+{
+ struct pat_ref * const *a = _a;
+ struct pat_ref * const *b = _b;
+
+ if ((*a)->unique_id < (*b)->unique_id)
+ return -1;
+ else if ((*a)->unique_id > (*b)->unique_id)
+ return 1;
+ return 0;
+}
+
+/* This function finalizes the configuration parsing. It sets all the
+ * automatic ids.
+ */
+int pattern_finalize_config(void)
+{
+ size_t len = 0;
+ size_t unassigned_pos = 0;
+ int next_unique_id = 0;
+ size_t i, j;
+ struct pat_ref *ref, **arr;
+ struct list pr = LIST_HEAD_INIT(pr);
+
+ pat_lru_seed = ha_random();
+
+ /* Count pat_refs with user defined unique_id and totalt count */
+ list_for_each_entry(ref, &pattern_reference, list) {
+ len++;
+ if (ref->unique_id != -1)
+ unassigned_pos++;
+ }
+
+ if (len == 0) {
+ return 0;
+ }
+
+ arr = calloc(len, sizeof(*arr));
+ if (arr == NULL) {
+ ha_alert("Out of memory error.\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ i = 0;
+ j = unassigned_pos;
+ list_for_each_entry(ref, &pattern_reference, list) {
+ if (ref->unique_id != -1)
+ arr[i++] = ref;
+ else
+ arr[j++] = ref;
+ }
+
+ /* Sort first segment of array with user-defined unique ids for
+ * fast lookup when generating unique ids
+ */
+ qsort(arr, unassigned_pos, sizeof(*arr), cmp_pat_ref);
+
+ /* Assign unique ids to the rest of the elements */
+ for (i = unassigned_pos; i < len; i++) {
+ do {
+ arr[i]->unique_id = next_unique_id++;
+ } while (bsearch(&arr[i], arr, unassigned_pos, sizeof(*arr), cmp_pat_ref));
+ }
+
+ /* Sort complete array */
+ qsort(arr, len, sizeof(*arr), cmp_pat_ref);
+
+ /* Convert back to linked list */
+ for (i = 0; i < len; i++)
+ LIST_APPEND(&pr, &arr[i]->list);
+
+ /* swap root */
+ LIST_INSERT(&pr, &pattern_reference);
+ LIST_DELETE(&pr);
+
+ free(arr);
+ return 0;
+}
+
+static int pattern_per_thread_lru_alloc()
+{
+ if (!global.tune.pattern_cache)
+ return 1;
+ pat_lru_tree = lru64_new(global.tune.pattern_cache);
+ return !!pat_lru_tree;
+}
+
+static void pattern_per_thread_lru_free()
+{
+ lru64_destroy(pat_lru_tree);
+}
+
+REGISTER_PER_THREAD_ALLOC(pattern_per_thread_lru_alloc);
+REGISTER_PER_THREAD_FREE(pattern_per_thread_lru_free);
diff --git a/src/payload.c b/src/payload.c
new file mode 100644
index 0000000..6a536d7
--- /dev/null
+++ b/src/payload.c
@@ -0,0 +1,1448 @@
+/*
+ * General protocol-agnostic payload-based sample fetches and ACLs
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/htx.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/pattern.h>
+#include <haproxy/payload.h>
+#include <haproxy/sample.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+
+/************************************************************************/
+/* All supported sample fetch functions must be declared here */
+/************************************************************************/
+
+/* wait for more data as long as possible, then return TRUE. This should be
+ * used with content inspection.
+ */
+static int
+smp_fetch_wait_end(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!(smp->opt & SMP_OPT_FINAL)) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* return the number of bytes in the request buffer */
+static int
+smp_fetch_len(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (smp->strm) {
+ struct channel *chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ /* Not accurate but kept for backward compatibility purpose */
+ if (IS_HTX_STRM(smp->strm)) {
+ struct htx *htx = htxbuf(&chn->buf);
+ smp->data.u.sint = htx->data - co_data(chn);
+ }
+ else
+ smp->data.u.sint = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* Not accurate but kept for backward compatibility purpose */
+ smp->data.u.sint = ((check->sc && IS_HTX_SC(check->sc)) ? (htxbuf(&check->bi))->data: b_data(&check->bi));
+ }
+ else
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = SMP_F_VOLATILE | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Returns 0 if the client didn't send a SessionTicket Extension
+ * Returns 1 if the client sent SessionTicket Extension
+ * Returns 2 if the client also sent non-zero length SessionTicket
+ * Returns SMP_T_SINT data type
+ */
+static int
+smp_fetch_req_ssl_st_ext(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, ext_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ /* SesstionTicket extension */
+ if (ext_type == 35) {
+ smp->data.type = SMP_T_SINT;
+ /* SessionTicket also present */
+ if (ext_len > 0)
+ smp->data.u.sint = 2;
+ /* SessionTicket absent */
+ else
+ smp->data.u.sint = 1;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* SessionTicket Extension not found */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+ return 0;
+}
+
+/* Returns TRUE if the client sent Supported Elliptic Curves Extension (0x000a)
+ * Mainly used to detect if client supports ECC cipher suites.
+ */
+static int
+smp_fetch_req_ssl_ec_ext(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, ext_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ /* Elliptic curves extension */
+ if (ext_type == 10) {
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* server name not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+/* returns the type of SSL hello message (mainly used to detect an SSL hello) */
+static int
+smp_fetch_ssl_hello_type(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len;
+ int hs_type, bleft;
+ struct channel *chn;
+ const unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (const unsigned char *)ci_head(chn);
+
+ if (!bleft)
+ goto too_short;
+
+ if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
+ /* SSLv3 header format */
+ if (bleft < 9)
+ goto too_short;
+
+ /* ssl version 3 */
+ if ((data[1] << 16) + data[2] < 0x00030000)
+ goto not_ssl_hello;
+
+ /* ssl message len must present handshake type and len */
+ if ((data[3] << 8) + data[4] < 4)
+ goto not_ssl_hello;
+
+ /* format introduced with SSLv3 */
+
+ hs_type = (int)data[5];
+ hs_len = ( data[6] << 16 ) + ( data[7] << 8 ) + data[8];
+
+ /* not a full handshake */
+ if (bleft < (9 + hs_len))
+ goto too_short;
+
+ }
+ else {
+ goto not_ssl_hello;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = hs_type;
+ smp->flags = SMP_F_VOLATILE;
+
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Return the version of the SSL protocol in the request. It supports both
+ * SSLv3 (TLSv1) header format for any message, and SSLv2 header format for
+ * the hello message. The SSLv3 format is described in RFC 2246 p49, and the
+ * SSLv2 format is described here, and completed p67 of RFC 2246 :
+ * http://wp.netscape.com/eng/security/SSL_2.html
+ *
+ * Note: this decoder only works with non-wrapping data.
+ */
+static int
+smp_fetch_req_ssl_ver(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int version, bleft, msg_len;
+ const unsigned char *data;
+ struct channel *req;
+
+ if (!smp->strm)
+ goto not_ssl;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl;
+
+ req = &smp->strm->req;
+ msg_len = 0;
+ bleft = ci_data(req);
+ if (!bleft)
+ goto too_short;
+
+ data = (const unsigned char *)ci_head(req);
+ if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
+ /* SSLv3 header format */
+ if (bleft < 11)
+ goto too_short;
+
+ version = (data[1] << 16) + data[2]; /* record layer version: major, minor */
+ msg_len = (data[3] << 8) + data[4]; /* record length */
+
+ /* format introduced with SSLv3 */
+ if (version < 0x00030000)
+ goto not_ssl;
+
+ /* message length between 6 and 2^14 + 2048 */
+ if (msg_len < 6 || msg_len > ((1<<14) + 2048))
+ goto not_ssl;
+
+ bleft -= 5; data += 5;
+
+ /* return the client hello client version, not the record layer version */
+ version = (data[4] << 16) + data[5]; /* client hello version: major, minor */
+ } else {
+ /* SSLv2 header format, only supported for hello (msg type 1) */
+ int rlen, plen, cilen, silen, chlen;
+
+ if (*data & 0x80) {
+ if (bleft < 3)
+ goto too_short;
+ /* short header format : 15 bits for length */
+ rlen = ((data[0] & 0x7F) << 8) | data[1];
+ plen = 0;
+ bleft -= 2; data += 2;
+ } else {
+ if (bleft < 4)
+ goto too_short;
+ /* long header format : 14 bits for length + pad length */
+ rlen = ((data[0] & 0x3F) << 8) | data[1];
+ plen = data[2];
+ bleft -= 3; data += 3;
+ }
+
+ if (*data != 0x01)
+ goto not_ssl;
+ bleft--; data++;
+
+ if (bleft < 8)
+ goto too_short;
+ version = (data[0] << 16) + data[1]; /* version: major, minor */
+ cilen = (data[2] << 8) + data[3]; /* cipher len, multiple of 3 */
+ silen = (data[4] << 8) + data[5]; /* session_id_len: 0 or 16 */
+ chlen = (data[6] << 8) + data[7]; /* 16<=challenge length<=32 */
+
+ bleft -= 8; data += 8;
+ if (cilen % 3 != 0)
+ goto not_ssl;
+ if (silen && silen != 16)
+ goto not_ssl;
+ if (chlen < 16 || chlen > 32)
+ goto not_ssl;
+ if (rlen != 9 + cilen + silen + chlen)
+ goto not_ssl;
+
+ /* focus on the remaining data length */
+ msg_len = cilen + silen + chlen + plen;
+ }
+ /* We could recursively check that the buffer ends exactly on an SSL
+ * fragment boundary and that a possible next segment is still SSL,
+ * but that's a bit pointless. However, we could still check that
+ * all the part of the request which fits in a buffer is already
+ * there.
+ */
+ if (msg_len > channel_recv_limit(req) + b_orig(&req->buf) - ci_head(req))
+ msg_len = channel_recv_limit(req) + b_orig(&req->buf) - ci_head(req);
+
+ if (bleft < msg_len)
+ goto too_short;
+
+ /* OK that's enough. We have at least the whole message, and we have
+ * the protocol version.
+ */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = version;
+ smp->flags = SMP_F_VOLATILE;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+ not_ssl:
+ return 0;
+}
+
+/* Try to extract the Server Name Indication that may be presented in a TLS
+ * client hello handshake message. The format of the message is the following
+ * (cf RFC5246 + RFC6066) :
+ * TLS frame :
+ * - uint8 type = 0x16 (Handshake)
+ * - uint16 version >= 0x0301 (TLSv1)
+ * - uint16 length (frame length)
+ * - TLS handshake :
+ * - uint8 msg_type = 0x01 (ClientHello)
+ * - uint24 length (handshake message length)
+ * - ClientHello :
+ * - uint16 client_version >= 0x0301 (TLSv1)
+ * - uint8 Random[32] (4 first ones are timestamp)
+ * - SessionID :
+ * - uint8 session_id_len (0..32) (SessionID len in bytes)
+ * - uint8 session_id[session_id_len]
+ * - CipherSuite :
+ * - uint16 cipher_len >= 2 (Cipher length in bytes)
+ * - uint16 ciphers[cipher_len/2]
+ * - CompressionMethod :
+ * - uint8 compression_len >= 1 (# of supported methods)
+ * - uint8 compression_methods[compression_len]
+ * - optional client_extension_len (in bytes)
+ * - optional sequence of ClientHelloExtensions (as many bytes as above):
+ * - uint16 extension_type = 0 for server_name
+ * - uint16 extension_len
+ * - opaque extension_data[extension_len]
+ * - uint16 server_name_list_len (# of bytes here)
+ * - opaque server_names[server_name_list_len bytes]
+ * - uint8 name_type = 0 for host_name
+ * - uint16 name_len
+ * - opaque hostname[name_len bytes]
+ */
+static int
+smp_fetch_ssl_hello_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, name_type, srv_len, name_len;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ if (ext_type == 0) { /* Server name */
+ if (ext_len < 2) /* need one list length */
+ goto not_ssl_hello;
+
+ srv_len = (data[4] << 8) + data[5];
+ if (srv_len < 4 || srv_len > hs_len - 6)
+ goto not_ssl_hello; /* at least 4 bytes per server name */
+
+ name_type = data[6];
+ name_len = (data[7] << 8) + data[8];
+
+ if (name_type == 0) { /* hostname */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char *)data + 9;
+ smp->data.u.str.data = name_len;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+ }
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* server name not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Try to extract the Application-Layer Protocol Negotiation (ALPN) protocol
+ * names that may be presented in a TLS client hello handshake message. As the
+ * message presents a list of protocol names in descending order of preference,
+ * it may return iteratively. The format of the message is the following
+ * (cf RFC5246 + RFC7301) :
+ * TLS frame :
+ * - uint8 type = 0x16 (Handshake)
+ * - uint16 version >= 0x0301 (TLSv1)
+ * - uint16 length (frame length)
+ * - TLS handshake :
+ * - uint8 msg_type = 0x01 (ClientHello)
+ * - uint24 length (handshake message length)
+ * - ClientHello :
+ * - uint16 client_version >= 0x0301 (TLSv1)
+ * - uint8 Random[32] (4 first ones are timestamp)
+ * - SessionID :
+ * - uint8 session_id_len (0..32) (SessionID len in bytes)
+ * - uint8 session_id[session_id_len]
+ * - CipherSuite :
+ * - uint16 cipher_len >= 2 (Cipher length in bytes)
+ * - uint16 ciphers[cipher_len/2]
+ * - CompressionMethod :
+ * - uint8 compression_len >= 1 (# of supported methods)
+ * - uint8 compression_methods[compression_len]
+ * - optional client_extension_len (in bytes)
+ * - optional sequence of ClientHelloExtensions (as many bytes as above):
+ * - uint16 extension_type = 16 for application_layer_protocol_negotiation
+ * - uint16 extension_len
+ * - opaque extension_data[extension_len]
+ * - uint16 protocol_names_len (# of bytes here)
+ * - opaque protocol_names[protocol_names_len bytes]
+ * - uint8 name_len
+ * - opaque protocol_name[name_len bytes]
+ */
+static int
+smp_fetch_ssl_hello_alpn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int hs_len, ext_len, bleft;
+ struct channel *chn;
+ unsigned char *data;
+
+ if (!smp->strm)
+ goto not_ssl_hello;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ goto not_ssl_hello;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ bleft = ci_data(chn);
+ data = (unsigned char *)ci_head(chn);
+
+ /* Check for SSL/TLS Handshake */
+ if (!bleft)
+ goto too_short;
+ if (*data != 0x16)
+ goto not_ssl_hello;
+
+ /* Check for SSLv3 or later (SSL version >= 3.0) in the record layer*/
+ if (bleft < 3)
+ goto too_short;
+ if (data[1] < 0x03)
+ goto not_ssl_hello;
+
+ if (bleft < 5)
+ goto too_short;
+ hs_len = (data[3] << 8) + data[4];
+ if (hs_len < 1 + 3 + 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ data += 5; /* enter TLS handshake */
+ bleft -= 5;
+
+ /* Check for a complete client hello starting at <data> */
+ if (bleft < 1)
+ goto too_short;
+ if (data[0] != 0x01) /* msg_type = Client Hello */
+ goto not_ssl_hello;
+
+ /* Check the Hello's length */
+ if (bleft < 4)
+ goto too_short;
+ hs_len = (data[1] << 16) + (data[2] << 8) + data[3];
+ if (hs_len < 2 + 32 + 1 + 2 + 2 + 1 + 1 + 2 + 2)
+ goto not_ssl_hello; /* too short to have an extension */
+
+ /* We want the full handshake here */
+ if (bleft < hs_len)
+ goto too_short;
+
+ data += 4;
+ /* Start of the ClientHello message */
+ if (data[0] < 0x03 || data[1] < 0x01) /* TLSv1 minimum */
+ goto not_ssl_hello;
+
+ ext_len = data[34]; /* session_id_len */
+ if (ext_len > 32 || ext_len > (hs_len - 35)) /* check for correct session_id len */
+ goto not_ssl_hello;
+
+ /* Jump to cipher suite */
+ hs_len -= 35 + ext_len;
+ data += 35 + ext_len;
+
+ if (hs_len < 4 || /* minimum one cipher */
+ (ext_len = (data[0] << 8) + data[1]) < 2 || /* minimum 2 bytes for a cipher */
+ ext_len > hs_len)
+ goto not_ssl_hello;
+
+ /* Jump to the compression methods */
+ hs_len -= 2 + ext_len;
+ data += 2 + ext_len;
+
+ if (hs_len < 2 || /* minimum one compression method */
+ data[0] < 1 || data[0] > hs_len) /* minimum 1 bytes for a method */
+ goto not_ssl_hello;
+
+ /* Jump to the extensions */
+ hs_len -= 1 + data[0];
+ data += 1 + data[0];
+
+ if (hs_len < 2 || /* minimum one extension list length */
+ (ext_len = (data[0] << 8) + data[1]) > hs_len - 2) /* list too long */
+ goto not_ssl_hello;
+
+ hs_len = ext_len; /* limit ourselves to the extension length */
+ data += 2;
+
+ while (hs_len >= 4) {
+ int ext_type, name_len, name_offset;
+
+ ext_type = (data[0] << 8) + data[1];
+ ext_len = (data[2] << 8) + data[3];
+
+ if (ext_len > hs_len - 4) /* Extension too long */
+ goto not_ssl_hello;
+
+ if (ext_type == 16) { /* ALPN */
+ if (ext_len < 3) /* one list length [uint16] + at least one name length [uint8] */
+ goto not_ssl_hello;
+
+ /* Name cursor in ctx, must begin after protocol_names_len */
+ name_offset = smp->ctx.i < 6 ? 6 : smp->ctx.i;
+ name_len = data[name_offset];
+
+ if (name_len + name_offset - 3 > ext_len)
+ goto not_ssl_hello;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char *)data + name_offset + 1; /* +1 to skip name_len */
+ smp->data.u.str.data = name_len;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+
+ /* May have more protocol names remaining */
+ if (name_len + name_offset - 3 < ext_len) {
+ smp->ctx.i = name_offset + name_len + 1;
+ smp->flags |= SMP_F_NOT_LAST;
+ }
+
+ return 1;
+ }
+
+ hs_len -= 4 + ext_len;
+ data += 4 + ext_len;
+ }
+ /* alpn not found */
+ goto not_ssl_hello;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE;
+
+ not_ssl_hello:
+
+ return 0;
+}
+
+/* Fetch the request RDP cookie identified in <cname>:<clen>, or any cookie if
+ * <clen> is empty (cname is then ignored). It returns the data into sample <smp>
+ * of type SMP_T_CSTR. Note: this decoder only works with non-wrapping data.
+ */
+int
+fetch_rdp_cookie_name(struct stream *s, struct sample *smp, const char *cname, int clen)
+{
+ int bleft;
+ const unsigned char *data;
+
+ smp->flags = SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ bleft = ci_data(&s->req);
+ if (bleft <= 11)
+ goto too_short;
+
+ data = (const unsigned char *)ci_head(&s->req) + 11;
+ bleft -= 11;
+
+ if (bleft <= 7)
+ goto too_short;
+
+ if (strncasecmp((const char *)data, "Cookie:", 7) != 0)
+ goto not_cookie;
+
+ data += 7;
+ bleft -= 7;
+
+ while (bleft > 0 && *data == ' ') {
+ data++;
+ bleft--;
+ }
+
+ if (clen) {
+ if (bleft <= clen)
+ goto too_short;
+
+ if ((data[clen] != '=') ||
+ strncasecmp(cname, (const char *)data, clen) != 0)
+ goto not_cookie;
+
+ data += clen + 1;
+ bleft -= clen + 1;
+ } else {
+ while (bleft > 0 && *data != '=') {
+ if (*data == '\r' || *data == '\n')
+ goto not_cookie;
+ data++;
+ bleft--;
+ }
+
+ if (bleft < 1)
+ goto too_short;
+
+ if (*data != '=')
+ goto not_cookie;
+
+ data++;
+ bleft--;
+ }
+
+ /* data points to cookie value */
+ smp->data.u.str.area = (char *)data;
+ smp->data.u.str.data = 0;
+
+ while (bleft > 0 && *data != '\r') {
+ data++;
+ bleft--;
+ }
+
+ if (bleft < 2)
+ goto too_short;
+
+ if (data[0] != '\r' || data[1] != '\n')
+ goto not_cookie;
+
+ smp->data.u.str.data = (char *)data - smp->data.u.str.area;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ not_cookie:
+ return 0;
+}
+
+/* Fetch the request RDP cookie identified in the args, or any cookie if no arg
+ * is passed. It is usable both for ACL and for samples. Note: this decoder
+ * only works with non-wrapping data. Accepts either 0 or 1 argument. Argument
+ * is a string (cookie name), other types will lead to undefined behaviour. The
+ * returned sample has type SMP_T_CSTR.
+ */
+int
+smp_fetch_rdp_cookie(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ return fetch_rdp_cookie_name(smp->strm, smp,
+ args ? args->data.str.area : NULL,
+ args ? args->data.str.data : 0);
+}
+
+/* returns either 1 or 0 depending on whether an RDP cookie is found or not */
+static int
+smp_fetch_rdp_cookie_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int ret;
+
+ ret = smp_fetch_rdp_cookie(args, smp, kw, private);
+
+ if (smp->flags & SMP_F_MAY_CHANGE)
+ return 0;
+
+ smp->flags = SMP_F_VOLATILE;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ret;
+ return 1;
+}
+
+/* extracts part of a payload with offset and length at a given position */
+static int
+smp_fetch_payload_lv(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int len_offset = arg_p[0].data.sint;
+ unsigned int len_size = arg_p[1].data.sint;
+ unsigned int buf_offset;
+ unsigned int buf_size = 0;
+ struct channel *chn = NULL;
+ char *head = NULL;
+ size_t max, data;
+ int i;
+
+ /* Format is (len offset, len size, buf offset) or (len offset, len size) */
+ /* by default buf offset == len offset + len size */
+ /* buf offset could be absolute or relative to len offset + len size if prefixed by + or - */
+
+ if (smp->strm) {
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ head = ci_head(chn);
+ data = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* meaningless for HTX buffers */
+ if (check->sc && IS_HTX_SC(check->sc))
+ return 0;
+ head = b_head(&check->bi);
+ data = b_data(&check->bi);
+ }
+ max = global.tune.bufsize;
+ if (!head)
+ goto too_short;
+
+ if (len_offset + len_size > data)
+ goto too_short;
+
+ for (i = 0; i < len_size; i++) {
+ buf_size = (buf_size << 8) + ((unsigned char *)head)[i + len_offset];
+ }
+
+ /* buf offset may be implicit, absolute or relative. If the LSB
+ * is set, then the offset is relative otherwise it is absolute.
+ */
+ buf_offset = len_offset + len_size;
+ if (arg_p[2].type == ARGT_SINT) {
+ if (arg_p[2].data.sint & 1)
+ buf_offset += arg_p[2].data.sint >> 1;
+ else
+ buf_offset = arg_p[2].data.sint >> 1;
+ }
+
+ if (!buf_size || buf_size > max || buf_offset + buf_size > max) {
+ /* will never match */
+ smp->flags = 0;
+ return 0;
+ }
+
+ if (buf_offset + buf_size > data)
+ goto too_short;
+
+ /* init chunk as read only */
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ chunk_initlen(&smp->data.u.str, head + buf_offset, 0, buf_size);
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+}
+
+/* extracts some payload at a fixed position and length */
+static int
+smp_fetch_payload(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int buf_offset = arg_p[0].data.sint;
+ unsigned int buf_size = arg_p[1].data.sint;
+ struct channel *chn = NULL;
+ char *head = NULL;
+ size_t max, data;
+
+ if (smp->strm) {
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+ head = ci_head(chn);
+ data = ci_data(chn);
+ }
+ else if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) {
+ struct check *check = __objt_check(smp->sess->origin);
+
+ /* meaningless for HTX buffers */
+ if (check->sc && IS_HTX_SC(check->sc))
+ return 0;
+ head = b_head(&check->bi);
+ data = b_data(&check->bi);
+ }
+ max = global.tune.bufsize;
+ if (!head)
+ goto too_short;
+
+ if (buf_size > max || buf_offset + buf_size > max) {
+ /* will never match */
+ smp->flags = 0;
+ return 0;
+ }
+ if (buf_offset + buf_size > data)
+ goto too_short;
+
+ /* init chunk as read only */
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ chunk_initlen(&smp->data.u.str, head + buf_offset, 0, buf_size ? buf_size : (data - buf_offset));
+
+ if (!buf_size && chn && channel_may_recv(chn) && !channel_input_closed(chn))
+ smp->flags |= SMP_F_MAY_CHANGE;
+
+ return 1;
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+}
+
+/* This function is used to validate the arguments passed to a "payload_lv" fetch
+ * keyword. This keyword allows two positive integers and an optional signed one,
+ * with the second one being strictly positive and the third one being greater than
+ * the opposite of the two others if negative. It is assumed that the types are
+ * already the correct ones. Returns 0 on error, non-zero if OK. If <err_msg> is
+ * not NULL, it will be filled with a pointer to an error message in case of
+ * error, that the caller is responsible for freeing. The initial location must
+ * either be freeable or NULL.
+ *
+ * Note that offset2 is stored with SINT type, but its not directly usable as is.
+ * The value is contained in the 63 MSB and the LSB is used as a flag for marking
+ * the "relative" property of the value.
+ */
+int val_payload_lv(struct arg *arg, char **err_msg)
+{
+ int relative = 0;
+ const char *str;
+
+ if (arg[0].data.sint < 0) {
+ memprintf(err_msg, "payload offset1 must be positive");
+ return 0;
+ }
+
+ if (!arg[1].data.sint) {
+ memprintf(err_msg, "payload length must be > 0");
+ return 0;
+ }
+
+ if (arg[2].type == ARGT_STR && arg[2].data.str.data > 0) {
+ long long int i;
+
+ if (arg[2].data.str.area[0] == '+' || arg[2].data.str.area[0] == '-')
+ relative = 1;
+ str = arg[2].data.str.area;
+ i = read_int64(&str, str + arg[2].data.str.data);
+ if (*str != '\0') {
+ memprintf(err_msg, "payload offset2 is not a number");
+ return 0;
+ }
+ chunk_destroy(&arg[2].data.str);
+ arg[2].type = ARGT_SINT;
+ arg[2].data.sint = i;
+
+ if (arg[0].data.sint + arg[1].data.sint + arg[2].data.sint < 0) {
+ memprintf(err_msg, "payload offset2 too negative");
+ return 0;
+ }
+ if (relative)
+ arg[2].data.sint = ( arg[2].data.sint << 1 ) + 1;
+ }
+ return 1;
+}
+
+/* extracts the parameter value of a distcc token */
+static int
+smp_fetch_distcc_param(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int match_tok = arg_p[0].data.sint;
+ unsigned int match_occ = arg_p[1].data.sint;
+ unsigned int token;
+ unsigned int param;
+ unsigned int body;
+ unsigned int ofs;
+ unsigned int occ;
+ struct channel *chn;
+ int i;
+
+ /* Format is (token[,occ]). occ starts at 1. */
+
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ ofs = 0; occ = 0;
+ while (1) {
+ if (ofs + 12 > ci_data(chn)) {
+ /* not there yet but could it at least fit ? */
+ if (!chn->buf.size)
+ goto too_short;
+
+ if (ofs + 12 <= channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn))
+ goto too_short;
+
+ goto no_match;
+ }
+
+ token = read_n32(ci_head(chn) + ofs);
+ ofs += 4;
+
+ for (i = param = 0; i < 8; i++) {
+ int c = hex2i(ci_head(chn)[ofs + i]);
+
+ if (c < 0)
+ goto no_match;
+ param = (param << 4) + c;
+ }
+ ofs += 8;
+
+ /* these tokens don't have a body */
+ if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ &&
+ token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ &&
+ token != 0x444F4E45 /* DONE */)
+ body = param;
+ else
+ body = 0;
+
+ if (token == match_tok) {
+ occ++;
+ if (!match_occ || match_occ == occ) {
+ /* found */
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = param;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+ return 1;
+ }
+ }
+ ofs += body;
+ }
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+ no_match:
+ /* will never match (end of buffer, or bad contents) */
+ smp->flags = 0;
+ return 0;
+
+}
+
+/* extracts the (possibly truncated) body of a distcc token */
+static int
+smp_fetch_distcc_body(const struct arg *arg_p, struct sample *smp, const char *kw, void *private)
+{
+ unsigned int match_tok = arg_p[0].data.sint;
+ unsigned int match_occ = arg_p[1].data.sint;
+ unsigned int token;
+ unsigned int param;
+ unsigned int ofs;
+ unsigned int occ;
+ unsigned int body;
+ struct channel *chn;
+ int i;
+
+ /* Format is (token[,occ]). occ starts at 1. */
+
+ if (!smp->strm)
+ return 0;
+
+ /* meaningless for HTX buffers */
+ if (IS_HTX_STRM(smp->strm))
+ return 0;
+
+ chn = ((smp->opt & SMP_OPT_DIR) == SMP_OPT_DIR_RES) ? &smp->strm->res : &smp->strm->req;
+
+ ofs = 0; occ = 0;
+ while (1) {
+ if (ofs + 12 > ci_data(chn)) {
+ if (!chn->buf.size)
+ goto too_short;
+
+ if (ofs + 12 <= channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn))
+ goto too_short;
+
+ goto no_match;
+ }
+
+ token = read_n32(ci_head(chn) + ofs);
+ ofs += 4;
+
+ for (i = param = 0; i < 8; i++) {
+ int c = hex2i(ci_head(chn)[ofs + i]);
+
+ if (c < 0)
+ goto no_match;
+ param = (param << 4) + c;
+ }
+ ofs += 8;
+
+ /* these tokens don't have a body */
+ if (token != 0x41524743 /* ARGC */ && token != 0x44495354 /* DIST */ &&
+ token != 0x4E46494C /* NFIL */ && token != 0x53544154 /* STAT */ &&
+ token != 0x444F4E45 /* DONE */)
+ body = param;
+ else
+ body = 0;
+
+ if (token == match_tok) {
+ occ++;
+ if (!match_occ || match_occ == occ) {
+ /* found */
+
+ smp->data.type = SMP_T_BIN;
+ smp->flags = SMP_F_VOLATILE | SMP_F_CONST;
+
+ if (ofs + body > ci_head(chn) - b_orig(&chn->buf) + ci_data(chn)) {
+ /* incomplete body */
+
+ if (ofs + body > channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn)) {
+ /* truncate it to whatever will fit */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ body = channel_recv_limit(chn) + b_orig(&chn->buf) - ci_head(chn) - ofs;
+ }
+ }
+
+ chunk_initlen(&smp->data.u.str, ci_head(chn) + ofs, 0, body);
+ return 1;
+ }
+ }
+ ofs += body;
+ }
+
+ too_short:
+ smp->flags = SMP_F_MAY_CHANGE | SMP_F_CONST;
+ return 0;
+ no_match:
+ /* will never match (end of buffer, or bad contents) */
+ smp->flags = 0;
+ return 0;
+
+}
+
+/* This function is used to validate the arguments passed to a "distcc_param" or
+ * "distcc_body" sample fetch keyword. They take a mandatory token name of exactly
+ * 4 characters, followed by an optional occurrence number starting at 1. It is
+ * assumed that the types are already the correct ones. Returns 0 on error, non-
+ * zero if OK. If <err_msg> is not NULL, it will be filled with a pointer to an
+ * error message in case of error, that the caller is responsible for freeing.
+ * The initial location must either be freeable or NULL.
+ */
+int val_distcc(struct arg *arg, char **err_msg)
+{
+ unsigned int token;
+
+ if (arg[0].data.str.data != 4) {
+ memprintf(err_msg, "token name must be exactly 4 characters");
+ return 0;
+ }
+
+ /* convert the token name to an unsigned int (one byte per character,
+ * big endian format).
+ */
+ token = (arg[0].data.str.area[0] << 24) + (arg[0].data.str.area[1] << 16) +
+ (arg[0].data.str.area[2] << 8) + (arg[0].data.str.area[3] << 0);
+
+ chunk_destroy(&arg[0].data.str);
+ arg[0].type = ARGT_SINT;
+ arg[0].data.sint = token;
+
+ if (arg[1].type != ARGT_SINT) {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ return 1;
+}
+
+/************************************************************************/
+/* All supported sample and ACL keywords must be declared here. */
+/************************************************************************/
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types should be declared using the
+ * appropriate pseudo-type. If not available it must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "distcc_body", smp_fetch_distcc_body, ARG2(1,STR,SINT), val_distcc, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "distcc_param", smp_fetch_distcc_param, ARG2(1,STR,SINT), val_distcc, SMP_T_SINT, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6REQ|SMP_USE_L6RES },
+ { "rdp_cookie", smp_fetch_rdp_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "rdp_cookie_cnt", smp_fetch_rdp_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "rep_ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "req_len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req_ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req_ssl_sni", smp_fetch_ssl_hello_sni, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req_ssl_ver", smp_fetch_req_ssl_ver, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+
+ { "req.len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6REQ },
+ { "req.payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6REQ },
+ { "req.rdp_cookie", smp_fetch_rdp_cookie, ARG1(0,STR), NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.rdp_cookie_cnt", smp_fetch_rdp_cookie_cnt, ARG1(0,STR), NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_ec_ext", smp_fetch_req_ssl_ec_ext, 0, NULL, SMP_T_BOOL, SMP_USE_L6REQ },
+ { "req.ssl_st_ext", smp_fetch_req_ssl_st_ext, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "req.ssl_sni", smp_fetch_ssl_hello_sni, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.ssl_alpn", smp_fetch_ssl_hello_alpn, 0, NULL, SMP_T_STR, SMP_USE_L6REQ },
+ { "req.ssl_ver", smp_fetch_req_ssl_ver, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "res.len", smp_fetch_len, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "res.payload", smp_fetch_payload, ARG2(2,SINT,SINT), NULL, SMP_T_BIN, SMP_USE_L6RES },
+ { "res.payload_lv", smp_fetch_payload_lv, ARG3(2,SINT,SINT,STR), val_payload_lv, SMP_T_BIN, SMP_USE_L6RES },
+ { "res.ssl_hello_type", smp_fetch_ssl_hello_type, 0, NULL, SMP_T_SINT, SMP_USE_L6RES },
+ { "wait_end", smp_fetch_wait_end, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "payload", "req.payload", PAT_MATCH_BIN },
+ { "payload_lv", "req.payload_lv", PAT_MATCH_BIN },
+ { "req_rdp_cookie", "req.rdp_cookie", PAT_MATCH_STR },
+ { "req_rdp_cookie_cnt", "req.rdp_cookie_cnt", PAT_MATCH_INT },
+ { "req_ssl_sni", "req.ssl_sni", PAT_MATCH_STR },
+ { "req_ssl_ver", "req.ssl_ver", PAT_MATCH_INT, pat_parse_dotted_ver },
+ { "req.ssl_ver", "req.ssl_ver", PAT_MATCH_INT, pat_parse_dotted_ver },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/peers.c b/src/peers.c
new file mode 100644
index 0000000..5eefd18
--- /dev/null
+++ b/src/peers.c
@@ -0,0 +1,4231 @@
+/*
+ * Peer synchro management.
+ *
+ * Copyright 2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+#include <import/ebpttree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/dict.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/frontend.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session-t.h>
+#include <haproxy/signal.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+/*******************************/
+/* Current peer learning state */
+/*******************************/
+
+/******************************/
+/* Current peers section resync state */
+/******************************/
+#define PEERS_F_RESYNC_LOCAL 0x00000001 /* Learn from local finished or no more needed */
+#define PEERS_F_RESYNC_REMOTE 0x00000002 /* Learn from remote finished or no more needed */
+#define PEERS_F_RESYNC_ASSIGN 0x00000004 /* A peer was assigned to learn our lesson */
+#define PEERS_F_RESYNC_PROCESS 0x00000008 /* The assigned peer was requested for resync */
+#define PEERS_F_RESYNC_LOCALTIMEOUT 0x00000010 /* Timeout waiting for a full resync from a local node */
+#define PEERS_F_RESYNC_REMOTETIMEOUT 0x00000020 /* Timeout waiting for a full resync from a remote node */
+#define PEERS_F_RESYNC_LOCALABORT 0x00000040 /* Session aborted learning from a local node */
+#define PEERS_F_RESYNC_REMOTEABORT 0x00000080 /* Session aborted learning from a remote node */
+#define PEERS_F_RESYNC_LOCALFINISHED 0x00000100 /* A local node teach us and was fully up to date */
+#define PEERS_F_RESYNC_REMOTEFINISHED 0x00000200 /* A remote node teach us and was fully up to date */
+#define PEERS_F_RESYNC_LOCALPARTIAL 0x00000400 /* A local node teach us but was partially up to date */
+#define PEERS_F_RESYNC_REMOTEPARTIAL 0x00000800 /* A remote node teach us but was partially up to date */
+#define PEERS_F_RESYNC_LOCALASSIGN 0x00001000 /* A local node was assigned for a full resync */
+#define PEERS_F_RESYNC_REMOTEASSIGN 0x00002000 /* A remote node was assigned for a full resync */
+#define PEERS_F_RESYNC_REQUESTED 0x00004000 /* A resync was explicitly requested */
+#define PEERS_F_DONOTSTOP 0x00010000 /* Main table sync task block process during soft stop
+ to push data to new process */
+
+#define PEERS_RESYNC_STATEMASK (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE)
+#define PEERS_RESYNC_FROMLOCAL 0x00000000
+#define PEERS_RESYNC_FROMREMOTE PEERS_F_RESYNC_LOCAL
+#define PEERS_RESYNC_FINISHED (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE)
+
+/***********************************/
+/* Current shared table sync state */
+/***********************************/
+#define SHTABLE_F_TEACH_STAGE1 0x00000001 /* Teach state 1 complete */
+#define SHTABLE_F_TEACH_STAGE2 0x00000002 /* Teach state 2 complete */
+
+/******************************/
+/* Remote peer teaching state */
+/******************************/
+#define PEER_F_TEACH_PROCESS 0x00000001 /* Teach a lesson to current peer */
+#define PEER_F_TEACH_FINISHED 0x00000008 /* Teach conclude, (wait for confirm) */
+#define PEER_F_TEACH_COMPLETE 0x00000010 /* All that we know already taught to current peer, used only for a local peer */
+#define PEER_F_LEARN_ASSIGN 0x00000100 /* Current peer was assigned for a lesson */
+#define PEER_F_LEARN_NOTUP2DATE 0x00000200 /* Learn from peer finished but peer is not up to date */
+#define PEER_F_ALIVE 0x20000000 /* Used to flag a peer a alive. */
+#define PEER_F_HEARTBEAT 0x40000000 /* Heartbeat message to send. */
+#define PEER_F_DWNGRD 0x80000000 /* When this flag is enabled, we must downgrade the supported version announced during peer sessions. */
+
+#define PEER_TEACH_RESET ~(PEER_F_TEACH_PROCESS|PEER_F_TEACH_FINISHED) /* PEER_F_TEACH_COMPLETE should never be reset */
+#define PEER_LEARN_RESET ~(PEER_F_LEARN_ASSIGN|PEER_F_LEARN_NOTUP2DATE)
+
+#define PEER_RESYNC_TIMEOUT 5000 /* 5 seconds */
+#define PEER_RECONNECT_TIMEOUT 5000 /* 5 seconds */
+#define PEER_LOCAL_RECONNECT_TIMEOUT 500 /* 500ms */
+#define PEER_HEARTBEAT_TIMEOUT 3000 /* 3 seconds */
+
+/* default maximum of updates sent at once */
+#define PEER_DEF_MAX_UPDATES_AT_ONCE 200
+
+/* flags for "show peers" */
+#define PEERS_SHOW_F_DICT 0x00000001 /* also show the contents of the dictionary */
+
+/*****************************/
+/* Sync message class */
+/*****************************/
+enum {
+ PEER_MSG_CLASS_CONTROL = 0,
+ PEER_MSG_CLASS_ERROR,
+ PEER_MSG_CLASS_STICKTABLE = 10,
+ PEER_MSG_CLASS_RESERVED = 255,
+};
+
+/*****************************/
+/* control message types */
+/*****************************/
+enum {
+ PEER_MSG_CTRL_RESYNCREQ = 0,
+ PEER_MSG_CTRL_RESYNCFINISHED,
+ PEER_MSG_CTRL_RESYNCPARTIAL,
+ PEER_MSG_CTRL_RESYNCCONFIRM,
+ PEER_MSG_CTRL_HEARTBEAT,
+};
+
+/*****************************/
+/* error message types */
+/*****************************/
+enum {
+ PEER_MSG_ERR_PROTOCOL = 0,
+ PEER_MSG_ERR_SIZELIMIT,
+};
+
+/* network key types;
+ * network types were directly and mistakenly
+ * mapped on sample types, to keep backward
+ * compatiblitiy we keep those values but
+ * we now use a internal/network mapping
+ * to avoid further mistakes adding or
+ * modifying internals types
+ */
+enum {
+ PEER_KT_ANY = 0, /* any type */
+ PEER_KT_RESV1, /* UNUSED */
+ PEER_KT_SINT, /* signed 64bits integer type */
+ PEER_KT_RESV3, /* UNUSED */
+ PEER_KT_IPV4, /* ipv4 type */
+ PEER_KT_IPV6, /* ipv6 type */
+ PEER_KT_STR, /* char string type */
+ PEER_KT_BIN, /* buffer type */
+ PEER_KT_TYPES /* number of types, must always be last */
+};
+
+/* Map used to retrieve network type from internal type
+ * Note: Undeclared mapping maps entry to PEER_KT_ANY == 0
+ */
+static int peer_net_key_type[SMP_TYPES] = {
+ [SMP_T_SINT] = PEER_KT_SINT,
+ [SMP_T_IPV4] = PEER_KT_IPV4,
+ [SMP_T_IPV6] = PEER_KT_IPV6,
+ [SMP_T_STR] = PEER_KT_STR,
+ [SMP_T_BIN] = PEER_KT_BIN,
+};
+
+/* Map used to retrieve internal type from external type
+ * Note: Undeclared mapping maps entry to SMP_T_ANY == 0
+ */
+static int peer_int_key_type[PEER_KT_TYPES] = {
+ [PEER_KT_SINT] = SMP_T_SINT,
+ [PEER_KT_IPV4] = SMP_T_IPV4,
+ [PEER_KT_IPV6] = SMP_T_IPV6,
+ [PEER_KT_STR] = SMP_T_STR,
+ [PEER_KT_BIN] = SMP_T_BIN,
+};
+
+/*
+ * Parameters used by functions to build peer protocol messages. */
+struct peer_prep_params {
+ struct {
+ struct peer *peer;
+ } hello;
+ struct {
+ unsigned int st1;
+ } error_status;
+ struct {
+ struct stksess *stksess;
+ struct shared_table *shared_table;
+ unsigned int updateid;
+ int use_identifier;
+ int use_timed;
+ struct peer *peer;
+ } updt;
+ struct {
+ struct shared_table *shared_table;
+ } swtch;
+ struct {
+ struct shared_table *shared_table;
+ } ack;
+ struct {
+ unsigned char head[2];
+ } control;
+ struct {
+ unsigned char head[2];
+ } error;
+};
+
+/*******************************/
+/* stick table sync mesg types */
+/* Note: ids >= 128 contains */
+/* id message contains data */
+/*******************************/
+#define PEER_MSG_STKT_UPDATE 0x80
+#define PEER_MSG_STKT_INCUPDATE 0x81
+#define PEER_MSG_STKT_DEFINE 0x82
+#define PEER_MSG_STKT_SWITCH 0x83
+#define PEER_MSG_STKT_ACK 0x84
+#define PEER_MSG_STKT_UPDATE_TIMED 0x85
+#define PEER_MSG_STKT_INCUPDATE_TIMED 0x86
+/* All the stick-table message identifiers abova have the #7 bit set */
+#define PEER_MSG_STKT_BIT 7
+#define PEER_MSG_STKT_BIT_MASK (1 << PEER_MSG_STKT_BIT)
+
+/* The maximum length of an encoded data length. */
+#define PEER_MSG_ENC_LENGTH_MAXLEN 5
+
+/* Minimum 64-bits value encoded with 2 bytes */
+#define PEER_ENC_2BYTES_MIN 0xf0 /* 0xf0 (or 240) */
+/* 3 bytes */
+#define PEER_ENC_3BYTES_MIN ((1ULL << 11) | PEER_ENC_2BYTES_MIN) /* 0x8f0 (or 2288) */
+/* 4 bytes */
+#define PEER_ENC_4BYTES_MIN ((1ULL << 18) | PEER_ENC_3BYTES_MIN) /* 0x408f0 (or 264432) */
+/* 5 bytes */
+#define PEER_ENC_5BYTES_MIN ((1ULL << 25) | PEER_ENC_4BYTES_MIN) /* 0x20408f0 (or 33818864) */
+/* 6 bytes */
+#define PEER_ENC_6BYTES_MIN ((1ULL << 32) | PEER_ENC_5BYTES_MIN) /* 0x1020408f0 (or 4328786160) */
+/* 7 bytes */
+#define PEER_ENC_7BYTES_MIN ((1ULL << 39) | PEER_ENC_6BYTES_MIN) /* 0x81020408f0 (or 554084600048) */
+/* 8 bytes */
+#define PEER_ENC_8BYTES_MIN ((1ULL << 46) | PEER_ENC_7BYTES_MIN) /* 0x4081020408f0 (or 70922828777712) */
+/* 9 bytes */
+#define PEER_ENC_9BYTES_MIN ((1ULL << 53) | PEER_ENC_8BYTES_MIN) /* 0x204081020408f0 (or 9078122083518704) */
+/* 10 bytes */
+#define PEER_ENC_10BYTES_MIN ((1ULL << 60) | PEER_ENC_9BYTES_MIN) /* 0x10204081020408f0 (or 1161999626690365680) */
+
+/* #7 bit used to detect the last byte to be encoded */
+#define PEER_ENC_STOP_BIT 7
+/* The byte minimum value with #7 bit set */
+#define PEER_ENC_STOP_BYTE (1 << PEER_ENC_STOP_BIT)
+/* The left most number of bits set for PEER_ENC_2BYTES_MIN */
+#define PEER_ENC_2BYTES_MIN_BITS 4
+
+#define PEER_MSG_HEADER_LEN 2
+
+#define PEER_STKT_CACHE_MAX_ENTRIES 128
+
+/**********************************/
+/* Peer Session IO handler states */
+/**********************************/
+
+enum {
+ PEER_SESS_ST_ACCEPT = 0, /* Initial state for session create by an accept, must be zero! */
+ PEER_SESS_ST_GETVERSION, /* Validate supported protocol version */
+ PEER_SESS_ST_GETHOST, /* Validate host ID correspond to local host id */
+ PEER_SESS_ST_GETPEER, /* Validate peer ID correspond to a known remote peer id */
+ /* after this point, data were possibly exchanged */
+ PEER_SESS_ST_SENDSUCCESS, /* Send ret code 200 (success) and wait for message */
+ PEER_SESS_ST_CONNECT, /* Initial state for session create on a connect, push presentation into buffer */
+ PEER_SESS_ST_GETSTATUS, /* Wait for the welcome message */
+ PEER_SESS_ST_WAITMSG, /* Wait for data messages */
+ PEER_SESS_ST_EXIT, /* Exit with status code */
+ PEER_SESS_ST_ERRPROTO, /* Send error proto message before exit */
+ PEER_SESS_ST_ERRSIZE, /* Send error size message before exit */
+ PEER_SESS_ST_END, /* Killed session */
+};
+
+/***************************************************/
+/* Peer Session status code - part of the protocol */
+/***************************************************/
+
+#define PEER_SESS_SC_CONNECTCODE 100 /* connect in progress */
+#define PEER_SESS_SC_CONNECTEDCODE 110 /* tcp connect success */
+
+#define PEER_SESS_SC_SUCCESSCODE 200 /* accept or connect successful */
+
+#define PEER_SESS_SC_TRYAGAIN 300 /* try again later */
+
+#define PEER_SESS_SC_ERRPROTO 501 /* error protocol */
+#define PEER_SESS_SC_ERRVERSION 502 /* unknown protocol version */
+#define PEER_SESS_SC_ERRHOST 503 /* bad host name */
+#define PEER_SESS_SC_ERRPEER 504 /* unknown peer */
+
+#define PEER_SESSION_PROTO_NAME "HAProxyS"
+#define PEER_MAJOR_VER 2
+#define PEER_MINOR_VER 1
+#define PEER_DWNGRD_MINOR_VER 0
+
+static size_t proto_len = sizeof(PEER_SESSION_PROTO_NAME) - 1;
+struct peers *cfg_peers = NULL;
+static int peers_max_updates_at_once = PEER_DEF_MAX_UPDATES_AT_ONCE;
+static void peer_session_forceshutdown(struct peer *peer);
+
+static struct ebpt_node *dcache_tx_insert(struct dcache *dc,
+ struct dcache_tx_entry *i);
+static inline void flush_dcache(struct peer *peer);
+
+/* trace source and events */
+static void peers_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event peers_trace_events[] = {
+#define PEERS_EV_UPDTMSG (1 << 0)
+ { .mask = PEERS_EV_UPDTMSG, .name = "updtmsg", .desc = "update message received" },
+#define PEERS_EV_ACKMSG (1 << 1)
+ { .mask = PEERS_EV_ACKMSG, .name = "ackmsg", .desc = "ack message received" },
+#define PEERS_EV_SWTCMSG (1 << 2)
+ { .mask = PEERS_EV_SWTCMSG, .name = "swtcmsg", .desc = "switch message received" },
+#define PEERS_EV_DEFMSG (1 << 3)
+ { .mask = PEERS_EV_DEFMSG, .name = "defmsg", .desc = "definition message received" },
+#define PEERS_EV_CTRLMSG (1 << 4)
+ { .mask = PEERS_EV_CTRLMSG, .name = "ctrlmsg", .desc = "control message sent/received" },
+#define PEERS_EV_SESSREL (1 << 5)
+ { .mask = PEERS_EV_SESSREL, .name = "sessrl", .desc = "peer session releasing" },
+#define PEERS_EV_PROTOERR (1 << 6)
+ { .mask = PEERS_EV_PROTOERR, .name = "protoerr", .desc = "protocol error" },
+};
+
+static const struct name_desc peers_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="peers", .desc="Peers protocol" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc peers_trace_decoding[] = {
+#define PEERS_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+ { /* end */ }
+};
+
+
+struct trace_source trace_peers = {
+ .name = IST("peers"),
+ .desc = "Peers protocol",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = peers_trace,
+ .known_events = peers_trace_events,
+ .lockon_args = peers_trace_lockon_args,
+ .decoding = peers_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+/* Return peer control message types as strings (only for debugging purpose). */
+static inline char *ctrl_msg_type_str(unsigned int type)
+{
+ switch (type) {
+ case PEER_MSG_CTRL_RESYNCREQ:
+ return "RESYNCREQ";
+ case PEER_MSG_CTRL_RESYNCFINISHED:
+ return "RESYNCFINISHED";
+ case PEER_MSG_CTRL_RESYNCPARTIAL:
+ return "RESYNCPARTIAL";
+ case PEER_MSG_CTRL_RESYNCCONFIRM:
+ return "RESYNCCONFIRM";
+ case PEER_MSG_CTRL_HEARTBEAT:
+ return "HEARTBEAT";
+ default:
+ return "???";
+ }
+}
+
+#define TRACE_SOURCE &trace_peers
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+static void peers_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ if (mask & (PEERS_EV_UPDTMSG|PEERS_EV_ACKMSG|PEERS_EV_SWTCMSG)) {
+ if (a2) {
+ const struct peer *peer = a2;
+
+ chunk_appendf(&trace_buf, " peer=%s", peer->id);
+ }
+ if (a3) {
+ const char *p = a3;
+
+ chunk_appendf(&trace_buf, " @%p", p);
+ }
+ if (a4) {
+ const size_t *val = a4;
+
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*val);
+ }
+ }
+
+ if (mask & PEERS_EV_DEFMSG) {
+ if (a2) {
+ const struct peer *peer = a2;
+
+ chunk_appendf(&trace_buf, " peer=%s", peer->id);
+ }
+ if (a3) {
+ const char *p = a3;
+
+ chunk_appendf(&trace_buf, " @%p", p);
+ }
+ if (a4) {
+ const int *val = a4;
+
+ chunk_appendf(&trace_buf, " %d", *val);
+ }
+ }
+
+ if (mask & PEERS_EV_CTRLMSG) {
+ if (a2) {
+ const unsigned char *ctrl_msg_type = a2;
+
+ chunk_appendf(&trace_buf, " %s", ctrl_msg_type_str(*ctrl_msg_type));
+
+ }
+ if (a3) {
+ const char *local_peer = a3;
+
+ chunk_appendf(&trace_buf, " %s", local_peer);
+ }
+
+ if (a4) {
+ const char *remote_peer = a4;
+
+ chunk_appendf(&trace_buf, " -> %s", remote_peer);
+ }
+ }
+
+ if (mask & (PEERS_EV_SESSREL|PEERS_EV_PROTOERR)) {
+ if (a2) {
+ const struct peer *peer = a2;
+ struct peers *peers = NULL;
+
+ if (peer->appctx)
+ peers = peer->peers;
+
+ if (peers)
+ chunk_appendf(&trace_buf, " %s", peers->local->id);
+ chunk_appendf(&trace_buf, " -> %s", peer->id);
+ }
+
+ if (a3) {
+ const int *prev_state = a3;
+
+ chunk_appendf(&trace_buf, " prev_state=%d\n", *prev_state);
+ }
+ }
+}
+
+static const char *statuscode_str(int statuscode)
+{
+ switch (statuscode) {
+ case PEER_SESS_SC_CONNECTCODE:
+ return "CONN";
+ case PEER_SESS_SC_CONNECTEDCODE:
+ return "HSHK";
+ case PEER_SESS_SC_SUCCESSCODE:
+ return "ESTA";
+ case PEER_SESS_SC_TRYAGAIN:
+ return "RETR";
+ case PEER_SESS_SC_ERRPROTO:
+ return "PROT";
+ case PEER_SESS_SC_ERRVERSION:
+ return "VERS";
+ case PEER_SESS_SC_ERRHOST:
+ return "NAME";
+ case PEER_SESS_SC_ERRPEER:
+ return "UNKN";
+ default:
+ return "NONE";
+ }
+}
+
+/* This function encode an uint64 to 'dynamic' length format.
+ The encoded value is written at address *str, and the
+ caller must assure that size after *str is large enough.
+ At return, the *str is set at the next Byte after then
+ encoded integer. The function returns then length of the
+ encoded integer in Bytes */
+int intencode(uint64_t i, char **str) {
+ int idx = 0;
+ unsigned char *msg;
+
+ msg = (unsigned char *)*str;
+ if (i < PEER_ENC_2BYTES_MIN) {
+ msg[0] = (unsigned char)i;
+ *str = (char *)&msg[idx+1];
+ return (idx+1);
+ }
+
+ msg[idx] =(unsigned char)i | PEER_ENC_2BYTES_MIN;
+ i = (i - PEER_ENC_2BYTES_MIN) >> PEER_ENC_2BYTES_MIN_BITS;
+ while (i >= PEER_ENC_STOP_BYTE) {
+ msg[++idx] = (unsigned char)i | PEER_ENC_STOP_BYTE;
+ i = (i - PEER_ENC_STOP_BYTE) >> PEER_ENC_STOP_BIT;
+ }
+ msg[++idx] = (unsigned char)i;
+ *str = (char *)&msg[idx+1];
+ return (idx+1);
+}
+
+
+/* This function returns a decoded 64bits unsigned integer
+ * from a varint
+ *
+ * Calling:
+ * - *str must point on the first byte of the buffer to decode.
+ * - end must point on the next byte after the end of the buffer
+ * we are authorized to parse (buf + buflen)
+ *
+ * At return:
+ *
+ * On success *str will point at the byte following
+ * the fully decoded integer into the buffer. and
+ * the decoded value is returned.
+ *
+ * If end is reached before the integer was fully decoded,
+ * *str is set to NULL and the caller have to check this
+ * to know there is a decoding error. In this case
+ * the returned integer is also forced to 0
+ */
+uint64_t intdecode(char **str, char *end)
+{
+ unsigned char *msg;
+ uint64_t i;
+ int shift;
+
+ if (!*str)
+ return 0;
+
+ msg = (unsigned char *)*str;
+ if (msg >= (unsigned char *)end)
+ goto fail;
+
+ i = *(msg++);
+ if (i >= PEER_ENC_2BYTES_MIN) {
+ shift = PEER_ENC_2BYTES_MIN_BITS;
+ do {
+ if (msg >= (unsigned char *)end)
+ goto fail;
+ i += (uint64_t)*msg << shift;
+ shift += PEER_ENC_STOP_BIT;
+ } while (*(msg++) >= PEER_ENC_STOP_BYTE);
+ }
+ *str = (char *)msg;
+ return i;
+
+ fail:
+ *str = NULL;
+ return 0;
+}
+
+/*
+ * Build a "hello" peer protocol message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_hellomsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int min_ver, ret;
+ struct peer *peer;
+
+ peer = p->hello.peer;
+ min_ver = (peer->flags & PEER_F_DWNGRD) ? PEER_DWNGRD_MINOR_VER : PEER_MINOR_VER;
+ /* Prepare headers */
+ ret = snprintf(msg, size, PEER_SESSION_PROTO_NAME " %d.%d\n%s\n%s %d %d\n",
+ (int)PEER_MAJOR_VER, min_ver, peer->id, localpeer, (int)getpid(), (int)1);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Build a "handshake succeeded" status message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_status_successmsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int ret;
+
+ ret = snprintf(msg, size, "%d\n", (int)PEER_SESS_SC_SUCCESSCODE);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/*
+ * Build an error status message.
+ * Return the number of written bytes written to build this messages if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_status_errormsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ int ret;
+ unsigned int st1;
+
+ st1 = p->error_status.st1;
+ ret = snprintf(msg, size, "%u\n", st1);
+ if (ret >= size)
+ return 0;
+
+ return ret;
+}
+
+/* Set the stick-table UPDATE message type byte at <msg_type> address,
+ * depending on <use_identifier> and <use_timed> boolean parameters.
+ * Always successful.
+ */
+static inline void peer_set_update_msg_type(char *msg_type, int use_identifier, int use_timed)
+{
+ if (use_timed) {
+ if (use_identifier)
+ *msg_type = PEER_MSG_STKT_UPDATE_TIMED;
+ else
+ *msg_type = PEER_MSG_STKT_INCUPDATE_TIMED;
+ }
+ else {
+ if (use_identifier)
+ *msg_type = PEER_MSG_STKT_UPDATE;
+ else
+ *msg_type = PEER_MSG_STKT_INCUPDATE;
+ }
+}
+/*
+ * This prepare the data update message on the stick session <ts>, <st> is the considered
+ * stick table.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_updatemsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ uint32_t netinteger;
+ unsigned short datalen;
+ char *cursor, *datamsg;
+ unsigned int data_type;
+ void *data_ptr;
+ struct stksess *ts;
+ struct shared_table *st;
+ unsigned int updateid;
+ int use_identifier;
+ int use_timed;
+ struct peer *peer;
+
+ ts = p->updt.stksess;
+ st = p->updt.shared_table;
+ updateid = p->updt.updateid;
+ use_identifier = p->updt.use_identifier;
+ use_timed = p->updt.use_timed;
+ peer = p->updt.peer;
+
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ /* construct message */
+
+ /* check if we need to send the update identifier */
+ if (!st->last_pushed || updateid < st->last_pushed || ((updateid - st->last_pushed) != 1)) {
+ use_identifier = 1;
+ }
+
+ /* encode update identifier if needed */
+ if (use_identifier) {
+ netinteger = htonl(updateid);
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+
+ if (use_timed) {
+ netinteger = htonl(tick_remain(now_ms, ts->expire));
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+
+ /* encode the key */
+ if (st->table->type == SMP_T_STR) {
+ int stlen = strlen((char *)ts->key.key);
+
+ intencode(stlen, &cursor);
+ memcpy(cursor, ts->key.key, stlen);
+ cursor += stlen;
+ }
+ else if (st->table->type == SMP_T_SINT) {
+ netinteger = htonl(read_u32(ts->key.key));
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+ }
+ else {
+ memcpy(cursor, ts->key.key, st->table->key_size);
+ cursor += st->table->key_size;
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ /* encode values */
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+
+ data_ptr = stktable_data_ptr(st->table, ts, data_type);
+ if (data_ptr) {
+ /* in case of array all elements use
+ * the same std_type and they are linearly
+ * encoded.
+ */
+ if (stktable_data_types[data_type].is_array) {
+ unsigned int idx = 0;
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT: {
+ int data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_sint);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_UINT: {
+ unsigned int data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_uint);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_ULL: {
+ unsigned long long data;
+
+ do {
+ data = stktable_data_cast(data_ptr, std_t_ull);
+ intencode(data, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ case STD_T_FRQP: {
+ struct freq_ctr *frqp;
+
+ do {
+ frqp = &stktable_data_cast(data_ptr, std_t_frqp);
+ intencode((unsigned int)(now_ms - frqp->curr_tick), &cursor);
+ intencode(frqp->curr_ctr, &cursor);
+ intencode(frqp->prev_ctr, &cursor);
+
+ data_ptr = stktable_data_ptr_idx(st->table, ts, data_type, ++idx);
+ } while(data_ptr);
+ break;
+ }
+ }
+
+ /* array elements fully encoded
+ * proceed next data_type.
+ */
+ continue;
+ }
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT: {
+ int data;
+
+ data = stktable_data_cast(data_ptr, std_t_sint);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_UINT: {
+ unsigned int data;
+
+ data = stktable_data_cast(data_ptr, std_t_uint);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_ULL: {
+ unsigned long long data;
+
+ data = stktable_data_cast(data_ptr, std_t_ull);
+ intencode(data, &cursor);
+ break;
+ }
+ case STD_T_FRQP: {
+ struct freq_ctr *frqp;
+
+ frqp = &stktable_data_cast(data_ptr, std_t_frqp);
+ intencode((unsigned int)(now_ms - frqp->curr_tick), &cursor);
+ intencode(frqp->curr_ctr, &cursor);
+ intencode(frqp->prev_ctr, &cursor);
+ break;
+ }
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ struct ebpt_node *cached_de;
+ struct dcache_tx_entry cde = { };
+ char *beg, *end;
+ size_t value_len, data_len;
+ struct dcache *dc;
+
+ de = stktable_data_cast(data_ptr, std_t_dict);
+ if (!de) {
+ /* No entry */
+ intencode(0, &cursor);
+ break;
+ }
+
+ dc = peer->dcache;
+ cde.entry.key = de;
+ cached_de = dcache_tx_insert(dc, &cde);
+ if (cached_de == &cde.entry) {
+ if (cde.id + 1 >= PEER_ENC_2BYTES_MIN)
+ break;
+ /* Encode the length of the remaining data -> 1 */
+ intencode(1, &cursor);
+ /* Encode the cache entry ID */
+ intencode(cde.id + 1, &cursor);
+ }
+ else {
+ /* Leave enough room to encode the remaining data length. */
+ end = beg = cursor + PEER_MSG_ENC_LENGTH_MAXLEN;
+ /* Encode the dictionary entry key */
+ intencode(cde.id + 1, &end);
+ /* Encode the length of the dictionary entry data */
+ value_len = de->len;
+ intencode(value_len, &end);
+ /* Copy the data */
+ memcpy(end, de->value.key, value_len);
+ end += value_len;
+ /* Encode the length of the data */
+ data_len = end - beg;
+ intencode(data_len, &cursor);
+ memmove(cursor, beg, data_len);
+ cursor += data_len;
+ }
+ break;
+ }
+ }
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ peer_set_update_msg_type(&msg[1], use_identifier, use_timed);
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * This prepare the switch table message to targeted share table <st>.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_switchmsg(char *msg, size_t size, struct peer_prep_params *params)
+{
+ int len;
+ unsigned short datalen;
+ struct buffer *chunk;
+ char *cursor, *datamsg, *chunkp, *chunkq;
+ uint64_t data = 0;
+ unsigned int data_type;
+ struct shared_table *st;
+
+ st = params->swtch.shared_table;
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ /* Encode data */
+
+ /* encode local id */
+ intencode(st->local_id, &cursor);
+
+ /* encode table name */
+ len = strlen(st->table->nid);
+ intencode(len, &cursor);
+ memcpy(cursor, st->table->nid, len);
+ cursor += len;
+
+ /* encode table type */
+
+ intencode(peer_net_key_type[st->table->type], &cursor);
+
+ /* encode table key size */
+ intencode(st->table->key_size, &cursor);
+
+ chunk = get_trash_chunk();
+ chunkp = chunkq = chunk->area;
+ /* encode available known data types in table */
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+ if (st->table->data_ofs[data_type]) {
+ /* stored data types parameters are all linearly encoded
+ * at the end of the 'table definition' message.
+ *
+ * Currently only array data_types and and data_types
+ * using freq_counter base type have parameters:
+ *
+ * - array has always at least one parameter set to the
+ * number of elements.
+ *
+ * - array of base-type freq_counters has an additional
+ * parameter set to the period used to compute those
+ * freq_counters.
+ *
+ * - simple freq counter has a parameter set to the period
+ * used to compute
+ *
+ * A set of parameter for a datatype MUST BE prefixed
+ * by the data-type id itself:
+ * This is useless because the data_types are ordered and
+ * the data_type bitfield already gives the information of
+ * stored types, but it was designed this way when the
+ * push of period parameter was added for freq counters
+ * and we don't want to break the compatibility.
+ *
+ */
+ if (stktable_data_types[data_type].is_array) {
+ /* This is an array type so we first encode
+ * the data_type itself to prefix parameters
+ */
+ intencode(data_type, &chunkq);
+
+ /* We encode the first parameter which is
+ * the number of elements of this array
+ */
+ intencode(st->table->data_nbelem[data_type], &chunkq);
+
+ /* for array of freq counters, there is an additional
+ * period parameter to encode
+ */
+ if (stktable_data_types[data_type].std_type == STD_T_FRQP)
+ intencode(st->table->data_arg[data_type].u, &chunkq);
+ }
+ else if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ /* this datatype is a simple freq counter not part
+ * of an array. We encode the data_type itself
+ * to prefix the 'period' parameter
+ */
+ intencode(data_type, &chunkq);
+ intencode(st->table->data_arg[data_type].u, &chunkq);
+ }
+ /* set the bit corresponding to stored data type */
+ data |= 1ULL << data_type;
+ }
+ }
+ intencode(data, &cursor);
+
+ /* Encode stick-table entries duration. */
+ intencode(st->table->expire, &cursor);
+
+ if (chunkq > chunkp) {
+ chunk->data = chunkq - chunkp;
+ memcpy(cursor, chunk->area, chunk->data);
+ cursor += chunk->data;
+ }
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ msg[1] = PEER_MSG_STKT_DEFINE;
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * This prepare the acknowledge message on the stick session <ts>, <st> is the considered
+ * stick table.
+ * <msg> is a buffer of <size> to receive data message content
+ * If function returns 0, the caller should consider we were unable to encode this message (TODO:
+ * check size)
+ */
+static int peer_prepare_ackmsg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ unsigned short datalen;
+ char *cursor, *datamsg;
+ uint32_t netinteger;
+ struct shared_table *st;
+
+ cursor = datamsg = msg + PEER_MSG_HEADER_LEN + PEER_MSG_ENC_LENGTH_MAXLEN;
+
+ st = p->ack.shared_table;
+ intencode(st->remote_id, &cursor);
+ netinteger = htonl(st->last_get);
+ memcpy(cursor, &netinteger, sizeof(netinteger));
+ cursor += sizeof(netinteger);
+
+ /* Compute datalen */
+ datalen = (cursor - datamsg);
+
+ /* prepare message header */
+ msg[0] = PEER_MSG_CLASS_STICKTABLE;
+ msg[1] = PEER_MSG_STKT_ACK;
+ cursor = &msg[2];
+ intencode(datalen, &cursor);
+
+ /* move data after header */
+ memmove(cursor, datamsg, datalen);
+
+ /* return header size + data_len */
+ return (cursor - msg) + datalen;
+}
+
+/*
+ * Function to deinit connected peer
+ */
+void __peer_session_deinit(struct peer *peer)
+{
+ struct peers *peers = peer->peers;
+ int thr;
+
+ if (!peers || !peer->appctx)
+ return;
+
+ thr = peer->appctx->t->tid;
+ HA_ATOMIC_DEC(&peers->applet_count[thr]);
+
+ if (peer->appctx->st0 == PEER_SESS_ST_WAITMSG)
+ HA_ATOMIC_DEC(&connected_peers);
+
+ HA_ATOMIC_DEC(&active_peers);
+
+ flush_dcache(peer);
+
+ /* Re-init current table pointers to force announcement on re-connect */
+ peer->remote_table = peer->last_local_table = peer->stop_local_table = NULL;
+ peer->appctx = NULL;
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ /* unassign current peer for learning */
+ peer->flags &= ~(PEER_F_LEARN_ASSIGN);
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALABORT;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEABORT;
+ /* reschedule a resync */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000));
+ }
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+}
+
+static int peer_session_init(struct appctx *appctx)
+{
+ struct peer *peer = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &peer->addr, sizeof(peer->addr)))
+ goto out_error;
+
+ if (appctx_finalize_startup(appctx, peer->peers->peers_fe, &BUF_NULL) == -1)
+ goto out_free_addr;
+
+ s = appctx_strm(appctx);
+ /* applet is waiting for data */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ /* initiate an outgoing connection */
+ s->scb->dst = addr;
+ s->scb->flags |= (SC_FL_RCV_ONCE|SC_FL_NOLINGER);
+ s->flags = SF_ASSIGNED;
+ s->target = peer_session_target(peer, s);
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ _HA_ATOMIC_INC(&active_peers);
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+/*
+ * Callback to release a session with a peer
+ */
+static void peer_session_release(struct appctx *appctx)
+{
+ struct peer *peer = appctx->svcctx;
+
+ TRACE_PROTO("releasing peer session", PEERS_EV_SESSREL, NULL, peer);
+ /* appctx->svcctx is not a peer session */
+ if (appctx->st0 < PEER_SESS_ST_SENDSUCCESS)
+ return;
+
+ /* peer session identified */
+ if (peer) {
+ HA_SPIN_LOCK(PEER_LOCK, &peer->lock);
+ if (peer->appctx == appctx)
+ __peer_session_deinit(peer);
+ peer->flags &= ~PEER_F_ALIVE;
+ HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock);
+ }
+}
+
+/* Retrieve the major and minor versions of peers protocol
+ * announced by a remote peer. <str> is a null-terminated
+ * string with the following format: "<maj_ver>.<min_ver>".
+ */
+static int peer_get_version(const char *str,
+ unsigned int *maj_ver, unsigned int *min_ver)
+{
+ unsigned int majv, minv;
+ const char *pos, *saved;
+ const char *end;
+
+ saved = pos = str;
+ end = str + strlen(str);
+
+ majv = read_uint(&pos, end);
+ if (saved == pos || *pos++ != '.')
+ return -1;
+
+ saved = pos;
+ minv = read_uint(&pos, end);
+ if (saved == pos || pos != end)
+ return -1;
+
+ *maj_ver = majv;
+ *min_ver = minv;
+
+ return 0;
+}
+
+/*
+ * Parse a line terminated by an optional '\r' character, followed by a mandatory
+ * '\n' character.
+ * Returns 1 if succeeded or 0 if a '\n' character could not be found, and -1 if
+ * a line could not be read because the communication channel is closed.
+ */
+static inline int peer_getline(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ int n;
+
+ n = co_getline(sc_oc(sc), trash.area, trash.size);
+ if (!n)
+ return 0;
+
+ if (n < 0 || trash.area[n - 1] != '\n') {
+ appctx->st0 = PEER_SESS_ST_END;
+ return -1;
+ }
+
+ if (n > 1 && (trash.area[n - 2] == '\r'))
+ trash.area[n - 2] = 0;
+ else
+ trash.area[n - 1] = 0;
+
+ co_skip(sc_oc(sc), n);
+
+ return n;
+}
+
+/*
+ * Send a message after having called <peer_prepare_msg> to build it.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_msg(struct appctx *appctx,
+ int (*peer_prepare_msg)(char *, size_t, struct peer_prep_params *),
+ struct peer_prep_params *params)
+{
+ int ret, msglen;
+
+ msglen = peer_prepare_msg(trash.area, trash.size, params);
+ if (!msglen) {
+ /* internal error: message does not fit in trash */
+ appctx->st0 = PEER_SESS_ST_END;
+ return 0;
+ }
+
+ /* message to buffer */
+ ret = applet_putblk(appctx, trash.area, msglen);
+ if (ret <= 0) {
+ if (ret != -1)
+ appctx->st0 = PEER_SESS_ST_END;
+ }
+
+ return ret;
+}
+
+/*
+ * Send a hello message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_hellomsg(struct appctx *appctx, struct peer *peer)
+{
+ struct peer_prep_params p = {
+ .hello.peer = peer,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_hellomsg, &p);
+}
+
+/*
+ * Send a success peer handshake status message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_status_successmsg(struct appctx *appctx)
+{
+ return peer_send_msg(appctx, peer_prepare_status_successmsg, NULL);
+}
+
+/*
+ * Send a peer handshake status error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_status_errormsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error_status.st1 = appctx->st1,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_status_errormsg, &p);
+}
+
+/*
+ * Send a stick-table switch message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_switchmsg(struct shared_table *st, struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .swtch.shared_table = st,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_switchmsg, &p);
+}
+
+/*
+ * Send a stick-table update acknowledgement message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_ackmsg(struct shared_table *st, struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .ack.shared_table = st,
+ };
+
+ return peer_send_msg(appctx, peer_prepare_ackmsg, &p);
+}
+
+/*
+ * Send a stick-table update message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_updatemsg(struct shared_table *st, struct appctx *appctx, struct stksess *ts,
+ unsigned int updateid, int use_identifier, int use_timed)
+{
+ struct peer_prep_params p = {
+ .updt = {
+ .stksess = ts,
+ .shared_table = st,
+ .updateid = updateid,
+ .use_identifier = use_identifier,
+ .use_timed = use_timed,
+ .peer = appctx->svcctx,
+ },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_updatemsg, &p);
+}
+
+/*
+ * Build a peer protocol control class message.
+ * Returns the number of written bytes used to build the message if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_control_msg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ if (size < sizeof p->control.head)
+ return 0;
+
+ msg[0] = p->control.head[0];
+ msg[1] = p->control.head[1];
+
+ return 2;
+}
+
+/*
+ * Send a stick-table synchronization request message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_reqmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_RESYNCREQ, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a stick-table synchronization confirmation message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_confirmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_RESYNCCONFIRM, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a stick-table synchronization finished message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_resync_finishedmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, },
+ };
+
+ p.control.head[1] = (peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ?
+ PEER_MSG_CTRL_RESYNCFINISHED : PEER_MSG_CTRL_RESYNCPARTIAL;
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Send a heartbeat message.
+ * Return 0 if the message could not be built modifying the appctx st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_heartbeatmsg(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ struct peer_prep_params p = {
+ .control.head = { PEER_MSG_CLASS_CONTROL, PEER_MSG_CTRL_HEARTBEAT, },
+ };
+
+ TRACE_PROTO("send control message", PEERS_EV_CTRLMSG,
+ NULL, &p.control.head[1], peers->local->id, peer->id);
+
+ return peer_send_msg(appctx, peer_prepare_control_msg, &p);
+}
+
+/*
+ * Build a peer protocol error class message.
+ * Returns the number of written bytes used to build the message if succeeded,
+ * 0 if not.
+ */
+static int peer_prepare_error_msg(char *msg, size_t size, struct peer_prep_params *p)
+{
+ if (size < sizeof p->error.head)
+ return 0;
+
+ msg[0] = p->error.head[0];
+ msg[1] = p->error.head[1];
+
+ return 2;
+}
+
+/*
+ * Send a "size limit reached" error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_error_size_limitmsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error.head = { PEER_MSG_CLASS_ERROR, PEER_MSG_ERR_SIZELIMIT, },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_error_msg, &p);
+}
+
+/*
+ * Send a "peer protocol" error message.
+ * Return 0 if the message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appctx st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_error_protomsg(struct appctx *appctx)
+{
+ struct peer_prep_params p = {
+ .error.head = { PEER_MSG_CLASS_ERROR, PEER_MSG_ERR_PROTOCOL, },
+ };
+
+ return peer_send_msg(appctx, peer_prepare_error_msg, &p);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table when a lesson must be taught a peer (PEER_F_LEARN_ASSIGN flag set).
+ */
+static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb) {
+ eb = eb32_first(&st->table->updates);
+ if (!eb || (eb->key == st->last_pushed)) {
+ st->table->commitupdate = st->last_pushed = st->table->localupdate;
+ return NULL;
+ }
+ }
+
+ /* if distance between the last pushed and the retrieved key
+ * is greater than the distance last_pushed and the local_update
+ * this means we are beyond localupdate.
+ */
+ if ((eb->key - st->last_pushed) > (st->table->localupdate - st->last_pushed)) {
+ st->table->commitupdate = st->last_pushed = st->table->localupdate;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table during teach state 1 step.
+ */
+static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb) {
+ st->flags |= SHTABLE_F_TEACH_STAGE1;
+ eb = eb32_first(&st->table->updates);
+ if (eb)
+ st->last_pushed = eb->key - 1;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Function used to lookup for recent stick-table updates associated with
+ * <st> shared stick-table during teach state 2 step.
+ */
+static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_table *st)
+{
+ struct eb32_node *eb;
+
+ eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1);
+ if (!eb || eb->key > st->teaching_origin) {
+ st->flags |= SHTABLE_F_TEACH_STAGE2;
+ return NULL;
+ }
+
+ return eb32_entry(eb, struct stksess, upd);
+}
+
+/*
+ * Generic function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p>.
+ *
+ * This function temporary unlock/lock <st> when it sends stick-table updates or
+ * when decrementing its refcount in case of any error when it sends this updates.
+ * It must be called with the stick-table lock released.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ * If it returns 0 or -1, this function leave <st> locked if already locked when entering this function
+ * unlocked if not already locked when entering this function.
+ */
+static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p,
+ struct stksess *(*peer_stksess_lookup)(struct shared_table *),
+ struct shared_table *st)
+{
+ int ret, new_pushed, use_timed;
+ int updates_sent = 0;
+
+ ret = 1;
+ use_timed = 0;
+ if (st != p->last_local_table) {
+ ret = peer_send_switchmsg(st, appctx);
+ if (ret <= 0)
+ return ret;
+
+ p->last_local_table = st;
+ }
+
+ if (peer_stksess_lookup != peer_teach_process_stksess_lookup)
+ use_timed = !(p->flags & PEER_F_DWNGRD);
+
+ /* We force new pushed to 1 to force identifier in update message */
+ new_pushed = 1;
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &st->table->updt_lock);
+
+ while (1) {
+ struct stksess *ts;
+ unsigned updateid;
+
+ /* push local updates */
+ ts = peer_stksess_lookup(st);
+ if (!ts) {
+ ret = 1; // done
+ break;
+ }
+
+ updateid = ts->upd.key;
+ if (p->srv->shard && ts->shard != p->srv->shard) {
+ /* Skip this entry */
+ st->last_pushed = updateid;
+ new_pushed = 1;
+ continue;
+ }
+
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock);
+
+ ret = peer_send_updatemsg(st, appctx, ts, updateid, new_pushed, use_timed);
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &st->table->updt_lock);
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+ if (ret <= 0)
+ break;
+
+ st->last_pushed = updateid;
+
+ if (peer_stksess_lookup == peer_teach_process_stksess_lookup) {
+ uint commitid = _HA_ATOMIC_LOAD(&st->table->commitupdate);
+
+ while ((int)(updateid - commitid) > 0) {
+ if (_HA_ATOMIC_CAS(&st->table->commitupdate, &commitid, updateid))
+ break;
+ __ha_cpu_relax();
+ }
+ }
+
+ /* identifier may not needed in next update message */
+ new_pushed = 0;
+
+ updates_sent++;
+ if (updates_sent >= peers_max_updates_at_once) {
+ /* pretend we're full so that we get back ASAP */
+ struct stconn *sc = appctx_sc(appctx);
+
+ sc_need_room(sc, 0);
+ ret = -1;
+ break;
+ }
+ }
+
+ out:
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock);
+ return ret;
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> (PEER_F_LEARN_ASSIGN flag set).
+ *
+ * Note that <st> shared stick-table is locked when calling this function, and
+ * the lock is dropped then re-acquired.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_process_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ int ret;
+
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ ret = peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock);
+
+ return ret;
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> during teach state 1 step. It must be called with
+ * the stick-table lock released.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_stage1_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ return peer_send_teachmsgs(appctx, p, peer_teach_stage1_stksess_lookup, st);
+}
+
+/*
+ * Function to emit update messages for <st> stick-table when a lesson must
+ * be taught to the peer <p> during teach state 1 step. It must be called with
+ * the stick-table lock released.
+ *
+ * Return 0 if any message could not be built modifying the appcxt st0 to PEER_SESS_ST_END value.
+ * Returns -1 if there was not enough room left to send the message,
+ * any other negative returned value must be considered as an error with an appcxt st0
+ * returned value equal to PEER_SESS_ST_END.
+ */
+static inline int peer_send_teach_stage2_msgs(struct appctx *appctx, struct peer *p,
+ struct shared_table *st)
+{
+ return peer_send_teachmsgs(appctx, p, peer_teach_stage2_stksess_lookup, st);
+}
+
+
+/*
+ * Function used to parse a stick-table update message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * <exp> must be set if the stick-table entry expires.
+ * <updt> must be set for PEER_MSG_STKT_UPDATE or PEER_MSG_STKT_UPDATE_TIMED stick-table
+ * messages, in this case the stick-table update message is received with a stick-table
+ * update ID.
+ * <totl> is the length of the stick-table update message computed upon receipt.
+ */
+static int peer_treat_updatemsg(struct appctx *appctx, struct peer *p, int updt, int exp,
+ char **msg_cur, char *msg_end, int msg_len, int totl)
+{
+ struct shared_table *st = p->remote_table;
+ struct stktable *table;
+ struct stksess *ts, *newts;
+ struct stksess *wts = NULL; /* write_to stksess */
+ uint32_t update;
+ int expire;
+ unsigned int data_type;
+ size_t keylen;
+ void *data_ptr;
+ char *msg_save;
+
+ TRACE_ENTER(PEERS_EV_UPDTMSG, NULL, p);
+ /* Here we have data message */
+ if (!st)
+ goto ignore_msg;
+
+ table = st->table;
+
+ expire = MS_TO_TICKS(table->expire);
+
+ if (updt) {
+ if (msg_len < sizeof(update)) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ memcpy(&update, *msg_cur, sizeof(update));
+ *msg_cur += sizeof(update);
+ st->last_get = htonl(update);
+ }
+ else {
+ st->last_get++;
+ }
+
+ if (exp) {
+ size_t expire_sz = sizeof expire;
+
+ if (*msg_cur + expire_sz > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &expire_sz);
+ goto malformed_exit;
+ }
+
+ memcpy(&expire, *msg_cur, expire_sz);
+ *msg_cur += expire_sz;
+ expire = ntohl(expire);
+ }
+
+ newts = stksess_new(table, NULL);
+ if (!newts)
+ goto ignore_msg;
+
+ if (table->type == SMP_T_STR) {
+ unsigned int to_read, to_store;
+
+ to_read = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_free_newts;
+ }
+
+ to_store = MIN(to_read, table->key_size - 1);
+ if (*msg_cur + to_store > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &to_store);
+ goto malformed_free_newts;
+ }
+
+ keylen = to_store;
+ memcpy(newts->key.key, *msg_cur, keylen);
+ newts->key.key[keylen] = 0;
+ *msg_cur += to_read;
+ }
+ else if (table->type == SMP_T_SINT) {
+ unsigned int netinteger;
+
+ if (*msg_cur + sizeof(netinteger) > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end);
+ goto malformed_free_newts;
+ }
+
+ keylen = sizeof(netinteger);
+ memcpy(&netinteger, *msg_cur, keylen);
+ netinteger = ntohl(netinteger);
+ memcpy(newts->key.key, &netinteger, keylen);
+ *msg_cur += keylen;
+ }
+ else {
+ if (*msg_cur + table->key_size > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &table->key_size);
+ goto malformed_free_newts;
+ }
+
+ keylen = table->key_size;
+ memcpy(newts->key.key, *msg_cur, keylen);
+ *msg_cur += keylen;
+ }
+
+ newts->shard = stktable_get_key_shard(table, newts->key.key, keylen);
+
+ /* lookup for existing entry */
+ ts = stktable_set_entry(table, newts);
+ if (ts != newts) {
+ stksess_free(table, newts);
+ newts = NULL;
+ }
+
+ msg_save = *msg_cur;
+
+ update_wts:
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ for (data_type = 0 ; data_type < STKTABLE_DATA_TYPES ; data_type++) {
+ uint64_t decoded_int;
+ unsigned int idx;
+ int ignore = 0;
+
+ if (!((1ULL << data_type) & st->remote_data))
+ continue;
+
+ /* We shouldn't learn local-only values. Also, when handling the
+ * write_to table we must ignore types that can be processed
+ * so we don't interfere with any potential arithmetic logic
+ * performed on them (ie: cumulative counters).
+ */
+ if (stktable_data_types[data_type].is_local ||
+ (table != st->table && !stktable_data_types[data_type].as_is))
+ ignore = 1;
+
+ if (stktable_data_types[data_type].is_array) {
+ /* in case of array all elements
+ * use the same std_type and they
+ * are linearly encoded.
+ * The number of elements was provided
+ * by table definition message
+ */
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_sint) = decoded_int;
+ }
+ break;
+ case STD_T_UINT:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_uint) = decoded_int;
+ }
+ break;
+ case STD_T_ULL:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_ull) = decoded_int;
+ }
+ break;
+ case STD_T_FRQP:
+ for (idx = 0; idx < st->remote_data_nbelem[data_type]; idx++) {
+ struct freq_ctr data;
+
+ /* First bit is reserved for the freq_ctr lock
+ * Note: here we're still protected by the stksess lock
+ * so we don't need to update the update the freq_ctr
+ * using its internal lock.
+ */
+
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.curr_tick = tick_add(now_ms, -decoded_int) & ~0x1;
+ data.curr_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.prev_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr_idx(table, ts, data_type, idx);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_frqp) = data;
+ }
+ break;
+ }
+
+ /* array is fully decoded
+ * proceed next data_type.
+ */
+ continue;
+ }
+ decoded_int = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ data_ptr = stktable_data_ptr(table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_sint) = decoded_int;
+ break;
+
+ case STD_T_UINT:
+ data_ptr = stktable_data_ptr(table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_uint) = decoded_int;
+ break;
+
+ case STD_T_ULL:
+ data_ptr = stktable_data_ptr(table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_ull) = decoded_int;
+ break;
+
+ case STD_T_FRQP: {
+ struct freq_ctr data;
+
+ /* First bit is reserved for the freq_ctr lock
+ Note: here we're still protected by the stksess lock
+ so we don't need to update the update the freq_ctr
+ using its internal lock.
+ */
+
+ data.curr_tick = tick_add(now_ms, -decoded_int) & ~0x1;
+ data.curr_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data.prev_ctr = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG, NULL, p);
+ goto malformed_unlock;
+ }
+
+ data_ptr = stktable_data_ptr(table, ts, data_type);
+ if (data_ptr && !ignore)
+ stktable_data_cast(data_ptr, std_t_frqp) = data;
+ break;
+ }
+ case STD_T_DICT: {
+ struct buffer *chunk;
+ size_t data_len, value_len;
+ unsigned int id;
+ struct dict_entry *de;
+ struct dcache *dc;
+ char *end;
+
+ if (!decoded_int) {
+ /* No entry. */
+ break;
+ }
+ data_len = decoded_int;
+ if (*msg_cur + data_len > msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, msg_end, &data_len);
+ goto malformed_unlock;
+ }
+
+ /* Compute the end of the current data, <msg_end> being at the end of
+ * the entire message.
+ */
+ end = *msg_cur + data_len;
+ id = intdecode(msg_cur, end);
+ if (!*msg_cur || !id) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur, &id);
+ goto malformed_unlock;
+ }
+
+ dc = p->dcache;
+ if (*msg_cur == end) {
+ /* Dictionary entry key without value. */
+ if (id > dc->max_entries) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, NULL, &id);
+ goto malformed_unlock;
+ }
+ /* IDs sent over the network are numbered from 1. */
+ de = dc->rx[id - 1].de;
+ }
+ else {
+ chunk = get_trash_chunk();
+ value_len = intdecode(msg_cur, end);
+ if (!*msg_cur || *msg_cur + value_len > end ||
+ unlikely(value_len + 1 >= chunk->size)) {
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, *msg_cur, &value_len);
+ TRACE_PROTO("malformed message", PEERS_EV_UPDTMSG,
+ NULL, p, end, &chunk->size);
+ goto malformed_unlock;
+ }
+
+ chunk_memcpy(chunk, *msg_cur, value_len);
+ chunk->area[chunk->data] = '\0';
+ *msg_cur += value_len;
+
+ de = dict_insert(&server_key_dict, chunk->area);
+ dict_entry_unref(&server_key_dict, dc->rx[id - 1].de);
+ dc->rx[id - 1].de = de;
+ }
+ if (de) {
+ data_ptr = stktable_data_ptr(table, ts, data_type);
+ if (data_ptr && !ignore) {
+ HA_ATOMIC_INC(&de->refcount);
+ stktable_data_cast(data_ptr, std_t_dict) = de;
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ if (st->table->write_to.t && table != st->table->write_to.t) {
+ struct stktable_key stkey = { .key = ts->key.key, .key_len = keylen };
+
+ /* While we're still under the main ts lock, try to get related
+ * write_to stksess with main ts key
+ */
+ wts = stktable_get_entry(st->table->write_to.t, &stkey);
+ }
+
+ /* Force new expiration */
+ ts->expire = tick_add(now_ms, expire);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_remote(table, ts, 1);
+
+ if (wts) {
+ /* Start over the message decoding for wts as we got a valid stksess
+ * for write_to table, so we need to refresh the entry with supported
+ * values.
+ *
+ * We prefer to do the decoding a second time even though it might
+ * cost a bit more than copying from main ts to wts, but doing so
+ * enables us to get rid of main ts lock: we only need the wts lock
+ * since upstream data is still available in msg_cur
+ */
+ ts = wts;
+ table = st->table->write_to.t;
+ wts = NULL; /* so we don't get back here */
+ *msg_cur = msg_save;
+ goto update_wts;
+ }
+
+ ignore_msg:
+ TRACE_LEAVE(PEERS_EV_UPDTMSG, NULL, p);
+ return 1;
+
+ malformed_unlock:
+ /* malformed message */
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_remote(st->table, ts, 1);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ TRACE_DEVEL("leaving in error", PEERS_EV_UPDTMSG);
+ return 0;
+
+ malformed_free_newts:
+ /* malformed message */
+ stksess_free(st->table, newts);
+ malformed_exit:
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ TRACE_DEVEL("leaving in error", PEERS_EV_UPDTMSG);
+ return 0;
+}
+
+/*
+ * Function used to parse a stick-table update acknowledgement message after it
+ * has been received by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_ackmsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end)
+{
+ /* ack message */
+ uint32_t table_id ;
+ uint32_t update;
+ struct shared_table *st;
+
+ /* ignore ack during teaching process */
+ if (p->flags & PEER_F_TEACH_PROCESS)
+ return 1;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur || (*msg_cur + sizeof(update) > msg_end)) {
+ /* malformed message */
+
+ TRACE_PROTO("malformed message", PEERS_EV_ACKMSG,
+ NULL, p, *msg_cur);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ memcpy(&update, *msg_cur, sizeof(update));
+ update = ntohl(update);
+
+ for (st = p->tables; st; st = st->next) {
+ if (st->local_id == table_id) {
+ st->update = update;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to parse a stick-table switch message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_switchmsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end)
+{
+ struct shared_table *st;
+ int table_id;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_SWTCMSG, NULL, p);
+ /* malformed message */
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ p->remote_table = NULL;
+ for (st = p->tables; st; st = st->next) {
+ if (st->remote_id == table_id) {
+ p->remote_table = st;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to parse a stick-table definition message after it has been received
+ * by <p> peer with <msg_cur> as address of the pointer to the position in the
+ * receipt buffer with <msg_end> being the position of the end of the stick-table message.
+ * Update <msg_curr> accordingly to the peer protocol specs if no peer protocol error
+ * was encountered.
+ * <totl> is the length of the stick-table update message computed upon receipt.
+ * Return 1 if succeeded, 0 if not with the appctx state st0 set to PEER_SESS_ST_ERRPROTO.
+ */
+static inline int peer_treat_definemsg(struct appctx *appctx, struct peer *p,
+ char **msg_cur, char *msg_end, int totl)
+{
+ int table_id_len;
+ struct shared_table *st;
+ int table_type;
+ int table_keylen;
+ int table_id;
+ uint64_t table_data;
+
+ table_id = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_id_len = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p, *msg_cur);
+ goto malformed_exit;
+ }
+
+ p->remote_table = NULL;
+ if (!table_id_len || (*msg_cur + table_id_len) >= msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p, *msg_cur, &table_id_len);
+ goto malformed_exit;
+ }
+
+ for (st = p->tables; st; st = st->next) {
+ /* Reset IDs */
+ if (st->remote_id == table_id)
+ st->remote_id = 0;
+
+ if (!p->remote_table && (table_id_len == strlen(st->table->nid)) &&
+ (memcmp(st->table->nid, *msg_cur, table_id_len) == 0))
+ p->remote_table = st;
+ }
+
+ if (!p->remote_table) {
+ TRACE_PROTO("ignored message", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ *msg_cur += table_id_len;
+ if (*msg_cur >= msg_end) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_keylen = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ table_data = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ TRACE_PROTO("malformed message", PEERS_EV_DEFMSG, NULL, p);
+ goto malformed_exit;
+ }
+
+ if (p->remote_table->table->type != peer_int_key_type[table_type]
+ || p->remote_table->table->key_size != table_keylen) {
+ p->remote_table = NULL;
+ TRACE_PROTO("ignored message", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* Check if there there is the additional expire data */
+ intdecode(msg_cur, msg_end);
+ if (*msg_cur) {
+ uint64_t data_type;
+ uint64_t type;
+
+ /* This define contains the expire data so we consider
+ * it also contain all data_types parameters.
+ */
+ for (data_type = 0; data_type < STKTABLE_DATA_TYPES; data_type++) {
+ if (table_data & (1ULL << data_type)) {
+ if (stktable_data_types[data_type].is_array) {
+ /* This should be an array
+ * so we parse the data_type prefix
+ * because we must have parameters.
+ */
+ type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* check if the data_type match the current from the bitfield */
+ if (type != data_type) {
+ p->remote_table = NULL;
+ TRACE_PROTO("meta data mismatch type", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* decode the nbelem of the array */
+ p->remote_table->remote_data_nbelem[type] = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing array size meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* if it is an array of frqp, we must also have the period to decode */
+ if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing period for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ else if (stktable_data_types[data_type].std_type == STD_T_FRQP) {
+ /* This should be a std freq counter data_type
+ * so we parse the data_type prefix
+ * because we must have parameters.
+ */
+ type = intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing meta data for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* check if the data_type match the current from the bitfield */
+ if (type != data_type) {
+ p->remote_table = NULL;
+ TRACE_PROTO("meta data mismatch type", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+
+ /* decode the period */
+ intdecode(msg_cur, msg_end);
+ if (!*msg_cur) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing period for frqp", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ }
+ }
+ else {
+ uint64_t data_type;
+
+ /* There is not additional data but
+ * array size parameter is mandatory to parse array
+ * so we consider an error if an array data_type is define
+ * but there is no additional data.
+ */
+ for (data_type = 0; data_type < STKTABLE_DATA_TYPES; data_type++) {
+ if (table_data & (1ULL << data_type)) {
+ if (stktable_data_types[data_type].is_array) {
+ p->remote_table = NULL;
+ TRACE_PROTO("missing array size meta data for array", PEERS_EV_DEFMSG, NULL, p);
+ goto ignore_msg;
+ }
+ }
+ }
+ }
+
+ p->remote_table->remote_data = table_data;
+ p->remote_table->remote_id = table_id;
+
+ ignore_msg:
+ return 1;
+
+ malformed_exit:
+ /* malformed message */
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+}
+
+/*
+ * Receive a stick-table message or pre-parse any other message.
+ * The message's header will be sent into <msg_head> which must be at least
+ * <msg_head_sz> bytes long (at least 7 to store 32-bit variable lengths).
+ * The first two bytes are always read, and the rest is only read if the
+ * first bytes indicate a stick-table message. If the message is a stick-table
+ * message, the varint is decoded and the equivalent number of bytes will be
+ * copied into the trash at trash.area. <totl> is incremented by the number of
+ * bytes read EVEN IN CASE OF INCOMPLETE MESSAGES.
+ * Returns 1 if there was no error, if not, returns 0 if not enough data were available,
+ * -1 if there was an error updating the appctx state st0 accordingly.
+ */
+static inline int peer_recv_msg(struct appctx *appctx, char *msg_head, size_t msg_head_sz,
+ uint32_t *msg_len, int *totl)
+{
+ int reql;
+ struct stconn *sc = appctx_sc(appctx);
+ char *cur;
+
+ reql = co_getblk(sc_oc(sc), msg_head, 2 * sizeof(char), *totl);
+ if (reql <= 0) /* closed or EOL not found */
+ goto incomplete;
+
+ *totl += reql;
+
+ if (!(msg_head[1] & PEER_MSG_STKT_BIT_MASK))
+ return 1;
+
+ /* This is a stick-table message, let's go on */
+
+ /* Read and Decode message length */
+ msg_head += *totl;
+ msg_head_sz -= *totl;
+ reql = co_data(sc_oc(sc)) - *totl;
+ if (reql > msg_head_sz)
+ reql = msg_head_sz;
+
+ reql = co_getblk(sc_oc(sc), msg_head, reql, *totl);
+ if (reql <= 0) /* closed */
+ goto incomplete;
+
+ cur = msg_head;
+ *msg_len = intdecode(&cur, cur + reql);
+ if (!cur) {
+ /* the number is truncated, did we read enough ? */
+ if (reql < msg_head_sz)
+ goto incomplete;
+
+ /* malformed message */
+ TRACE_PROTO("malformed message: too large length encoding", PEERS_EV_UPDTMSG);
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return -1;
+ }
+ *totl += cur - msg_head;
+
+ /* Read message content */
+ if (*msg_len) {
+ if (*msg_len > trash.size) {
+ /* Status code is not success, abort */
+ appctx->st0 = PEER_SESS_ST_ERRSIZE;
+ return -1;
+ }
+
+ reql = co_getblk(sc_oc(sc), trash.area, *msg_len, *totl);
+ if (reql <= 0) /* closed */
+ goto incomplete;
+ *totl += reql;
+ }
+
+ return 1;
+
+ incomplete:
+ if (reql < 0 || (sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) {
+ /* there was an error or the message was truncated */
+ appctx->st0 = PEER_SESS_ST_END;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Treat the awaited message with <msg_head> as header.*
+ * Return 1 if succeeded, 0 if not.
+ */
+static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *peer, unsigned char *msg_head,
+ char **msg_cur, char *msg_end, int msg_len, int totl)
+{
+ struct peers *peers = peer->peers;
+
+ if (msg_head[0] == PEER_MSG_CLASS_CONTROL) {
+ if (msg_head[1] == PEER_MSG_CTRL_RESYNCREQ) {
+ struct shared_table *st;
+ /* Reset message: remote need resync */
+
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ /* prepare tables for a global push */
+ for (st = peer->tables; st; st = st->next) {
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+ }
+
+ /* reset teaching flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+
+ /* flag to start to teach lesson */
+ peer->flags |= PEER_F_TEACH_PROCESS;
+ peers->flags |= PEERS_F_RESYNC_REQUESTED;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCFINISHED) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ int commit_a_finish = 1;
+
+ peer->flags &= ~PEER_F_LEARN_ASSIGN;
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+ if (peer->srv->shard) {
+ struct peer *ps;
+
+ peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL;
+ peer->flags |= PEER_F_LEARN_NOTUP2DATE;
+ for (ps = peers->remote; ps; ps = ps->next) {
+ if (ps->srv->shard == peer->srv->shard) {
+ /* flag all peers from same shard
+ * notup2date to disable request
+ * of a resync frm them
+ */
+ ps->flags |= PEER_F_LEARN_NOTUP2DATE;
+ }
+ else if (ps->srv->shard && !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) {
+ /* it remains some other shards not requested
+ * we don't commit a resync finish to request
+ * the other shards
+ */
+ commit_a_finish = 0;
+ }
+ }
+
+ if (!commit_a_finish) {
+ /* it remains some shard to request, we schedule a new request
+ */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+ }
+ }
+
+ if (commit_a_finish) {
+ peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE);
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALFINISHED;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED;
+ }
+ }
+ peer->confirm++;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCPARTIAL) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ if (peer->flags & PEER_F_LEARN_ASSIGN) {
+ peer->flags &= ~PEER_F_LEARN_ASSIGN;
+ peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+
+ if (peer->local)
+ peers->flags |= PEERS_F_RESYNC_LOCALPARTIAL;
+ else
+ peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL;
+ peer->flags |= PEER_F_LEARN_NOTUP2DATE;
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+ }
+ peer->confirm++;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_RESYNCCONFIRM) {
+ struct shared_table *st;
+
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ /* If stopping state */
+ if (stopping) {
+ /* Close session, push resync no more needed */
+ peer->flags |= PEER_F_TEACH_COMPLETE;
+ appctx->st0 = PEER_SESS_ST_END;
+ return 0;
+ }
+ for (st = peer->tables; st; st = st->next) {
+ st->update = st->last_pushed = st->teaching_origin;
+ st->flags = 0;
+ }
+
+ /* reset teaching flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ }
+ else if (msg_head[1] == PEER_MSG_CTRL_HEARTBEAT) {
+ TRACE_PROTO("received control message", PEERS_EV_CTRLMSG,
+ NULL, &msg_head[1], peers->local->id, peer->id);
+ peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT));
+ peer->rx_hbt++;
+ }
+ }
+ else if (msg_head[0] == PEER_MSG_CLASS_STICKTABLE) {
+ if (msg_head[1] == PEER_MSG_STKT_DEFINE) {
+ if (!peer_treat_definemsg(appctx, peer, msg_cur, msg_end, totl))
+ return 0;
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_SWITCH) {
+ if (!peer_treat_switchmsg(appctx, peer, msg_cur, msg_end))
+ return 0;
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_UPDATE ||
+ msg_head[1] == PEER_MSG_STKT_INCUPDATE ||
+ msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED ||
+ msg_head[1] == PEER_MSG_STKT_INCUPDATE_TIMED) {
+ int update, expire;
+
+ update = msg_head[1] == PEER_MSG_STKT_UPDATE || msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED;
+ expire = msg_head[1] == PEER_MSG_STKT_UPDATE_TIMED || msg_head[1] == PEER_MSG_STKT_INCUPDATE_TIMED;
+ if (!peer_treat_updatemsg(appctx, peer, update, expire,
+ msg_cur, msg_end, msg_len, totl))
+ return 0;
+
+ }
+ else if (msg_head[1] == PEER_MSG_STKT_ACK) {
+ if (!peer_treat_ackmsg(appctx, peer, msg_cur, msg_end))
+ return 0;
+ }
+ }
+ else if (msg_head[0] == PEER_MSG_CLASS_RESERVED) {
+ appctx->st0 = PEER_SESS_ST_ERRPROTO;
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/*
+ * Send any message to <peer> peer.
+ * Returns 1 if succeeded, or -1 or 0 if failed.
+ * -1 means an internal error occurred, 0 is for a peer protocol error leading
+ * to a peer state change (from the peer I/O handler point of view).
+ *
+ * - peer->last_local_table is the last table for which we send an update
+ * messages.
+ *
+ * - peer->stop_local_table is the last evaluated table. It is unset when the
+ * teaching process starts. But we use it as a
+ * restart point when the loop is interrupted. It is
+ * especially useful when the number of tables exceeds
+ * peers_max_updates_at_once value.
+ *
+ * When a teaching lopp is started, the peer's last_local_table is saved in a
+ * local variable. This variable is used as a finish point. When the crrent
+ * table is equal to it, it means all tables were evaluated, all updates where
+ * sent and the teaching process is finished.
+ *
+ * peer->stop_local_table is always NULL when the teaching process begins. It is
+ * only reset at the end. In the mean time, it always point on a table.
+ */
+
+static inline int peer_send_msgs(struct appctx *appctx,
+ struct peer *peer, struct peers *peers)
+{
+ int repl;
+
+ /* Need to request a resync */
+ if ((peer->flags & PEER_F_LEARN_ASSIGN) &&
+ (peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ !(peers->flags & PEERS_F_RESYNC_PROCESS)) {
+
+ repl = peer_send_resync_reqmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ peers->flags |= PEERS_F_RESYNC_PROCESS;
+ }
+
+ /* Nothing to read, now we start to write */
+ if (peer->tables) {
+ struct shared_table *st;
+ struct shared_table *last_local_table;
+ int updates = 0;
+
+ last_local_table = peer->last_local_table;
+ if (!last_local_table)
+ last_local_table = peer->tables;
+ if (!peer->stop_local_table)
+ peer->stop_local_table = last_local_table;
+ st = peer->stop_local_table->next;
+
+ while (1) {
+ if (!st)
+ st = peer->tables;
+ /* It remains some updates to ack */
+ if (st->last_get != st->last_acked) {
+ repl = peer_send_ackmsg(st, appctx);
+ if (repl <= 0)
+ return repl;
+
+ st->last_acked = st->last_get;
+ }
+
+ if (!(peer->flags & PEER_F_TEACH_PROCESS)) {
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock);
+ if (!(peer->flags & PEER_F_LEARN_ASSIGN) &&
+ (st->last_pushed != st->table->localupdate)) {
+
+ repl = peer_send_teach_process_msgs(appctx, peer, st);
+ if (repl <= 0) {
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ peer->stop_local_table = peer->last_local_table;
+ return repl;
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+ else if (!(peer->flags & PEER_F_TEACH_FINISHED)) {
+ if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) {
+ repl = peer_send_teach_stage1_msgs(appctx, peer, st);
+ if (repl <= 0) {
+ peer->stop_local_table = peer->last_local_table;
+ return repl;
+ }
+ }
+
+ if (!(st->flags & SHTABLE_F_TEACH_STAGE2)) {
+ repl = peer_send_teach_stage2_msgs(appctx, peer, st);
+ if (repl <= 0) {
+ peer->stop_local_table = peer->last_local_table;
+ return repl;
+ }
+ }
+ }
+
+ if (st == last_local_table) {
+ peer->stop_local_table = NULL;
+ break;
+ }
+
+ /* This one is to be sure to restart from <st->next> if we are interrupted
+ * because of peer_send_teach_stage2_msgs or because buffer is full
+ * when sedning an ackmsg. In both cases current <st> was evaluated and
+ * we must restart from <st->next>
+ */
+ peer->stop_local_table = st;
+
+ updates++;
+ if (updates >= peers_max_updates_at_once) {
+ /* pretend we're full so that we get back ASAP */
+ struct stconn *sc = appctx_sc(appctx);
+
+ sc_need_room(sc, 0);
+ return -1;
+ }
+
+ st = st->next;
+ }
+ }
+
+ if ((peer->flags & PEER_F_TEACH_PROCESS) && !(peer->flags & PEER_F_TEACH_FINISHED)) {
+ repl = peer_send_resync_finishedmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ /* flag finished message sent */
+ peer->flags |= PEER_F_TEACH_FINISHED;
+ }
+
+ /* Confirm finished or partial messages */
+ while (peer->confirm) {
+ repl = peer_send_resync_confirmsg(appctx, peer, peers);
+ if (repl <= 0)
+ return repl;
+
+ peer->confirm--;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a first line of a "hello" peer protocol message.
+ * Returns 0 if could not read a line, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ */
+static inline int peer_getline_version(struct appctx *appctx,
+ unsigned int *maj_ver, unsigned int *min_ver)
+{
+ int reql;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* test protocol */
+ if (strncmp(PEER_SESSION_PROTO_NAME " ", trash.area, proto_len + 1) != 0) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPROTO;
+ return -1;
+ }
+ if (peer_get_version(trash.area + proto_len + 1, maj_ver, min_ver) == -1 ||
+ *maj_ver != PEER_MAJOR_VER || *min_ver > PEER_MINOR_VER) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRVERSION;
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a second line of a "hello" peer protocol message.
+ * Returns 0 if could not read a line, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ */
+static inline int peer_getline_host(struct appctx *appctx)
+{
+ int reql;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* test hostname match */
+ if (strcmp(localpeer, trash.area) != 0) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRHOST;
+ return -1;
+ }
+
+ return 1;
+}
+
+/*
+ * Read and parse a last line of a "hello" peer protocol message.
+ * Returns 0 if could not read a character, -1 if there was a read error or
+ * the line is malformed, 1 if succeeded.
+ * Set <curpeer> accordingly (the remote peer sending the "hello" message).
+ */
+static inline int peer_getline_last(struct appctx *appctx, struct peer **curpeer)
+{
+ char *p;
+ int reql;
+ struct peer *peer;
+ struct stream *s = appctx_strm(appctx);
+ struct peers *peers = strm_fe(s)->parent;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ return 0;
+
+ if (reql < 0)
+ return -1;
+
+ /* parse line "<peer name> <pid> <relative_pid>" */
+ p = strchr(trash.area, ' ');
+ if (!p) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPROTO;
+ return -1;
+ }
+ *p = 0;
+
+ /* lookup known peer */
+ for (peer = peers->remote; peer; peer = peer->next) {
+ if (strcmp(peer->id, trash.area) == 0)
+ break;
+ }
+
+ /* if unknown peer */
+ if (!peer) {
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_ERRPEER;
+ return -1;
+ }
+ *curpeer = peer;
+
+ return 1;
+}
+
+/*
+ * Init <peer> peer after having accepted it at peer protocol level.
+ */
+static inline void init_accepted_peer(struct peer *peer, struct peers *peers)
+{
+ struct shared_table *st;
+
+ peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* Register status code */
+ peer->statuscode = PEER_SESS_SC_SUCCESSCODE;
+ peer->last_hdshk = now_ms;
+
+ /* Awake main task */
+ task_wakeup(peers->sync_task, TASK_WOKEN_MSG);
+
+ /* Init confirm counter */
+ peer->confirm = 0;
+
+ /* Init cursors */
+ for (st = peer->tables; st ; st = st->next) {
+ uint commitid, updateid;
+
+ st->last_get = st->last_acked = 0;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock);
+ /* if st->update appears to be in future it means
+ * that the last acked value is very old and we
+ * remain unconnected a too long time to use this
+ * acknowledgement as a reset.
+ * We should update the protocol to be able to
+ * signal the remote peer that it needs a full resync.
+ * Here a partial fix consist to set st->update at
+ * the max past value
+ */
+ if ((int)(st->table->localupdate - st->update) < 0)
+ st->update = st->table->localupdate + (2147483648U);
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+
+ updateid = st->last_pushed;
+ commitid = _HA_ATOMIC_LOAD(&st->table->commitupdate);
+
+ while ((int)(updateid - commitid) > 0) {
+ if (_HA_ATOMIC_CAS(&st->table->commitupdate, &commitid, updateid))
+ break;
+ __ha_cpu_relax();
+ }
+
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+
+ /* if current peer is local */
+ if (peer->local) {
+ /* if current host need resyncfrom local and no process assigned */
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* assign local peer for a lesson, consider lesson already requested */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= (PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS);
+ peers->flags |= PEERS_F_RESYNC_LOCALASSIGN;
+ }
+
+ }
+ else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* assign peer for a lesson */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+ }
+}
+
+/*
+ * Init <peer> peer after having connected it at peer protocol level.
+ */
+static inline void init_connected_peer(struct peer *peer, struct peers *peers)
+{
+ struct shared_table *st;
+
+ peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* Init cursors */
+ for (st = peer->tables; st ; st = st->next) {
+ uint updateid, commitid;
+
+ st->last_get = st->last_acked = 0;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->lock);
+ /* if st->update appears to be in future it means
+ * that the last acked value is very old and we
+ * remain unconnected a too long time to use this
+ * acknowledgement as a reset.
+ * We should update the protocol to be able to
+ * signal the remote peer that it needs a full resync.
+ * Here a partial fix consist to set st->update at
+ * the max past value.
+ */
+ if ((int)(st->table->localupdate - st->update) < 0)
+ st->update = st->table->localupdate + (2147483648U);
+ st->teaching_origin = st->last_pushed = st->update;
+ st->flags = 0;
+
+ updateid = st->last_pushed;
+ commitid = _HA_ATOMIC_LOAD(&st->table->commitupdate);
+
+ while ((int)(updateid - commitid) > 0) {
+ if (_HA_ATOMIC_CAS(&st->table->commitupdate, &commitid, updateid))
+ break;
+ __ha_cpu_relax();
+ }
+
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->lock);
+ }
+
+ /* Init confirm counter */
+ peer->confirm = 0;
+
+ /* reset teaching and learning flags to 0 */
+ peer->flags &= PEER_TEACH_RESET;
+ peer->flags &= PEER_LEARN_RESET;
+
+ /* If current peer is local */
+ if (peer->local) {
+ /* flag to start to teach lesson */
+ peer->flags |= PEER_F_TEACH_PROCESS;
+ }
+ else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* If peer is remote and resync from remote is needed,
+ and no peer currently assigned */
+
+ /* assign peer for a lesson */
+ peer->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+ }
+}
+
+/*
+ * IO Handler to handle message exchange with a peer
+ */
+static void peer_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct peers *curpeers = strm_fe(s)->parent;
+ struct peer *curpeer = NULL;
+ int reql = 0;
+ int repl = 0;
+ unsigned int maj_ver, min_ver;
+ int prev_state;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+
+ /* Check if the input buffer is available. */
+ if (sc_ib(sc)->size == 0) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ while (1) {
+ prev_state = appctx->st0;
+switchstate:
+ maj_ver = min_ver = (unsigned int)-1;
+ switch(appctx->st0) {
+ case PEER_SESS_ST_ACCEPT:
+ prev_state = appctx->st0;
+ appctx->svcctx = NULL;
+ appctx->st0 = PEER_SESS_ST_GETVERSION;
+ __fallthrough;
+ case PEER_SESS_ST_GETVERSION:
+ prev_state = appctx->st0;
+ reql = peer_getline_version(appctx, &maj_ver, &min_ver);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ appctx->st0 = PEER_SESS_ST_GETHOST;
+ __fallthrough;
+ case PEER_SESS_ST_GETHOST:
+ prev_state = appctx->st0;
+ reql = peer_getline_host(appctx);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ appctx->st0 = PEER_SESS_ST_GETPEER;
+ __fallthrough;
+ case PEER_SESS_ST_GETPEER: {
+ prev_state = appctx->st0;
+ reql = peer_getline_last(appctx, &curpeer);
+ if (reql <= 0) {
+ if (!reql)
+ goto out;
+ goto switchstate;
+ }
+
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx && curpeer->appctx != appctx) {
+ if (curpeer->local) {
+ /* Local connection, reply a retry */
+ appctx->st0 = PEER_SESS_ST_EXIT;
+ appctx->st1 = PEER_SESS_SC_TRYAGAIN;
+ goto switchstate;
+ }
+
+ /* we're killing a connection, we must apply a random delay before
+ * retrying otherwise the other end will do the same and we can loop
+ * for a while.
+ */
+ curpeer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ peer_session_forceshutdown(curpeer);
+ curpeer->heartbeat = TICK_ETERNITY;
+ curpeer->coll++;
+ }
+ if (maj_ver != (unsigned int)-1 && min_ver != (unsigned int)-1) {
+ if (min_ver == PEER_DWNGRD_MINOR_VER) {
+ curpeer->flags |= PEER_F_DWNGRD;
+ }
+ else {
+ curpeer->flags &= ~PEER_F_DWNGRD;
+ }
+ }
+ curpeer->appctx = appctx;
+ curpeer->flags |= PEER_F_ALIVE;
+ appctx->svcctx = curpeer;
+ appctx->st0 = PEER_SESS_ST_SENDSUCCESS;
+ _HA_ATOMIC_INC(&active_peers);
+ }
+ __fallthrough;
+ case PEER_SESS_ST_SENDSUCCESS: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ repl = peer_send_status_successmsg(appctx);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ init_accepted_peer(curpeer, curpeers);
+
+ /* switch to waiting message state */
+ _HA_ATOMIC_INC(&connected_peers);
+ appctx->st0 = PEER_SESS_ST_WAITMSG;
+ goto switchstate;
+ }
+ case PEER_SESS_ST_CONNECT: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ repl = peer_send_hellomsg(appctx, curpeer);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ /* switch to the waiting statuscode state */
+ appctx->st0 = PEER_SESS_ST_GETSTATUS;
+ }
+ __fallthrough;
+ case PEER_SESS_ST_GETSTATUS: {
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ if (sc_ic(sc)->flags & CF_WROTE_DATA)
+ curpeer->statuscode = PEER_SESS_SC_CONNECTEDCODE;
+
+ reql = peer_getline(appctx);
+ if (!reql)
+ goto out;
+
+ if (reql < 0)
+ goto switchstate;
+
+ /* Register status code */
+ curpeer->statuscode = atoi(trash.area);
+ curpeer->last_hdshk = now_ms;
+
+ /* Awake main task */
+ task_wakeup(curpeers->sync_task, TASK_WOKEN_MSG);
+
+ /* If status code is success */
+ if (curpeer->statuscode == PEER_SESS_SC_SUCCESSCODE) {
+ init_connected_peer(curpeer, curpeers);
+ }
+ else {
+ if (curpeer->statuscode == PEER_SESS_SC_ERRVERSION)
+ curpeer->flags |= PEER_F_DWNGRD;
+ /* Status code is not success, abort */
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ _HA_ATOMIC_INC(&connected_peers);
+ appctx->st0 = PEER_SESS_ST_WAITMSG;
+ }
+ __fallthrough;
+ case PEER_SESS_ST_WAITMSG: {
+ uint32_t msg_len = 0;
+ char *msg_cur = trash.area;
+ char *msg_end = trash.area;
+ unsigned char msg_head[7]; // 2 + 5 for varint32
+ int totl = 0;
+
+ prev_state = appctx->st0;
+ if (!curpeer) {
+ curpeer = appctx->svcctx;
+ HA_SPIN_LOCK(PEER_LOCK, &curpeer->lock);
+ if (curpeer->appctx != appctx) {
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ }
+
+ reql = peer_recv_msg(appctx, (char *)msg_head, sizeof msg_head, &msg_len, &totl);
+ if (reql <= 0) {
+ if (reql == -1)
+ goto switchstate;
+ goto send_msgs;
+ }
+
+ msg_end += msg_len;
+ if (!peer_treat_awaited_msg(appctx, curpeer, msg_head, &msg_cur, msg_end, msg_len, totl))
+ goto switchstate;
+
+ curpeer->flags |= PEER_F_ALIVE;
+
+ /* skip consumed message */
+ co_skip(sc_oc(sc), totl);
+ /* loop on that state to peek next message */
+ goto switchstate;
+
+send_msgs:
+ if (curpeer->flags & PEER_F_HEARTBEAT) {
+ curpeer->flags &= ~PEER_F_HEARTBEAT;
+ repl = peer_send_heartbeatmsg(appctx, curpeer, curpeers);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+ curpeer->tx_hbt++;
+ }
+ /* we get here when a peer_recv_msg() returns 0 in reql */
+ repl = peer_send_msgs(appctx, curpeer, curpeers);
+ if (repl <= 0) {
+ if (repl == -1)
+ goto out;
+ goto switchstate;
+ }
+
+ /* noting more to do */
+ goto out;
+ }
+ case PEER_SESS_ST_EXIT:
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_status_errormsg(appctx) == -1)
+ goto out;
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ case PEER_SESS_ST_ERRSIZE: {
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_error_size_limitmsg(appctx) == -1)
+ goto out;
+ appctx->st0 = PEER_SESS_ST_END;
+ goto switchstate;
+ }
+ case PEER_SESS_ST_ERRPROTO: {
+ TRACE_PROTO("protocol error", PEERS_EV_PROTOERR,
+ NULL, curpeer, &prev_state);
+ if (curpeer)
+ curpeer->proto_err++;
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (peer_send_error_protomsg(appctx) == -1) {
+ TRACE_PROTO("could not send error message", PEERS_EV_PROTOERR);
+ goto out;
+ }
+ appctx->st0 = PEER_SESS_ST_END;
+ prev_state = appctx->st0;
+ }
+ __fallthrough;
+ case PEER_SESS_ST_END: {
+ if (prev_state == PEER_SESS_ST_WAITMSG)
+ _HA_ATOMIC_DEC(&connected_peers);
+ prev_state = appctx->st0;
+ if (curpeer) {
+ HA_SPIN_UNLOCK(PEER_LOCK, &curpeer->lock);
+ curpeer = NULL;
+ }
+ se_fl_set(appctx->sedesc, SE_FL_EOS|SE_FL_EOI);
+ co_skip(sc_oc(sc), co_data(sc_oc(sc)));
+ goto out;
+ }
+ }
+ }
+out:
+ sc_opposite(sc)->flags |= SC_FL_RCV_ONCE;
+
+ if (curpeer)
+ HA_SPIN_UNLOCK(PEER_LOCK, &curpeer->lock);
+ return;
+}
+
+static struct applet peer_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<PEER>", /* used for logging */
+ .fct = peer_io_handler,
+ .init = peer_session_init,
+ .release = peer_session_release,
+};
+
+
+/*
+ * Use this function to force a close of a peer session
+ */
+static void peer_session_forceshutdown(struct peer *peer)
+{
+ struct appctx *appctx = peer->appctx;
+
+ /* Note that the peer sessions which have just been created
+ * (->st0 == PEER_SESS_ST_CONNECT) must not
+ * be shutdown, if not, the TCP session will never be closed
+ * and stay in CLOSE_WAIT state after having been closed by
+ * the remote side.
+ */
+ if (!appctx || appctx->st0 == PEER_SESS_ST_CONNECT)
+ return;
+
+ if (appctx->applet != &peer_applet)
+ return;
+
+ __peer_session_deinit(peer);
+
+ appctx->st0 = PEER_SESS_ST_END;
+ appctx_wakeup(appctx);
+}
+
+/* Pre-configures a peers frontend to accept incoming connections */
+void peers_setup_frontend(struct proxy *fe)
+{
+ fe->last_change = ns_to_sec(now_ns);
+ fe->cap = PR_CAP_FE | PR_CAP_BE;
+ fe->mode = PR_MODE_PEERS;
+ fe->maxconn = 0;
+ fe->conn_retries = CONN_RETRIES;
+ fe->timeout.connect = MS_TO_TICKS(1000);
+ fe->timeout.client = MS_TO_TICKS(5000);
+ fe->timeout.server = MS_TO_TICKS(5000);
+ fe->accept = frontend_accept;
+ fe->default_target = &peer_applet.obj_type;
+ fe->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+}
+
+/*
+ * Create a new peer session in assigned state (connect will start automatically)
+ */
+static struct appctx *peer_session_create(struct peers *peers, struct peer *peer)
+{
+ struct appctx *appctx;
+ unsigned int thr = 0;
+ int idx;
+
+ peer->new_conn++;
+ peer->reconnect = tick_add(now_ms, (stopping ? MS_TO_TICKS(PEER_LOCAL_RECONNECT_TIMEOUT) : MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)));
+ peer->heartbeat = TICK_ETERNITY;
+ peer->statuscode = PEER_SESS_SC_CONNECTCODE;
+ peer->last_hdshk = now_ms;
+
+ for (idx = 0; idx < global.nbthread; idx++)
+ thr = peers->applet_count[idx] < peers->applet_count[thr] ? idx : thr;
+ appctx = appctx_new_on(&peer_applet, NULL, thr);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)peer;
+
+ appctx->st0 = PEER_SESS_ST_CONNECT;
+ peer->appctx = appctx;
+
+ HA_ATOMIC_INC(&peers->applet_count[thr]);
+ appctx_wakeup(appctx);
+ return appctx;
+
+ out_close:
+ return NULL;
+}
+
+/*
+ * Task processing function to manage re-connect, peer session
+ * tasks wakeup on local update and heartbeat. Let's keep it exported so that it
+ * resolves in stack traces and "show tasks".
+ */
+struct task *process_peer_sync(struct task * task, void *context, unsigned int state)
+{
+ struct peers *peers = context;
+ struct peer *ps;
+ struct shared_table *st;
+
+ task->expire = TICK_ETERNITY;
+
+ /* Acquire lock for all peers of the section */
+ for (ps = peers->remote; ps; ps = ps->next)
+ HA_SPIN_LOCK(PEER_LOCK, &ps->lock);
+
+ if (!stopping) {
+ /* Normal case (not soft stop)*/
+
+ /* resync timeout set to TICK_ETERNITY means we just start
+ * a new process and timer was not initialized.
+ * We must arm this timer to switch to a request to a remote
+ * node if incoming connection from old local process never
+ * comes.
+ */
+ if (peers->resync_timeout == TICK_ETERNITY)
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) &&
+ (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN)) {
+ /* Resync from local peer needed
+ no peer was assigned for the lesson
+ and no old local peer found
+ or resync timeout expire */
+
+ /* flag no more resync from local, to try resync from remotes */
+ peers->flags |= PEERS_F_RESYNC_LOCAL;
+ peers->flags |= PEERS_F_RESYNC_LOCALTIMEOUT;
+
+ /* reschedule a resync */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ }
+
+ /* For each session */
+ for (ps = peers->remote; ps; ps = ps->next) {
+ /* For each remote peers */
+ if (!ps->local) {
+ if (!ps->appctx) {
+ /* no active peer connection */
+ if (ps->statuscode == 0 ||
+ ((ps->statuscode == PEER_SESS_SC_CONNECTCODE ||
+ ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
+ ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) &&
+ tick_is_expired(ps->reconnect, now_ms))) {
+ /* connection never tried
+ * or previous peer connection established with success
+ * or previous peer connection failed while connecting
+ * and reconnection timer is expired */
+
+ /* retry a connect */
+ ps->appctx = peer_session_create(peers, ps);
+ }
+ else if (!tick_is_expired(ps->reconnect, now_ms)) {
+ /* If previous session failed during connection
+ * but reconnection timer is not expired */
+
+ /* reschedule task for reconnect */
+ task->expire = tick_first(task->expire, ps->reconnect);
+ }
+ /* else do nothing */
+ } /* !ps->appctx */
+ else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE) {
+ /* current peer connection is active and established */
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) {
+ /* Resync from a remote is needed
+ * and no peer was assigned for lesson
+ * and current peer may be up2date */
+
+ /* assign peer for the lesson */
+ ps->flags |= PEER_F_LEARN_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_ASSIGN;
+ peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN;
+
+ /* wake up peer handler to handle a request of resync */
+ appctx_wakeup(ps->appctx);
+ }
+ else {
+ int update_to_push = 0;
+
+ /* Awake session if there is data to push */
+ for (st = ps->tables; st ; st = st->next) {
+ if (st->last_pushed != st->table->localupdate) {
+ /* wake up the peer handler to push local updates */
+ update_to_push = 1;
+ /* There is no need to send a heartbeat message
+ * when some updates must be pushed. The remote
+ * peer will consider <ps> peer as alive when it will
+ * receive these updates.
+ */
+ ps->flags &= ~PEER_F_HEARTBEAT;
+ /* Re-schedule another one later. */
+ ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ /* Refresh reconnect if necessary */
+ if (tick_is_expired(ps->reconnect, now_ms))
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT));
+ /* We are going to send updates, let's ensure we will
+ * come back to send heartbeat messages or to reconnect.
+ */
+ task->expire = tick_first(ps->reconnect, ps->heartbeat);
+ appctx_wakeup(ps->appctx);
+ break;
+ }
+ }
+ /* When there are updates to send we do not reconnect
+ * and do not send heartbeat message either.
+ */
+ if (!update_to_push) {
+ if (tick_is_expired(ps->reconnect, now_ms)) {
+ if (ps->flags & PEER_F_ALIVE) {
+ /* This peer was alive during a 'reconnect' period.
+ * Flag it as not alive again for the next period.
+ */
+ ps->flags &= ~PEER_F_ALIVE;
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT));
+ }
+ else {
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ ps->heartbeat = TICK_ETERNITY;
+ peer_session_forceshutdown(ps);
+ ps->no_hbt++;
+ }
+ }
+ else if (tick_is_expired(ps->heartbeat, now_ms)) {
+ ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT));
+ ps->flags |= PEER_F_HEARTBEAT;
+ appctx_wakeup(ps->appctx);
+ }
+ task->expire = tick_first(ps->reconnect, ps->heartbeat);
+ }
+ }
+ /* else do nothing */
+ } /* SUCCESSCODE */
+ } /* !ps->peer->local */
+ } /* for */
+
+ /* Resync from remotes expired: consider resync is finished */
+ if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) &&
+ !(peers->flags & PEERS_F_RESYNC_ASSIGN) &&
+ tick_is_expired(peers->resync_timeout, now_ms)) {
+ /* Resync from remote peer needed
+ * no peer was assigned for the lesson
+ * and resync timeout expire */
+
+ /* flag no more resync from remote, consider resync is finished */
+ peers->flags |= PEERS_F_RESYNC_REMOTE;
+ peers->flags |= PEERS_F_RESYNC_REMOTETIMEOUT;
+ }
+
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) {
+ /* Resync not finished*/
+ /* reschedule task to resync timeout if not expired, to ended resync if needed */
+ if (!tick_is_expired(peers->resync_timeout, now_ms))
+ task->expire = tick_first(task->expire, peers->resync_timeout);
+ }
+ } /* !stopping */
+ else {
+ /* soft stop case */
+ if (state & TASK_WOKEN_SIGNAL) {
+ /* We've just received the signal */
+ if (!(peers->flags & PEERS_F_DONOTSTOP)) {
+ /* add DO NOT STOP flag if not present */
+ _HA_ATOMIC_INC(&jobs);
+ peers->flags |= PEERS_F_DONOTSTOP;
+
+ /* disconnect all connected peers to process a local sync
+ * this must be done only the first time we are switching
+ * in stopping state
+ */
+ for (ps = peers->remote; ps; ps = ps->next) {
+ /* we're killing a connection, we must apply a random delay before
+ * retrying otherwise the other end will do the same and we can loop
+ * for a while.
+ */
+ ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000));
+ if (ps->appctx) {
+ peer_session_forceshutdown(ps);
+ }
+ }
+
+ /* Set resync timeout for the local peer and request a immediate reconnect */
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+ peers->local->reconnect = now_ms;
+ }
+ }
+
+ ps = peers->local;
+ if (ps->flags & PEER_F_TEACH_COMPLETE) {
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ /* resync of new process was complete, current process can die now */
+ _HA_ATOMIC_DEC(&jobs);
+ peers->flags &= ~PEERS_F_DONOTSTOP;
+ for (st = ps->tables; st ; st = st->next)
+ HA_ATOMIC_DEC(&st->table->refcnt);
+ }
+ }
+ else if (!ps->appctx) {
+ /* Re-arm resync timeout if necessary */
+ if (!tick_isset(peers->resync_timeout))
+ peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT));
+
+ /* If there's no active peer connection */
+ if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED &&
+ !tick_is_expired(peers->resync_timeout, now_ms) &&
+ (ps->statuscode == 0 ||
+ ps->statuscode == PEER_SESS_SC_SUCCESSCODE ||
+ ps->statuscode == PEER_SESS_SC_CONNECTEDCODE ||
+ ps->statuscode == PEER_SESS_SC_TRYAGAIN)) {
+ /* The resync is finished for the local peer and
+ * the resync timeout is not expired and
+ * connection never tried
+ * or previous peer connection was successfully established
+ * or previous tcp connect succeeded but init state incomplete
+ * or during previous connect, peer replies a try again statuscode */
+
+ if (!tick_is_expired(ps->reconnect, now_ms)) {
+ /* reconnection timer is not expired. reschedule task for reconnect */
+ task->expire = tick_first(task->expire, ps->reconnect);
+ }
+ else {
+ /* connect to the local peer if we must push a local sync */
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ peer_session_create(peers, ps);
+ }
+ }
+ }
+ else {
+ /* Other error cases */
+ if (peers->flags & PEERS_F_DONOTSTOP) {
+ /* unable to resync new process, current process can die now */
+ _HA_ATOMIC_DEC(&jobs);
+ peers->flags &= ~PEERS_F_DONOTSTOP;
+ for (st = ps->tables; st ; st = st->next)
+ HA_ATOMIC_DEC(&st->table->refcnt);
+ }
+ }
+ }
+ else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE ) {
+ /* Reset resync timeout during a resync */
+ peers->resync_timeout = TICK_ETERNITY;
+
+ /* current peer connection is active and established
+ * wake up all peer handlers to push remaining local updates */
+ for (st = ps->tables; st ; st = st->next) {
+ if (st->last_pushed != st->table->localupdate) {
+ appctx_wakeup(ps->appctx);
+ break;
+ }
+ }
+ }
+ } /* stopping */
+
+ /* Release lock for all peers of the section */
+ for (ps = peers->remote; ps; ps = ps->next)
+ HA_SPIN_UNLOCK(PEER_LOCK, &ps->lock);
+
+ /* Wakeup for re-connect */
+ return task;
+}
+
+
+/*
+ * returns 0 in case of error.
+ */
+int peers_init_sync(struct peers *peers)
+{
+ struct peer * curpeer;
+
+ for (curpeer = peers->remote; curpeer; curpeer = curpeer->next) {
+ peers->peers_fe->maxconn += 3;
+ }
+
+ peers->sync_task = task_new_anywhere();
+ if (!peers->sync_task)
+ return 0;
+
+ memset(peers->applet_count, 0, sizeof(peers->applet_count));
+ peers->sync_task->process = process_peer_sync;
+ peers->sync_task->context = (void *)peers;
+ peers->sighandler = signal_register_task(0, peers->sync_task, 0);
+ task_wakeup(peers->sync_task, TASK_WOKEN_INIT);
+ return 1;
+}
+
+/*
+ * Allocate a cache a dictionary entries used upon transmission.
+ */
+static struct dcache_tx *new_dcache_tx(size_t max_entries)
+{
+ struct dcache_tx *d;
+ struct ebpt_node *entries;
+
+ d = malloc(sizeof *d);
+ entries = calloc(max_entries, sizeof *entries);
+ if (!d || !entries)
+ goto err;
+
+ d->lru_key = 0;
+ d->prev_lookup = NULL;
+ d->cached_entries = EB_ROOT_UNIQUE;
+ d->entries = entries;
+
+ return d;
+
+ err:
+ free(d);
+ free(entries);
+ return NULL;
+}
+
+/*
+ * Allocate a cache of dictionary entries with <name> as name and <max_entries>
+ * as maximum of entries.
+ * Return the dictionary cache if succeeded, NULL if not.
+ * Must be deallocated calling free_dcache().
+ */
+static struct dcache *new_dcache(size_t max_entries)
+{
+ struct dcache_tx *dc_tx;
+ struct dcache *dc;
+ struct dcache_rx *dc_rx;
+
+ dc = calloc(1, sizeof *dc);
+ dc_tx = new_dcache_tx(max_entries);
+ dc_rx = calloc(max_entries, sizeof *dc_rx);
+ if (!dc || !dc_tx || !dc_rx)
+ goto err;
+
+ dc->tx = dc_tx;
+ dc->rx = dc_rx;
+ dc->max_entries = max_entries;
+
+ return dc;
+
+ err:
+ free(dc);
+ free(dc_tx);
+ free(dc_rx);
+ return NULL;
+}
+
+/*
+ * Look for the dictionary entry with the value of <i> in <d> cache of dictionary
+ * entries used upon transmission.
+ * Return the entry if found, NULL if not.
+ */
+static struct ebpt_node *dcache_tx_lookup_value(struct dcache_tx *d,
+ struct dcache_tx_entry *i)
+{
+ return ebpt_lookup(&d->cached_entries, i->entry.key);
+}
+
+/*
+ * Flush <dc> cache.
+ * Always succeeds.
+ */
+static inline void flush_dcache(struct peer *peer)
+{
+ int i;
+ struct dcache *dc = peer->dcache;
+
+ for (i = 0; i < dc->max_entries; i++) {
+ ebpt_delete(&dc->tx->entries[i]);
+ dc->tx->entries[i].key = NULL;
+ dict_entry_unref(&server_key_dict, dc->rx[i].de);
+ dc->rx[i].de = NULL;
+ }
+ dc->tx->prev_lookup = NULL;
+ dc->tx->lru_key = 0;
+
+ memset(dc->rx, 0, dc->max_entries * sizeof *dc->rx);
+}
+
+/*
+ * Insert a dictionary entry in <dc> cache part used upon transmission (->tx)
+ * with information provided by <i> dictionary cache entry (especially the value
+ * to be inserted if not already). Return <i> if already present in the cache
+ * or something different of <i> if not.
+ */
+static struct ebpt_node *dcache_tx_insert(struct dcache *dc, struct dcache_tx_entry *i)
+{
+ struct dcache_tx *dc_tx;
+ struct ebpt_node *o;
+
+ dc_tx = dc->tx;
+
+ if (dc_tx->prev_lookup && dc_tx->prev_lookup->key == i->entry.key) {
+ o = dc_tx->prev_lookup;
+ } else {
+ o = dcache_tx_lookup_value(dc_tx, i);
+ if (o) {
+ /* Save it */
+ dc_tx->prev_lookup = o;
+ }
+ }
+
+ if (o) {
+ /* Copy the ID. */
+ i->id = o - dc->tx->entries;
+ return &i->entry;
+ }
+
+ /* The new entry to put in cache */
+ dc_tx->prev_lookup = o = &dc_tx->entries[dc_tx->lru_key];
+
+ ebpt_delete(o);
+ o->key = i->entry.key;
+ ebpt_insert(&dc_tx->cached_entries, o);
+ i->id = dc_tx->lru_key;
+
+ /* Update the index for the next entry to put in cache */
+ dc_tx->lru_key = (dc_tx->lru_key + 1) & (dc->max_entries - 1);
+
+ return o;
+}
+
+/*
+ * Allocate a dictionary cache for each peer of <peers> section.
+ * Return 1 if succeeded, 0 if not.
+ */
+int peers_alloc_dcache(struct peers *peers)
+{
+ struct peer *p;
+
+ for (p = peers->remote; p; p = p->next) {
+ p->dcache = new_dcache(PEER_STKT_CACHE_MAX_ENTRIES);
+ if (!p->dcache)
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Function used to register a table for sync on a group of peers
+ * Returns 0 in case of success.
+ */
+int peers_register_table(struct peers *peers, struct stktable *table)
+{
+ struct shared_table *st;
+ struct peer * curpeer;
+ int id = 0;
+ int retval = 0;
+
+ for (curpeer = peers->remote; curpeer; curpeer = curpeer->next) {
+ st = calloc(1,sizeof(*st));
+ if (!st) {
+ retval = 1;
+ break;
+ }
+ st->table = table;
+ st->next = curpeer->tables;
+ if (curpeer->tables)
+ id = curpeer->tables->local_id;
+ st->local_id = id + 1;
+
+ /* If peer is local we inc table
+ * refcnt to protect against flush
+ * until this process pushed all
+ * table content to the new one
+ */
+ if (curpeer->local)
+ HA_ATOMIC_INC(&st->table->refcnt);
+ curpeer->tables = st;
+ }
+
+ table->sync_task = peers->sync_task;
+
+ return retval;
+}
+
+/* context used by a "show peers" command */
+struct show_peers_ctx {
+ void *target; /* if non-null, dump only this section and stop */
+ struct peers *peers; /* "peers" section being currently dumped. */
+ struct peer *peer; /* "peer" being currently dumped. */
+ int flags; /* non-zero if "dict" dump requested */
+ enum {
+ STATE_HEAD = 0, /* dump the section's header */
+ STATE_PEER, /* dump the whole peer */
+ STATE_DONE, /* finished */
+ } state; /* parser's state */
+};
+
+/*
+ * Parse the "show peers" command arguments.
+ * Returns 0 if succeeded, 1 if not with the ->msg of the appctx set as
+ * error message.
+ */
+static int cli_parse_show_peers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_peers_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (strcmp(args[2], "dict") == 0) {
+ /* show the dictionaries (large dump) */
+ ctx->flags |= PEERS_SHOW_F_DICT;
+ args++;
+ } else if (strcmp(args[2], "-") == 0)
+ args++; // allows to show a section called "dict"
+
+ if (*args[2]) {
+ struct peers *p;
+
+ for (p = cfg_peers; p; p = p->next) {
+ if (strcmp(p->id, args[2]) == 0) {
+ ctx->target = p;
+ break;
+ }
+ }
+
+ if (!p)
+ return cli_err(appctx, "No such peers\n");
+ }
+
+ /* where to start from */
+ ctx->peers = ctx->target ? ctx->target : cfg_peers;
+ return 0;
+}
+
+/*
+ * This function dumps the peer state information of <peers> "peers" section.
+ * Returns 0 if the output buffer is full and needs to be called again, non-zero if not.
+ * Dedicated to be called by cli_io_handler_show_peers() cli I/O handler.
+ */
+static int peers_dump_head(struct buffer *msg, struct appctx *appctx, struct peers *peers)
+{
+ struct tm tm;
+
+ get_localtime(peers->last_change, &tm);
+ chunk_appendf(msg, "%p: [%02d/%s/%04d:%02d:%02d:%02d] id=%s disabled=%d flags=0x%x resync_timeout=%s task_calls=%u\n",
+ peers,
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec,
+ peers->id, peers->disabled, peers->flags,
+ peers->resync_timeout ?
+ tick_is_expired(peers->resync_timeout, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peers->resync_timeout - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ peers->sync_task ? peers->sync_task->calls : 0);
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * This function dumps <peer> state information.
+ * Returns 0 if the output buffer is full and needs to be called again, non-zero
+ * if not. Dedicated to be called by cli_io_handler_show_peers() cli I/O handler.
+ */
+static int peers_dump_peer(struct buffer *msg, struct appctx *appctx, struct peer *peer, int flags)
+{
+ struct connection *conn;
+ char pn[INET6_ADDRSTRLEN];
+ struct stconn *peer_cs;
+ struct stream *peer_s;
+ struct shared_table *st;
+
+ addr_to_str(&peer->addr, pn, sizeof pn);
+ chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d last_status=%s",
+ peer, peer->id,
+ peer->local ? "local" : "remote",
+ peer->appctx ? "active" : "inactive",
+ pn, get_host_port(&peer->addr),
+ statuscode_str(peer->statuscode));
+
+ chunk_appendf(msg, " last_hdshk=%s\n",
+ peer->last_hdshk ? human_time(TICKS_TO_MS(now_ms - peer->last_hdshk),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " reconnect=%s",
+ peer->reconnect ?
+ tick_is_expired(peer->reconnect, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peer->reconnect - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " heartbeat=%s",
+ peer->heartbeat ?
+ tick_is_expired(peer->heartbeat, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(peer->heartbeat - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(msg, " confirm=%u tx_hbt=%u rx_hbt=%u no_hbt=%u new_conn=%u proto_err=%u coll=%u\n",
+ peer->confirm, peer->tx_hbt, peer->rx_hbt,
+ peer->no_hbt, peer->new_conn, peer->proto_err, peer->coll);
+
+ chunk_appendf(&trash, " flags=0x%x", peer->flags);
+
+ if (!peer->appctx)
+ goto table_info;
+
+ chunk_appendf(&trash, " appctx:%p st0=%d st1=%d task_calls=%u",
+ peer->appctx, peer->appctx->st0, peer->appctx->st1,
+ peer->appctx->t ? peer->appctx->t->calls : 0);
+
+ peer_cs = appctx_sc(peer->appctx);
+ if (!peer_cs) {
+ /* the appctx might exist but not yet be initialized due to
+ * deferred initialization used to balance applets across
+ * threads.
+ */
+ goto table_info;
+ }
+
+ peer_s = __sc_strm(peer_cs);
+
+ chunk_appendf(&trash, " state=%s", sc_state_str(sc_opposite(peer_cs)->state));
+
+ conn = objt_conn(strm_orig(peer_s));
+ if (conn)
+ chunk_appendf(&trash, "\n xprt=%s", conn_get_xprt_name(conn));
+
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " src=%s:%d", pn, get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " src=unix:%d", strm_li(peer_s)->luid);
+ break;
+ }
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash, " addr=%s:%d", pn, get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash, " addr=unix:%d", strm_li(peer_s)->luid);
+ break;
+ }
+
+ table_info:
+ if (peer->remote_table)
+ chunk_appendf(&trash, "\n remote_table:%p id=%s local_id=%d remote_id=%d",
+ peer->remote_table,
+ peer->remote_table->table->id,
+ peer->remote_table->local_id,
+ peer->remote_table->remote_id);
+
+ if (peer->last_local_table)
+ chunk_appendf(&trash, "\n last_local_table:%p id=%s local_id=%d remote_id=%d",
+ peer->last_local_table,
+ peer->last_local_table->table->id,
+ peer->last_local_table->local_id,
+ peer->last_local_table->remote_id);
+
+ if (peer->tables) {
+ chunk_appendf(&trash, "\n shared tables:");
+ for (st = peer->tables; st; st = st->next) {
+ int i, count;
+ struct stktable *t;
+ struct dcache *dcache;
+
+ t = st->table;
+ dcache = peer->dcache;
+
+ chunk_appendf(&trash, "\n %p local_id=%d remote_id=%d "
+ "flags=0x%x remote_data=0x%llx",
+ st, st->local_id, st->remote_id,
+ st->flags, (unsigned long long)st->remote_data);
+ chunk_appendf(&trash, "\n last_acked=%u last_pushed=%u last_get=%u"
+ " teaching_origin=%u update=%u",
+ st->last_acked, st->last_pushed, st->last_get,
+ st->teaching_origin, st->update);
+ chunk_appendf(&trash, "\n table:%p id=%s update=%u localupdate=%u"
+ " commitupdate=%u refcnt=%u",
+ t, t->id, t->update, t->localupdate, _HA_ATOMIC_LOAD(&t->commitupdate), t->refcnt);
+ if (flags & PEERS_SHOW_F_DICT) {
+ chunk_appendf(&trash, "\n TX dictionary cache:");
+ count = 0;
+ for (i = 0; i < dcache->max_entries; i++) {
+ struct ebpt_node *node;
+ struct dict_entry *de;
+
+ node = &dcache->tx->entries[i];
+ if (!node->key)
+ break;
+
+ if (!count++)
+ chunk_appendf(&trash, "\n ");
+ de = node->key;
+ chunk_appendf(&trash, " %3u -> %s", i, (char *)de->value.key);
+ count &= 0x3;
+ }
+ chunk_appendf(&trash, "\n RX dictionary cache:");
+ count = 0;
+ for (i = 0; i < dcache->max_entries; i++) {
+ if (!count++)
+ chunk_appendf(&trash, "\n ");
+ chunk_appendf(&trash, " %3u -> %s", i,
+ dcache->rx[i].de ?
+ (char *)dcache->rx[i].de->value.key : "-");
+ count &= 0x3;
+ }
+ } else {
+ chunk_appendf(&trash, "\n Dictionary cache not dumped (use \"show peers dict\")");
+ }
+ }
+ }
+
+ end:
+ chunk_appendf(&trash, "\n");
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/*
+ * This function dumps all the peers of "peers" section.
+ * Returns 0 if the output buffer is full and needs to be called
+ * again, non-zero if not. It proceeds in an isolated thread, so
+ * there is no thread safety issue here.
+ */
+static int cli_io_handler_show_peers(struct appctx *appctx)
+{
+ struct show_peers_ctx *ctx = appctx->svcctx;
+ int ret = 0, first_peers = 1;
+
+ thread_isolate();
+
+ chunk_reset(&trash);
+
+ while (ctx->state != STATE_DONE) {
+ switch (ctx->state) {
+ case STATE_HEAD:
+ if (!ctx->peers) {
+ /* No more peers list. */
+ ctx->state = STATE_DONE;
+ }
+ else {
+ if (!first_peers)
+ chunk_appendf(&trash, "\n");
+ else
+ first_peers = 0;
+ if (!peers_dump_head(&trash, appctx, ctx->peers))
+ goto out;
+
+ ctx->peer = ctx->peers->remote;
+ ctx->peers = ctx->peers->next;
+ ctx->state = STATE_PEER;
+ }
+ break;
+
+ case STATE_PEER:
+ if (!ctx->peer) {
+ /* End of peer list */
+ if (!ctx->target)
+ ctx->state = STATE_HEAD; // next one
+ else
+ ctx->state = STATE_DONE;
+ }
+ else {
+ if (!peers_dump_peer(&trash, appctx, ctx->peer, ctx->flags))
+ goto out;
+
+ ctx->peer = ctx->peer->next;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ ret = 1;
+ out:
+ thread_release();
+ return ret;
+}
+
+
+struct peers_kw_list peers_keywords = {
+ .list = LIST_HEAD_INIT(peers_keywords.list)
+};
+
+void peers_register_keywords(struct peers_kw_list *pkwl)
+{
+ LIST_APPEND(&peers_keywords.list, &pkwl->list);
+}
+
+/* config parser for global "tune.peers.max-updates-at-once" */
+static int cfg_parse_max_updt_at_once(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int arg = -1;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 1) {
+ memprintf(err, "'%s' expects an integer argument greater than 0.", args[0]);
+ return -1;
+ }
+
+ peers_max_updates_at_once = arg;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.peers.max-updates-at-once", cfg_parse_max_updt_at_once },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * CLI keywords.
+ */
+static struct cli_kw_list cli_kws = {{ }, {
+ { { "show", "peers", NULL }, "show peers [dict|-] [section] : dump some information about all the peers or this peers section", cli_parse_show_peers, cli_io_handler_show_peers, },
+ {},
+}};
+
+/* Register cli keywords */
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/pipe.c b/src/pipe.c
new file mode 100644
index 0000000..5599fe0
--- /dev/null
+++ b/src/pipe.c
@@ -0,0 +1,136 @@
+/*
+ * Pipe management
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <haproxy/api.h>
+#include <haproxy/global.h>
+#include <haproxy/pipe-t.h>
+#include <haproxy/pool.h>
+#include <haproxy/thread.h>
+
+
+DECLARE_STATIC_POOL(pool_head_pipe, "pipe", sizeof(struct pipe));
+
+struct pipe *pipes_live = NULL; /* pipes which are still ready to use */
+
+__decl_spinlock(pipes_lock); /* lock used to protect pipes list */
+
+static THREAD_LOCAL int local_pipes_free = 0; /* #cache objects */
+static THREAD_LOCAL struct pipe *local_pipes = NULL;
+
+int pipes_used = 0; /* # of pipes in use (2 fds each) */
+int pipes_free = 0; /* # of pipes unused */
+
+/* return a pre-allocated empty pipe. Try to allocate one if there isn't any
+ * left. NULL is returned if a pipe could not be allocated.
+ */
+struct pipe *get_pipe()
+{
+ struct pipe *ret = NULL;
+ int pipefd[2];
+
+ ret = local_pipes;
+ if (likely(ret)) {
+ local_pipes = ret->next;
+ local_pipes_free--;
+ HA_ATOMIC_DEC(&pipes_free);
+ HA_ATOMIC_INC(&pipes_used);
+ goto out;
+ }
+
+ if (likely(pipes_live)) {
+ HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
+ ret = pipes_live;
+ if (likely(ret))
+ pipes_live = ret->next;
+ HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
+ if (ret) {
+ HA_ATOMIC_DEC(&pipes_free);
+ HA_ATOMIC_INC(&pipes_used);
+ goto out;
+ }
+ }
+
+ HA_ATOMIC_INC(&pipes_used);
+ if (pipes_used + pipes_free >= global.maxpipes)
+ goto fail;
+
+ ret = pool_alloc(pool_head_pipe);
+ if (!ret)
+ goto fail;
+
+ if (pipe(pipefd) < 0)
+ goto fail;
+
+#ifdef F_SETPIPE_SZ
+ if (global.tune.pipesize)
+ fcntl(pipefd[0], F_SETPIPE_SZ, global.tune.pipesize);
+#endif
+ ret->data = 0;
+ ret->prod = pipefd[1];
+ ret->cons = pipefd[0];
+ ret->next = NULL;
+ out:
+ return ret;
+ fail:
+ pool_free(pool_head_pipe, ret);
+ HA_ATOMIC_DEC(&pipes_used);
+ return NULL;
+
+}
+
+/* destroy a pipe, possibly because an error was encountered on it. Its FDs
+ * will be closed and it will not be reinjected into the live pool.
+ */
+void kill_pipe(struct pipe *p)
+{
+ close(p->prod);
+ close(p->cons);
+ pool_free(pool_head_pipe, p);
+ HA_ATOMIC_DEC(&pipes_used);
+}
+
+/* put back a unused pipe into the live pool. If it still has data in it, it is
+ * closed and not reinjected into the live pool. The caller is not allowed to
+ * use it once released.
+ */
+void put_pipe(struct pipe *p)
+{
+ if (unlikely(p->data)) {
+ kill_pipe(p);
+ return;
+ }
+
+ if (likely(local_pipes_free * global.nbthread < global.maxpipes - pipes_used)) {
+ p->next = local_pipes;
+ local_pipes = p;
+ local_pipes_free++;
+ goto out;
+ }
+
+ HA_SPIN_LOCK(PIPES_LOCK, &pipes_lock);
+ p->next = pipes_live;
+ pipes_live = p;
+ HA_SPIN_UNLOCK(PIPES_LOCK, &pipes_lock);
+ out:
+ HA_ATOMIC_INC(&pipes_free);
+ HA_ATOMIC_DEC(&pipes_used);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/pool.c b/src/pool.c
new file mode 100644
index 0000000..376b311
--- /dev/null
+++ b/src/pool.c
@@ -0,0 +1,1539 @@
+/*
+ * Memory management functions.
+ *
+ * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+
+#include <import/plock.h>
+
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/pool-os.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+/* These ones are initialized per-thread on startup by init_pools() */
+THREAD_LOCAL size_t pool_cache_bytes = 0; /* total cache size */
+THREAD_LOCAL size_t pool_cache_count = 0; /* #cache objects */
+
+static struct list pools __read_mostly = LIST_HEAD_INIT(pools);
+int mem_poison_byte __read_mostly = 'P';
+int pool_trim_in_progress = 0;
+uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */
+#ifdef DEBUG_FAIL_ALLOC
+ POOL_DBG_FAIL_ALLOC |
+#endif
+#ifdef DEBUG_DONT_SHARE_POOLS
+ POOL_DBG_DONT_MERGE |
+#endif
+#ifdef DEBUG_POOL_INTEGRITY
+ POOL_DBG_COLD_FIRST |
+#endif
+#ifdef DEBUG_POOL_INTEGRITY
+ POOL_DBG_INTEGRITY |
+#endif
+#ifdef CONFIG_HAP_NO_GLOBAL_POOLS
+ POOL_DBG_NO_GLOBAL |
+#endif
+#if defined(DEBUG_NO_POOLS) || defined(DEBUG_UAF)
+ POOL_DBG_NO_CACHE |
+#endif
+#if defined(DEBUG_POOL_TRACING)
+ POOL_DBG_CALLER |
+#endif
+#if defined(DEBUG_MEMORY_POOLS)
+ POOL_DBG_TAG |
+#endif
+#if defined(DEBUG_UAF)
+ POOL_DBG_UAF |
+#endif
+ 0;
+
+static const struct {
+ uint flg;
+ const char *set;
+ const char *clr;
+ const char *hlp;
+} dbg_options[] = {
+ /* flg, set, clr, hlp */
+ { POOL_DBG_FAIL_ALLOC, "fail", "no-fail", "randomly fail allocations" },
+ { POOL_DBG_DONT_MERGE, "no-merge", "merge", "disable merging of similar pools" },
+ { POOL_DBG_COLD_FIRST, "cold-first", "hot-first", "pick cold objects first" },
+ { POOL_DBG_INTEGRITY, "integrity", "no-integrity", "enable cache integrity checks" },
+ { POOL_DBG_NO_GLOBAL, "no-global", "global", "disable global shared cache" },
+ { POOL_DBG_NO_CACHE, "no-cache", "cache", "disable thread-local cache" },
+ { POOL_DBG_CALLER, "caller", "no-caller", "save caller information in cache" },
+ { POOL_DBG_TAG, "tag", "no-tag", "add tag at end of allocated objects" },
+ { POOL_DBG_POISON, "poison", "no-poison", "poison newly allocated objects" },
+ { POOL_DBG_UAF, "uaf", "no-uaf", "enable use-after-free checks (slow)" },
+ { 0 /* end */ }
+};
+
+/* describes a snapshot of a pool line about to be dumped by "show pools" */
+struct pool_dump_info {
+ const struct pool_head *entry;
+ ulong alloc_items;
+ ulong alloc_bytes;
+ ulong used_items;
+ ulong cached_items;
+ ulong need_avg;
+ ulong failed_items;
+};
+
+/* context used by "show pools" */
+struct show_pools_ctx {
+ char *prefix; /* if non-null, match this prefix name for the pool */
+ int by_what; /* 0=no sort, 1=by name, 2=by item size, 3=by total alloc */
+ int maxcnt; /* 0=no limit, other=max number of output entries */
+};
+
+static int mem_fail_rate __read_mostly = 0;
+static int using_default_allocator __read_mostly = 1; // linked-in allocator or LD_PRELOADed one ?
+static int disable_trim __read_mostly = 0;
+static int(*my_mallctl)(const char *, void *, size_t *, void *, size_t) = NULL;
+static int(*_malloc_trim)(size_t) = NULL;
+
+/* returns the pool hash bucket an object should use based on its pointer.
+ * Objects will needed consistent bucket assignment so that they may be
+ * allocated on one thread and released on another one. Thus only the
+ * pointer is usable.
+ */
+static forceinline unsigned int pool_pbucket(const void *ptr)
+{
+ return ptr_hash(ptr, CONFIG_HAP_POOL_BUCKETS_BITS);
+}
+
+/* returns the pool hash bucket to use for the current thread. This should only
+ * be used when no pointer is available (e.g. count alloc failures).
+ */
+static forceinline unsigned int pool_tbucket(void)
+{
+ return tid % CONFIG_HAP_POOL_BUCKETS;
+}
+
+/* ask the allocator to trim memory pools.
+ * This must run under thread isolation so that competing threads trying to
+ * allocate or release memory do not prevent the allocator from completing
+ * its job. We just have to be careful as callers might already be isolated
+ * themselves.
+ */
+void trim_all_pools(void)
+{
+ int isolated = thread_isolated();
+
+ if (!isolated)
+ thread_isolate();
+
+ malloc_trim(0);
+
+ if (!isolated)
+ thread_release();
+}
+
+/* check if we're using the same allocator as the one that provides
+ * malloc_trim() and mallinfo(). The principle is that on glibc, both
+ * malloc_trim() and mallinfo() are provided, and using mallinfo() we
+ * can check if malloc() is performed through glibc or any other one
+ * the executable was linked against (e.g. jemalloc). Prior to this we
+ * have to check whether we're running on jemalloc by verifying if the
+ * mallctl() function is provided. Its pointer will be used later.
+ */
+static void detect_allocator(void)
+{
+#if defined(__ELF__)
+ extern int mallctl(const char *, void *, size_t *, void *, size_t) __attribute__((weak));
+
+ my_mallctl = mallctl;
+#endif
+ if (!my_mallctl) {
+ /* trick: we won't enter here if mallctl() is known at link
+ * time. This allows to detect if the symbol was changed since
+ * the program was linked, indicating it's not running on the
+ * expected allocator (due to an LD_PRELOAD) and that we must
+ * be extra cautious and avoid some optimizations that are
+ * known to break such as malloc_trim().
+ */
+ my_mallctl = get_sym_curr_addr("mallctl");
+ using_default_allocator = (my_mallctl == NULL);
+ }
+
+ if (!my_mallctl) {
+#if defined(HA_HAVE_MALLOC_TRIM)
+#ifdef HA_HAVE_MALLINFO2
+ struct mallinfo2 mi1, mi2;
+#else
+ struct mallinfo mi1, mi2;
+#endif
+ void *ptr;
+
+#ifdef HA_HAVE_MALLINFO2
+ mi1 = mallinfo2();
+#else
+ mi1 = mallinfo();
+#endif
+ ptr = DISGUISE(malloc(1));
+#ifdef HA_HAVE_MALLINFO2
+ mi2 = mallinfo2();
+#else
+ mi2 = mallinfo();
+#endif
+ free(DISGUISE(ptr));
+
+ using_default_allocator = !!memcmp(&mi1, &mi2, sizeof(mi1));
+#elif defined(HA_HAVE_MALLOC_ZONE)
+ using_default_allocator = (malloc_default_zone() != NULL);
+#endif
+ }
+
+ /* detect presence of malloc_trim() */
+ _malloc_trim = get_sym_next_addr("malloc_trim");
+}
+
+/* replace the libc's malloc_trim() so that we can also intercept the calls
+ * from child libraries when the allocator is not the default one.
+ */
+int malloc_trim(size_t pad)
+{
+ int ret = 0;
+
+ if (disable_trim)
+ return ret;
+
+ HA_ATOMIC_INC(&pool_trim_in_progress);
+
+ if (my_mallctl) {
+ /* here we're on jemalloc and malloc_trim() is called either
+ * by haproxy or another dependency (the worst case that
+ * normally crashes). Instead of just failing, we can actually
+ * emulate it so let's do it now.
+ */
+ unsigned int i, narenas = 0;
+ size_t len = sizeof(narenas);
+
+ if (my_mallctl("arenas.narenas", &narenas, &len, NULL, 0) == 0) {
+ for (i = 0; i < narenas; i ++) {
+ char mib[32] = {0};
+ snprintf(mib, sizeof(mib), "arena.%u.purge", i);
+ (void)my_mallctl(mib, NULL, NULL, NULL, 0);
+ ret = 1; // success
+ }
+ }
+ }
+ else if (!using_default_allocator) {
+ /* special allocators that can be LD_PRELOADed end here */
+ ret = 0; // did nothing
+ }
+ else if (_malloc_trim) {
+ /* we're typically on glibc and not overridden */
+ ret = _malloc_trim(pad);
+ }
+#if defined(HA_HAVE_MALLOC_ZONE)
+ else {
+ /* we're on MacOS, there's an equivalent mechanism */
+ vm_address_t *zones;
+ unsigned int i, nzones;
+
+ if (malloc_get_all_zones(0, NULL, &zones, &nzones) == KERN_SUCCESS) {
+ for (i = 0; i < nzones; i ++) {
+ malloc_zone_t *zone = (malloc_zone_t *)zones[i];
+
+ /* we cannot purge anonymous zones */
+ if (zone->zone_name) {
+ malloc_zone_pressure_relief(zone, 0);
+ ret = 1; // success
+ }
+ }
+ }
+ }
+#endif
+ HA_ATOMIC_DEC(&pool_trim_in_progress);
+
+ /* here we have ret=0 if nothing was release, or 1 if some were */
+ return ret;
+}
+
+static int mem_should_fail(const struct pool_head *pool)
+{
+ int ret = 0;
+
+ if (mem_fail_rate > 0 && !(global.mode & MODE_STARTING)) {
+ if (mem_fail_rate > statistical_prng_range(100))
+ ret = 1;
+ else
+ ret = 0;
+ }
+ return ret;
+}
+
+/* Try to find an existing shared pool with the same characteristics and
+ * returns it, otherwise creates this one. NULL is returned if no memory
+ * is available for a new creation. Two flags are supported :
+ * - MEM_F_SHARED to indicate that the pool may be shared with other users
+ * - MEM_F_EXACT to indicate that the size must not be rounded up
+ */
+struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags)
+{
+ unsigned int extra_mark, extra_caller, extra;
+ struct pool_head *pool;
+ struct pool_head *entry;
+ struct list *start;
+ unsigned int align;
+ int thr __maybe_unused;
+
+ extra_mark = (pool_debugging & POOL_DBG_TAG) ? POOL_EXTRA_MARK : 0;
+ extra_caller = (pool_debugging & POOL_DBG_CALLER) ? POOL_EXTRA_CALLER : 0;
+ extra = extra_mark + extra_caller;
+
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ /* we'll store two lists there, we need the room for this. Let's
+ * make sure it's always OK even when including the extra word
+ * that is stored after the pci struct.
+ */
+ if (size + extra - extra_caller < sizeof(struct pool_cache_item))
+ size = sizeof(struct pool_cache_item) + extra_caller - extra;
+ }
+
+ /* Now we know our size is set to the strict minimum possible. It may
+ * be OK for elements allocated with an exact size (e.g. buffers), but
+ * we're going to round the size up 16 bytes to merge almost identical
+ * pools together. We only round up however when we add the debugging
+ * tag since it's used to detect overflows. Otherwise we only round up
+ * to the size of a word to preserve alignment.
+ */
+ if (!(flags & MEM_F_EXACT)) {
+ align = (pool_debugging & POOL_DBG_TAG) ? sizeof(void *) : 16;
+ size = ((size + align - 1) & -align);
+ }
+
+ /* TODO: thread: we do not lock pool list for now because all pools are
+ * created during HAProxy startup (so before threads creation) */
+ start = &pools;
+ pool = NULL;
+
+ list_for_each_entry(entry, &pools, list) {
+ if (entry->size == size) {
+ /* either we can share this place and we take it, or
+ * we look for a shareable one or for the next position
+ * before which we will insert a new one.
+ */
+ if ((flags & entry->flags & MEM_F_SHARED) &&
+ (!(pool_debugging & POOL_DBG_DONT_MERGE) ||
+ strcmp(name, entry->name) == 0)) {
+ /* we can share this one */
+ pool = entry;
+ DPRINTF(stderr, "Sharing %s with %s\n", name, pool->name);
+ break;
+ }
+ }
+ else if (entry->size > size) {
+ /* insert before this one */
+ start = &entry->list;
+ break;
+ }
+ }
+
+ if (!pool) {
+ void *pool_addr;
+
+ pool_addr = calloc(1, sizeof(*pool) + __alignof__(*pool));
+ if (!pool_addr)
+ return NULL;
+
+ /* always provide an aligned pool */
+ pool = (struct pool_head*)((((size_t)pool_addr) + __alignof__(*pool)) & -(size_t)__alignof__(*pool));
+ pool->base_addr = pool_addr; // keep it, it's the address to free later
+
+ if (name)
+ strlcpy2(pool->name, name, sizeof(pool->name));
+ pool->alloc_sz = size + extra;
+ pool->size = size;
+ pool->flags = flags;
+ LIST_APPEND(start, &pool->list);
+
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ /* update per-thread pool cache if necessary */
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ LIST_INIT(&pool->cache[thr].list);
+ pool->cache[thr].tid = thr;
+ pool->cache[thr].pool = pool;
+ }
+ }
+ }
+ pool->users++;
+ return pool;
+}
+
+/* Tries to allocate an object for the pool <pool> using the system's allocator
+ * and directly returns it. The pool's allocated counter is checked but NOT
+ * updated, this is left to the caller, and but no other checks are performed.
+ */
+void *pool_get_from_os_noinc(struct pool_head *pool)
+{
+ if (!pool->limit || pool_allocated(pool) < pool->limit) {
+ void *ptr;
+
+ if (pool_debugging & POOL_DBG_UAF)
+ ptr = pool_alloc_area_uaf(pool->alloc_sz);
+ else
+ ptr = pool_alloc_area(pool->alloc_sz);
+ if (ptr)
+ return ptr;
+ _HA_ATOMIC_INC(&pool->buckets[pool_tbucket()].failed);
+ }
+ activity[tid].pool_fail++;
+ return NULL;
+
+}
+
+/* Releases a pool item back to the operating system but DOES NOT update
+ * the allocation counter, it's left to the caller to do it. It may be
+ * done before or after, it doesn't matter, the function does not use it.
+ */
+void pool_put_to_os_nodec(struct pool_head *pool, void *ptr)
+{
+ if (pool_debugging & POOL_DBG_UAF)
+ pool_free_area_uaf(ptr, pool->alloc_sz);
+ else
+ pool_free_area(ptr, pool->alloc_sz);
+}
+
+/* Tries to allocate an object for the pool <pool> using the system's allocator
+ * and directly returns it. The pool's counters are updated but the object is
+ * never cached, so this is usable with and without local or shared caches.
+ */
+void *pool_alloc_nocache(struct pool_head *pool, const void *caller)
+{
+ void *ptr = NULL;
+ uint bucket;
+
+ ptr = pool_get_from_os_noinc(pool);
+ if (!ptr)
+ return NULL;
+
+ bucket = pool_pbucket(ptr);
+ swrate_add_scaled_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used, POOL_AVG_SAMPLES/4);
+ _HA_ATOMIC_INC(&pool->buckets[bucket].allocated);
+ _HA_ATOMIC_INC(&pool->buckets[bucket].used);
+
+ /* keep track of where the element was allocated from */
+ POOL_DEBUG_SET_MARK(pool, ptr);
+ POOL_DEBUG_TRACE_CALLER(pool, (struct pool_cache_item *)ptr, caller);
+ return ptr;
+}
+
+/* Release a pool item back to the OS and keeps the pool's counters up to date.
+ * This is always defined even when pools are not enabled (their usage stats
+ * are maintained).
+ */
+void pool_free_nocache(struct pool_head *pool, void *ptr)
+{
+ uint bucket = pool_pbucket(ptr);
+
+ _HA_ATOMIC_DEC(&pool->buckets[bucket].used);
+ _HA_ATOMIC_DEC(&pool->buckets[bucket].allocated);
+ swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used);
+
+ pool_put_to_os_nodec(pool, ptr);
+}
+
+
+/* Updates <pch>'s fill_pattern and fills the free area after <item> with it,
+ * up to <size> bytes. The item part is left untouched.
+ */
+void pool_fill_pattern(struct pool_cache_head *pch, struct pool_cache_item *item, uint size)
+{
+ ulong *ptr = (ulong *)item;
+ uint ofs;
+ ulong u;
+
+ if (size <= sizeof(*item))
+ return;
+
+ /* Upgrade the fill_pattern to change about half of the bits
+ * (to be sure to catch static flag corruption), and apply it.
+ */
+ u = pch->fill_pattern += ~0UL / 3; // 0x55...55
+ ofs = sizeof(*item) / sizeof(*ptr);
+ while (ofs < size / sizeof(*ptr))
+ ptr[ofs++] = u;
+}
+
+/* check for a pool_cache_item integrity after extracting it from the cache. It
+ * must have been previously initialized using pool_fill_pattern(). If any
+ * corruption is detected, the function provokes an immediate crash.
+ */
+void pool_check_pattern(struct pool_cache_head *pch, struct pool_head *pool, struct pool_cache_item *item, const void *caller)
+{
+ const ulong *ptr = (const ulong *)item;
+ uint size = pool->size;
+ uint ofs;
+ ulong u;
+
+ if (size <= sizeof(*item))
+ return;
+
+ /* let's check that all words past *item are equal */
+ ofs = sizeof(*item) / sizeof(*ptr);
+ u = ptr[ofs++];
+ while (ofs < size / sizeof(*ptr)) {
+ if (unlikely(ptr[ofs] != u)) {
+ pool_inspect_item("cache corruption detected", pool, item, caller);
+ ABORT_NOW();
+ }
+ ofs++;
+ }
+}
+
+/* removes up to <count> items from the end of the local pool cache <ph> for
+ * pool <pool>. The shared pool is refilled with these objects in the limit
+ * of the number of acceptable objects, and the rest will be released to the
+ * OS. It is not a problem is <count> is larger than the number of objects in
+ * the local cache. The counters are automatically updated. Must not be used
+ * with pools disabled.
+ */
+static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head *ph, uint count)
+{
+ struct pool_cache_item *item;
+ struct pool_item *pi, *head = NULL;
+ void *caller = __builtin_return_address(0);
+ uint released = 0;
+ uint cluster = 0;
+ uint to_free_max;
+ uint bucket;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ /* Note: this will be zero when global pools are disabled */
+ to_free_max = pool_releasable(pool);
+
+ while (released < count && !LIST_ISEMPTY(&ph->list)) {
+ item = LIST_PREV(&ph->list, typeof(item), by_pool);
+ BUG_ON(&item->by_pool == &ph->list);
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_check_pattern(ph, pool, item, caller);
+ LIST_DELETE(&item->by_pool);
+ LIST_DELETE(&item->by_lru);
+
+ bucket = pool_pbucket(item);
+ _HA_ATOMIC_DEC(&pool->buckets[bucket].used);
+ swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used);
+
+ if (to_free_max > released || cluster) {
+ /* will never match when global pools are disabled */
+ pi = (struct pool_item *)item;
+ pi->next = NULL;
+ pi->down = head;
+ head = pi;
+ cluster++;
+ if (cluster >= CONFIG_HAP_POOL_CLUSTER_SIZE) {
+ /* enough to make a cluster */
+ pool_put_to_shared_cache(pool, head);
+ cluster = 0;
+ head = NULL;
+ }
+ } else {
+ /* does pool_free_nocache() with a known bucket */
+ _HA_ATOMIC_DEC(&pool->buckets[bucket].allocated);
+ pool_put_to_os_nodec(pool, item);
+ }
+
+ released++;
+ }
+
+ /* incomplete cluster left */
+ if (cluster)
+ pool_put_to_shared_cache(pool, head);
+
+ ph->count -= released;
+ pool_cache_count -= released;
+ pool_cache_bytes -= released * pool->size;
+}
+
+/* Evicts some of the oldest objects from one local cache, until its number of
+ * objects is no more than 16+1/8 of the total number of locally cached objects
+ * or the total size of the local cache is no more than 75% of its maximum (i.e.
+ * we don't want a single cache to use all the cache for itself). For this, the
+ * list is scanned in reverse. If <full> is non-null, all objects are evicted.
+ * Must not be used when pools are disabled.
+ */
+void pool_evict_from_local_cache(struct pool_head *pool, int full)
+{
+ struct pool_cache_head *ph = &pool->cache[tid];
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ while ((ph->count && full) ||
+ (ph->count >= CONFIG_HAP_POOL_CLUSTER_SIZE &&
+ ph->count >= 16 + pool_cache_count / 8 &&
+ pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) {
+ pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE);
+ }
+}
+
+/* Evicts some of the oldest objects from the local cache, pushing them to the
+ * global pool. Must not be used when pools are disabled.
+ */
+void pool_evict_from_local_caches()
+{
+ struct pool_cache_item *item;
+ struct pool_cache_head *ph;
+ struct pool_head *pool;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ do {
+ item = LIST_PREV(&th_ctx->pool_lru_head, struct pool_cache_item *, by_lru);
+ BUG_ON(&item->by_lru == &th_ctx->pool_lru_head);
+ /* note: by definition we remove oldest objects so they also are the
+ * oldest in their own pools, thus their next is the pool's head.
+ */
+ ph = LIST_NEXT(&item->by_pool, struct pool_cache_head *, list);
+ BUG_ON(ph->tid != tid);
+
+ pool = container_of(ph - tid, struct pool_head, cache);
+ BUG_ON(pool != ph->pool);
+
+ pool_evict_last_items(pool, ph, CONFIG_HAP_POOL_CLUSTER_SIZE);
+ } while (pool_cache_bytes > global.tune.pool_cache_size * 7 / 8);
+}
+
+/* Frees an object to the local cache, possibly pushing oldest objects to the
+ * shared cache, which itself may decide to release some of them to the OS.
+ * While it is unspecified what the object becomes past this point, it is
+ * guaranteed to be released from the users' perspective. A caller address may
+ * be passed and stored into the area when DEBUG_POOL_TRACING is set. Must not
+ * be used with pools disabled.
+ */
+void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
+{
+ struct pool_cache_item *item = (struct pool_cache_item *)ptr;
+ struct pool_cache_head *ph = &pool->cache[tid];
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ LIST_INSERT(&ph->list, &item->by_pool);
+ LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
+ POOL_DEBUG_TRACE_CALLER(pool, item, caller);
+ ph->count++;
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_fill_pattern(ph, item, pool->size);
+ pool_cache_count++;
+ pool_cache_bytes += pool->size;
+
+ if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) {
+ if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE)
+ pool_evict_from_local_cache(pool, 0);
+ if (pool_cache_bytes > global.tune.pool_cache_size)
+ pool_evict_from_local_caches();
+ }
+}
+
+/* Tries to refill the local cache <pch> from the shared one for pool <pool>.
+ * This is only used when pools are in use and shared pools are enabled. No
+ * malloc() is attempted, and poisonning is never performed. The purpose is to
+ * get the fastest possible refilling so that the caller can easily check if
+ * the cache has enough objects for its use. Must not be used when pools are
+ * disabled.
+ */
+void pool_refill_local_from_shared(struct pool_head *pool, struct pool_cache_head *pch)
+{
+ struct pool_cache_item *item;
+ struct pool_item *ret, *down;
+ uint bucket;
+ uint count;
+
+ BUG_ON(pool_debugging & POOL_DBG_NO_CACHE);
+
+ /* we'll need to reference the first element to figure the next one. We
+ * must temporarily lock it so that nobody allocates then releases it,
+ * or the dereference could fail. In order to limit the locking,
+ * threads start from a bucket that depends on their ID.
+ */
+
+ bucket = pool_tbucket();
+ ret = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list);
+ count = 0;
+ do {
+ /* look for an apparently non-busy entry. If we hit a busy pool
+ * we retry with another random bucket. And if we encounter a
+ * NULL, we retry once with another random bucket. This is in
+ * order to prevent object accumulation in other buckets.
+ */
+ while (unlikely(ret == POOL_BUSY || (ret == NULL && count++ < 1))) {
+ bucket = statistical_prng() % CONFIG_HAP_POOL_BUCKETS;
+ ret = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list);
+ }
+ if (ret == NULL)
+ return;
+ } while (unlikely((ret = _HA_ATOMIC_XCHG(&pool->buckets[bucket].free_list, POOL_BUSY)) == POOL_BUSY));
+
+ if (unlikely(ret == NULL)) {
+ HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, NULL);
+ return;
+ }
+
+ /* this releases the lock */
+ HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, ret->next);
+
+ /* now store the retrieved object(s) into the local cache. Note that
+ * they don't all have the same hash and that it doesn't necessarily
+ * match the one from the pool.
+ */
+ count = 0;
+ for (; ret; ret = down) {
+ down = ret->down;
+ item = (struct pool_cache_item *)ret;
+ POOL_DEBUG_TRACE_CALLER(pool, item, NULL);
+ LIST_INSERT(&pch->list, &item->by_pool);
+ LIST_INSERT(&th_ctx->pool_lru_head, &item->by_lru);
+ _HA_ATOMIC_INC(&pool->buckets[pool_pbucket(item)].used);
+ count++;
+ if (unlikely(pool_debugging & POOL_DBG_INTEGRITY))
+ pool_fill_pattern(pch, item, pool->size);
+
+ }
+ pch->count += count;
+ pool_cache_count += count;
+ pool_cache_bytes += count * pool->size;
+}
+
+/* Adds pool item cluster <item> to the shared cache, which contains <count>
+ * elements. The caller is advised to first check using pool_releasable() if
+ * it's wise to add this series of objects there. Both the pool and the item's
+ * head must be valid.
+ */
+void pool_put_to_shared_cache(struct pool_head *pool, struct pool_item *item)
+{
+ struct pool_item *free_list;
+ uint bucket = pool_pbucket(item);
+
+ /* we prefer to put the item into the entry that corresponds to its own
+ * hash so that on return it remains in the right place, but that's not
+ * mandatory.
+ */
+ free_list = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list);
+ do {
+ /* look for an apparently non-busy entry */
+ while (unlikely(free_list == POOL_BUSY)) {
+ bucket = (bucket + 1) % CONFIG_HAP_POOL_BUCKETS;
+ free_list = _HA_ATOMIC_LOAD(&pool->buckets[bucket].free_list);
+ }
+ _HA_ATOMIC_STORE(&item->next, free_list);
+ __ha_barrier_atomic_store();
+ } while (!_HA_ATOMIC_CAS(&pool->buckets[bucket].free_list, &free_list, item));
+ __ha_barrier_atomic_store();
+}
+
+/*
+ * This function frees whatever can be freed in pool <pool>.
+ */
+void pool_flush(struct pool_head *pool)
+{
+ struct pool_item *next, *temp, *down;
+ uint bucket;
+
+ if (!pool || (pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL)))
+ return;
+
+ /* The loop below atomically detaches the head of the free list and
+ * replaces it with a NULL. Then the list can be released.
+ */
+ for (bucket = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) {
+ next = pool->buckets[bucket].free_list;
+ while (1) {
+ while (unlikely(next == POOL_BUSY))
+ next = (void*)pl_wait_new_long((ulong*)&pool->buckets[bucket].free_list, (ulong)next);
+
+ if (next == NULL)
+ break;
+
+ next = _HA_ATOMIC_XCHG(&pool->buckets[bucket].free_list, POOL_BUSY);
+ if (next != POOL_BUSY) {
+ HA_ATOMIC_STORE(&pool->buckets[bucket].free_list, NULL);
+ break;
+ }
+ }
+
+ while (next) {
+ temp = next;
+ next = temp->next;
+ for (; temp; temp = down) {
+ down = temp->down;
+ _HA_ATOMIC_DEC(&pool->buckets[pool_pbucket(temp)].allocated);
+ pool_put_to_os_nodec(pool, temp);
+ }
+ }
+ }
+ /* here, we should have pool->allocated == pool->used */
+}
+
+/*
+ * This function frees whatever can be freed in all pools, but respecting
+ * the minimum thresholds imposed by owners. It makes sure to be alone to
+ * run by using thread_isolate(). <pool_ctx> is unused.
+ */
+void pool_gc(struct pool_head *pool_ctx)
+{
+ struct pool_head *entry;
+ int isolated = thread_isolated();
+
+ if (!isolated)
+ thread_isolate();
+
+ list_for_each_entry(entry, &pools, list) {
+ struct pool_item *temp, *down;
+ uint allocated = pool_allocated(entry);
+ uint used = pool_used(entry);
+ int bucket = 0;
+
+ while ((int)(allocated - used) > (int)entry->minavail) {
+ /* ok let's find next entry to evict */
+ while (!entry->buckets[bucket].free_list && bucket < CONFIG_HAP_POOL_BUCKETS)
+ bucket++;
+
+ if (bucket >= CONFIG_HAP_POOL_BUCKETS)
+ break;
+
+ temp = entry->buckets[bucket].free_list;
+ entry->buckets[bucket].free_list = temp->next;
+ for (; temp; temp = down) {
+ down = temp->down;
+ allocated--;
+ _HA_ATOMIC_DEC(&entry->buckets[pool_pbucket(temp)].allocated);
+ pool_put_to_os_nodec(entry, temp);
+ }
+ }
+ }
+
+ trim_all_pools();
+
+ if (!isolated)
+ thread_release();
+}
+
+/*
+ * Returns a pointer to type <type> taken from the pool <pool_type> or
+ * dynamically allocated. In the first case, <pool_type> is updated to point to
+ * the next element in the list. <flags> is a binary-OR of POOL_F_* flags.
+ * Prefer using pool_alloc() which does the right thing without flags.
+ */
+void *__pool_alloc(struct pool_head *pool, unsigned int flags)
+{
+ void *p = NULL;
+ void *caller = __builtin_return_address(0);
+
+ if (unlikely(pool_debugging & POOL_DBG_FAIL_ALLOC))
+ if (!(flags & POOL_F_NO_FAIL) && mem_should_fail(pool))
+ return NULL;
+
+ if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p)
+ p = pool_get_from_cache(pool, caller);
+
+ if (unlikely(!p))
+ p = pool_alloc_nocache(pool, caller);
+
+ if (likely(p)) {
+#ifdef USE_MEMORY_PROFILING
+ if (unlikely(profiling & HA_PROF_MEMORY)) {
+ extern struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1];
+ struct memprof_stats *bin;
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_P_ALLOC);
+ _HA_ATOMIC_ADD(&bin->alloc_calls, 1);
+ _HA_ATOMIC_ADD(&bin->alloc_tot, pool->size);
+ _HA_ATOMIC_STORE(&bin->info, pool);
+ /* replace the caller with the allocated bin: this way
+ * we'll the pool_free() call will be able to update our
+ * entry. We only do it for non-colliding entries though,
+ * since these ones store the true caller location.
+ */
+ if (bin >= &memprof_stats[0] && bin < &memprof_stats[MEMPROF_HASH_BUCKETS])
+ POOL_DEBUG_TRACE_CALLER(pool, (struct pool_cache_item *)p, bin);
+ }
+#endif
+ if (unlikely(flags & POOL_F_MUST_ZERO))
+ memset(p, 0, pool->size);
+ else if (unlikely(!(flags & POOL_F_NO_POISON) && (pool_debugging & POOL_DBG_POISON)))
+ memset(p, mem_poison_byte, pool->size);
+ }
+ return p;
+}
+
+/*
+ * Puts a memory area back to the corresponding pool. <ptr> be valid. Using
+ * pool_free() is preferred.
+ */
+void __pool_free(struct pool_head *pool, void *ptr)
+{
+ const void *caller = __builtin_return_address(0);
+
+ /* we'll get late corruption if we refill to the wrong pool or double-free */
+ POOL_DEBUG_CHECK_MARK(pool, ptr, caller);
+ POOL_DEBUG_RESET_MARK(pool, ptr);
+
+#ifdef USE_MEMORY_PROFILING
+ if (unlikely(profiling & HA_PROF_MEMORY) && ptr) {
+ extern struct memprof_stats memprof_stats[MEMPROF_HASH_BUCKETS + 1];
+ struct memprof_stats *bin;
+
+ bin = memprof_get_bin(__builtin_return_address(0), MEMPROF_METH_P_FREE);
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, pool->size);
+ _HA_ATOMIC_STORE(&bin->info, pool);
+
+ /* check if the caller is an allocator, and if so, let's update
+ * its free() count.
+ */
+ bin = *(struct memprof_stats**)(((char *)ptr) + pool->alloc_sz - sizeof(void*));
+ if (bin >= &memprof_stats[0] && bin < &memprof_stats[MEMPROF_HASH_BUCKETS]) {
+ _HA_ATOMIC_ADD(&bin->free_calls, 1);
+ _HA_ATOMIC_ADD(&bin->free_tot, pool->size);
+ }
+ }
+#endif
+
+ if (unlikely((pool_debugging & POOL_DBG_NO_CACHE) ||
+ global.tune.pool_cache_size < pool->size)) {
+ pool_free_nocache(pool, ptr);
+ return;
+ }
+
+ pool_put_to_cache(pool, ptr, caller);
+}
+
+/*
+ * This function destroys a pool by freeing it completely, unless it's still
+ * in use. This should be called only under extreme circumstances. It always
+ * returns NULL if the resulting pool is empty, easing the clearing of the old
+ * pointer, otherwise it returns the pool.
+ * .
+ */
+void *pool_destroy(struct pool_head *pool)
+{
+ if (pool) {
+ if (!(pool_debugging & POOL_DBG_NO_CACHE))
+ pool_evict_from_local_cache(pool, 1);
+
+ pool_flush(pool);
+ if (pool_used(pool))
+ return pool;
+ pool->users--;
+ if (!pool->users) {
+ LIST_DELETE(&pool->list);
+ /* note that if used == 0, the cache is empty */
+ free(pool->base_addr);
+ }
+ }
+ return NULL;
+}
+
+/* This destroys all pools on exit. It is *not* thread safe. */
+void pool_destroy_all()
+{
+ struct pool_head *entry, *back;
+
+ list_for_each_entry_safe(entry, back, &pools, list) {
+ /* there's only one occurrence of each pool in the list,
+ * and we're existing instead of looping on the whole
+ * list just to decrement users, force it to 1 here.
+ */
+ entry->users = 1;
+ pool_destroy(entry);
+ }
+}
+
+/* carefully inspects an item upon fatal error and emit diagnostics */
+void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller)
+{
+ const struct pool_head *the_pool = NULL;
+
+ chunk_printf(&trash,
+ "FATAL: pool inconsistency detected in thread %d: %s.\n"
+ " caller: %p (",
+ tid + 1, msg, caller);
+
+ resolve_sym_name(&trash, NULL, caller);
+
+ chunk_appendf(&trash,
+ ")\n"
+ " item: %p\n"
+ " pool: %p ('%s', size %u, real %u, users %u)\n",
+ item, pool, pool->name, pool->size, pool->alloc_sz, pool->users);
+
+ if (pool_debugging & POOL_DBG_TAG) {
+ const void **pool_mark;
+ struct pool_head *ph;
+ const void *tag;
+
+ pool_mark = (const void **)(((char *)item) + pool->size);
+ tag = may_access(pool_mark) ? *pool_mark : NULL;
+ if (tag == pool) {
+ chunk_appendf(&trash, " tag: @%p = %p (%s)\n", pool_mark, tag, pool->name);
+ the_pool = pool;
+ }
+ else {
+ if (!may_access(pool_mark))
+ chunk_appendf(&trash, "Tag not accessible. ");
+ else
+ chunk_appendf(&trash, "Tag does not match (%p). ", tag);
+
+ list_for_each_entry(ph, &pools, list) {
+ pool_mark = (const void **)(((char *)item) + ph->size);
+ if (!may_access(pool_mark))
+ continue;
+ tag = *pool_mark;
+
+ if (tag == ph) {
+ if (!the_pool)
+ chunk_appendf(&trash, "Possible origin pool(s):\n");
+
+ chunk_appendf(&trash, " tag: @%p = %p (%s, size %u, real %u, users %u)\n",
+ pool_mark, tag, ph->name, ph->size, ph->alloc_sz, ph->users);
+ if (!the_pool || the_pool->size < ph->size)
+ the_pool = ph;
+ }
+ }
+
+ if (!the_pool) {
+ const char *start, *end, *p;
+
+ pool_mark = (const void **)(((char *)item) + pool->size);
+ chunk_appendf(&trash,
+ "Tag does not match any other pool.\n"
+ "Contents around address %p+%lu=%p:\n",
+ item, (ulong)((const void*)pool_mark - (const void*)item),
+ pool_mark);
+
+ /* dump in word-sized blocks */
+ start = (const void *)(((uintptr_t)pool_mark - 32) & -sizeof(void*));
+ end = (const void *)(((uintptr_t)pool_mark + 32 + sizeof(void*) - 1) & -sizeof(void*));
+
+ while (start < end) {
+ dump_addr_and_bytes(&trash, " ", start, sizeof(void*));
+ chunk_strcat(&trash, " [");
+ for (p = start; p < start + sizeof(void*); p++) {
+ if (!may_access(p))
+ chunk_strcat(&trash, "*");
+ else if (isprint((unsigned char)*p))
+ chunk_appendf(&trash, "%c", *p);
+ else
+ chunk_strcat(&trash, ".");
+ }
+
+ if (may_access(start))
+ tag = *(const void **)start;
+ else
+ tag = NULL;
+
+ if (tag == pool) {
+ /* the pool can often be there so let's detect it */
+ chunk_appendf(&trash, "] [pool:%s", pool->name);
+ }
+ else if (tag) {
+ /* print pointers that resolve to a symbol */
+ size_t back_data = trash.data;
+ chunk_strcat(&trash, "] [");
+ if (!resolve_sym_name(&trash, NULL, tag))
+ trash.data = back_data;
+ }
+
+ chunk_strcat(&trash, "]\n");
+ start = p;
+ }
+ }
+ }
+ }
+
+ if (pool_debugging & POOL_DBG_CALLER) {
+ struct buffer *trash2 = get_trash_chunk();
+ const struct pool_head *ph;
+ const void **pool_mark;
+ const void *tag, *rec_tag;
+
+ ph = the_pool ? the_pool : pool;
+ pool_mark = (const void **)(((char *)item) + ph->alloc_sz - sizeof(void*));
+ rec_tag = may_access(pool_mark) ? *pool_mark : NULL;
+
+ if (rec_tag && resolve_sym_name(trash2, NULL, rec_tag))
+ chunk_appendf(&trash,
+ "Recorded caller if pool '%s':\n @%p (+%04u) = %p (%s)\n",
+ ph->name, pool_mark, (uint)(ph->alloc_sz - sizeof(void*)),
+ rec_tag, trash2->area);
+
+ if (!the_pool) {
+ /* the pool couldn't be formally verified */
+ chunk_appendf(&trash, "Other possible callers:\n");
+ list_for_each_entry(ph, &pools, list) {
+ if (ph == pool)
+ continue;
+ pool_mark = (const void **)(((char *)item) + ph->alloc_sz - sizeof(void*));
+ if (!may_access(pool_mark))
+ continue;
+ tag = *pool_mark;
+ if (tag == rec_tag)
+ continue;
+
+ /* see if we can resolve something */
+ chunk_printf(trash2, "@%p (+%04u) = %p (", pool_mark, (uint)(ph->alloc_sz - sizeof(void*)), tag);
+ if (resolve_sym_name(trash2, NULL, tag)) {
+ chunk_appendf(trash2, ")");
+ chunk_appendf(&trash,
+ " %s [as pool %s, size %u, real %u, users %u]\n",
+ trash2->area, ph->name, ph->size, ph->alloc_sz, ph->users);
+ }
+ }
+ }
+ }
+
+ chunk_appendf(&trash, "\n");
+ DISGUISE(write(2, trash.area, trash.data));
+}
+
+/* used by qsort in "show pools" to sort by name */
+static int cmp_dump_pools_name(const void *a, const void *b)
+{
+ const struct pool_dump_info *l = (const struct pool_dump_info *)a;
+ const struct pool_dump_info *r = (const struct pool_dump_info *)b;
+
+ return strcmp(l->entry->name, r->entry->name);
+}
+
+/* used by qsort in "show pools" to sort by item size */
+static int cmp_dump_pools_size(const void *a, const void *b)
+{
+ const struct pool_dump_info *l = (const struct pool_dump_info *)a;
+ const struct pool_dump_info *r = (const struct pool_dump_info *)b;
+
+ if (l->entry->size > r->entry->size)
+ return -1;
+ else if (l->entry->size < r->entry->size)
+ return 1;
+ else
+ return 0;
+}
+
+/* used by qsort in "show pools" to sort by usage */
+static int cmp_dump_pools_usage(const void *a, const void *b)
+{
+ const struct pool_dump_info *l = (const struct pool_dump_info *)a;
+ const struct pool_dump_info *r = (const struct pool_dump_info *)b;
+
+ if (l->alloc_bytes > r->alloc_bytes)
+ return -1;
+ else if (l->alloc_bytes < r->alloc_bytes)
+ return 1;
+ else
+ return 0;
+}
+
+/* will not dump more than this number of entries. Anything beyond this will
+ * likely not fit into a regular output buffer anyway.
+ */
+#define POOLS_MAX_DUMPED_ENTRIES 1024
+
+/* This function dumps memory usage information into the trash buffer.
+ * It may sort by a criterion if <by_what> is non-zero, and limit the
+ * number of output lines if <max> is non-zero. It may limit only to
+ * pools whose names start with <pfx> if <pfx> is non-null.
+ */
+void dump_pools_to_trash(int by_what, int max, const char *pfx)
+{
+ struct pool_dump_info pool_info[POOLS_MAX_DUMPED_ENTRIES];
+ struct pool_head *entry;
+ unsigned long long allocated, used;
+ int nbpools, i;
+ unsigned long long cached_bytes = 0;
+ uint cached = 0;
+ uint alloc_items;
+
+ allocated = used = nbpools = 0;
+
+ list_for_each_entry(entry, &pools, list) {
+ if (nbpools >= POOLS_MAX_DUMPED_ENTRIES)
+ break;
+
+ alloc_items = pool_allocated(entry);
+ /* do not dump unused entries when sorting by usage */
+ if (by_what == 3 && !alloc_items)
+ continue;
+
+ /* verify the pool name if a prefix is requested */
+ if (pfx && strncmp(entry->name, pfx, strlen(pfx)) != 0)
+ continue;
+
+ if (!(pool_debugging & POOL_DBG_NO_CACHE)) {
+ for (cached = i = 0; i < global.nbthread; i++)
+ cached += entry->cache[i].count;
+ }
+ pool_info[nbpools].entry = entry;
+ pool_info[nbpools].alloc_items = alloc_items;
+ pool_info[nbpools].alloc_bytes = (ulong)entry->size * alloc_items;
+ pool_info[nbpools].used_items = pool_used(entry);
+ pool_info[nbpools].cached_items = cached;
+ pool_info[nbpools].need_avg = swrate_avg(pool_needed_avg(entry), POOL_AVG_SAMPLES);
+ pool_info[nbpools].failed_items = pool_failed(entry);
+ nbpools++;
+ }
+
+ if (by_what == 1) /* sort by name */
+ qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_name);
+ else if (by_what == 2) /* sort by item size */
+ qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_size);
+ else if (by_what == 3) /* sort by total usage */
+ qsort(pool_info, nbpools, sizeof(pool_info[0]), cmp_dump_pools_usage);
+
+ chunk_printf(&trash, "Dumping pools usage");
+ if (!max || max >= POOLS_MAX_DUMPED_ENTRIES)
+ max = POOLS_MAX_DUMPED_ENTRIES;
+ if (nbpools >= max)
+ chunk_appendf(&trash, " (limited to the first %u entries)", max);
+ chunk_appendf(&trash, ". Use SIGQUIT to flush them.\n");
+
+ for (i = 0; i < nbpools && i < max; i++) {
+ chunk_appendf(&trash, " - Pool %s (%lu bytes) : %lu allocated (%lu bytes), %lu used"
+ " (~%lu by thread caches)"
+ ", needed_avg %lu, %lu failures, %u users, @%p%s\n",
+ pool_info[i].entry->name, (ulong)pool_info[i].entry->size,
+ pool_info[i].alloc_items, pool_info[i].alloc_bytes,
+ pool_info[i].used_items, pool_info[i].cached_items,
+ pool_info[i].need_avg, pool_info[i].failed_items,
+ pool_info[i].entry->users, pool_info[i].entry,
+ (pool_info[i].entry->flags & MEM_F_SHARED) ? " [SHARED]" : "");
+
+ cached_bytes += pool_info[i].cached_items * (ulong)pool_info[i].entry->size;
+ allocated += pool_info[i].alloc_items * (ulong)pool_info[i].entry->size;
+ used += pool_info[i].used_items * (ulong)pool_info[i].entry->size;
+ }
+
+ chunk_appendf(&trash, "Total: %d pools, %llu bytes allocated, %llu used"
+ " (~%llu by thread caches)"
+ ".\n",
+ nbpools, allocated, used, cached_bytes
+ );
+}
+
+/* Dump statistics on pools usage. */
+void dump_pools(void)
+{
+ dump_pools_to_trash(0, 0, NULL);
+ qfprintf(stderr, "%s", trash.area);
+}
+
+/* This function returns the total number of failed pool allocations */
+int pool_total_failures()
+{
+ struct pool_head *entry;
+ int failed = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ failed += pool_failed(entry);
+ return failed;
+}
+
+/* This function returns the total amount of memory allocated in pools (in bytes) */
+unsigned long long pool_total_allocated()
+{
+ struct pool_head *entry;
+ unsigned long long allocated = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ allocated += pool_allocated(entry) * (ullong)entry->size;
+ return allocated;
+}
+
+/* This function returns the total amount of memory used in pools (in bytes) */
+unsigned long long pool_total_used()
+{
+ struct pool_head *entry;
+ unsigned long long used = 0;
+
+ list_for_each_entry(entry, &pools, list)
+ used += pool_used(entry) * (ullong)entry->size;
+ return used;
+}
+
+/* This function parses a string made of a set of debugging features as
+ * specified after -dM on the command line, and will set pool_debugging
+ * accordingly. On success it returns a strictly positive value. It may zero
+ * with the first warning in <err>, -1 with a help message in <err>, or -2 with
+ * the first error in <err> return the first error in <err>. <err> is undefined
+ * on success, and will be non-null and locally allocated on help/error/warning.
+ * The caller must free it. Warnings are used to report features that were not
+ * enabled at build time, and errors are used to report unknown features.
+ */
+int pool_parse_debugging(const char *str, char **err)
+{
+ struct ist args;
+ char *end;
+ uint new_dbg;
+ int v;
+
+
+ /* if it's empty or starts with a number, it's the mem poisonning byte */
+ v = strtol(str, &end, 0);
+ if (!*end || *end == ',') {
+ mem_poison_byte = *str ? v : 'P';
+ if (mem_poison_byte >= 0)
+ pool_debugging |= POOL_DBG_POISON;
+ else
+ pool_debugging &= ~POOL_DBG_POISON;
+ str = end;
+ }
+
+ new_dbg = pool_debugging;
+
+ for (args = ist(str); istlen(args); args = istadv(istfind(args, ','), 1)) {
+ struct ist feat = iststop(args, ',');
+
+ if (!istlen(feat))
+ continue;
+
+ if (isteq(feat, ist("help"))) {
+ ha_free(err);
+ memprintf(err,
+ "-dM alone enables memory poisonning with byte 0x50 on allocation. A numeric\n"
+ "value may be appended immediately after -dM to use another value (0 supported).\n"
+ "Then an optional list of comma-delimited keywords may be appended to set or\n"
+ "clear some debugging options ('*' marks the current setting):\n\n"
+ " set clear description\n"
+ " -----------------+-----------------+-----------------------------------------\n");
+
+ for (v = 0; dbg_options[v].flg; v++) {
+ memprintf(err, "%s %c %-15s|%c %-15s| %s\n",
+ *err,
+ (pool_debugging & dbg_options[v].flg) ? '*' : ' ',
+ dbg_options[v].set,
+ (pool_debugging & dbg_options[v].flg) ? ' ' : '*',
+ dbg_options[v].clr,
+ dbg_options[v].hlp);
+ }
+
+ memprintf(err,
+ "%s -----------------+-----------------+-----------------------------------------\n"
+ "Examples:\n"
+ " Disable merging and enable poisonning with byte 'P': -dM0x50,no-merge\n"
+ " Randomly fail allocations: -dMfail\n"
+ " Detect out-of-bound corruptions: -dMno-merge,tag\n"
+ " Detect post-free cache corruptions: -dMno-merge,cold-first,integrity,caller\n"
+ " Detect all cache corruptions: -dMno-merge,cold-first,integrity,tag,caller\n"
+ " Detect UAF (disables cache, very slow): -dMuaf\n"
+ " Detect post-cache UAF: -dMuaf,cache,no-merge,cold-first,integrity,tag,caller\n"
+ " Detect post-free cache corruptions: -dMno-merge,cold-first,integrity,caller\n",
+ *err);
+ return -1;
+ }
+
+ for (v = 0; dbg_options[v].flg; v++) {
+ if (isteq(feat, ist(dbg_options[v].set))) {
+ new_dbg |= dbg_options[v].flg;
+ /* UAF implicitly disables caching, but it's
+ * still possible to forcefully re-enable it.
+ */
+ if (dbg_options[v].flg == POOL_DBG_UAF)
+ new_dbg |= POOL_DBG_NO_CACHE;
+ /* fail should preset the tune.fail-alloc ratio to 1% */
+ if (dbg_options[v].flg == POOL_DBG_FAIL_ALLOC)
+ mem_fail_rate = 1;
+ break;
+ }
+ else if (isteq(feat, ist(dbg_options[v].clr))) {
+ new_dbg &= ~dbg_options[v].flg;
+ /* no-fail should reset the tune.fail-alloc ratio */
+ if (dbg_options[v].flg == POOL_DBG_FAIL_ALLOC)
+ mem_fail_rate = 0;
+ break;
+ }
+ }
+
+ if (!dbg_options[v].flg) {
+ memprintf(err, "unknown pool debugging feature <%.*s>", (int)istlen(feat), istptr(feat));
+ return -2;
+ }
+ }
+
+ pool_debugging = new_dbg;
+ return 1;
+}
+
+/* parse a "show pools" command. It returns 1 on failure, 0 if it starts to dump. */
+static int cli_parse_show_pools(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_pools_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg;
+
+ for (arg = 2; *args[arg]; arg++) {
+ if (strcmp(args[arg], "byname") == 0) {
+ ctx->by_what = 1; // sort output by name
+ }
+ else if (strcmp(args[arg], "bysize") == 0) {
+ ctx->by_what = 2; // sort output by item size
+ }
+ else if (strcmp(args[arg], "byusage") == 0) {
+ ctx->by_what = 3; // sort output by total allocated size
+ }
+ else if (strcmp(args[arg], "match") == 0 && *args[arg+1]) {
+ ctx->prefix = strdup(args[arg+1]); // only pools starting with this
+ arg++;
+ }
+ else if (isdigit((unsigned char)*args[arg])) {
+ ctx->maxcnt = atoi(args[arg]); // number of entries to dump
+ }
+ else
+ return cli_err(appctx, "Expects either 'byname', 'bysize', 'byusage', 'match <pfx>', or a max number of output lines.\n");
+ }
+ return 0;
+}
+
+/* release the "show pools" context */
+static void cli_release_show_pools(struct appctx *appctx)
+{
+ struct show_pools_ctx *ctx = appctx->svcctx;
+
+ ha_free(&ctx->prefix);
+}
+
+/* This function dumps memory usage information onto the stream connector's
+ * read buffer. It returns 0 as long as it does not complete, non-zero upon
+ * completion. No state is used.
+ */
+static int cli_io_handler_dump_pools(struct appctx *appctx)
+{
+ struct show_pools_ctx *ctx = appctx->svcctx;
+
+ dump_pools_to_trash(ctx->by_what, ctx->maxcnt, ctx->prefix);
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ return 1;
+}
+
+/* callback used to create early pool <name> of size <size> and store the
+ * resulting pointer into <ptr>. If the allocation fails, it quits with after
+ * emitting an error message.
+ */
+void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size)
+{
+ *ptr = create_pool(name, size, MEM_F_SHARED);
+ if (!*ptr) {
+ ha_alert("Failed to allocate pool '%s' of size %u : %s. Aborting.\n",
+ name, size, strerror(errno));
+ exit(1);
+ }
+}
+
+/* Initializes all per-thread arrays on startup */
+static void init_pools()
+{
+ int thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++) {
+ LIST_INIT(&ha_thread_ctx[thr].pool_lru_head);
+ }
+
+ detect_allocator();
+}
+
+INITCALL0(STG_PREPARE, init_pools);
+
+/* Report in build options if trim is supported */
+static void pools_register_build_options(void)
+{
+ if (!using_default_allocator) {
+ char *ptr = NULL;
+ memprintf(&ptr, "Running with a replaced memory allocator (e.g. via LD_PRELOAD).");
+ hap_register_build_opts(ptr, 1);
+ mark_tainted(TAINTED_REPLACED_MEM_ALLOCATOR);
+ }
+}
+INITCALL0(STG_REGISTER, pools_register_build_options);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "pools", NULL }, "show pools [by*] [match <pfx>] [nb] : report information about the memory pools usage", cli_parse_show_pools, cli_io_handler_dump_pools, cli_release_show_pools },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+/* config parser for global "tune.fail-alloc" */
+static int mem_parse_global_fail_alloc(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+ mem_fail_rate = atoi(args[1]);
+ if (mem_fail_rate < 0 || mem_fail_rate > 100) {
+ memprintf(err, "'%s' expects a numeric value between 0 and 100.", args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.memory.hot-size" */
+static int mem_parse_global_hot_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ long size;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ size = atol(args[1]);
+ if (size <= 0) {
+ memprintf(err, "'%s' expects a strictly positive value.", args[0]);
+ return -1;
+ }
+
+ global.tune.pool_cache_size = size;
+ return 0;
+}
+
+/* config parser for global "no-memory-trimming" */
+static int mem_parse_global_no_mem_trim(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(0, args, err, NULL))
+ return -1;
+ disable_trim = 1;
+ return 0;
+}
+
+/* register global config keywords */
+static struct cfg_kw_list mem_cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.fail-alloc", mem_parse_global_fail_alloc },
+ { CFG_GLOBAL, "tune.memory.hot-size", mem_parse_global_hot_size },
+ { CFG_GLOBAL, "no-memory-trimming", mem_parse_global_no_mem_trim },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &mem_cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_quic.c b/src/proto_quic.c
new file mode 100644
index 0000000..899cffe
--- /dev/null
+++ b/src/proto_quic.c
@@ -0,0 +1,799 @@
+/*
+ * AF_INET/AF_INET6 QUIC protocol layer.
+ *
+ * Copyright 2020 Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/udp.h>
+#include <netinet/in.h>
+
+#include <import/ebtree-t.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/cbuf.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/proto_udp.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+/* per-thread quic datagram handlers */
+struct quic_dghdlr *quic_dghdlrs;
+struct eb_root *quic_cid_tree;
+
+/* global CID trees */
+#define QUIC_CID_TREES_CNT 256
+struct quic_cid_tree *quic_cid_trees;
+
+/* Size of the internal buffer of QUIC RX buffer at the fd level */
+#define QUIC_RX_BUFSZ (1UL << 18)
+
+DECLARE_STATIC_POOL(pool_head_quic_rxbuf, "quic_rxbuf", QUIC_RX_BUFSZ);
+
+static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int quic_connect_server(struct connection *conn, int flags);
+static void quic_enable_listener(struct listener *listener);
+static void quic_disable_listener(struct listener *listener);
+static int quic_set_affinity(struct connection *conn, int new_tid);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_quic4 = {
+ .name = "quic4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = quic_bind_listener,
+ .enable = quic_enable_listener,
+ .disable = quic_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = quic_sock_accept_conn,
+ .get_src = quic_sock_get_src,
+ .get_dst = quic_sock_get_dst,
+ .connect = quic_connect_server,
+ .set_affinity = quic_set_affinity,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = quic_sock_accepting_conn,
+ .default_iocb = quic_lstnr_sock_fd_iocb,
+ .receivers = LIST_HEAD_INIT(proto_quic4.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_quic4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_quic6 = {
+ .name = "quic6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = quic_bind_listener,
+ .enable = quic_enable_listener,
+ .disable = quic_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = quic_sock_accept_conn,
+ .get_src = quic_sock_get_src,
+ .get_dst = quic_sock_get_dst,
+ .connect = quic_connect_server,
+ .set_affinity = quic_set_affinity,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = quic_sock_accepting_conn,
+ .default_iocb = quic_lstnr_sock_fd_iocb,
+ .receivers = LIST_HEAD_INIT(proto_quic6.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_quic6);
+
+/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
+ * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
+ * - 0 : ignore remote address (may even be a NULL pointer)
+ * - 1 : use provided address
+ * - 2 : use provided port
+ * - 3 : use both
+ *
+ * The function supports multiple foreign binding methods :
+ * - linux_tproxy: we directly bind to the foreign address
+ * The second one can be used as a fallback for the first one.
+ * This function returns 0 when everything's OK, 1 if it could not bind, to the
+ * local address, 2 if it could not bind to the foreign address.
+ */
+int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
+{
+ struct sockaddr_storage bind_addr;
+ int foreign_ok = 0;
+ int ret;
+ static THREAD_LOCAL int ip_transp_working = 1;
+ static THREAD_LOCAL int ip6_transp_working = 1;
+
+ switch (local->ss_family) {
+ case AF_INET:
+ if (flags && ip_transp_working) {
+ /* This deserves some explanation. Some platforms will support
+ * multiple combinations of certain methods, so we try the
+ * supported ones until one succeeds.
+ */
+ if (sock_inet4_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip_transp_working = 0;
+ }
+ break;
+ case AF_INET6:
+ if (flags && ip6_transp_working) {
+ if (sock_inet6_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip6_transp_working = 0;
+ }
+ break;
+ }
+
+ if (flags) {
+ memset(&bind_addr, 0, sizeof(bind_addr));
+ bind_addr.ss_family = remote->ss_family;
+ switch (remote->ss_family) {
+ case AF_INET:
+ if (flags & 1)
+ ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
+ if (flags & 2)
+ ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
+ break;
+ case AF_INET6:
+ if (flags & 1)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
+ if (flags & 2)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
+ break;
+ default:
+ /* we don't want to try to bind to an unknown address family */
+ foreign_ok = 0;
+ }
+ }
+
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (foreign_ok) {
+ if (is_inet_addr(&bind_addr)) {
+ ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
+ if (ret < 0)
+ return 2;
+ }
+ }
+ else {
+ if (is_inet_addr(local)) {
+ ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
+ if (ret < 0)
+ return 1;
+ }
+ }
+
+ if (!flags)
+ return 0;
+
+ if (!foreign_ok)
+ /* we could not bind to a foreign address */
+ return 2;
+
+ return 0;
+}
+
+/*
+ * This function initiates a QUIC connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). A source address may be
+ * pointed to by conn->src in case of transparent proxying. Normal source
+ * bind addresses are still determined locally (due to the possible need of a
+ * source port). conn->target may point either to a valid server or to a backend,
+ * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
+ * supported. The <data> parameter is a boolean indicating whether there are data
+ * waiting for being sent or not, in order to adjust data write polling and on
+ * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
+ * is not used.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+
+int quic_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+ struct conn_src *src;
+ struct sockaddr_storage *addr;
+
+ BUG_ON(!conn->dst);
+
+ conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ fd = conn->handle.fd = sock_create_server_socket(conn);
+
+ if (fd == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ /* allow specific binding :
+ * - server-specific at first
+ * - proxy-specific next
+ */
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (be->conn_src.opts & CO_SRC_BIND)
+ src = &be->conn_src;
+ else
+ src = NULL;
+
+ if (src) {
+ int ret, flags = 0;
+
+ if (conn->src && is_inet_addr(conn->src)) {
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_CLI:
+ conn_set_private(conn);
+ __fallthrough;
+ case CO_SRC_TPROXY_ADDR:
+ flags = 3;
+ break;
+ case CO_SRC_TPROXY_CIP:
+ case CO_SRC_TPROXY_DYN:
+ conn_set_private(conn);
+ flags = 1;
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (src->iface_name)
+ setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
+#endif
+
+ if (src->sport_range) {
+ int attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_storage sa;
+
+ ret = 1;
+ memcpy(&sa, &src->source_addr, sizeof(sa));
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+
+ if (!attempts)
+ break;
+ attempts--;
+
+ fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
+ if (!fdinfo[fd].local_port) {
+ conn->err_code = CO_ER_PORT_RANGE;
+ break;
+ }
+
+ fdinfo[fd].port_range = src->sport_range;
+ set_host_port(&sa, fdinfo[fd].local_port);
+
+ ret = quic_bind_socket(fd, flags, &sa, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+#ifdef IP_BIND_ADDRESS_NO_PORT
+ static THREAD_LOCAL int bind_address_no_port = 1;
+ setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
+#endif
+ ret = quic_bind_socket(fd, flags, &src->source_addr, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ }
+
+ if (unlikely(ret != 0)) {
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+
+ if (ret == 1) {
+ ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to source address before connect() for backend %s.\n",
+ be->id);
+ } else {
+ ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to tproxy source address before connect() for backend %s.\n",
+ be->id);
+ }
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ /* common case, let's wait for connect status */
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ /* should normally not happen but if so, indicates that it's OK */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "no free ports";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ } else if (errno == ETIMEDOUT) {
+ //qfprintf(stderr,"Connect(): ETIMEDOUT");
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ } else {
+ // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ //qfprintf(stderr,"Connect(): %d", errno);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() == 0, this is great! */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_LINGER_RISK); /* close hard if needed */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/* Allocate the RX buffers for <l> listener.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_alloc_rxbufs_listener(struct listener *l)
+{
+ int i;
+ struct quic_receiver_buf *tmp;
+
+ MT_LIST_INIT(&l->rx.rxbuf_list);
+ for (i = 0; i < my_popcountl(l->rx.bind_thread); i++) {
+ struct quic_receiver_buf *rxbuf;
+ char *buf;
+
+ rxbuf = calloc(1, sizeof(*rxbuf));
+ if (!rxbuf)
+ goto err;
+
+ buf = pool_alloc(pool_head_quic_rxbuf);
+ if (!buf) {
+ free(rxbuf);
+ goto err;
+ }
+
+ rxbuf->buf = b_make(buf, QUIC_RX_BUFSZ, 0, 0);
+ LIST_INIT(&rxbuf->dgram_list);
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+ }
+
+ return 1;
+
+ err:
+ while ((tmp = MT_LIST_POP(&l->rx.rxbuf_list, typeof(tmp), rxbuf_el))) {
+ pool_free(pool_head_quic_rxbuf, tmp->buf.area);
+ free(tmp);
+ }
+ return 0;
+}
+
+/* Check if platform supports the required feature set for quic-conn owned
+ * socket. <l> listener must already be binded; a dummy socket will be opened
+ * on the same address as one of the support test.
+ *
+ * Returns true if platform is deemed compatible else false.
+ */
+static int quic_test_sock_per_conn_support(struct listener *l)
+{
+ const struct receiver *rx = &l->rx;
+ int ret = 1, fdtest;
+
+ /* Check if IP destination address can be retrieved on recvfrom()
+ * operation.
+ */
+#if !defined(IP_PKTINFO) && !defined(IP_RECVDSTADDR)
+ ha_alert("Your platform does not seem to support UDP source address retrieval through IP_PKTINFO or an alternative flag. "
+ "QUIC connections will use listener socket.\n");
+ ret = 0;
+#endif
+
+ /* Check if platform support multiple UDP sockets bind on the same
+ * local address. Create a dummy socket and bind it on the same address
+ * as <l> listener. If bind system call fails, deactivate socket per
+ * connection. All other errors are not taken into account.
+ */
+ if (ret) {
+ fdtest = socket(rx->proto->fam->sock_domain,
+ rx->proto->sock_type, rx->proto->sock_prot);
+ if (fdtest >= 0) {
+ if (setsockopt(fdtest, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) &&
+ bind(fdtest, (struct sockaddr *)&rx->addr, rx->proto->fam->sock_addrlen) < 0) {
+ ha_alert("Your platform does not seem to support multiple UDP sockets binded on the same address. "
+ "QUIC connections will use listener socket.\n");
+ ret = 0;
+ }
+
+ close(fdtest);
+ }
+ }
+
+ return ret;
+}
+
+/* This function tries to bind a QUIC4/6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+static int quic_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ const struct sockaddr_storage addr = listener->rx.addr;
+ int fd, err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto udp_return;
+ }
+
+ /* Duplicate quic_mode setting from bind_conf. Useful to overwrite it
+ * at runtime per receiver instance.
+ */
+ listener->rx.quic_mode = listener->bind_conf->quic_mode;
+
+ /* Set IP_PKTINFO to retrieve destination address on recv. */
+ fd = listener->rx.fd;
+ switch (addr.ss_family) {
+ case AF_INET:
+#if defined(IP_PKTINFO)
+ setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one));
+#elif defined(IP_RECVDSTADDR)
+ setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one));
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+ case AF_INET6:
+#ifdef IPV6_RECVPKTINFO
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
+#endif
+ break;
+ default:
+ break;
+ }
+
+ if (!quic_alloc_rxbufs_listener(listener)) {
+ msg = "could not initialize tx/rx rings";
+ err |= ERR_WARN;
+ goto udp_return;
+ }
+
+ if (global.tune.options & GTUNE_QUIC_SOCK_PER_CONN) {
+ if (!quic_test_sock_per_conn_support(listener))
+ global.tune.options &= ~GTUNE_QUIC_SOCK_PER_CONN;
+ }
+
+ if (global.tune.frontend_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.frontend_rcvbuf, sizeof(global.tune.frontend_rcvbuf));
+
+ if (global.tune.frontend_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf));
+
+ listener_set_state(listener, LI_LISTEN);
+
+ udp_return:
+ if (msg && errlen) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid. Does nothing in early boot (needs fd_updt).
+ */
+static void quic_enable_listener(struct listener *l)
+{
+ /* FIXME: The following statements are incorrect. This
+ * is the responsibility of the QUIC xprt to stop accepting new
+ * connections.
+ */
+ if (fd_updt)
+ fd_want_recv(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid. Does nothing in early boot (needs fd_updt).
+ */
+static void quic_disable_listener(struct listener *l)
+{
+ /* FIXME: The following statements are incorrect. This
+ * is the responsibility of the QUIC xprt to start accepting new
+ * connections again.
+ */
+ if (fd_updt)
+ fd_stop_recv(l->rx.fd);
+}
+
+/* change the connection's thread to <new_tid>. For frontend connections, the
+ * target is a listener, and the caller is responsible for guaranteeing that
+ * the listener assigned to the connection is bound to the requested thread.
+ */
+static int quic_set_affinity(struct connection *conn, int new_tid)
+{
+ struct quic_conn *qc = conn->handle.qc;
+ return qc_set_tid_affinity(qc, new_tid, objt_listener(conn->target));
+}
+
+static int quic_alloc_dghdlrs(void)
+{
+ int i;
+
+ quic_dghdlrs = calloc(global.nbthread, sizeof(*quic_dghdlrs));
+ if (!quic_dghdlrs) {
+ ha_alert("Failed to allocate the quic datagram handlers.\n");
+ return 0;
+ }
+
+ for (i = 0; i < global.nbthread; i++) {
+ struct quic_dghdlr *dghdlr = &quic_dghdlrs[i];
+
+ dghdlr->task = tasklet_new();
+ if (!dghdlr->task) {
+ ha_alert("Failed to allocate the quic datagram handler on thread %d.\n", i);
+ return 0;
+ }
+
+ tasklet_set_tid(dghdlr->task, i);
+ dghdlr->task->context = dghdlr;
+ dghdlr->task->process = quic_lstnr_dghdlr;
+
+ MT_LIST_INIT(&dghdlr->dgrams);
+ }
+
+ quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(*quic_cid_trees));
+ if (!quic_cid_trees) {
+ ha_alert("Failed to allocate global CIDs trees.\n");
+ return 0;
+ }
+
+ for (i = 0; i < QUIC_CID_TREES_CNT; ++i) {
+ HA_RWLOCK_INIT(&quic_cid_trees[i].lock);
+ quic_cid_trees[i].root = EB_ROOT_UNIQUE;
+ }
+
+ return 1;
+}
+REGISTER_POST_CHECK(quic_alloc_dghdlrs);
+
+static int quic_deallocate_dghdlrs(void)
+{
+ int i;
+
+ if (quic_dghdlrs) {
+ for (i = 0; i < global.nbthread; ++i)
+ tasklet_free(quic_dghdlrs[i].task);
+ free(quic_dghdlrs);
+ }
+
+ ha_free(&quic_cid_trees);
+
+ return 1;
+}
+REGISTER_POST_DEINIT(quic_deallocate_dghdlrs);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_rhttp.c b/src/proto_rhttp.c
new file mode 100644
index 0000000..452ee32
--- /dev/null
+++ b/src/proto_rhttp.c
@@ -0,0 +1,464 @@
+#include <stdio.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/intops.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/sock.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/task.h>
+
+#include <haproxy/proto_rhttp.h>
+
+struct proto_fam proto_fam_rhttp = {
+ .name = "rhttp",
+ .sock_domain = AF_CUST_RHTTP_SRV,
+ .sock_family = AF_INET,
+ .bind = rhttp_bind_receiver,
+};
+
+struct protocol proto_rhttp = {
+ .name = "rev",
+
+ /* connection layer (no outgoing connection) */
+ .listen = rhttp_bind_listener,
+ .enable = rhttp_enable_listener,
+ .disable = rhttp_disable_listener,
+ .add = default_add_listener,
+ .unbind = rhttp_unbind_receiver,
+ .resume = default_resume_listener,
+ .accept_conn = rhttp_accept_conn,
+ .set_affinity = rhttp_set_affinity,
+
+ /* address family */
+ .fam = &proto_fam_rhttp,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = IPPROTO_TCP,
+ .rx_listening = rhttp_accepting_conn,
+ .receivers = LIST_HEAD_INIT(proto_rhttp.receivers),
+};
+
+static struct connection *new_reverse_conn(struct listener *l, struct server *srv)
+{
+ struct connection *conn = conn_new(srv);
+ struct sockaddr_storage *bind_addr = NULL;
+ if (!conn)
+ goto err;
+
+ HA_ATOMIC_INC(&th_ctx->nb_rhttp_conns);
+
+ conn_set_reverse(conn, &l->obj_type);
+
+ if (alloc_bind_address(&bind_addr, srv, srv->proxy, NULL) != SRV_STATUS_OK)
+ goto err;
+ conn->src = bind_addr;
+
+ sockaddr_alloc(&conn->dst, 0, 0);
+ if (!conn->dst)
+ goto err;
+ *conn->dst = srv->addr;
+ set_host_port(conn->dst, srv->svc_port);
+
+ if (conn_prepare(conn, protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt))
+ goto err;
+
+ if (conn->ctrl->connect(conn, 0) != SF_ERR_NONE)
+ goto err;
+
+#ifdef USE_OPENSSL
+ if (srv->ssl_ctx.sni) {
+ struct sample *sni_smp = NULL;
+ /* TODO remove NULL session which can cause crash depending on the SNI sample expr used. */
+ sni_smp = sample_fetch_as_type(srv->proxy, NULL, NULL,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ srv->ssl_ctx.sni, SMP_T_STR);
+ if (smp_make_safe(sni_smp))
+ ssl_sock_set_servername(conn, sni_smp->data.u.str.area);
+ }
+#endif /* USE_OPENSSL */
+
+ if (conn_xprt_start(conn) < 0)
+ goto err;
+
+ if (!srv->use_ssl ||
+ (!srv->ssl_ctx.alpn_str && !srv->ssl_ctx.npn_str) ||
+ srv->mux_proto) {
+ if (conn_install_mux_be(conn, NULL, NULL, NULL) < 0)
+ goto err;
+ }
+
+ /* Not expected here. */
+ BUG_ON((conn->flags & CO_FL_HANDSHAKE));
+ return conn;
+
+ err:
+ if (conn) {
+ conn_stop_tracking(conn);
+ conn_xprt_shutw(conn);
+ conn_xprt_close(conn);
+ conn_sock_shutw(conn, 0);
+ conn_ctrl_close(conn);
+
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+
+ /* Mark connection as non-reversable. This prevents conn_free()
+ * to reschedule rhttp task on freeing a preconnect connection.
+ */
+ conn->reverse.target = NULL;
+ conn_free(conn);
+ }
+
+ return NULL;
+}
+
+/* Report that a connection used for preconnect on listener <l> is freed before
+ * reversal is completed. This is used to cleanup any reference to the
+ * connection and rearm a new preconnect attempt.
+ */
+void rhttp_notify_preconn_err(struct listener *l)
+{
+ /* Receiver must reference a reverse connection as pending. */
+ BUG_ON(!l->rx.rhttp.pend_conn);
+
+ /* Remove reference to the freed connection. */
+ l->rx.rhttp.pend_conn = NULL;
+
+ if (l->rx.rhttp.state != LI_PRECONN_ST_ERR) {
+ send_log(l->bind_conf->frontend, LOG_ERR,
+ "preconnect %s::%s: Error encountered.\n",
+ l->bind_conf->frontend->id, l->bind_conf->rhttp_srvname);
+ l->rx.rhttp.state = LI_PRECONN_ST_ERR;
+ }
+
+ /* Rearm a new preconnect attempt. */
+ l->rx.rhttp.task->expire = MS_TO_TICKS(now_ms + 1000);
+ task_queue(l->rx.rhttp.task);
+}
+
+/* Lookup over listener <l> threads for their current count of active reverse
+ * HTTP connections. Returns the less loaded thread ID.
+ */
+static unsigned int select_thread(struct listener *l)
+{
+ unsigned long mask = l->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled);
+ unsigned int load_min = HA_ATOMIC_LOAD(&th_ctx->nb_rhttp_conns);
+ unsigned int load_thr;
+ unsigned int ret = tid;
+ int i;
+
+ /* Returns current tid if listener runs on one thread only. */
+ if (!atleast2(mask))
+ goto end;
+
+ /* Loop over all threads and return the less loaded one. This needs to
+ * be just an approximation so it's not important if the selected
+ * thread load has varied since its selection.
+ */
+
+ for (i = tg->base; mask; mask >>= 1, i++) {
+ if (!(mask & 0x1))
+ continue;
+
+ load_thr = HA_ATOMIC_LOAD(&ha_thread_ctx[i].nb_rhttp_conns);
+ if (load_min > load_thr) {
+ ret = i;
+ load_min = load_thr;
+ }
+ }
+
+ end:
+ return ret;
+}
+
+/* Detach <task> from its thread and assign it to <new_tid> thread. The task is
+ * queued to be woken up on the new thread.
+ */
+static void task_migrate(struct task *task, uint new_tid)
+{
+ task_unlink_wq(task);
+ task->expire = TICK_ETERNITY;
+ task_set_thread(task, new_tid);
+ task_wakeup(task, TASK_WOKEN_MSG);
+}
+
+struct task *rhttp_process(struct task *task, void *ctx, unsigned int state)
+{
+ struct listener *l = ctx;
+ struct connection *conn = l->rx.rhttp.pend_conn;
+
+ if (conn) {
+ /* Either connection is on error ot the connect timeout fired. */
+ if (conn->flags & CO_FL_ERROR || tick_is_expired(task->expire, now_ms)) {
+ /* If mux already instantiated, let it release the
+ * connection along with its context. Else do cleanup
+ * directly.
+ */
+ if (conn->mux && conn->mux->destroy) {
+ conn->mux->destroy(conn->ctx);
+ }
+ else {
+ conn_stop_tracking(conn);
+ conn_xprt_shutw(conn);
+ conn_xprt_close(conn);
+ conn_sock_shutw(conn, 0);
+ conn_ctrl_close(conn);
+
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+
+ /* conn_free() must report preconnect failure using rhttp_notify_preconn_err(). */
+ BUG_ON(l->rx.rhttp.pend_conn);
+
+ l->rx.rhttp.task->expire = TICKS_TO_MS(now_ms);
+ }
+ else {
+ /* Spurious receiver task woken up despite pend_conn not ready/on error. */
+ BUG_ON(!(conn->flags & CO_FL_ACT_REVERSING));
+
+ /* A connection is ready to be accepted. */
+ listener_accept(l);
+ l->rx.rhttp.task->expire = TICK_ETERNITY;
+ }
+ }
+ else {
+ struct server *srv = l->rx.rhttp.srv;
+
+ if ((state & TASK_WOKEN_ANY) != TASK_WOKEN_MSG) {
+ unsigned int new_tid = select_thread(l);
+ if (new_tid != tid) {
+ task_migrate(l->rx.rhttp.task, new_tid);
+ return task;
+ }
+ }
+
+ /* No pending reverse connection, prepare a new one. Store it in the
+ * listener and return NULL. Connection will be returned later after
+ * reversal is completed.
+ */
+ conn = new_reverse_conn(l, srv);
+ l->rx.rhttp.pend_conn = conn;
+
+ /* On success task will be woken up by H2 mux after reversal. */
+ l->rx.rhttp.task->expire = conn ?
+ tick_add_ifset(now_ms, srv->proxy->timeout.connect) :
+ MS_TO_TICKS(now_ms + 1000);
+ }
+
+ return task;
+}
+
+int rhttp_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ rx->flags |= RX_F_BOUND;
+ return ERR_NONE;
+}
+
+int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ struct task *task;
+ struct proxy *be;
+ struct server *srv;
+ struct ist be_name, sv_name;
+ char *name = NULL;
+
+ unsigned long mask;
+ uint task_tid;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ /* Retrieve the first thread usable for this listener. */
+ mask = listener->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled);
+ task_tid = my_ffsl(mask) + ha_tgroup_info[listener->rx.bind_tgroup].base;
+ if (!(task = task_new_on(task_tid))) {
+ snprintf(errmsg, errlen, "Out of memory.");
+ goto err;
+ }
+ task->process = rhttp_process;
+ task->context = listener;
+ listener->rx.rhttp.task = task;
+ listener->rx.rhttp.state = LI_PRECONN_ST_STOP;
+
+ /* Set maxconn which is defined via the special kw nbconn for reverse
+ * connect. Use a default value of 1 if not set. This guarantees that
+ * listener will be automatically re-enable each time it fell back below
+ * it due to a connection error.
+ */
+ listener->bind_conf->maxconn = listener->bind_conf->rhttp_nbconn;
+ if (!listener->bind_conf->maxconn)
+ listener->bind_conf->maxconn = 1;
+
+ name = strdup(listener->bind_conf->rhttp_srvname);
+ if (!name) {
+ snprintf(errmsg, errlen, "Out of memory.");
+ goto err;
+ }
+
+ sv_name = ist(name);
+ be_name = istsplit(&sv_name, '/');
+ if (!istlen(sv_name)) {
+ snprintf(errmsg, errlen, "Invalid server name: '%s'.", name);
+ goto err;
+ }
+
+ if (!(be = proxy_be_by_name(ist0(be_name)))) {
+ snprintf(errmsg, errlen, "No such backend: '%s'.", name);
+ goto err;
+ }
+ if (!(srv = server_find_by_name(be, ist0(sv_name)))) {
+ snprintf(errmsg, errlen, "No such server: '%s/%s'.", ist0(be_name), ist0(sv_name));
+ goto err;
+ }
+
+ if (srv->flags & SRV_F_RHTTP) {
+ snprintf(errmsg, errlen, "Cannot use reverse HTTP server '%s/%s' as target to a reverse bind.", ist0(be_name), ist0(sv_name));
+ goto err;
+ }
+
+ if (srv_is_transparent(srv)) {
+ snprintf(errmsg, errlen, "Cannot use transparent server '%s/%s' as target to a reverse bind.", ist0(be_name), ist0(sv_name));
+ goto err;
+ }
+
+ /* Check that server uses HTTP/2 either with proto or ALPN. */
+ if ((!srv->mux_proto || !isteqi(srv->mux_proto->token, ist("h2"))) &&
+ (!srv->use_ssl || !isteqi(ist(srv->ssl_ctx.alpn_str), ist("\x02h2")))) {
+ snprintf(errmsg, errlen, "Cannot reverse connect with server '%s/%s' unless HTTP/2 is activated on it with either proto or alpn keyword.", name, ist0(sv_name));
+ goto err;
+ }
+
+ /* Prevent dynamic source address settings. */
+ if (((srv->conn_src.opts & CO_SRC_TPROXY_MASK) &&
+ (srv->conn_src.opts & CO_SRC_TPROXY_MASK) != CO_SRC_TPROXY_ADDR) ||
+ ((srv->proxy->conn_src.opts & CO_SRC_TPROXY_MASK) &&
+ (srv->proxy->conn_src.opts & CO_SRC_TPROXY_MASK) != CO_SRC_TPROXY_ADDR)) {
+ snprintf(errmsg, errlen, "Cannot reverse connect with server '%s/%s' which uses dynamic source address setting.", name, ist0(sv_name));
+ goto err;
+ }
+
+ ha_free(&name);
+
+ listener->rx.rhttp.srv = srv;
+ listener_set_state(listener, LI_LISTEN);
+
+ return ERR_NONE;
+
+ err:
+ ha_free(&name);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+void rhttp_enable_listener(struct listener *l)
+{
+ if (l->rx.rhttp.state < LI_PRECONN_ST_INIT) {
+ send_log(l->bind_conf->frontend, LOG_INFO,
+ "preconnect %s::%s: Initiating.\n",
+ l->bind_conf->frontend->id, l->bind_conf->rhttp_srvname);
+ l->rx.rhttp.state = LI_PRECONN_ST_INIT;
+ }
+
+ task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY);
+}
+
+void rhttp_disable_listener(struct listener *l)
+{
+ if (l->rx.rhttp.state < LI_PRECONN_ST_FULL) {
+ send_log(l->bind_conf->frontend, LOG_INFO,
+ "preconnect %s::%s: Running with nbconn %d reached.\n",
+ l->bind_conf->frontend->id, l->bind_conf->rhttp_srvname,
+ l->bind_conf->maxconn);
+ l->rx.rhttp.state = LI_PRECONN_ST_FULL;
+ }
+}
+
+struct connection *rhttp_accept_conn(struct listener *l, int *status)
+{
+ struct connection *conn = l->rx.rhttp.pend_conn;
+
+ if (!conn) {
+ /* Reverse connect listener must have an explicit maxconn set
+ * to ensure it is re-enabled on connection error.
+ */
+ BUG_ON(!l->bind_conf->maxconn);
+
+ /* Instantiate a new conn if maxconn not yet exceeded. */
+ if (l->nbconn <= l->bind_conf->maxconn) {
+ /* Try first if a new thread should be used for the new connection. */
+ unsigned int new_tid = select_thread(l);
+ if (new_tid != tid) {
+ task_migrate(l->rx.rhttp.task, new_tid);
+ *status = CO_AC_DONE;
+ return NULL;
+ }
+
+ /* No need to use a new thread, use the opportunity to alloc the connection right now. */
+ l->rx.rhttp.pend_conn = new_reverse_conn(l, l->rx.rhttp.srv);
+ if (!l->rx.rhttp.pend_conn) {
+ *status = CO_AC_PAUSE;
+ return NULL;
+ }
+ }
+
+ *status = CO_AC_DONE;
+ return NULL;
+ }
+
+ /* listener_accept() must not be called if no pending connection is not yet reversed. */
+ BUG_ON(!(conn->flags & CO_FL_ACT_REVERSING));
+ conn->flags &= ~CO_FL_ACT_REVERSING;
+ conn->flags |= CO_FL_REVERSED;
+ conn->mux->ctl(conn, MUX_CTL_REVERSE_CONN, NULL);
+
+ l->rx.rhttp.pend_conn = NULL;
+ *status = CO_AC_NONE;
+
+ return conn;
+}
+
+void rhttp_unbind_receiver(struct listener *l)
+{
+ l->rx.flags &= ~RX_F_BOUND;
+}
+
+int rhttp_set_affinity(struct connection *conn, int new_tid)
+{
+ /* Explicitely disable connection thread migration on accept. Indeed,
+ * it's unsafe to move a connection with its FD to another thread. Note
+ * that active reverse task thread migration should be sufficient to
+ * ensure repartition of reversed connections accross listener threads.
+ */
+ return -1;
+}
+
+int rhttp_accepting_conn(const struct receiver *rx)
+{
+ return 1;
+}
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_rhttp);
+
+/* perform minimal intializations */
+static void init_rhttp()
+{
+ int i;
+
+ for (i = 0; i < MAX_THREADS; i++)
+ ha_thread_ctx[i].nb_rhttp_conns = 0;
+}
+
+INITCALL0(STG_PREPARE, init_rhttp);
diff --git a/src/proto_sockpair.c b/src/proto_sockpair.c
new file mode 100644
index 0000000..a719063
--- /dev/null
+++ b/src/proto_sockpair.c
@@ -0,0 +1,589 @@
+/*
+ * Socket Pair protocol layer (sockpair)
+ *
+ * Copyright HAProxy Technologies - William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <pwd.h>
+#include <grp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_sockpair.h>
+#include <haproxy/sock.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void sockpair_enable_listener(struct listener *listener);
+static void sockpair_disable_listener(struct listener *listener);
+static int sockpair_connect_server(struct connection *conn, int flags);
+static int sockpair_accepting_conn(const struct receiver *rx);
+struct connection *sockpair_accept_conn(struct listener *l, int *status);
+
+struct proto_fam proto_fam_sockpair = {
+ .name = "sockpair",
+ .sock_domain = AF_CUST_SOCKPAIR,
+ .sock_family = AF_UNIX,
+ .sock_addrlen = sizeof(struct sockaddr_un),
+ .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
+ .addrcmp = NULL,
+ .bind = sockpair_bind_receiver,
+ .get_src = NULL,
+ .get_dst = NULL,
+};
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_sockpair = {
+ .name = "sockpair",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = sockpair_bind_listener,
+ .enable = sockpair_enable_listener,
+ .disable = sockpair_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .accept_conn = sockpair_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = sockpair_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ /* Note: suspend/resume not supported */
+
+ /* address family */
+ .fam = &proto_fam_sockpair,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sockpair_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_sockpair.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_sockpair);
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void sockpair_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void sockpair_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback
+ * and context, respectively, with ->bind_thread as the thread mask. Returns an
+ * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
+ * an error message may be passed into <errmsg>. Note that the binding address
+ * is only an FD to receive the incoming FDs on. Thus by definition there is no
+ * real "bind" operation, this only completes the receiver. Such FDs are not
+ * inherited upon reload.
+ */
+int sockpair_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ int err;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ if (rx->flags & RX_F_MUST_DUP) {
+ /* this is a secondary receiver that is an exact copy of a
+ * reference which must already be bound (or has failed).
+ * We'll try to dup() the other one's FD and take it. We
+ * try hard not to reconfigure the socket since it's shared.
+ */
+ BUG_ON(!rx->shard_info);
+ if (!(rx->shard_info->ref->flags & RX_F_BOUND)) {
+ /* it's assumed that the first one has already reported
+ * the error, let's not spam with another one, and do
+ * not set ERR_ALERT.
+ */
+ err |= ERR_RETRYABLE;
+ goto bind_ret_err;
+ }
+ /* taking the other one's FD will result in it being marked
+ * extern and being dup()ed. Let's mark the receiver as
+ * inherited so that it properly bypasses all second-stage
+ * setup and avoids being passed to new processes.
+ */
+ rx->flags |= RX_F_INHERITED;
+ rx->fd = rx->shard_info->ref->fd;
+ }
+
+ if (rx->fd == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "sockpair may be only used with inherited FDs");
+ goto bind_return;
+ }
+
+ if (rx->fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(rx->fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ rx->flags |= RX_F_BOUND;
+
+ fd_insert(rx->fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
+ return err;
+
+ bind_return:
+ if (errmsg && *errmsg)
+ memprintf(errmsg, "%s for [fd %d]", *errmsg, rx->fd);
+
+ bind_ret_err:
+ return err;
+
+ bind_close_return:
+ close(rx->fd);
+ goto bind_return;
+}
+
+/* This function changes the state from ASSIGNED to LISTEN. The socket is NOT
+ * enabled for polling. The return value is composed from ERR_NONE,
+ * ERR_RETRYABLE and ERR_FATAL. It may return a warning or an error message in
+ * <errmsg> if the message is at most <errlen> bytes long (including '\0').
+ * Note that <errmsg> may be NULL if <errlen> is also zero.
+ */
+static int sockpair_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err;
+ char *msg = NULL;
+
+ err = ERR_NONE;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto err_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+ return err;
+
+ err_return:
+ if (msg && errlen)
+ snprintf(errmsg, errlen, "%s [fd %d]", msg, listener->rx.fd);
+ return err;
+}
+
+/*
+ * Send FD over a unix socket
+ *
+ * <send_fd> is the FD to send
+ * <fd> is the fd of the unix socket to use for the transfer
+ *
+ * The iobuf variable could be use in the future to enhance the protocol.
+ */
+int send_fd_uxst(int fd, int send_fd)
+{
+ char iobuf[2];
+ struct iovec iov;
+ struct msghdr msghdr;
+
+ char cmsgbuf[CMSG_SPACE(sizeof(int))];
+ char buf[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg = (void *)buf;
+
+ int *fdptr;
+
+ iov.iov_base = iobuf;
+ iov.iov_len = sizeof(iobuf);
+
+ memset(&msghdr, 0, sizeof(msghdr));
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ /* Now send the fds */
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ fdptr = (int *)CMSG_DATA(cmsg);
+ memcpy(fdptr, &send_fd, sizeof(send_fd));
+
+ if (sendmsg(fd, &msghdr, 0) != sizeof(iobuf)) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ *
+ * This function works like uxst_connect_server but instead of creating a
+ * socket and establishing a connection, it creates a pair of connected
+ * sockets, and send one of them through the destination FD. The destination FD
+ * is stored in conn->dst->sin_addr.s_addr during configuration parsing.
+ *
+ * conn->target may point either to a valid server or to a backend, depending
+ * on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are supported. The
+ * <data> parameter is a boolean indicating whether there are data waiting for
+ * being sent or not, in order to adjust data write polling and on some
+ * platforms. The <delack> argument is ignored.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+static int sockpair_connect_server(struct connection *conn, int flags)
+{
+ int sv[2], fd, dst_fd = -1;
+
+ BUG_ON(!conn->dst);
+
+ /* the FD is stored in the sockaddr struct */
+ dst_fd = ((struct sockaddr_in *)conn->dst)->sin_addr.s_addr;
+
+ if (obj_type(conn->target) != OBJ_TYPE_PROXY &&
+ obj_type(conn->target) != OBJ_TYPE_SERVER) {
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ ha_alert("socketpair(): Cannot create socketpair. Giving up.\n");
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ fd = conn->handle.fd = sv[1];
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ /* The new socket is sent on the other side, it should be retrieved and
+ * considered as an 'accept' socket on the server side */
+ if (send_fd_uxst(dst_fd, sv[0]) == -1) {
+ ha_alert("socketpair: Cannot transfer the fd %d over sockpair@%d. Giving up.\n", sv[0], dst_fd);
+ close(sv[0]);
+ close(sv[1]);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ close(sv[0]); /* we don't need this side anymore */
+
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ /* Prepare to send a few handshakes related to the on-wire protocol. */
+ if (conn->send_proxy_ofs)
+ conn->flags |= CO_FL_SEND_PROXY;
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK); /* no need to disable lingering */
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+
+/*
+ * Receives a file descriptor transferred from a unix socket.
+ *
+ * Return -1 or a socket fd;
+ *
+ * The iobuf variable could be used in the future to enhance the protocol.
+ */
+int recv_fd_uxst(int sock)
+{
+ struct msghdr msghdr;
+ struct iovec iov;
+ char iobuf[2];
+
+ char cmsgbuf[CMSG_SPACE(sizeof(int))];
+ char buf[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg = (void *)buf;
+
+
+ int recv_fd = -1;
+ int ret = -1;
+
+ memset(&msghdr, 0, sizeof(msghdr));
+
+ iov.iov_base = iobuf;
+ iov.iov_len = sizeof(iobuf);
+
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int));
+
+ iov.iov_len = sizeof(iobuf);
+ iov.iov_base = iobuf;
+
+ while (1) {
+ ret = recvmsg(sock, &msghdr, 0);
+ if (ret == -1 && errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (ret == -1)
+ return ret;
+
+ cmsg = CMSG_FIRSTHDR(&msghdr);
+ if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_RIGHTS) {
+ size_t totlen = cmsg->cmsg_len -
+ CMSG_LEN(0);
+ memcpy(&recv_fd, CMSG_DATA(cmsg), totlen);
+ }
+ return recv_fd;
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible, negative if the socket is non-recoverable. In
+ * practice zero is never returned since we don't support suspending sockets.
+ * The real test consists in verifying we have a connected SOCK_STREAM of
+ * family AF_UNIX.
+ */
+static int sockpair_accepting_conn(const struct receiver *rx)
+{
+ struct sockaddr sa;
+ socklen_t len;
+ int val;
+
+ len = sizeof(val);
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_TYPE, &val, &len) == -1)
+ return -1;
+
+ if (val != SOCK_STREAM)
+ return -1;
+
+ len = sizeof(sa);
+ if (getsockname(rx->fd, &sa, &len) != 0)
+ return -1;
+
+ if (sa.sa_family != AF_UNIX)
+ return -1;
+
+ len = sizeof(val);
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_ACCEPTCONN, &val, &len) == -1)
+ return -1;
+
+ /* Note: cannot be a listening socket, must be established */
+ if (val)
+ return -1;
+
+ return 1;
+}
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *sockpair_accept_conn(struct listener *l, int *status)
+{
+ struct proxy *p = l->bind_conf->frontend;
+ struct connection *conn = NULL;
+ int ret;
+ int cfd;
+
+ if ((cfd = recv_fd_uxst(l->rx.fd)) != -1)
+ fd_set_nonblock(cfd);
+
+ if (likely(cfd != -1)) {
+ /* Perfect, the connection was accepted */
+ conn = conn_new(&l->obj_type);
+ if (!conn)
+ goto fail_conn;
+
+ if (!sockaddr_alloc(&conn->src, NULL, 0))
+ goto fail_addr;
+
+ /* just like with UNIX sockets, only the family is filled */
+ conn->src->ss_family = AF_UNIX;
+ conn->handle.fd = cfd;
+ ret = CO_AC_DONE;
+ goto done;
+ }
+
+ switch (errno) {
+#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
+ case EWOULDBLOCK:
+#endif
+ case EAGAIN:
+ ret = CO_AC_DONE; /* nothing more to accept */
+ if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) {
+ /* the listening socket might have been disabled in a shared
+ * process and we're a collateral victim. We'll just pause for
+ * a while in case it comes back. In the mean time, we need to
+ * clear this sticky flag.
+ */
+ _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ ret = CO_AC_PAUSE;
+ }
+ fd_cant_recv(l->rx.fd);
+ break;
+
+ case EINVAL:
+ /* might be trying to accept on a shut fd (eg: soft stop) */
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EINTR:
+ case ECONNABORTED:
+ ret = CO_AC_RETRY;
+ break;
+
+ case ENFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EMFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case ENOBUFS:
+ case ENOMEM:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ default:
+ /* unexpected result, let's give up and let other tasks run */
+ ret = CO_AC_YIELD;
+ }
+ done:
+ if (status)
+ *status = ret;
+ return conn;
+
+ fail_addr:
+ conn_free(conn);
+ conn = NULL;
+ fail_conn:
+ ret = CO_AC_PAUSE;
+ goto done;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
new file mode 100644
index 0000000..45ce27f
--- /dev/null
+++ b/src/proto_tcp.c
@@ -0,0 +1,834 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+
+static int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int tcp_suspend_receiver(struct receiver *rx);
+static int tcp_resume_receiver(struct receiver *rx);
+static void tcp_enable_listener(struct listener *listener);
+static void tcp_disable_listener(struct listener *listener);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_tcpv4 = {
+ .name = "tcpv4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = tcp_bind_listener,
+ .enable = tcp_enable_listener,
+ .disable = tcp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = tcp_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = tcp_suspend_receiver,
+ .rx_resume = tcp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = IPPROTO_TCP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_tcpv4.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_tcpv6 = {
+ .name = "tcpv6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = tcp_bind_listener,
+ .enable = tcp_enable_listener,
+ .disable = tcp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = tcp_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = tcp_suspend_receiver,
+ .rx_resume = tcp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = IPPROTO_TCP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_tcpv6.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_tcpv6);
+
+/* Binds ipv4/ipv6 address <local> to socket <fd>, unless <flags> is set, in which
+ * case we try to bind <remote>. <flags> is a 2-bit field consisting of :
+ * - 0 : ignore remote address (may even be a NULL pointer)
+ * - 1 : use provided address
+ * - 2 : use provided port
+ * - 3 : use both
+ *
+ * The function supports multiple foreign binding methods :
+ * - linux_tproxy: we directly bind to the foreign address
+ * The second one can be used as a fallback for the first one.
+ * This function returns 0 when everything's OK, 1 if it could not bind, to the
+ * local address, 2 if it could not bind to the foreign address.
+ */
+int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct sockaddr_storage *remote)
+{
+ struct sockaddr_storage bind_addr;
+ int foreign_ok = 0;
+ int ret;
+ static THREAD_LOCAL int ip_transp_working = 1;
+ static THREAD_LOCAL int ip6_transp_working = 1;
+
+ switch (local->ss_family) {
+ case AF_INET:
+ if (flags && ip_transp_working) {
+ /* This deserves some explanation. Some platforms will support
+ * multiple combinations of certain methods, so we try the
+ * supported ones until one succeeds.
+ */
+ if (sock_inet4_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip_transp_working = 0;
+ }
+ break;
+ case AF_INET6:
+ if (flags && ip6_transp_working) {
+ if (sock_inet6_make_foreign(fd))
+ foreign_ok = 1;
+ else
+ ip6_transp_working = 0;
+ }
+ break;
+ }
+
+ if (flags) {
+ memset(&bind_addr, 0, sizeof(bind_addr));
+ bind_addr.ss_family = remote->ss_family;
+ switch (remote->ss_family) {
+ case AF_INET:
+ if (flags & 1)
+ ((struct sockaddr_in *)&bind_addr)->sin_addr = ((struct sockaddr_in *)remote)->sin_addr;
+ if (flags & 2)
+ ((struct sockaddr_in *)&bind_addr)->sin_port = ((struct sockaddr_in *)remote)->sin_port;
+ break;
+ case AF_INET6:
+ if (flags & 1)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_addr = ((struct sockaddr_in6 *)remote)->sin6_addr;
+ if (flags & 2)
+ ((struct sockaddr_in6 *)&bind_addr)->sin6_port = ((struct sockaddr_in6 *)remote)->sin6_port;
+ break;
+ default:
+ /* we don't want to try to bind to an unknown address family */
+ foreign_ok = 0;
+ }
+ }
+
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (foreign_ok) {
+ if (is_inet_addr(&bind_addr)) {
+ ret = bind(fd, (struct sockaddr *)&bind_addr, get_addr_len(&bind_addr));
+ if (ret < 0)
+ return 2;
+ }
+ }
+ else {
+ if (is_inet_addr(local)) {
+ ret = bind(fd, (struct sockaddr *)local, get_addr_len(local));
+ if (ret < 0)
+ return 1;
+ }
+ }
+
+ if (!flags)
+ return 0;
+
+ if (!foreign_ok)
+ /* we could not bind to a foreign address */
+ return 2;
+
+ return 0;
+}
+
+/*
+ * This function initiates a TCP connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). A source address may be
+ * pointed to by conn->src in case of transparent proxying. Normal source
+ * bind addresses are still determined locally (due to the possible need of a
+ * source port). conn->target may point either to a valid server or to a backend,
+ * depending on conn->target. Only OBJ_TYPE_PROXY and OBJ_TYPE_SERVER are
+ * supported. The <data> parameter is a boolean indicating whether there are data
+ * waiting for being sent or not, in order to adjust data write polling and on
+ * some platforms, the ability to avoid an empty initial ACK. The <flags> argument
+ * allows the caller to force using a delayed ACK when establishing the connection
+ * - 0 = no delayed ACK unless data are advertised and backend has tcp-smart-connect
+ * - CONNECT_DELACK_SMART_CONNECT = delayed ACK if backend has tcp-smart-connect, regardless of data
+ * - CONNECT_DELACK_ALWAYS = delayed ACK regardless of backend options
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+
+int tcp_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+ struct conn_src *src;
+ int use_fastopen = 0;
+ struct sockaddr_storage *addr;
+
+ BUG_ON(!conn->dst);
+
+ conn->flags |= CO_FL_WAIT_L4_CONN; /* connection in progress */
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ /* Make sure we check that we have data before activating
+ * TFO, or we could trigger a kernel issue whereby after
+ * a successful connect() == 0, any subsequent connect()
+ * will return EINPROGRESS instead of EISCONN.
+ */
+ use_fastopen = (srv->flags & SRV_F_FASTOPEN) &&
+ ((flags & (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA)) ==
+ (CONNECT_CAN_USE_TFO | CONNECT_HAS_DATA));
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ fd = conn->handle.fd = sock_create_server_socket(conn);
+
+ if (fd == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1 ||
+ (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (be->options & PR_O_TCP_SRV_KA) {
+ setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
+
+#ifdef TCP_KEEPCNT
+ if (be->srvtcpka_cnt)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &be->srvtcpka_cnt, sizeof(be->srvtcpka_cnt));
+#endif
+
+#ifdef TCP_KEEPIDLE
+ if (be->srvtcpka_idle)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &be->srvtcpka_idle, sizeof(be->srvtcpka_idle));
+#endif
+
+#ifdef TCP_KEEPINTVL
+ if (be->srvtcpka_intvl)
+ setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &be->srvtcpka_intvl, sizeof(be->srvtcpka_intvl));
+#endif
+ }
+
+ /* allow specific binding :
+ * - server-specific at first
+ * - proxy-specific next
+ */
+ if (srv && srv->conn_src.opts & CO_SRC_BIND)
+ src = &srv->conn_src;
+ else if (be->conn_src.opts & CO_SRC_BIND)
+ src = &be->conn_src;
+ else
+ src = NULL;
+
+ if (src) {
+ int ret, flags = 0;
+
+ if (conn->src && is_inet_addr(conn->src)) {
+ switch (src->opts & CO_SRC_TPROXY_MASK) {
+ case CO_SRC_TPROXY_CLI:
+ case CO_SRC_TPROXY_ADDR:
+ flags = 3;
+ break;
+ case CO_SRC_TPROXY_CIP:
+ case CO_SRC_TPROXY_DYN:
+ flags = 1;
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (src->iface_name)
+ setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, src->iface_name, src->iface_len + 1);
+#endif
+
+ if (src->sport_range) {
+ int attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_storage sa;
+
+ ret = 1;
+ memcpy(&sa, &src->source_addr, sizeof(sa));
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+
+ if (!attempts)
+ break;
+ attempts--;
+
+ fdinfo[fd].local_port = port_range_alloc_port(src->sport_range);
+ if (!fdinfo[fd].local_port) {
+ conn->err_code = CO_ER_PORT_RANGE;
+ break;
+ }
+
+ fdinfo[fd].port_range = src->sport_range;
+ set_host_port(&sa, fdinfo[fd].local_port);
+
+ ret = tcp_bind_socket(fd, flags, &sa, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+#ifdef IP_BIND_ADDRESS_NO_PORT
+ static THREAD_LOCAL int bind_address_no_port = 1;
+ setsockopt(fd, IPPROTO_IP, IP_BIND_ADDRESS_NO_PORT, (const void *) &bind_address_no_port, sizeof(int));
+#endif
+ ret = tcp_bind_socket(fd, flags, &src->source_addr, conn->src);
+ if (ret != 0)
+ conn->err_code = CO_ER_CANT_BIND;
+ }
+
+ if (unlikely(ret != 0)) {
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+
+ if (ret == 1) {
+ ha_alert("Cannot bind to source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to source address before connect() for backend %s.\n",
+ be->id);
+ } else {
+ ha_alert("Cannot bind to tproxy source address before connect() for backend %s. Aborting.\n",
+ be->id);
+ send_log(be, LOG_EMERG,
+ "Cannot bind to tproxy source address before connect() for backend %s.\n",
+ be->id);
+ }
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ }
+
+#if defined(TCP_QUICKACK)
+ /* disabling tcp quick ack now allows the first request to leave the
+ * machine with the first ACK. We only do this if there are pending
+ * data in the buffer.
+ */
+ if (flags & (CONNECT_DELACK_ALWAYS) ||
+ ((flags & CONNECT_DELACK_SMART_CONNECT ||
+ (flags & CONNECT_HAS_DATA) || conn->send_proxy_ofs) &&
+ (be->options2 & PR_O2_SMARTCON)))
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
+#endif
+
+#ifdef TCP_USER_TIMEOUT
+ /* there is not much more we can do here when it fails, it's still minor */
+ if (srv && srv->tcp_ut)
+ setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &srv->tcp_ut, sizeof(srv->tcp_ut));
+#endif
+
+ if (use_fastopen) {
+#if defined(TCP_FASTOPEN_CONNECT)
+ setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT, &one, sizeof(one));
+#endif
+ }
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ addr = (conn->flags & CO_FL_SOCKS4) ? &srv->socks4_addr : conn->dst;
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ /* common case, let's wait for connect status */
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ /* should normally not happen but if so, indicates that it's OK */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "no free ports";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ } else if (errno == ETIMEDOUT) {
+ //qfprintf(stderr,"Connect(): ETIMEDOUT");
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ } else {
+ // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ //qfprintf(stderr,"Connect(): %d", errno);
+ port_range_release_port(fdinfo[fd].port_range, fdinfo[fd].local_port);
+ fdinfo[fd].port_range = NULL;
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() == 0, this is great! */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_LINGER_RISK); /* close hard if needed */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/* This function tries to bind a TCPv4/v6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int tcp_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int fd, err;
+ int ready;
+ struct buffer *msg = alloc_trash_chunk();
+
+ err = ERR_NONE;
+
+ if (!msg) {
+ if (errlen)
+ snprintf(errmsg, errlen, "out of memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ chunk_appendf(msg, "%sreceiving socket not bound", msg->data ? ", " : "");
+ goto tcp_return;
+ }
+
+ if (listener->rx.flags & RX_F_MUST_DUP)
+ goto done;
+
+ fd = listener->rx.fd;
+
+ if (listener->bind_conf->options & BC_O_NOLINGER)
+ setsockopt(fd, SOL_SOCKET, SO_LINGER, &nolinger, sizeof(struct linger));
+ else {
+ struct linger tmplinger;
+ socklen_t len = sizeof(tmplinger);
+ if (getsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger, &len) == 0 &&
+ (tmplinger.l_onoff == 1 || tmplinger.l_linger == 0)) {
+ tmplinger.l_onoff = 0;
+ tmplinger.l_linger = 0;
+ setsockopt(fd, SOL_SOCKET, SO_LINGER, &tmplinger,
+ sizeof(tmplinger));
+ }
+ }
+
+#if defined(TCP_MAXSEG)
+ if (listener->bind_conf->maxseg > 0) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG,
+ &listener->bind_conf->maxseg, sizeof(listener->bind_conf->maxseg)) == -1) {
+ chunk_appendf(msg, "%scannot set MSS to %d", msg->data ? ", " : "", listener->bind_conf->maxseg);
+ err |= ERR_WARN;
+ }
+ } else {
+ /* we may want to try to restore the default MSS if the socket was inherited */
+ int tmpmaxseg = -1;
+ int defaultmss;
+ socklen_t len = sizeof(tmpmaxseg);
+
+ if (listener->rx.addr.ss_family == AF_INET)
+ defaultmss = sock_inet_tcp_maxseg_default;
+ else
+ defaultmss = sock_inet6_tcp_maxseg_default;
+
+ getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &tmpmaxseg, &len);
+ if (defaultmss > 0 &&
+ tmpmaxseg != defaultmss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &defaultmss, sizeof(defaultmss)) == -1) {
+ chunk_appendf(msg, "%scannot set MSS to %d", msg->data ? ", " : "", defaultmss);
+ err |= ERR_WARN;
+ }
+ }
+#endif
+#if defined(TCP_USER_TIMEOUT)
+ if (listener->bind_conf->tcp_ut) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT,
+ &listener->bind_conf->tcp_ut, sizeof(listener->bind_conf->tcp_ut)) == -1) {
+ chunk_appendf(msg, "%scannot set TCP User Timeout", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else
+ setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &zero,
+ sizeof(zero));
+#endif
+#if defined(TCP_DEFER_ACCEPT)
+ if (listener->bind_conf->options & BC_O_DEF_ACCEPT) {
+ /* defer accept by up to one second */
+ int accept_delay = 1;
+ if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &accept_delay, sizeof(accept_delay)) == -1) {
+ chunk_appendf(msg, "%scannot enable DEFER_ACCEPT", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else
+ setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &zero,
+ sizeof(zero));
+#endif
+#if defined(TCP_FASTOPEN)
+ if (listener->bind_conf->options & BC_O_TCP_FO) {
+ /* TFO needs a queue length, let's use the configured backlog */
+ int qlen = listener_backlog(listener);
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) {
+ chunk_appendf(msg, "%scannot enable TCP_FASTOPEN", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ } else {
+ socklen_t len;
+ int qlen;
+ len = sizeof(qlen);
+ /* Only disable fast open if it was enabled, we don't want
+ * the kernel to create a fast open queue if there's none.
+ */
+ if (getsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, &len) == 0 &&
+ qlen != 0) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &zero,
+ sizeof(zero)) == -1) {
+ chunk_appendf(msg, "%scannot disable TCP_FASTOPEN", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ }
+ }
+#endif
+
+ ready = sock_accepting_conn(&listener->rx) > 0;
+
+ if (!ready && /* only listen if not already done by external process */
+ listen(fd, listener_backlog(listener)) == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ chunk_appendf(msg, "%scannot listen to socket", msg->data ? ", " : "");
+ goto tcp_close_return;
+ }
+
+#if !defined(TCP_DEFER_ACCEPT) && defined(SO_ACCEPTFILTER)
+ /* the socket needs to listen first */
+ if (listener->bind_conf->options & BC_O_DEF_ACCEPT) {
+ struct accept_filter_arg accept;
+ memset(&accept, 0, sizeof(accept));
+ strlcpy2(accept.af_name, "dataready", sizeof(accept.af_name));
+ if (setsockopt(fd, SOL_SOCKET, SO_ACCEPTFILTER, &accept, sizeof(accept)) == -1) {
+ chunk_appendf(msg, "%scannot enable ACCEPT_FILTER", msg->data ? ", " : "");
+ err |= ERR_WARN;
+ }
+ }
+#endif
+#if defined(TCP_QUICKACK)
+ if (listener->bind_conf->options & BC_O_NOQUICKACK)
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &zero, sizeof(zero));
+ else
+ setsockopt(fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
+#endif
+
+ done:
+ /* the socket is ready */
+ listener_set_state(listener, LI_LISTEN);
+ goto tcp_return;
+
+ tcp_close_return:
+ free_trash_chunk(msg);
+ msg = NULL;
+ close(fd);
+ tcp_return:
+ if (msg && errlen && msg->data) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg->area, pn, get_host_port(&listener->rx.addr));
+ }
+ free_trash_chunk(msg);
+ msg = NULL;
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void tcp_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void tcp_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. Note that inherited FDs
+ * are neither suspended nor resumed, we only enable/disable polling on them.
+ */
+static int tcp_suspend_receiver(struct receiver *rx)
+{
+ const struct sockaddr sa = { .sa_family = AF_UNSPEC };
+ int ret;
+
+ /* We never disconnect a shared FD otherwise we'd break it in the
+ * parent process and any possible subsequent worker inheriting it.
+ * Thus we just stop receiving from it.
+ */
+ if (rx->flags & RX_F_INHERITED)
+ goto done;
+
+ if (connect(rx->fd, &sa, sizeof(sa)) < 0)
+ goto check_already_done;
+ done:
+ fd_stop_recv(rx->fd);
+ return 1;
+
+ check_already_done:
+ /* in case one of the shutdown() above fails, it might be because we're
+ * dealing with a socket that is shared with other processes doing the
+ * same. Let's check if it's still accepting connections.
+ */
+ ret = sock_accepting_conn(rx);
+ if (ret <= 0) {
+ /* unrecoverable or paused by another process */
+ fd_stop_recv(rx->fd);
+ return ret == 0;
+ }
+
+ /* still listening, that's not good */
+ return -1;
+}
+
+/* Resume a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly resumed. Note that inherited FDs
+ * are neither suspended nor resumed, we only enable/disable polling on them.
+ */
+static int tcp_resume_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (rx->fd < 0)
+ return 0;
+
+ if ((rx->flags & RX_F_INHERITED) || listen(rx->fd, listener_backlog(l)) == 0) {
+ fd_want_recv(l->rx.fd);
+ return 1;
+ }
+ return -1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_udp.c b/src/proto_udp.c
new file mode 100644
index 0000000..9855974
--- /dev/null
+++ b/src/proto_udp.c
@@ -0,0 +1,247 @@
+/*
+ * UDP protocol layer on top of AF_INET/AF_INET6
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * Partial merge by Emeric Brun <ebrun@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/udp.h>
+#include <netinet/in.h>
+
+#include <haproxy/fd.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_udp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/server.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+static int udp_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void udp_enable_listener(struct listener *listener);
+static void udp_disable_listener(struct listener *listener);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_udp4 = {
+ .name = "udp4",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = udp_bind_listener,
+ .enable = udp_enable_listener,
+ .disable = udp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet4,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_udp4.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_udp4);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_udp6 = {
+ .name = "udp6",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = udp_bind_listener,
+ .enable = udp_enable_listener,
+ .disable = udp_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = udp_suspend_receiver,
+ .rx_resume = udp_resume_receiver,
+
+ /* address family */
+ .fam = &proto_fam_inet6,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = IPPROTO_UDP,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_udp6.receivers),
+ .nb_receivers = 0,
+#ifdef SO_REUSEPORT
+ .flags = PROTO_F_REUSEPORT_SUPPORTED,
+#endif
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_udp6);
+
+/* This function tries to bind a UDPv4/v6 listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int udp_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ goto udp_return;
+ }
+
+ /* we may want to adjust the output buffer (tune.sndbuf.backend) */
+ if (global.tune.frontend_rcvbuf)
+ setsockopt(listener->rx.fd, SOL_SOCKET, SO_RCVBUF, &global.tune.frontend_rcvbuf, sizeof(global.tune.frontend_rcvbuf));
+
+ if (global.tune.frontend_sndbuf)
+ setsockopt(listener->rx.fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf));
+
+ listener_set_state(listener, LI_LISTEN);
+
+ udp_return:
+ if (msg && errlen) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&listener->rx.addr, pn, sizeof(pn));
+ snprintf(errmsg, errlen, "%s for [%s:%d]", msg, pn, get_host_port(&listener->rx.addr));
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void udp_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void udp_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended.
+ * The principle is a bit ugly but works well, at least on Linux: in order to
+ * suspend the receiver, we want it to stop receiving traffic, which means that
+ * the socket must be unhashed from the kernel's socket table. The simple way
+ * to do this is to connect to any address that is reachable and will not be
+ * used by regular traffic, and a great one is reconnecting to self. Note that
+ * inherited FDs are neither suspended nor resumed, we only enable/disable
+ * polling on them.
+ */
+int udp_suspend_receiver(struct receiver *rx)
+{
+ struct sockaddr_storage ss;
+ socklen_t len = sizeof(ss);
+
+ if (rx->fd < 0)
+ return 0;
+
+ /* we never do that with a shared FD otherwise we'd break it in the
+ * parent process and any possible subsequent worker inheriting it.
+ */
+ if (rx->flags & RX_F_INHERITED)
+ goto done;
+
+ if (getsockname(rx->fd, (struct sockaddr *)&ss, &len) < 0)
+ return -1;
+
+ if (connect(rx->fd, (struct sockaddr *)&ss, len) < 0)
+ return -1;
+ done:
+ /* not necessary but may make debugging clearer */
+ fd_stop_recv(rx->fd);
+ return 1;
+}
+
+/* Resume a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended.
+ * The principle is to reverse the change above, we'll break the connection by
+ * connecting to AF_UNSPEC. The association breaks and the socket starts to
+ * receive from everywhere again. Note that inherited FDs are neither suspended
+ * nor resumed, we only enable/disable polling on them.
+ */
+int udp_resume_receiver(struct receiver *rx)
+{
+ const struct sockaddr sa = { .sa_family = AF_UNSPEC };
+
+ if (rx->fd < 0)
+ return 0;
+
+ if (!(rx->flags & RX_F_INHERITED) && connect(rx->fd, &sa, sizeof(sa)) < 0)
+ return -1;
+
+ fd_want_recv(rx->fd);
+ return 1;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_uxdg.c b/src/proto_uxdg.c
new file mode 100644
index 0000000..43cbe5a
--- /dev/null
+++ b/src/proto_uxdg.c
@@ -0,0 +1,159 @@
+/*
+ * DGRAM protocol layer on top of AF_UNIX
+ *
+ * Copyright 2020 HAProxy Technologies, Emeric Brun <ebrun@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/fd.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/protocol.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+#include <haproxy/tools.h>
+
+static int uxdg_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static void uxdg_enable_listener(struct listener *listener);
+static void uxdg_disable_listener(struct listener *listener);
+static int uxdg_suspend_receiver(struct receiver *rx);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_uxdg = {
+ .name = "uxdg",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_DGRAM,
+ .listen = uxdg_bind_listener,
+ .enable = uxdg_enable_listener,
+ .disable = uxdg_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+
+ /* binding layer */
+ .rx_suspend = uxdg_suspend_receiver,
+
+ /* address family */
+ .fam = &proto_fam_unix,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_DGRAM,
+ .sock_type = SOCK_DGRAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .receivers = LIST_HEAD_INIT(proto_uxdg.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_uxdg);
+
+/* This function tries to bind dgram unix socket listener. It may return a warning or
+ * an error message in <errmsg> if the message is at most <errlen> bytes long
+ * (including '\0'). Note that <errmsg> may be NULL if <errlen> is also zero.
+ * The return value is composed from ERR_ABORT, ERR_WARN,
+ * ERR_ALERT, ERR_RETRYABLE and ERR_FATAL. ERR_NONE indicates that everything
+ * was alright and that no message was returned. ERR_RETRYABLE means that an
+ * error occurred but that it may vanish after a retry (eg: port in use), and
+ * ERR_FATAL indicates a non-fixable error. ERR_WARN and ERR_ALERT do not alter
+ * the meaning of the error, but just indicate that a message is present which
+ * should be displayed with the respective level. Last, ERR_ABORT indicates
+ * that it's pointless to try to start other listeners. No error message is
+ * returned if errlen is NULL.
+ */
+int uxdg_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int err = ERR_NONE;
+ char *msg = NULL;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ err |= ERR_FATAL | ERR_ALERT;
+ goto uxdg_return;
+ }
+
+ listener_set_state(listener, LI_LISTEN);
+
+ uxdg_return:
+ if (msg && errlen) {
+ char *path_str;
+
+ path_str = sa2str((struct sockaddr_storage *)&listener->rx.addr, 0, 0);
+ snprintf(errmsg, errlen, "%s for [%s]", msg, ((path_str) ? path_str : ""));
+ ha_free(&path_str);
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxdg_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxdg_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. For plain unix sockets
+ * we only disable the listener to prevent data from being handled but nothing
+ * more is done since currently it's the new process which handles the renaming.
+ * Abstract sockets are completely unbound and closed so there's no need to stop
+ * the poller.
+ */
+static int uxdg_suspend_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (((struct sockaddr_un *)&rx->addr)->sun_path[0]) {
+ uxdg_disable_listener(l);
+ return 1;
+ }
+
+ /* Listener's lock already held. Call lockless version of
+ * unbind_listener. */
+ do_unbind_listener(l);
+ return 0;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proto_uxst.c b/src/proto_uxst.c
new file mode 100644
index 0000000..7988e00
--- /dev/null
+++ b/src/proto_uxst.c
@@ -0,0 +1,372 @@
+/*
+ * UNIX SOCK_STREAM protocol layer (uxst)
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <time.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_uxst.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+#include <haproxy/tools.h>
+#include <haproxy/version.h>
+
+
+static int uxst_bind_listener(struct listener *listener, char *errmsg, int errlen);
+static int uxst_connect_server(struct connection *conn, int flags);
+static void uxst_enable_listener(struct listener *listener);
+static void uxst_disable_listener(struct listener *listener);
+static int uxst_suspend_receiver(struct receiver *rx);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+struct protocol proto_uxst = {
+ .name = "unix_stream",
+
+ /* connection layer */
+ .xprt_type = PROTO_TYPE_STREAM,
+ .listen = uxst_bind_listener,
+ .enable = uxst_enable_listener,
+ .disable = uxst_disable_listener,
+ .add = default_add_listener,
+ .unbind = default_unbind_listener,
+ .suspend = default_suspend_listener,
+ .resume = default_resume_listener,
+ .accept_conn = sock_accept_conn,
+ .ctrl_init = sock_conn_ctrl_init,
+ .ctrl_close = sock_conn_ctrl_close,
+ .connect = uxst_connect_server,
+ .drain = sock_drain,
+ .check_events = sock_check_events,
+ .ignore_events = sock_ignore_events,
+
+ /* binding layer */
+ .rx_suspend = uxst_suspend_receiver,
+
+ /* address family */
+ .fam = &proto_fam_unix,
+
+ /* socket layer */
+ .proto_type = PROTO_TYPE_STREAM,
+ .sock_type = SOCK_STREAM,
+ .sock_prot = 0,
+ .rx_enable = sock_enable,
+ .rx_disable = sock_disable,
+ .rx_unbind = sock_unbind,
+ .rx_listening = sock_accepting_conn,
+ .default_iocb = sock_accept_iocb,
+ .receivers = LIST_HEAD_INIT(proto_uxst.receivers),
+ .nb_receivers = 0,
+};
+
+INITCALL1(STG_REGISTER, protocol_register, &proto_uxst);
+
+/********************************
+ * 1) low-level socket functions
+ ********************************/
+
+
+/********************************
+ * 2) listener-oriented functions
+ ********************************/
+
+/* This function creates a UNIX socket associated to the listener. It changes
+ * the state from ASSIGNED to LISTEN. The socket is NOT enabled for polling.
+ * The return value is composed from ERR_NONE, ERR_RETRYABLE and ERR_FATAL. It
+ * may return a warning or an error message in <errmsg> if the message is at
+ * most <errlen> bytes long (including '\0'). Note that <errmsg> may be NULL if
+ * <errlen> is also zero.
+ */
+static int uxst_bind_listener(struct listener *listener, char *errmsg, int errlen)
+{
+ int fd, err;
+ int ready;
+ char *msg = NULL;
+
+ err = ERR_NONE;
+
+ /* ensure we never return garbage */
+ if (errlen)
+ *errmsg = 0;
+
+ if (listener->state != LI_ASSIGNED)
+ return ERR_NONE; /* already bound */
+
+ if (!(listener->rx.flags & RX_F_BOUND)) {
+ msg = "receiving socket not bound";
+ err |= ERR_FATAL | ERR_ALERT;
+ goto uxst_return;
+ }
+
+ if (listener->rx.flags & RX_F_MUST_DUP)
+ goto done;
+
+ fd = listener->rx.fd;
+ ready = sock_accepting_conn(&listener->rx) > 0;
+
+ if (!ready && /* only listen if not already done by external process */
+ listen(fd, listener_backlog(listener)) < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ msg = "cannot listen to UNIX socket";
+ goto uxst_close_return;
+ }
+
+ done:
+ /* the socket is now listening */
+ listener_set_state(listener, LI_LISTEN);
+ return err;
+
+ uxst_close_return:
+ close(fd);
+ uxst_return:
+ if (msg && errlen) {
+ char *path_str;
+
+ path_str = sa2str((struct sockaddr_storage *)&listener->rx.addr, 0, 0);
+ snprintf(errmsg, errlen, "%s for [%s]", msg, ((path_str) ? path_str : ""));
+ ha_free(&path_str);
+ }
+ return err;
+}
+
+/* Enable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxst_enable_listener(struct listener *l)
+{
+ fd_want_recv_safe(l->rx.fd);
+}
+
+/* Disable receipt of incoming connections for listener <l>. The receiver must
+ * still be valid.
+ */
+static void uxst_disable_listener(struct listener *l)
+{
+ fd_stop_recv(l->rx.fd);
+}
+
+/* Suspend a receiver. Returns < 0 in case of failure, 0 if the receiver
+ * was totally stopped, or > 0 if correctly suspended. For plain unix sockets
+ * we only disable the listener to prevent data from being handled but nothing
+ * more is done since currently it's the new process which handles the renaming.
+ * Abstract sockets are completely unbound and closed so there's no need to stop
+ * the poller.
+ */
+static int uxst_suspend_receiver(struct receiver *rx)
+{
+ struct listener *l = LIST_ELEM(rx, struct listener *, rx);
+
+ if (((struct sockaddr_un *)&rx->addr)->sun_path[0]) {
+ uxst_disable_listener(l);
+ return 1;
+ }
+
+ /* Listener's lock already held. Call lockless version of
+ * unbind_listener. */
+ do_unbind_listener(l);
+ return 0;
+}
+
+
+/*
+ * This function initiates a UNIX connection establishment to the target assigned
+ * to connection <conn> using (si->{target,dst}). The source address is ignored
+ * and will be selected by the system. conn->target may point either to a valid
+ * server or to a backend, depending on conn->target. Only OBJ_TYPE_PROXY and
+ * OBJ_TYPE_SERVER are supported. The <data> parameter is a boolean indicating
+ * whether there are data waiting for being sent or not, in order to adjust data
+ * write polling and on some platforms. The <delack> argument is ignored.
+ *
+ * Note that a pending send_proxy message accounts for data.
+ *
+ * It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ *
+ * The connection's fd is inserted only when SF_ERR_NONE is returned, otherwise
+ * it's invalid and the caller has nothing to do.
+ */
+static int uxst_connect_server(struct connection *conn, int flags)
+{
+ int fd;
+ struct server *srv;
+ struct proxy *be;
+
+ BUG_ON(!conn->dst);
+
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_PROXY:
+ be = __objt_proxy(conn->target);
+ srv = NULL;
+ break;
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ be = srv->proxy;
+ break;
+ default:
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if ((fd = conn->handle.fd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
+ qfprintf(stderr, "Cannot get a server socket.\n");
+
+ if (errno == ENFILE) {
+ conn->err_code = CO_ER_SYS_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EMFILE) {
+ conn->err_code = CO_ER_PROC_FDLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == ENOBUFS || errno == ENOMEM) {
+ conn->err_code = CO_ER_SYS_MEMLIM;
+ send_log(be, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ be->id, global.maxsock);
+ }
+ else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
+ conn->err_code = CO_ER_NOPROTO;
+ }
+ else
+ conn->err_code = CO_ER_SOCK_ERR;
+
+ /* this is a resource error */
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+
+ if (fd >= global.maxsock) {
+ /* do not log anything there, it's a normal condition when this option
+ * is used to serialize connections to a server !
+ */
+ ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n");
+ close(fd);
+ conn->err_code = CO_ER_CONF_FDLIM;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_PRXCOND; /* it is a configuration limit */
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ qfprintf(stderr,"Cannot set client socket to non blocking mode.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (master == 1 && fd_set_cloexec(fd) == -1) {
+ ha_alert("Cannot set CLOEXEC on client socket.\n");
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_INTERNAL;
+ }
+
+ if (global.tune.server_sndbuf)
+ setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf));
+
+ if (global.tune.server_rcvbuf)
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &global.tune.server_rcvbuf, sizeof(global.tune.server_rcvbuf));
+
+ if (connect(fd, (struct sockaddr *)conn->dst, get_addr_len(conn->dst)) == -1) {
+ if (errno == EINPROGRESS || errno == EALREADY) {
+ conn->flags |= CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EISCONN) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRINUSE || errno == EADDRNOTAVAIL) {
+ char *msg;
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EADDRNOTAVAIL) {
+ msg = "can't connect to destination unix socket, check backlog size on the server";
+ conn->err_code = CO_ER_FREE_PORTS;
+ }
+ else {
+ msg = "local address already in use";
+ conn->err_code = CO_ER_ADDR_INUSE;
+ }
+
+ qfprintf(stderr,"Connect() failed for backend %s: %s.\n", be->id, msg);
+ close(fd);
+ send_log(be, LOG_ERR, "Connect() failed for backend %s: %s.\n", be->id, msg);
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_RESOURCE;
+ }
+ else if (errno == ETIMEDOUT) {
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVTO;
+ }
+ else { // (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
+ close(fd);
+ conn->err_code = CO_ER_SOCK_ERR;
+ conn->flags |= CO_FL_ERROR;
+ return SF_ERR_SRVCL;
+ }
+ }
+ else {
+ /* connect() already succeeded, which is quite usual for unix
+ * sockets. Let's avoid a second connect() probe to complete it.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ /* Prepare to send a few handshakes related to the on-wire protocol. */
+ if (conn->send_proxy_ofs)
+ conn->flags |= CO_FL_SEND_PROXY;
+
+ conn_ctrl_init(conn); /* registers the FD */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK); /* no need to disable lingering */
+
+ if (conn->flags & CO_FL_WAIT_L4_CONN) {
+ fd_want_send(fd);
+ fd_cant_send(fd);
+ fd_cant_recv(fd);
+ }
+
+ return SF_ERR_NONE; /* connection is OK */
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/protocol.c b/src/protocol.c
new file mode 100644
index 0000000..25ed6b7
--- /dev/null
+++ b/src/protocol.c
@@ -0,0 +1,309 @@
+/*
+ * Protocol registration functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sock.h>
+#include <haproxy/tools.h>
+
+
+/* List head of all registered protocols */
+static struct list protocols = LIST_HEAD_INIT(protocols);
+struct protocol *__protocol_by_family[AF_CUST_MAX][PROTO_NUM_TYPES][2] __read_mostly = { };
+
+/* This is the global spinlock we may need to register/unregister listeners or
+ * protocols. Its main purpose is in fact to serialize the rare stop/deinit()
+ * phases.
+ */
+__decl_spinlock(proto_lock);
+
+/* Registers the protocol <proto> */
+void protocol_register(struct protocol *proto)
+{
+ int sock_domain = proto->fam->sock_domain;
+
+ BUG_ON(sock_domain < 0 || sock_domain >= AF_CUST_MAX);
+ BUG_ON(proto->proto_type >= PROTO_NUM_TYPES);
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ LIST_APPEND(&protocols, &proto->list);
+ __protocol_by_family[sock_domain]
+ [proto->proto_type]
+ [proto->xprt_type == PROTO_TYPE_DGRAM] = proto;
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* Unregisters the protocol <proto>. Note that all listeners must have
+ * previously been unbound.
+ */
+void protocol_unregister(struct protocol *proto)
+{
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ LIST_DELETE(&proto->list);
+ LIST_INIT(&proto->list);
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* clears flag <flag> on all protocols. */
+void protocol_clrf_all(uint flag)
+{
+ struct protocol *proto;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list)
+ proto->flags &= ~flag;
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* sets flag <flag> on all protocols. */
+void protocol_setf_all(uint flag)
+{
+ struct protocol *proto;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list)
+ proto->flags |= flag;
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* Checks if protocol <proto> supports PROTO_F flag <flag>. Returns zero if not,
+ * non-zero if supported. It may return a cached value from a previous test,
+ * and may run live tests then update the proto's flags to cache a result. It's
+ * better to call it only if needed so that it doesn't result in modules being
+ * loaded in case of a live test. It is only supposed to be used during boot.
+ */
+int protocol_supports_flag(struct protocol *proto, uint flag)
+{
+ if (flag == PROTO_F_REUSEPORT_SUPPORTED) {
+ int ret = 0;
+
+ /* check if the protocol supports SO_REUSEPORT */
+ if (!(_HA_ATOMIC_LOAD(&proto->flags) & PROTO_F_REUSEPORT_SUPPORTED))
+ return 0;
+
+ /* at least nobody said it was not supported */
+ if (_HA_ATOMIC_LOAD(&proto->flags) & PROTO_F_REUSEPORT_TESTED)
+ return 1;
+
+ /* run a live check */
+ ret = _sock_supports_reuseport(proto->fam, proto->sock_type, proto->sock_prot);
+ if (!ret)
+ _HA_ATOMIC_AND(&proto->flags, ~PROTO_F_REUSEPORT_SUPPORTED);
+
+ _HA_ATOMIC_OR(&proto->flags, PROTO_F_REUSEPORT_TESTED);
+ return ret;
+ }
+ return 0;
+}
+
+#ifdef USE_QUIC
+/* Return 1 if QUIC protocol may be bound, 0 if no, depending on the tuning
+ * parameters.
+ */
+static inline int protocol_may_bind_quic(struct listener *l)
+{
+ if (global.tune.options & GTUNE_NO_QUIC)
+ return 0;
+ return 1;
+}
+#endif
+
+/* binds all listeners of all registered protocols. Returns a composition
+ * of ERR_NONE, ERR_RETRYABLE, ERR_FATAL.
+ */
+int protocol_bind_all(int verbose)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ struct receiver *receiver;
+ char msg[1000];
+ char *errmsg;
+ int err, lerr;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(receiver, &proto->receivers, proto_list) {
+ listener = LIST_ELEM(receiver, struct listener *, rx);
+#ifdef USE_QUIC
+ if ((proto == &proto_quic4 || proto == &proto_quic6) &&
+ !protocol_may_bind_quic(listener))
+ continue;
+#endif
+
+ lerr = proto->fam->bind(receiver, &errmsg);
+ err |= lerr;
+
+ /* errors are reported if <verbose> is set or if they are fatal */
+ if (verbose || (lerr & (ERR_FATAL | ERR_ABORT))) {
+ struct proxy *px = listener->bind_conf->frontend;
+
+ if (lerr & ERR_ALERT)
+ ha_alert("Binding [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, errmsg);
+ else if (lerr & ERR_WARN)
+ ha_warning("Binding [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, errmsg);
+ }
+ if (lerr != ERR_NONE)
+ ha_free(&errmsg);
+
+ if (lerr & ERR_ABORT)
+ break;
+
+ if (lerr & ~ERR_WARN)
+ continue;
+
+ /* for now there's still always a listening function */
+ BUG_ON(!proto->listen);
+ lerr = proto->listen(listener, msg, sizeof(msg));
+ err |= lerr;
+
+ if (verbose || (lerr & (ERR_FATAL | ERR_ABORT))) {
+ struct proxy *px = listener->bind_conf->frontend;
+
+ if (lerr & ERR_ALERT)
+ ha_alert("Starting [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, msg);
+ else if (lerr & ERR_WARN)
+ ha_warning("Starting [%s:%d] for %s %s: %s\n",
+ listener->bind_conf->file, listener->bind_conf->line,
+ proxy_type_str(px), px->id, msg);
+ }
+ if (lerr & ERR_ABORT)
+ break;
+ }
+ if (err & ERR_ABORT)
+ break;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* unbinds all listeners of all registered protocols. They are also closed.
+ * This must be performed before calling exit() in order to get a chance to
+ * remove file-system based sockets and pipes.
+ * Returns a composition of ERR_NONE, ERR_RETRYABLE, ERR_FATAL, ERR_ABORT.
+ */
+int protocol_unbind_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ unbind_listener(listener);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* stops all listeners of all registered protocols. This will normally catch
+ * every single listener, all protocols included. This is to be used during
+ * soft_stop() only. It does not return any error.
+ */
+void protocol_stop_now(void)
+{
+ struct protocol *proto;
+ struct listener *listener, *lback;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry_safe(listener, lback, &proto->receivers, rx.proto_list)
+ stop_listener(listener, 0, 1, 0);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+}
+
+/* suspends all listeners of all registered protocols. This is typically
+ * used on SIG_TTOU to release all listening sockets for the time needed to
+ * try to bind a new process. The listeners enter LI_PAUSED or LI_ASSIGNED.
+ * It returns ERR_NONE, with ERR_FATAL on failure.
+ */
+int protocol_pause_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ if (!suspend_listener(listener, 0, 0))
+ err |= ERR_FATAL;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* resumes all listeners of all registered protocols. This is typically used on
+ * SIG_TTIN to re-enable listening sockets after a new process failed to bind.
+ * The listeners switch to LI_READY/LI_FULL. It returns ERR_NONE, with ERR_FATAL
+ * on failure.
+ */
+int protocol_resume_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+ int err;
+
+ err = 0;
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ if (!resume_listener(listener, 0, 0))
+ err |= ERR_FATAL;
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return err;
+}
+
+/* enables all listeners of all registered protocols. This is intended to be
+ * used after a fork() to enable reading on all file descriptors. Returns
+ * composition of ERR_NONE.
+ */
+int protocol_enable_all(void)
+{
+ struct protocol *proto;
+ struct listener *listener;
+
+ HA_SPIN_LOCK(PROTO_LOCK, &proto_lock);
+ list_for_each_entry(proto, &protocols, list) {
+ list_for_each_entry(listener, &proto->receivers, rx.proto_list)
+ enable_listener(listener);
+ }
+ HA_SPIN_UNLOCK(PROTO_LOCK, &proto_lock);
+ return ERR_NONE;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/proxy.c b/src/proxy.c
new file mode 100644
index 0000000..ef95340
--- /dev/null
+++ b/src/proxy.c
@@ -0,0 +1,3451 @@
+/*
+ * Proxy variables and functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <import/eb32tree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/global.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/http_ext.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/obj_type-t.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/server-t.h>
+#include <haproxy/signal.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+int listeners; /* # of proxy listeners, set by cfgparse */
+struct proxy *proxies_list = NULL; /* list of all existing proxies */
+struct eb_root used_proxy_id = EB_ROOT; /* list of proxy IDs in use */
+struct eb_root proxy_by_name = EB_ROOT; /* tree of proxies sorted by name */
+struct eb_root defproxy_by_name = EB_ROOT; /* tree of default proxies sorted by name (dups possible) */
+unsigned int error_snapshot_id = 0; /* global ID assigned to each error then incremented */
+
+/* CLI context used during "show servers {state|conn}" */
+struct show_srv_ctx {
+ struct proxy *px; /* current proxy to dump or NULL */
+ struct server *sv; /* current server to dump or NULL */
+ uint only_pxid; /* dump only this proxy ID when explicit */
+ int show_conn; /* non-zero = "conn" otherwise "state" */
+ enum {
+ SHOW_SRV_HEAD = 0,
+ SHOW_SRV_LIST,
+ } state;
+};
+
+/* proxy->options */
+const struct cfg_opt cfg_opts[] =
+{
+ { "abortonclose", PR_O_ABRT_CLOSE, PR_CAP_BE, 0, 0 },
+ { "allbackups", PR_O_USE_ALL_BK, PR_CAP_BE, 0, 0 },
+ { "checkcache", PR_O_CHK_CACHE, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "clitcpka", PR_O_TCP_CLI_KA, PR_CAP_FE, 0, 0 },
+ { "contstats", PR_O_CONTSTATS, PR_CAP_FE, 0, 0 },
+ { "dontlognull", PR_O_NULLNOLOG, PR_CAP_FE, 0, 0 },
+ { "http-buffer-request", PR_O_WREQ_BODY, PR_CAP_FE | PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "http-ignore-probes", PR_O_IGNORE_PRB, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "idle-close-on-response", PR_O_IDLE_CLOSE_RESP, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "prefer-last-server", PR_O_PREF_LAST, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "logasap", PR_O_LOGASAP, PR_CAP_FE, 0, 0 },
+ { "nolinger", PR_O_TCP_NOLING, PR_CAP_FE | PR_CAP_BE, 0, 0 },
+ { "persist", PR_O_PERSIST, PR_CAP_BE, 0, 0 },
+ { "srvtcpka", PR_O_TCP_SRV_KA, PR_CAP_BE, 0, 0 },
+#ifdef USE_TPROXY
+ { "transparent", PR_O_TRANSP, PR_CAP_BE, 0, 0 },
+#else
+ { "transparent", 0, 0, 0, 0 },
+#endif
+
+ { NULL, 0, 0, 0, 0 }
+};
+
+/* proxy->options2 */
+const struct cfg_opt cfg_opts2[] =
+{
+#ifdef USE_LINUX_SPLICE
+ { "splice-request", PR_O2_SPLIC_REQ, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "splice-response", PR_O2_SPLIC_RTR, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "splice-auto", PR_O2_SPLIC_AUT, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+#else
+ { "splice-request", 0, 0, 0, 0 },
+ { "splice-response", 0, 0, 0, 0 },
+ { "splice-auto", 0, 0, 0, 0 },
+#endif
+ { "accept-invalid-http-request", PR_O2_REQBUG_OK, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "accept-invalid-http-response", PR_O2_RSPBUG_OK, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "dontlog-normal", PR_O2_NOLOGNORM, PR_CAP_FE, 0, 0 },
+ { "log-separate-errors", PR_O2_LOGERRORS, PR_CAP_FE, 0, 0 },
+ { "log-health-checks", PR_O2_LOGHCHKS, PR_CAP_BE, 0, 0 },
+ { "socket-stats", PR_O2_SOCKSTAT, PR_CAP_FE, 0, 0 },
+ { "tcp-smart-accept", PR_O2_SMARTACC, PR_CAP_FE, 0, 0 },
+ { "tcp-smart-connect", PR_O2_SMARTCON, PR_CAP_BE, 0, 0 },
+ { "independent-streams", PR_O2_INDEPSTR, PR_CAP_FE|PR_CAP_BE, 0, 0 },
+ { "http-use-proxy-header", PR_O2_USE_PXHDR, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { "http-pretend-keepalive", PR_O2_FAKE_KA, PR_CAP_BE, 0, PR_MODE_HTTP },
+ { "http-no-delay", PR_O2_NODELAY, PR_CAP_FE|PR_CAP_BE, 0, PR_MODE_HTTP },
+
+ {"h1-case-adjust-bogus-client", PR_O2_H1_ADJ_BUGCLI, PR_CAP_FE, 0, 0 },
+ {"h1-case-adjust-bogus-server", PR_O2_H1_ADJ_BUGSRV, PR_CAP_BE, 0, 0 },
+ {"disable-h2-upgrade", PR_O2_NO_H2_UPGRADE, PR_CAP_FE, 0, PR_MODE_HTTP },
+ { NULL, 0, 0, 0 }
+};
+
+/* Helper function to resolve a single sticking rule after config parsing.
+ * Returns 1 for success and 0 for failure
+ */
+int resolve_stick_rule(struct proxy *curproxy, struct sticking_rule *mrule)
+{
+ struct stktable *target;
+
+ if (mrule->table.name)
+ target = stktable_find_by_name(mrule->table.name);
+ else
+ target = curproxy->table;
+
+ if (!target) {
+ ha_alert("Proxy '%s': unable to find stick-table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ return 0;
+ }
+ else if (!stktable_compatible_sample(mrule->expr, target->type)) {
+ ha_alert("Proxy '%s': type of fetch not usable with type of stick-table '%s'.\n",
+ curproxy->id, mrule->table.name ? mrule->table.name : curproxy->id);
+ return 0;
+ }
+
+ /* success */
+ ha_free(&mrule->table.name);
+ mrule->table.t = target;
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_ID, NULL, NULL);
+ stktable_alloc_data_type(target, STKTABLE_DT_SERVER_KEY, NULL, NULL);
+ if (!in_proxies_list(target->proxies_list, curproxy)) {
+ curproxy->next_stkt_ref = target->proxies_list;
+ target->proxies_list = curproxy;
+ }
+ return 1;
+}
+
+void free_stick_rules(struct list *rules)
+{
+ struct sticking_rule *rule, *ruleb;
+
+ list_for_each_entry_safe(rule, ruleb, rules, list) {
+ LIST_DELETE(&rule->list);
+ free_acl_cond(rule->cond);
+ release_sample_expr(rule->expr);
+ free(rule);
+ }
+}
+
+static void free_logformat_list(struct list *lfs)
+{
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, lfs, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+}
+
+void free_server_rules(struct list *srules)
+{
+ struct server_rule *srule, *sruleb;
+
+ list_for_each_entry_safe(srule, sruleb, srules, list) {
+ LIST_DELETE(&srule->list);
+ free_acl_cond(srule->cond);
+ free_logformat_list(&srule->expr);
+ free(srule->file);
+ free(srule);
+ }
+}
+
+void free_proxy(struct proxy *p)
+{
+ struct server *s;
+ struct cap_hdr *h,*h_next;
+ struct listener *l,*l_next;
+ struct bind_conf *bind_conf, *bind_back;
+ struct acl_cond *cond, *condb;
+ struct acl *acl, *aclb;
+ struct switching_rule *rule, *ruleb;
+ struct redirect_rule *rdr, *rdrb;
+ struct logger *log, *logb;
+ struct proxy_deinit_fct *pxdf;
+ struct server_deinit_fct *srvdf;
+
+ if (!p)
+ return;
+
+ free(p->conf.file);
+ free(p->id);
+ free(p->cookie_name);
+ free(p->cookie_domain);
+ free(p->cookie_attrs);
+ free(p->lbprm.arg_str);
+ release_sample_expr(p->lbprm.expr);
+ free(p->server_state_file_name);
+ free(p->capture_name);
+ istfree(&p->monitor_uri);
+ free(p->rdp_cookie_name);
+ free(p->invalid_rep);
+ free(p->invalid_req);
+#if defined(CONFIG_HAP_TRANSPARENT)
+ free(p->conn_src.bind_hdr_name);
+#endif
+ if (p->conf.logformat_string != default_http_log_format &&
+ p->conf.logformat_string != default_tcp_log_format &&
+ p->conf.logformat_string != clf_http_log_format &&
+ p->conf.logformat_string != default_https_log_format &&
+ p->conf.logformat_string != httpclient_log_format)
+ free(p->conf.logformat_string);
+
+ free(p->conf.lfs_file);
+ free(p->conf.uniqueid_format_string);
+ istfree(&p->header_unique_id);
+ free(p->conf.uif_file);
+ if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP)
+ free(p->lbprm.map.srv);
+ if (p->mode == PR_MODE_SYSLOG)
+ free(p->lbprm.log.srv);
+
+ if (p->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ free(p->conf.logformat_sd_string);
+ free(p->conf.lfsd_file);
+
+ free(p->conf.error_logformat_string);
+ free(p->conf.elfs_file);
+
+ list_for_each_entry_safe(cond, condb, &p->mon_fail_cond, list) {
+ LIST_DELETE(&cond->list);
+ free_acl_cond(cond);
+ }
+
+ EXTRA_COUNTERS_FREE(p->extra_counters_fe);
+ EXTRA_COUNTERS_FREE(p->extra_counters_be);
+
+ list_for_each_entry_safe(acl, aclb, &p->acl, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+
+ free_server_rules(&p->server_rules);
+
+ list_for_each_entry_safe(rule, ruleb, &p->switching_rules, list) {
+ LIST_DELETE(&rule->list);
+ free_acl_cond(rule->cond);
+ free(rule->file);
+ free(rule);
+ }
+
+ list_for_each_entry_safe(rdr, rdrb, &p->redirect_rules, list) {
+ LIST_DELETE(&rdr->list);
+ http_free_redirect_rule(rdr);
+ }
+
+ list_for_each_entry_safe(log, logb, &p->loggers, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logger(log);
+ }
+
+ free_logformat_list(&p->logformat);
+ free_logformat_list(&p->logformat_sd);
+ free_logformat_list(&p->format_unique_id);
+ free_logformat_list(&p->logformat_error);
+
+ free_act_rules(&p->tcp_req.inspect_rules);
+ free_act_rules(&p->tcp_rep.inspect_rules);
+ free_act_rules(&p->tcp_req.l4_rules);
+ free_act_rules(&p->tcp_req.l5_rules);
+ free_act_rules(&p->http_req_rules);
+ free_act_rules(&p->http_res_rules);
+ free_act_rules(&p->http_after_res_rules);
+
+ free_stick_rules(&p->storersp_rules);
+ free_stick_rules(&p->sticking_rules);
+
+ h = p->req_cap;
+ while (h) {
+ if (p->defpx && h == p->defpx->req_cap)
+ break;
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }/* end while(h) */
+
+ h = p->rsp_cap;
+ while (h) {
+ if (p->defpx && h == p->defpx->rsp_cap)
+ break;
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }/* end while(h) */
+
+ s = p->srv;
+ while (s) {
+ list_for_each_entry(srvdf, &server_deinit_list, list)
+ srvdf->fct(s);
+ s = srv_drop(s);
+ }/* end while(s) */
+
+ /* also free default-server parameters since some of them might have
+ * been dynamically allocated (e.g.: config hints, cookies, ssl..)
+ */
+ srv_free_params(&p->defsrv);
+
+ list_for_each_entry_safe(l, l_next, &p->conf.listeners, by_fe) {
+ LIST_DELETE(&l->by_fe);
+ LIST_DELETE(&l->by_bind);
+ free(l->name);
+ free(l->per_thr);
+ free(l->counters);
+ task_destroy(l->rx.rhttp.task);
+
+ EXTRA_COUNTERS_FREE(l->extra_counters);
+ free(l);
+ }
+
+ /* Release unused SSL configs. */
+ list_for_each_entry_safe(bind_conf, bind_back, &p->conf.bind, by_fe) {
+ if (bind_conf->xprt->destroy_bind_conf)
+ bind_conf->xprt->destroy_bind_conf(bind_conf);
+ free(bind_conf->file);
+ free(bind_conf->arg);
+ free(bind_conf->settings.interface);
+ LIST_DELETE(&bind_conf->by_fe);
+ free(bind_conf->rhttp_srvname);
+ free(bind_conf);
+ }
+
+ flt_deinit(p);
+
+ list_for_each_entry(pxdf, &proxy_deinit_list, list)
+ pxdf->fct(p);
+
+ free(p->desc);
+
+ http_ext_clean(p);
+
+ task_destroy(p->task);
+
+ pool_destroy(p->req_cap_pool);
+ pool_destroy(p->rsp_cap_pool);
+
+ stktable_deinit(p->table);
+ ha_free(&p->table);
+
+ HA_RWLOCK_DESTROY(&p->lbprm.lock);
+ HA_RWLOCK_DESTROY(&p->lock);
+
+ proxy_unref_defaults(p);
+ ha_free(&p);
+}
+
+/*
+ * This function returns a string containing a name describing capabilities to
+ * report comprehensible error messages. Specifically, it will return the words
+ * "frontend", "backend" when appropriate, "defaults" if it corresponds to a
+ * defaults section, or "proxy" for all other cases including the proxies
+ * declared in "listen" mode.
+ */
+const char *proxy_cap_str(int cap)
+{
+ if (cap & PR_CAP_DEF)
+ return "defaults";
+
+ if ((cap & PR_CAP_LISTEN) != PR_CAP_LISTEN) {
+ if (cap & PR_CAP_FE)
+ return "frontend";
+ else if (cap & PR_CAP_BE)
+ return "backend";
+ }
+ return "proxy";
+}
+
+/*
+ * This function returns a string containing the mode of the proxy in a format
+ * suitable for error messages.
+ */
+const char *proxy_mode_str(int mode) {
+
+ if (mode == PR_MODE_TCP)
+ return "tcp";
+ else if (mode == PR_MODE_HTTP)
+ return "http";
+ else if (mode == PR_MODE_CLI)
+ return "cli";
+ else if (mode == PR_MODE_SYSLOG)
+ return "syslog";
+ else if (mode == PR_MODE_PEERS)
+ return "peers";
+ else
+ return "unknown";
+}
+
+/* try to find among known options the one that looks closest to <word> by
+ * counting transitions between letters, digits and other characters. Will
+ * return the best matching word if found, otherwise NULL. An optional array
+ * of extra words to compare may be passed in <extra>, but it must then be
+ * terminated by a NULL entry. If unused it may be NULL.
+ */
+const char *proxy_find_best_option(const char *word, const char **extra)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+
+ for (index = 0; cfg_opts[index].name; index++) {
+ make_word_fingerprint(list_sig, cfg_opts[index].name);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = cfg_opts[index].name;
+ }
+ }
+
+ for (index = 0; cfg_opts2[index].name; index++) {
+ make_word_fingerprint(list_sig, cfg_opts2[index].name);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = cfg_opts2[index].name;
+ }
+ }
+
+ while (extra && *extra) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ extra++;
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+ return best_ptr;
+}
+
+/* This function parses a "timeout" statement in a proxy section. It returns
+ * -1 if there is any error, 1 for a warning, otherwise zero. If it does not
+ * return zero, it will write an error or warning message into a preallocated
+ * buffer returned at <err>. The trailing is not be written. The function must
+ * be called with <args> pointing to the first command line word, with <proxy>
+ * pointing to the proxy being parsed, and <defpx> to the default proxy or NULL.
+ * As a special case for compatibility with older configs, it also accepts
+ * "{cli|srv|con}timeout" in args[0].
+ */
+static int proxy_parse_timeout(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ unsigned timeout;
+ int retval, cap;
+ const char *res, *name;
+ int *tv = NULL;
+ const int *td = NULL;
+
+ retval = 0;
+
+ /* simply skip "timeout" but remain compatible with old form */
+ if (strcmp(args[0], "timeout") == 0)
+ args++;
+
+ name = args[0];
+ if (strcmp(args[0], "client") == 0) {
+ name = "client";
+ tv = &proxy->timeout.client;
+ td = &defpx->timeout.client;
+ cap = PR_CAP_FE;
+ } else if (strcmp(args[0], "tarpit") == 0) {
+ tv = &proxy->timeout.tarpit;
+ td = &defpx->timeout.tarpit;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "client-hs") == 0) {
+ tv = &proxy->timeout.client_hs;
+ td = &defpx->timeout.client_hs;
+ cap = PR_CAP_FE;
+ } else if (strcmp(args[0], "http-keep-alive") == 0) {
+ tv = &proxy->timeout.httpka;
+ td = &defpx->timeout.httpka;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "http-request") == 0) {
+ tv = &proxy->timeout.httpreq;
+ td = &defpx->timeout.httpreq;
+ cap = PR_CAP_FE | PR_CAP_BE;
+ } else if (strcmp(args[0], "server") == 0) {
+ name = "server";
+ tv = &proxy->timeout.server;
+ td = &defpx->timeout.server;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "connect") == 0) {
+ name = "connect";
+ tv = &proxy->timeout.connect;
+ td = &defpx->timeout.connect;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "check") == 0) {
+ tv = &proxy->timeout.check;
+ td = &defpx->timeout.check;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "queue") == 0) {
+ tv = &proxy->timeout.queue;
+ td = &defpx->timeout.queue;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "tunnel") == 0) {
+ tv = &proxy->timeout.tunnel;
+ td = &defpx->timeout.tunnel;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "client-fin") == 0) {
+ tv = &proxy->timeout.clientfin;
+ td = &defpx->timeout.clientfin;
+ cap = PR_CAP_FE;
+ } else if (strcmp(args[0], "server-fin") == 0) {
+ tv = &proxy->timeout.serverfin;
+ td = &defpx->timeout.serverfin;
+ cap = PR_CAP_BE;
+ } else if (strcmp(args[0], "clitimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout client'.", args[0]);
+ return -1;
+ } else if (strcmp(args[0], "srvtimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout server'.", args[0]);
+ return -1;
+ } else if (strcmp(args[0], "contimeout") == 0) {
+ memprintf(err, "the '%s' directive is not supported anymore since HAProxy 2.1. Use 'timeout connect'.", args[0]);
+ return -1;
+ } else {
+ memprintf(err,
+ "'timeout' supports 'client', 'server', 'connect', 'check', "
+ "'queue', 'handshake', 'http-keep-alive', 'http-request', 'tunnel', 'tarpit', "
+ "'client-fin' and 'server-fin' (got '%s')",
+ args[0]);
+ return -1;
+ }
+
+ if (*args[1] == 0) {
+ memprintf(err, "'timeout %s' expects an integer value (in milliseconds)", name);
+ return -1;
+ }
+
+ res = parse_time_err(args[1], &timeout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to 'timeout %s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], name);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to 'timeout %s' (minimum non-null value is 1 ms)",
+ args[1], name);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in 'timeout %s'", *res, name);
+ return -1;
+ }
+
+ if (!(proxy->cap & cap)) {
+ memprintf(err, "'timeout %s' will be ignored because %s '%s' has no %s capability",
+ name, proxy_type_str(proxy), proxy->id,
+ (cap & PR_CAP_BE) ? "backend" : "frontend");
+ retval = 1;
+ }
+ else if (defpx && *tv != *td) {
+ memprintf(err, "overwriting 'timeout %s' which was already specified", name);
+ retval = 1;
+ }
+
+ if (*args[2] != 0) {
+ memprintf(err, "'timeout %s' : unexpected extra argument '%s' after value '%s'.", name, args[2], args[1]);
+ retval = -1;
+ }
+
+ *tv = MS_TO_TICKS(timeout);
+ return retval;
+}
+
+/* This function parses a "rate-limit" statement in a proxy section. It returns
+ * -1 if there is any error, 1 for a warning, otherwise zero. If it does not
+ * return zero, it will write an error or warning message into a preallocated
+ * buffer returned at <err>. The function must be called with <args> pointing
+ * to the first command line word, with <proxy> pointing to the proxy being
+ * parsed, and <defpx> to the default proxy or NULL.
+ */
+static int proxy_parse_rate_limit(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int *tv = NULL;
+ const unsigned int *td = NULL;
+ unsigned int val;
+
+ retval = 0;
+
+ if (strcmp(args[1], "sessions") == 0) {
+ tv = &proxy->fe_sps_lim;
+ td = &defpx->fe_sps_lim;
+ }
+ else {
+ memprintf(err, "'%s' only supports 'sessions' (got '%s')", args[0], args[1]);
+ return -1;
+ }
+
+ if (*args[2] == 0) {
+ memprintf(err, "'%s %s' expects expects an integer value (in sessions/second)", args[0], args[1]);
+ return -1;
+ }
+
+ val = strtoul(args[2], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s %s' : unexpected character '%c' in integer value '%s'", args[0], args[1], *res, args[2]);
+ return -1;
+ }
+
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s will be ignored because %s '%s' has no frontend capability",
+ args[0], args[1], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ else if (defpx && *tv != *td) {
+ memprintf(err, "overwriting %s %s which was already specified", args[0], args[1]);
+ retval = 1;
+ }
+
+ *tv = val;
+ return retval;
+}
+
+/* This function parses a "max-keep-alive-queue" statement in a proxy section.
+ * It returns -1 if there is any error, 1 for a warning, otherwise zero. If it
+ * does not return zero, it will write an error or warning message into a
+ * preallocated buffer returned at <err>. The function must be called with
+ * <args> pointing to the first command line word, with <proxy> pointing to
+ * the proxy being parsed, and <defpx> to the default proxy or NULL.
+ */
+static int proxy_parse_max_ka_queue(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int val;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects expects an integer value (or -1 to disable)", args[0]);
+ return -1;
+ }
+
+ val = strtol(args[1], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s' : unexpected character '%c' in integer value '%s'", args[0], *res, args[1]);
+ return -1;
+ }
+
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+
+ /* we store <val+1> so that a user-facing value of -1 is stored as zero (default) */
+ proxy->max_ka_queue = val + 1;
+ return retval;
+}
+
+/* This function parses a "declare" statement in a proxy section. It returns -1
+ * if there is any error, 1 for warning, otherwise 0. If it does not return zero,
+ * it will write an error or warning message into a preallocated buffer returned
+ * at <err>. The function must be called with <args> pointing to the first command
+ * line word, with <proxy> pointing to the proxy being parsed, and <defpx> to the
+ * default proxy or NULL.
+ */
+static int proxy_parse_declare(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ /* Capture keyword wannot be declared in a default proxy. */
+ if (curpx == defpx) {
+ memprintf(err, "'%s' not available in default section", args[0]);
+ return -1;
+ }
+
+ /* Capture keyword is only available in frontend. */
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "'%s' only available in frontend or listen section", args[0]);
+ return -1;
+ }
+
+ /* Check mandatory second keyword. */
+ if (!args[1] || !*args[1]) {
+ memprintf(err, "'%s' needs a second keyword that specify the type of declaration ('capture')", args[0]);
+ return -1;
+ }
+
+ /* Actually, declare is only available for declaring capture
+ * slot, but in the future it can declare maps or variables.
+ * So, this section permits to check and switch according with
+ * the second keyword.
+ */
+ if (strcmp(args[1], "capture") == 0) {
+ char *error = NULL;
+ long len;
+ struct cap_hdr *hdr;
+
+ /* Check the next keyword. */
+ if (!args[2] || !*args[2] ||
+ (strcmp(args[2], "response") != 0 &&
+ strcmp(args[2], "request") != 0)) {
+ memprintf(err, "'%s %s' requires a direction ('request' or 'response')", args[0], args[1]);
+ return -1;
+ }
+
+ /* Check the 'len' keyword. */
+ if (!args[3] || !*args[3] || strcmp(args[3], "len") != 0) {
+ memprintf(err, "'%s %s' requires a capture length ('len')", args[0], args[1]);
+ return -1;
+ }
+
+ /* Check the length value. */
+ if (!args[4] || !*args[4]) {
+ memprintf(err, "'%s %s': 'len' requires a numeric value that represents the "
+ "capture length",
+ args[0], args[1]);
+ return -1;
+ }
+
+ /* convert the length value. */
+ len = strtol(args[4], &error, 10);
+ if (*error != '\0') {
+ memprintf(err, "'%s %s': cannot parse the length '%s'.",
+ args[0], args[1], args[3]);
+ return -1;
+ }
+
+ /* check length. */
+ if (len <= 0) {
+ memprintf(err, "length must be > 0");
+ return -1;
+ }
+
+ /* register the capture. */
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "proxy '%s': out of memory while registering a capture", curpx->id);
+ return -1;
+ }
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+
+ if (strcmp(args[2], "request") == 0) {
+ hdr->next = curpx->req_cap;
+ hdr->index = curpx->nb_req_cap++;
+ curpx->req_cap = hdr;
+ }
+ if (strcmp(args[2], "response") == 0) {
+ hdr->next = curpx->rsp_cap;
+ hdr->index = curpx->nb_rsp_cap++;
+ curpx->rsp_cap = hdr;
+ }
+ return 0;
+ }
+ else {
+ memprintf(err, "unknown declaration type '%s' (supports 'capture')", args[1]);
+ return -1;
+ }
+}
+
+/* This function parses a "retry-on" statement */
+static int
+proxy_parse_retry_on(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int i;
+
+ if (!(*args[1])) {
+ memprintf(err, "'%s' needs at least one keyword to specify when to retry", args[0]);
+ return -1;
+ }
+ if (!(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "'%s' only available in backend or listen section", args[0]);
+ return -1;
+ }
+ curpx->retry_type = 0;
+ for (i = 1; *(args[i]); i++) {
+ if (strcmp(args[i], "conn-failure") == 0)
+ curpx->retry_type |= PR_RE_CONN_FAILED;
+ else if (strcmp(args[i], "empty-response") == 0)
+ curpx->retry_type |= PR_RE_DISCONNECTED;
+ else if (strcmp(args[i], "response-timeout") == 0)
+ curpx->retry_type |= PR_RE_TIMEOUT;
+ else if (strcmp(args[i], "401") == 0)
+ curpx->retry_type |= PR_RE_401;
+ else if (strcmp(args[i], "403") == 0)
+ curpx->retry_type |= PR_RE_403;
+ else if (strcmp(args[i], "404") == 0)
+ curpx->retry_type |= PR_RE_404;
+ else if (strcmp(args[i], "408") == 0)
+ curpx->retry_type |= PR_RE_408;
+ else if (strcmp(args[i], "425") == 0)
+ curpx->retry_type |= PR_RE_425;
+ else if (strcmp(args[i], "500") == 0)
+ curpx->retry_type |= PR_RE_500;
+ else if (strcmp(args[i], "501") == 0)
+ curpx->retry_type |= PR_RE_501;
+ else if (strcmp(args[i], "502") == 0)
+ curpx->retry_type |= PR_RE_502;
+ else if (strcmp(args[i], "503") == 0)
+ curpx->retry_type |= PR_RE_503;
+ else if (strcmp(args[i], "504") == 0)
+ curpx->retry_type |= PR_RE_504;
+ else if (strcmp(args[i], "0rtt-rejected") == 0)
+ curpx->retry_type |= PR_RE_EARLY_ERROR;
+ else if (strcmp(args[i], "junk-response") == 0)
+ curpx->retry_type |= PR_RE_JUNK_REQUEST;
+ else if (!(strcmp(args[i], "all-retryable-errors")))
+ curpx->retry_type |= PR_RE_CONN_FAILED | PR_RE_DISCONNECTED |
+ PR_RE_TIMEOUT | PR_RE_500 | PR_RE_502 |
+ PR_RE_503 | PR_RE_504 | PR_RE_EARLY_ERROR |
+ PR_RE_JUNK_REQUEST;
+ else if (strcmp(args[i], "none") == 0) {
+ if (i != 1 || *args[i + 1]) {
+ memprintf(err, "'%s' 'none' keyworld only usable alone", args[0]);
+ return -1;
+ }
+ } else {
+ memprintf(err, "'%s': unknown keyword '%s'", args[0], args[i]);
+ return -1;
+ }
+
+ }
+
+
+ return 0;
+}
+
+#ifdef TCP_KEEPCNT
+/* This function parses "{cli|srv}tcpka-cnt" statements */
+static int proxy_parse_tcpka_cnt(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ char *res;
+ unsigned int tcpka_cnt;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+
+ tcpka_cnt = strtol(args[1], &res, 0);
+ if (*res) {
+ memprintf(err, "'%s' : unexpected character '%c' in integer value '%s'", args[0], *res, args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-cnt") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_cnt = tcpka_cnt;
+ } else if (strcmp(args[0], "srvtcpka-cnt") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_cnt = tcpka_cnt;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+#ifdef TCP_KEEPIDLE
+/* This function parses "{cli|srv}tcpka-idle" statements */
+static int proxy_parse_tcpka_idle(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ const char *res;
+ unsigned int tcpka_idle;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &tcpka_idle, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-idle") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_idle = tcpka_idle;
+ } else if (strcmp(args[0], "srvtcpka-idle") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_idle = tcpka_idle;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+#ifdef TCP_KEEPINTVL
+/* This function parses "{cli|srv}tcpka-intvl" statements */
+static int proxy_parse_tcpka_intvl(char **args, int section, struct proxy *proxy,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int retval;
+ const char *res;
+ unsigned int tcpka_intvl;
+
+ retval = 0;
+
+ if (*args[1] == 0) {
+ memprintf(err, "'%s' expects an integer value", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &tcpka_intvl, TIME_UNIT_S);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+
+ if (strcmp(args[0], "clitcpka-intvl") == 0) {
+ if (!(proxy->cap & PR_CAP_FE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no frontend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->clitcpka_intvl = tcpka_intvl;
+ } else if (strcmp(args[0], "srvtcpka-intvl") == 0) {
+ if (!(proxy->cap & PR_CAP_BE)) {
+ memprintf(err, "%s will be ignored because %s '%s' has no backend capability",
+ args[0], proxy_type_str(proxy), proxy->id);
+ retval = 1;
+ }
+ proxy->srvtcpka_intvl = tcpka_intvl;
+ } else {
+ /* unreachable */
+ memprintf(err, "'%s': unknown keyword", args[0]);
+ return -1;
+ }
+
+ return retval;
+}
+#endif
+
+/* This function inserts proxy <px> into the tree of known proxies (regular
+ * ones or defaults depending on px->cap & PR_CAP_DEF). The proxy's name is
+ * used as the storing key so it must already have been initialized.
+ */
+void proxy_store_name(struct proxy *px)
+{
+ struct eb_root *root = (px->cap & PR_CAP_DEF) ? &defproxy_by_name : &proxy_by_name;
+
+ px->conf.by_name.key = px->id;
+ ebis_insert(root, &px->conf.by_name);
+}
+
+/* Returns a pointer to the first proxy matching capabilities <cap> and id
+ * <id>. NULL is returned if no match is found. If <table> is non-zero, it
+ * only considers proxies having a table.
+ */
+struct proxy *proxy_find_by_id(int id, int cap, int table)
+{
+ struct eb32_node *n;
+
+ for (n = eb32_lookup(&used_proxy_id, id); n; n = eb32_next(n)) {
+ struct proxy *px = container_of(n, struct proxy, conf.id);
+
+ if (px->uuid != id)
+ break;
+
+ if ((px->cap & cap) != cap)
+ continue;
+
+ if (table && (!px->table || !px->table->size))
+ continue;
+
+ return px;
+ }
+ return NULL;
+}
+
+/* Returns a pointer to the first proxy matching either name <name>, or id
+ * <name> if <name> begins with a '#'. NULL is returned if no match is found.
+ * If <table> is non-zero, it only considers proxies having a table. The search
+ * is made into the regular proxies, unless <cap> has PR_CAP_DEF set in which
+ * case it's searched into the defproxy tree.
+ */
+struct proxy *proxy_find_by_name(const char *name, int cap, int table)
+{
+ struct proxy *curproxy;
+
+ if (*name == '#' && !(cap & PR_CAP_DEF)) {
+ curproxy = proxy_find_by_id(atoi(name + 1), cap, table);
+ if (curproxy)
+ return curproxy;
+ }
+ else {
+ struct eb_root *root;
+ struct ebpt_node *node;
+
+ root = (cap & PR_CAP_DEF) ? &defproxy_by_name : &proxy_by_name;
+ for (node = ebis_lookup(root, name); node; node = ebpt_next(node)) {
+ curproxy = container_of(node, struct proxy, conf.by_name);
+
+ if (strcmp(curproxy->id, name) != 0)
+ break;
+
+ if ((curproxy->cap & cap) != cap)
+ continue;
+
+ if (table && (!curproxy->table || !curproxy->table->size))
+ continue;
+
+ return curproxy;
+ }
+ }
+ return NULL;
+}
+
+/* Finds the best match for a proxy with capabilities <cap>, name <name> and id
+ * <id>. At most one of <id> or <name> may be different provided that <cap> is
+ * valid. Either <id> or <name> may be left unspecified (0). The purpose is to
+ * find a proxy based on some information from a previous configuration, across
+ * reloads or during information exchange between peers.
+ *
+ * Names are looked up first if present, then IDs are compared if present. In
+ * case of an inexact match whatever is forced in the configuration has
+ * precedence in the following order :
+ * - 1) forced ID (proves a renaming / change of proxy type)
+ * - 2) proxy name+type (may indicate a move if ID differs)
+ * - 3) automatic ID+type (may indicate a renaming)
+ *
+ * Depending on what is found, we can end up in the following situations :
+ *
+ * name id cap | possible causes
+ * -------------+-----------------
+ * -- -- -- | nothing found
+ * -- -- ok | nothing found
+ * -- ok -- | proxy deleted, ID points to next one
+ * -- ok ok | proxy renamed, or deleted with ID pointing to next one
+ * ok -- -- | proxy deleted, but other half with same name still here (before)
+ * ok -- ok | proxy's ID changed (proxy moved in the config file)
+ * ok ok -- | proxy deleted, but other half with same name still here (after)
+ * ok ok ok | perfect match
+ *
+ * Upon return if <diff> is not NULL, it is zeroed then filled with up to 3 bits :
+ * - PR_FBM_MISMATCH_ID : proxy was found but ID differs
+ * (and ID was not zero)
+ * - PR_FBM_MISMATCH_NAME : proxy was found by ID but name differs
+ * (and name was not NULL)
+ * - PR_FBM_MISMATCH_PROXYTYPE : a proxy of different type was found with
+ * the same name and/or id
+ *
+ * Only a valid proxy is returned. If capabilities do not match, NULL is
+ * returned. The caller can check <diff> to report detailed warnings / errors,
+ * and decide whether or not to use what was found.
+ */
+struct proxy *proxy_find_best_match(int cap, const char *name, int id, int *diff)
+{
+ struct proxy *byname;
+ struct proxy *byid;
+
+ if (!name && !id)
+ return NULL;
+
+ if (diff)
+ *diff = 0;
+
+ byname = byid = NULL;
+
+ if (name) {
+ byname = proxy_find_by_name(name, cap, 0);
+ if (byname && (!id || byname->uuid == id))
+ return byname;
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ * - name found, but ID doesn't match.
+ */
+ if (id) {
+ byid = proxy_find_by_id(id, cap, 0);
+ if (byid) {
+ if (byname) {
+ /* id+type found, name+type found, but not all 3.
+ * ID wins only if forced, otherwise name wins.
+ */
+ if (byid->options & PR_O_FORCED_ID) {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_NAME;
+ return byid;
+ }
+ else {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_ID;
+ return byname;
+ }
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ */
+ if (name && diff)
+ *diff |= PR_FBM_MISMATCH_NAME;
+ return byid;
+ }
+
+ /* ID not found */
+ if (byname) {
+ if (diff)
+ *diff |= PR_FBM_MISMATCH_ID;
+ return byname;
+ }
+ }
+
+ /* All remaining possibilities will lead to NULL. If we can report more
+ * detailed information to the caller about changed types and/or name,
+ * we'll do it. For example, we could detect that "listen foo" was
+ * split into "frontend foo_ft" and "backend foo_bk" if IDs are forced.
+ * - name not set, ID not found
+ * - name not found, ID not set
+ * - name not found, ID not found
+ */
+ if (!diff)
+ return NULL;
+
+ if (name) {
+ byname = proxy_find_by_name(name, 0, 0);
+ if (byname && (!id || byname->uuid == id))
+ *diff |= PR_FBM_MISMATCH_PROXYTYPE;
+ }
+
+ if (id) {
+ byid = proxy_find_by_id(id, 0, 0);
+ if (byid) {
+ if (!name)
+ *diff |= PR_FBM_MISMATCH_PROXYTYPE; /* only type changed */
+ else if (byid->options & PR_O_FORCED_ID)
+ *diff |= PR_FBM_MISMATCH_NAME | PR_FBM_MISMATCH_PROXYTYPE; /* name and type changed */
+ /* otherwise it's a different proxy that was returned */
+ }
+ }
+ return NULL;
+}
+
+/*
+ * This function finds a server with matching name within selected proxy.
+ * It also checks if there are more matching servers with
+ * requested name as this often leads into unexpected situations.
+ */
+
+struct server *findserver(const struct proxy *px, const char *name) {
+
+ struct server *cursrv, *target = NULL;
+
+ if (!px)
+ return NULL;
+
+ for (cursrv = px->srv; cursrv; cursrv = cursrv->next) {
+ if (strcmp(cursrv->id, name) != 0)
+ continue;
+
+ if (!target) {
+ target = cursrv;
+ continue;
+ }
+
+ ha_alert("Refusing to use duplicated server '%s' found in proxy: %s!\n",
+ name, px->id);
+
+ return NULL;
+ }
+
+ return target;
+}
+
+/*
+ * This function finds a server with matching "<puid> x <rid>" within
+ * selected proxy <px>.
+ * Using the combination of proxy-uid + revision id ensures that the function
+ * will either return the server we're expecting or NULL if it has been removed
+ * from the proxy.
+ */
+struct server *findserver_unique_id(const struct proxy *px, int puid, uint32_t rid) {
+
+ struct server *cursrv;
+
+ if (!px)
+ return NULL;
+
+ for (cursrv = px->srv; cursrv; cursrv = cursrv->next) {
+ if (cursrv->puid == puid && cursrv->rid == rid)
+ return cursrv;
+ }
+
+ return NULL;
+}
+
+/*
+ * This function finds a server with matching "<name> x <rid>" within
+ * selected proxy <px>.
+ * Using the combination of name + revision id ensures that the function will
+ * either return the server we're expecting or NULL if it has been removed
+ * from the proxy.
+ */
+struct server *findserver_unique_name(const struct proxy *px, const char *name, uint32_t rid) {
+
+ struct server *cursrv;
+
+ if (!px)
+ return NULL;
+
+ for (cursrv = px->srv; cursrv; cursrv = cursrv->next) {
+ if (!strcmp(cursrv->id, name) && cursrv->rid == rid)
+ return cursrv;
+ }
+
+ return NULL;
+}
+
+/* This function checks that the designated proxy has no http directives
+ * enabled. It will output a warning if there are, and will fix some of them.
+ * It returns the number of fatal errors encountered. This should be called
+ * at the end of the configuration parsing if the proxy is not in http mode.
+ * The <file> argument is used to construct the error message.
+ */
+int proxy_cfg_ensure_no_http(struct proxy *curproxy)
+{
+ if (curproxy->cookie_name != NULL) {
+ ha_warning("cookie will be ignored for %s '%s' (needs 'mode http').\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (isttest(curproxy->monitor_uri)) {
+ ha_warning("monitor-uri will be ignored for %s '%s' (needs 'mode http').\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->lbprm.algo & BE_LB_NEED_HTTP) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ ha_warning("Layer 7 hash not possible for %s '%s' (needs 'mode http'). Falling back to round robin.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->to_log & (LW_REQ | LW_RESP)) {
+ curproxy->to_log &= ~(LW_REQ | LW_RESP);
+ ha_warning("parsing [%s:%d] : HTTP log/header format not usable with %s '%s' (needs 'mode http').\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ if (curproxy->conf.logformat_string == default_http_log_format ||
+ curproxy->conf.logformat_string == clf_http_log_format) {
+ /* Note: we don't change the directive's file:line number */
+ curproxy->conf.logformat_string = default_tcp_log_format;
+ ha_warning("parsing [%s:%d] : 'option httplog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+ else if (curproxy->conf.logformat_string == default_https_log_format) {
+ /* Note: we don't change the directive's file:line number */
+ curproxy->conf.logformat_string = default_tcp_log_format;
+ ha_warning("parsing [%s:%d] : 'option httpslog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n",
+ curproxy->conf.lfs_file, curproxy->conf.lfs_line,
+ proxy_type_str(curproxy), curproxy->id);
+ }
+
+ return 0;
+}
+
+/* This function checks that the designated proxy has no log directives
+ * enabled. It will output a warning if there are, and will fix some of them.
+ * It returns the number of fatal errors encountered. This should be called
+ * at the end of the configuration parsing if the proxy is not in log mode.
+ * The <file> argument is used to construct the error message.
+ */
+int proxy_cfg_ensure_no_log(struct proxy *curproxy)
+{
+ if (curproxy->lbprm.algo & BE_LB_NEED_LOG) {
+ curproxy->lbprm.algo &= ~BE_LB_ALGO;
+ curproxy->lbprm.algo |= BE_LB_ALGO_RR;
+ ha_warning("Unusable balance algorithm for %s '%s' (needs 'mode log'). Falling back to round robin.\n",
+ proxy_type_str(curproxy), curproxy->id);
+ }
+
+ return 0;
+}
+
+/* Perform the most basic initialization of a proxy :
+ * memset(), list_init(*), reset_timeouts(*).
+ * Any new proxy or peer should be initialized via this function.
+ */
+void init_new_proxy(struct proxy *p)
+{
+ memset(p, 0, sizeof(struct proxy));
+ p->obj_type = OBJ_TYPE_PROXY;
+ queue_init(&p->queue, p, NULL);
+ LIST_INIT(&p->acl);
+ LIST_INIT(&p->http_req_rules);
+ LIST_INIT(&p->http_res_rules);
+ LIST_INIT(&p->http_after_res_rules);
+ LIST_INIT(&p->redirect_rules);
+ LIST_INIT(&p->mon_fail_cond);
+ LIST_INIT(&p->switching_rules);
+ LIST_INIT(&p->server_rules);
+ LIST_INIT(&p->persist_rules);
+ LIST_INIT(&p->sticking_rules);
+ LIST_INIT(&p->storersp_rules);
+ LIST_INIT(&p->tcp_req.inspect_rules);
+ LIST_INIT(&p->tcp_rep.inspect_rules);
+ LIST_INIT(&p->tcp_req.l4_rules);
+ LIST_INIT(&p->tcp_req.l5_rules);
+ MT_LIST_INIT(&p->listener_queue);
+ LIST_INIT(&p->loggers);
+ LIST_INIT(&p->logformat);
+ LIST_INIT(&p->logformat_sd);
+ LIST_INIT(&p->format_unique_id);
+ LIST_INIT(&p->logformat_error);
+ LIST_INIT(&p->conf.bind);
+ LIST_INIT(&p->conf.listeners);
+ LIST_INIT(&p->conf.errors);
+ LIST_INIT(&p->conf.args.list);
+ LIST_INIT(&p->filter_configs);
+ LIST_INIT(&p->tcpcheck_rules.preset_vars);
+
+ p->defsrv.id = "default-server";
+ p->conf.used_listener_id = EB_ROOT;
+ p->conf.used_server_id = EB_ROOT;
+ p->used_server_addr = EB_ROOT_UNIQUE;
+
+ /* Timeouts are defined as -1 */
+ proxy_reset_timeouts(p);
+ p->tcp_rep.inspect_delay = TICK_ETERNITY;
+
+ /* initial uuid is unassigned (-1) */
+ p->uuid = -1;
+
+ /* Default to only allow L4 retries */
+ p->retry_type = PR_RE_CONN_FAILED;
+
+ p->extra_counters_fe = NULL;
+ p->extra_counters_be = NULL;
+
+ HA_RWLOCK_INIT(&p->lock);
+
+ /* initialize the default settings */
+ proxy_preset_defaults(p);
+}
+
+/* Preset default settings onto proxy <defproxy>. */
+void proxy_preset_defaults(struct proxy *defproxy)
+{
+ defproxy->mode = PR_MODE_TCP;
+ defproxy->flags = 0;
+ if (!(defproxy->cap & PR_CAP_INT)) {
+ defproxy->maxconn = cfg_maxpconn;
+ defproxy->conn_retries = CONN_RETRIES;
+ }
+ defproxy->redispatch_after = 0;
+ defproxy->options = PR_O_REUSE_SAFE;
+ if (defproxy->cap & PR_CAP_INT)
+ defproxy->options2 |= PR_O2_INDEPSTR;
+ defproxy->max_out_conns = MAX_SRV_LIST;
+
+ defproxy->defsrv.check.inter = DEF_CHKINTR;
+ defproxy->defsrv.check.fastinter = 0;
+ defproxy->defsrv.check.downinter = 0;
+ defproxy->defsrv.agent.inter = DEF_CHKINTR;
+ defproxy->defsrv.agent.fastinter = 0;
+ defproxy->defsrv.agent.downinter = 0;
+ defproxy->defsrv.check.rise = DEF_RISETIME;
+ defproxy->defsrv.check.fall = DEF_FALLTIME;
+ defproxy->defsrv.agent.rise = DEF_AGENT_RISETIME;
+ defproxy->defsrv.agent.fall = DEF_AGENT_FALLTIME;
+ defproxy->defsrv.check.port = 0;
+ defproxy->defsrv.agent.port = 0;
+ defproxy->defsrv.maxqueue = 0;
+ defproxy->defsrv.minconn = 0;
+ defproxy->defsrv.maxconn = 0;
+ defproxy->defsrv.max_reuse = -1;
+ defproxy->defsrv.max_idle_conns = -1;
+ defproxy->defsrv.pool_purge_delay = 5000;
+ defproxy->defsrv.slowstart = 0;
+ defproxy->defsrv.onerror = DEF_HANA_ONERR;
+ defproxy->defsrv.consecutive_errors_limit = DEF_HANA_ERRLIMIT;
+ defproxy->defsrv.uweight = defproxy->defsrv.iweight = 1;
+ LIST_INIT(&defproxy->defsrv.pp_tlvs);
+
+ defproxy->email_alert.level = LOG_ALERT;
+ defproxy->load_server_state_from_file = PR_SRV_STATE_FILE_UNSPEC;
+
+ if (defproxy->cap & PR_CAP_INT)
+ defproxy->timeout.connect = 5000;
+}
+
+/* Frees all dynamic settings allocated on a default proxy that's about to be
+ * destroyed. This is a subset of the complete proxy deinit code, but these
+ * should probably be merged ultimately. Note that most of the fields are not
+ * even reset, so extreme care is required here, and calling
+ * proxy_preset_defaults() afterwards would be safer.
+ */
+void proxy_free_defaults(struct proxy *defproxy)
+{
+ struct acl *acl, *aclb;
+ struct logger *log, *logb;
+ struct cap_hdr *h,*h_next;
+
+ ha_free(&defproxy->id);
+ ha_free(&defproxy->conf.file);
+ ha_free((char **)&defproxy->defsrv.conf.file);
+ ha_free(&defproxy->check_command);
+ ha_free(&defproxy->check_path);
+ ha_free(&defproxy->cookie_name);
+ ha_free(&defproxy->rdp_cookie_name);
+ ha_free(&defproxy->dyncookie_key);
+ ha_free(&defproxy->cookie_domain);
+ ha_free(&defproxy->cookie_attrs);
+ ha_free(&defproxy->lbprm.arg_str);
+ ha_free(&defproxy->capture_name);
+ istfree(&defproxy->monitor_uri);
+ ha_free(&defproxy->defbe.name);
+ ha_free(&defproxy->conn_src.iface_name);
+ istfree(&defproxy->server_id_hdr_name);
+
+ http_ext_clean(defproxy);
+
+ list_for_each_entry_safe(acl, aclb, &defproxy->acl, list) {
+ LIST_DELETE(&acl->list);
+ prune_acl(acl);
+ free(acl);
+ }
+
+ free_act_rules(&defproxy->tcp_req.inspect_rules);
+ free_act_rules(&defproxy->tcp_rep.inspect_rules);
+ free_act_rules(&defproxy->tcp_req.l4_rules);
+ free_act_rules(&defproxy->tcp_req.l5_rules);
+ free_act_rules(&defproxy->http_req_rules);
+ free_act_rules(&defproxy->http_res_rules);
+ free_act_rules(&defproxy->http_after_res_rules);
+
+ h = defproxy->req_cap;
+ while (h) {
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }
+
+ h = defproxy->rsp_cap;
+ while (h) {
+ h_next = h->next;
+ free(h->name);
+ pool_destroy(h->pool);
+ free(h);
+ h = h_next;
+ }
+
+ if (defproxy->conf.logformat_string != default_http_log_format &&
+ defproxy->conf.logformat_string != default_tcp_log_format &&
+ defproxy->conf.logformat_string != clf_http_log_format &&
+ defproxy->conf.logformat_string != default_https_log_format) {
+ ha_free(&defproxy->conf.logformat_string);
+ }
+
+ if (defproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ ha_free(&defproxy->conf.logformat_sd_string);
+
+ list_for_each_entry_safe(log, logb, &defproxy->loggers, list) {
+ LIST_DEL_INIT(&log->list);
+ free_logger(log);
+ }
+
+ ha_free(&defproxy->conf.uniqueid_format_string);
+ ha_free(&defproxy->conf.error_logformat_string);
+ ha_free(&defproxy->conf.lfs_file);
+ ha_free(&defproxy->conf.lfsd_file);
+ ha_free(&defproxy->conf.uif_file);
+ ha_free(&defproxy->conf.elfs_file);
+ chunk_destroy(&defproxy->log_tag);
+
+ free_email_alert(defproxy);
+ proxy_release_conf_errors(defproxy);
+ deinit_proxy_tcpcheck(defproxy);
+
+ /* FIXME: we cannot free uri_auth because it might already be used by
+ * another proxy (legacy code for stats URI ...). Refcount anyone ?
+ */
+}
+
+/* delete a defproxy from the tree if still in it, frees its content and its
+ * storage. Nothing is done if <px> is NULL or if it doesn't have PR_CAP_DEF
+ * set, allowing to pass it the direct result of a lookup function.
+ */
+void proxy_destroy_defaults(struct proxy *px)
+{
+ if (!px)
+ return;
+ if (!(px->cap & PR_CAP_DEF))
+ return;
+ BUG_ON(px->conf.refcount != 0);
+ ebpt_delete(&px->conf.by_name);
+ proxy_free_defaults(px);
+ free(px);
+}
+
+/* delete all unreferenced default proxies. A default proxy is unreferenced if
+ * its refcount is equal to zero.
+ */
+void proxy_destroy_all_unref_defaults()
+{
+ struct ebpt_node *n;
+
+ n = ebpt_first(&defproxy_by_name);
+ while (n) {
+ struct proxy *px = container_of(n, struct proxy, conf.by_name);
+ BUG_ON(!(px->cap & PR_CAP_DEF));
+ n = ebpt_next(n);
+ if (!px->conf.refcount)
+ proxy_destroy_defaults(px);
+ }
+}
+
+/* Add a reference on the default proxy <defpx> for the proxy <px> Nothing is
+ * done if <px> already references <defpx>. Otherwise, the default proxy
+ * refcount is incremented by one. For now, this operation is not thread safe
+ * and is perform during init stage only.
+ */
+void proxy_ref_defaults(struct proxy *px, struct proxy *defpx)
+{
+ if (px->defpx == defpx)
+ return;
+ BUG_ON(px->defpx != NULL);
+ px->defpx = defpx;
+ defpx->conf.refcount++;
+}
+
+/* proxy <px> removes its reference on its default proxy. The default proxy
+ * refcount is decremented by one. If it was the last reference, the
+ * corresponding default proxy is destroyed. For now this operation is not
+ * thread safe and is performed during deinit staged only.
+*/
+void proxy_unref_defaults(struct proxy *px)
+{
+ if (px->defpx == NULL)
+ return;
+ if (!--px->defpx->conf.refcount)
+ proxy_destroy_defaults(px->defpx);
+ px->defpx = NULL;
+}
+
+/* Allocates a new proxy <name> of type <cap>.
+ * Returns the proxy instance on success. On error, NULL is returned.
+ */
+struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg)
+{
+ struct proxy *curproxy;
+
+ if ((curproxy = calloc(1, sizeof(*curproxy))) == NULL) {
+ memprintf(errmsg, "proxy '%s': out of memory", name);
+ goto fail;
+ }
+
+ init_new_proxy(curproxy);
+ curproxy->last_change = ns_to_sec(now_ns);
+ curproxy->id = strdup(name);
+ curproxy->cap = cap;
+
+ if (!(cap & PR_CAP_INT))
+ proxy_store_name(curproxy);
+
+ done:
+ return curproxy;
+
+ fail:
+ /* Note: in case of fatal error here, we WILL make valgrind unhappy,
+ * but its not worth trying to unroll everything here just before
+ * quitting.
+ */
+ free(curproxy);
+ return NULL;
+}
+
+/* Copy the proxy settings from <defproxy> to <curproxy>.
+ * Returns 0 on success.
+ * Returns 1 on error. <errmsg> will be allocated with an error description.
+ */
+static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defproxy,
+ char **errmsg)
+{
+ struct logger *tmplogger;
+ char *tmpmsg = NULL;
+
+ /* set default values from the specified default proxy */
+ srv_settings_cpy(&curproxy->defsrv, &defproxy->defsrv, 0);
+
+ curproxy->flags = (defproxy->flags & PR_FL_DISABLED); /* Only inherit from disabled flag */
+ curproxy->options = defproxy->options;
+ curproxy->options2 = defproxy->options2;
+ curproxy->no_options = defproxy->no_options;
+ curproxy->no_options2 = defproxy->no_options2;
+ curproxy->retry_type = defproxy->retry_type;
+ curproxy->tcp_req.inspect_delay = defproxy->tcp_req.inspect_delay;
+ curproxy->tcp_rep.inspect_delay = defproxy->tcp_rep.inspect_delay;
+
+ http_ext_clean(curproxy);
+ http_ext_dup(defproxy, curproxy);
+
+ if (isttest(defproxy->server_id_hdr_name))
+ curproxy->server_id_hdr_name = istdup(defproxy->server_id_hdr_name);
+
+ /* initialize error relocations */
+ if (!proxy_dup_default_conf_errors(curproxy, defproxy, &tmpmsg)) {
+ memprintf(errmsg, "proxy '%s' : %s", curproxy->id, tmpmsg);
+ free(tmpmsg);
+ return 1;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ curproxy->maxconn = defproxy->maxconn;
+ curproxy->backlog = defproxy->backlog;
+ curproxy->fe_sps_lim = defproxy->fe_sps_lim;
+
+ curproxy->to_log = defproxy->to_log & ~LW_COOKIE & ~LW_REQHDR & ~ LW_RSPHDR;
+ curproxy->max_out_conns = defproxy->max_out_conns;
+
+ curproxy->clitcpka_cnt = defproxy->clitcpka_cnt;
+ curproxy->clitcpka_idle = defproxy->clitcpka_idle;
+ curproxy->clitcpka_intvl = defproxy->clitcpka_intvl;
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ curproxy->lbprm.algo = defproxy->lbprm.algo;
+ curproxy->lbprm.hash_balance_factor = defproxy->lbprm.hash_balance_factor;
+ curproxy->fullconn = defproxy->fullconn;
+ curproxy->conn_retries = defproxy->conn_retries;
+ curproxy->redispatch_after = defproxy->redispatch_after;
+ curproxy->max_ka_queue = defproxy->max_ka_queue;
+
+ curproxy->tcpcheck_rules.flags = (defproxy->tcpcheck_rules.flags & ~TCPCHK_RULES_UNUSED_RS);
+ curproxy->tcpcheck_rules.list = defproxy->tcpcheck_rules.list;
+ if (!LIST_ISEMPTY(&defproxy->tcpcheck_rules.preset_vars)) {
+ if (!dup_tcpcheck_vars(&curproxy->tcpcheck_rules.preset_vars,
+ &defproxy->tcpcheck_rules.preset_vars)) {
+ memprintf(errmsg, "proxy '%s': failed to duplicate tcpcheck preset-vars", curproxy->id);
+ return 1;
+ }
+ }
+
+ curproxy->ck_opts = defproxy->ck_opts;
+
+ if (defproxy->cookie_name)
+ curproxy->cookie_name = strdup(defproxy->cookie_name);
+ curproxy->cookie_len = defproxy->cookie_len;
+
+ if (defproxy->dyncookie_key)
+ curproxy->dyncookie_key = strdup(defproxy->dyncookie_key);
+ if (defproxy->cookie_domain)
+ curproxy->cookie_domain = strdup(defproxy->cookie_domain);
+
+ if (defproxy->cookie_maxidle)
+ curproxy->cookie_maxidle = defproxy->cookie_maxidle;
+
+ if (defproxy->cookie_maxlife)
+ curproxy->cookie_maxlife = defproxy->cookie_maxlife;
+
+ if (defproxy->rdp_cookie_name)
+ curproxy->rdp_cookie_name = strdup(defproxy->rdp_cookie_name);
+ curproxy->rdp_cookie_len = defproxy->rdp_cookie_len;
+
+ if (defproxy->cookie_attrs)
+ curproxy->cookie_attrs = strdup(defproxy->cookie_attrs);
+
+ if (defproxy->lbprm.arg_str)
+ curproxy->lbprm.arg_str = strdup(defproxy->lbprm.arg_str);
+ curproxy->lbprm.arg_len = defproxy->lbprm.arg_len;
+ curproxy->lbprm.arg_opt1 = defproxy->lbprm.arg_opt1;
+ curproxy->lbprm.arg_opt2 = defproxy->lbprm.arg_opt2;
+ curproxy->lbprm.arg_opt3 = defproxy->lbprm.arg_opt3;
+
+ if (defproxy->conn_src.iface_name)
+ curproxy->conn_src.iface_name = strdup(defproxy->conn_src.iface_name);
+ curproxy->conn_src.iface_len = defproxy->conn_src.iface_len;
+ curproxy->conn_src.opts = defproxy->conn_src.opts;
+#if defined(CONFIG_HAP_TRANSPARENT)
+ curproxy->conn_src.tproxy_addr = defproxy->conn_src.tproxy_addr;
+#endif
+ curproxy->load_server_state_from_file = defproxy->load_server_state_from_file;
+
+ curproxy->srvtcpka_cnt = defproxy->srvtcpka_cnt;
+ curproxy->srvtcpka_idle = defproxy->srvtcpka_idle;
+ curproxy->srvtcpka_intvl = defproxy->srvtcpka_intvl;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ if (defproxy->capture_name)
+ curproxy->capture_name = strdup(defproxy->capture_name);
+ curproxy->capture_namelen = defproxy->capture_namelen;
+ curproxy->capture_len = defproxy->capture_len;
+
+ curproxy->nb_req_cap = defproxy->nb_req_cap;
+ curproxy->req_cap = defproxy->req_cap;
+
+ curproxy->nb_rsp_cap = defproxy->nb_rsp_cap;
+ curproxy->rsp_cap = defproxy->rsp_cap;
+ }
+
+ if (curproxy->cap & PR_CAP_FE) {
+ curproxy->timeout.client = defproxy->timeout.client;
+ curproxy->timeout.client_hs = defproxy->timeout.client_hs;
+ curproxy->timeout.clientfin = defproxy->timeout.clientfin;
+ curproxy->timeout.tarpit = defproxy->timeout.tarpit;
+ curproxy->timeout.httpreq = defproxy->timeout.httpreq;
+ curproxy->timeout.httpka = defproxy->timeout.httpka;
+ if (isttest(defproxy->monitor_uri))
+ curproxy->monitor_uri = istdup(defproxy->monitor_uri);
+ if (defproxy->defbe.name)
+ curproxy->defbe.name = strdup(defproxy->defbe.name);
+
+ /* get either a pointer to the logformat string or a copy of it */
+ curproxy->conf.logformat_string = defproxy->conf.logformat_string;
+ if (curproxy->conf.logformat_string &&
+ curproxy->conf.logformat_string != default_http_log_format &&
+ curproxy->conf.logformat_string != default_tcp_log_format &&
+ curproxy->conf.logformat_string != clf_http_log_format &&
+ curproxy->conf.logformat_string != default_https_log_format)
+ curproxy->conf.logformat_string = strdup(curproxy->conf.logformat_string);
+
+ if (defproxy->conf.lfs_file) {
+ curproxy->conf.lfs_file = strdup(defproxy->conf.lfs_file);
+ curproxy->conf.lfs_line = defproxy->conf.lfs_line;
+ }
+
+ /* get either a pointer to the logformat string for RFC5424 structured-data or a copy of it */
+ curproxy->conf.logformat_sd_string = defproxy->conf.logformat_sd_string;
+ if (curproxy->conf.logformat_sd_string &&
+ curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format)
+ curproxy->conf.logformat_sd_string = strdup(curproxy->conf.logformat_sd_string);
+
+ if (defproxy->conf.lfsd_file) {
+ curproxy->conf.lfsd_file = strdup(defproxy->conf.lfsd_file);
+ curproxy->conf.lfsd_line = defproxy->conf.lfsd_line;
+ }
+
+ curproxy->conf.error_logformat_string = defproxy->conf.error_logformat_string;
+ if (curproxy->conf.error_logformat_string)
+ curproxy->conf.error_logformat_string = strdup(curproxy->conf.error_logformat_string);
+
+ if (defproxy->conf.elfs_file) {
+ curproxy->conf.elfs_file = strdup(defproxy->conf.elfs_file);
+ curproxy->conf.elfs_line = defproxy->conf.elfs_line;
+ }
+ }
+
+ if (curproxy->cap & PR_CAP_BE) {
+ curproxy->timeout.connect = defproxy->timeout.connect;
+ curproxy->timeout.server = defproxy->timeout.server;
+ curproxy->timeout.serverfin = defproxy->timeout.serverfin;
+ curproxy->timeout.check = defproxy->timeout.check;
+ curproxy->timeout.queue = defproxy->timeout.queue;
+ curproxy->timeout.tarpit = defproxy->timeout.tarpit;
+ curproxy->timeout.httpreq = defproxy->timeout.httpreq;
+ curproxy->timeout.httpka = defproxy->timeout.httpka;
+ curproxy->timeout.tunnel = defproxy->timeout.tunnel;
+ curproxy->conn_src.source_addr = defproxy->conn_src.source_addr;
+ }
+
+ curproxy->mode = defproxy->mode;
+ curproxy->uri_auth = defproxy->uri_auth; /* for stats */
+
+ /* copy default loggers to curproxy */
+ list_for_each_entry(tmplogger, &defproxy->loggers, list) {
+ struct logger *node = dup_logger(tmplogger);
+
+ if (!node) {
+ memprintf(errmsg, "proxy '%s': out of memory", curproxy->id);
+ return 1;
+ }
+ LIST_APPEND(&curproxy->loggers, &node->list);
+ }
+
+ curproxy->conf.uniqueid_format_string = defproxy->conf.uniqueid_format_string;
+ if (curproxy->conf.uniqueid_format_string)
+ curproxy->conf.uniqueid_format_string = strdup(curproxy->conf.uniqueid_format_string);
+
+ chunk_dup(&curproxy->log_tag, &defproxy->log_tag);
+
+ if (defproxy->conf.uif_file) {
+ curproxy->conf.uif_file = strdup(defproxy->conf.uif_file);
+ curproxy->conf.uif_line = defproxy->conf.uif_line;
+ }
+
+ /* copy default header unique id */
+ if (isttest(defproxy->header_unique_id)) {
+ const struct ist copy = istdup(defproxy->header_unique_id);
+
+ if (!isttest(copy)) {
+ memprintf(errmsg, "proxy '%s': out of memory for unique-id-header", curproxy->id);
+ return 1;
+ }
+ curproxy->header_unique_id = copy;
+ }
+
+ /* default compression options */
+ if (defproxy->comp != NULL) {
+ curproxy->comp = calloc(1, sizeof(*curproxy->comp));
+ if (!curproxy->comp) {
+ memprintf(errmsg, "proxy '%s': out of memory for default compression options", curproxy->id);
+ return 1;
+ }
+ curproxy->comp->algos_res = defproxy->comp->algos_res;
+ curproxy->comp->algo_req = defproxy->comp->algo_req;
+ curproxy->comp->types_res = defproxy->comp->types_res;
+ curproxy->comp->types_req = defproxy->comp->types_req;
+ curproxy->comp->flags = defproxy->comp->flags;
+ }
+
+ if (defproxy->check_path)
+ curproxy->check_path = strdup(defproxy->check_path);
+ if (defproxy->check_command)
+ curproxy->check_command = strdup(defproxy->check_command);
+
+ if (defproxy->email_alert.mailers.name)
+ curproxy->email_alert.mailers.name = strdup(defproxy->email_alert.mailers.name);
+ if (defproxy->email_alert.from)
+ curproxy->email_alert.from = strdup(defproxy->email_alert.from);
+ if (defproxy->email_alert.to)
+ curproxy->email_alert.to = strdup(defproxy->email_alert.to);
+ if (defproxy->email_alert.myhostname)
+ curproxy->email_alert.myhostname = strdup(defproxy->email_alert.myhostname);
+ curproxy->email_alert.level = defproxy->email_alert.level;
+ curproxy->email_alert.set = defproxy->email_alert.set;
+
+ return 0;
+}
+
+/* Allocates a new proxy <name> of type <cap> found at position <file:linenum>,
+ * preset it from the defaults of <defproxy> and returns it. In case of error,
+ * an alert is printed and NULL is returned.
+ */
+struct proxy *parse_new_proxy(const char *name, unsigned int cap,
+ const char *file, int linenum,
+ const struct proxy *defproxy)
+{
+ struct proxy *curproxy = NULL;
+ char *errmsg = NULL;
+
+ if (!(curproxy = alloc_new_proxy(name, cap, &errmsg))) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ free(errmsg);
+ return NULL;
+ }
+
+ if (defproxy) {
+ if (proxy_defproxy_cpy(curproxy, defproxy, &errmsg)) {
+ ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg);
+ free(errmsg);
+
+ ha_free(&curproxy);
+ return NULL;
+ }
+ }
+
+ curproxy->conf.args.file = curproxy->conf.file = strdup(file);
+ curproxy->conf.args.line = curproxy->conf.line = linenum;
+
+ return curproxy;
+}
+
+/* to be called under the proxy lock after pausing some listeners. This will
+ * automatically update the p->flags flag
+ */
+void proxy_cond_pause(struct proxy *p)
+{
+ if (p->li_ready)
+ return;
+ p->flags |= PR_FL_PAUSED;
+}
+
+/* to be called under the proxy lock after resuming some listeners. This will
+ * automatically update the p->flags flag
+ */
+void proxy_cond_resume(struct proxy *p)
+{
+ if (!p->li_ready)
+ return;
+ p->flags &= ~PR_FL_PAUSED;
+}
+
+/* to be called under the proxy lock after stopping some listeners. This will
+ * automatically update the p->flags flag after stopping the last one, and
+ * will emit a log indicating the proxy's condition. The function is idempotent
+ * so that it will not emit multiple logs; a proxy will be disabled only once.
+ */
+void proxy_cond_disable(struct proxy *p)
+{
+ if (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return;
+
+ if (p->li_ready + p->li_paused > 0)
+ return;
+
+ p->flags |= PR_FL_STOPPED;
+
+ /* Note: syslog proxies use their own loggers so while it's somewhat OK
+ * to report them being stopped as a warning, we must not spam their log
+ * servers which are in fact production servers. For other types (CLI,
+ * peers, etc) we must not report them at all as they're not really on
+ * the data plane but on the control plane.
+ */
+ if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP || p->mode == PR_MODE_SYSLOG) && !(p->cap & PR_CAP_INT))
+ ha_warning("Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n",
+ p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+
+ if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP) && !(p->cap & PR_CAP_INT))
+ send_log(p, LOG_WARNING, "Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n",
+ p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn);
+
+ if (p->table && p->table->size && p->table->sync_task)
+ task_wakeup(p->table->sync_task, TASK_WOKEN_MSG);
+
+ if (p->task)
+ task_wakeup(p->task, TASK_WOKEN_MSG);
+}
+
+/*
+ * This is the proxy management task. It enables proxies when there are enough
+ * free streams, or stops them when the table is full. It is designed to be
+ * called as a task which is woken up upon stopping or when rate limiting must
+ * be enforced.
+ */
+struct task *manage_proxy(struct task *t, void *context, unsigned int state)
+{
+ struct proxy *p = context;
+ int next = TICK_ETERNITY;
+ unsigned int wait;
+
+ /* We should periodically try to enable listeners waiting for a
+ * global resource here.
+ */
+
+ /* If the proxy holds a stick table, we need to purge all unused
+ * entries. These are all the ones in the table with ref_cnt == 0
+ * and all the ones in the pool used to allocate new entries. Any
+ * entry attached to an existing stream waiting for a store will
+ * be in neither list. Any entry being dumped will have ref_cnt > 0.
+ * However we protect tables that are being synced to peers.
+ */
+ if (unlikely(stopping && (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && p->table && p->table->current)) {
+
+ if (!p->table->refcnt) {
+ /* !table->refcnt means there
+ * is no more pending full resync
+ * to push to a new process and
+ * we are free to flush the table.
+ */
+ int budget;
+ int cleaned_up;
+
+ /* We purposely enforce a budget limitation since we don't want
+ * to spend too much time purging old entries
+ *
+ * This is known to cause the watchdog to occasionnaly trigger if
+ * the table is huge and all entries become available for purge
+ * at the same time
+ *
+ * Moreover, we must also anticipate the pool_gc() call which
+ * will also be much slower if there is too much work at once
+ */
+ budget = MIN(p->table->current, (1 << 15)); /* max: 32K */
+ cleaned_up = stktable_trash_oldest(p->table, budget);
+ if (cleaned_up) {
+ /* immediately release freed memory since we are stopping */
+ pool_gc(NULL);
+ if (cleaned_up > (budget / 2)) {
+ /* most of the budget was used to purge entries,
+ * it is very likely that there are still trashable
+ * entries in the table, reschedule a new cleanup
+ * attempt ASAP
+ */
+ t->expire = TICK_ETERNITY;
+ task_wakeup(t, TASK_WOKEN_RES);
+ return t;
+ }
+ }
+ }
+ if (p->table->current) {
+ /* some entries still remain but are not yet available
+ * for cleanup, let's recheck in one second
+ */
+ next = tick_first(next, tick_add(now_ms, 1000));
+ }
+ }
+
+ /* the rest below is just for frontends */
+ if (!(p->cap & PR_CAP_FE))
+ goto out;
+
+ /* check the various reasons we may find to block the frontend */
+ if (unlikely(p->feconn >= p->maxconn))
+ goto out;
+
+ if (p->fe_sps_lim &&
+ (wait = next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0))) {
+ /* we're blocking because a limit was reached on the number of
+ * requests/s on the frontend. We want to re-check ASAP, which
+ * means in 1 ms before estimated expiration date, because the
+ * timer will have settled down.
+ */
+ next = tick_first(next, tick_add(now_ms, wait));
+ goto out;
+ }
+
+ /* The proxy is not limited so we can re-enable any waiting listener */
+ dequeue_proxy_listeners(p);
+ out:
+ t->expire = next;
+ task_queue(t);
+ return t;
+}
+
+
+static int proxy_parse_grace(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &global.grace_delay, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int proxy_parse_hard_stop_after(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+ res = parse_time_err(args[1], &global.hard_stop_after, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ return 0;
+}
+
+static int proxy_parse_close_spread_time(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *res;
+
+ if (!*args[1]) {
+ memprintf(err, "'%s' expects <time> as argument.\n", args[0]);
+ return -1;
+ }
+
+ /* If close-spread-time is set to "infinite", disable the active connection
+ * closing during soft-stop.
+ */
+ if (strcmp(args[1], "infinite") == 0) {
+ global.tune.options |= GTUNE_DISABLE_ACTIVE_CLOSE;
+ global.close_spread_time = TICK_ETERNITY;
+ return 0;
+ }
+
+ res = parse_time_err(args[1], &global.close_spread_time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[1], args[0]);
+ return -1;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n", *res, args[0]);
+ return -1;
+ }
+ global.tune.options &= ~GTUNE_DISABLE_ACTIVE_CLOSE;
+
+ return 0;
+}
+
+struct task *hard_stop(struct task *t, void *context, unsigned int state)
+{
+ struct proxy *p;
+ struct stream *s;
+ int thr;
+
+ if (killed) {
+ ha_warning("Some tasks resisted to hard-stop, exiting now.\n");
+ send_log(NULL, LOG_WARNING, "Some tasks resisted to hard-stop, exiting now.\n");
+ killed = 2;
+ for (thr = 0; thr < global.nbthread; thr++)
+ if (_HA_ATOMIC_LOAD(&ha_thread_info[thr].tg->threads_enabled) & ha_thread_info[thr].ltid_bit)
+ wake_thread(thr);
+ t->expire = TICK_ETERNITY;
+ return t;
+ }
+
+ ha_warning("soft-stop running for too long, performing a hard-stop.\n");
+ send_log(NULL, LOG_WARNING, "soft-stop running for too long, performing a hard-stop.\n");
+ p = proxies_list;
+ while (p) {
+ if ((p->cap & PR_CAP_FE) && (p->feconn > 0)) {
+ ha_warning("Proxy %s hard-stopped (%d remaining conns will be closed).\n",
+ p->id, p->feconn);
+ send_log(p, LOG_WARNING, "Proxy %s hard-stopped (%d remaining conns will be closed).\n",
+ p->id, p->feconn);
+ }
+ p = p->next;
+ }
+
+ thread_isolate();
+
+ for (thr = 0; thr < global.nbthread; thr++) {
+ list_for_each_entry(s, &ha_thread_ctx[thr].streams, list) {
+ stream_shutdown(s, SF_ERR_KILLED);
+ }
+ }
+
+ thread_release();
+
+ killed = 1;
+ t->expire = tick_add(now_ms, MS_TO_TICKS(1000));
+ return t;
+}
+
+/* perform the soft-stop right now (i.e. unbind listeners) */
+static void do_soft_stop_now()
+{
+ struct proxy *p;
+ struct task *task;
+
+ /* disable busy polling to avoid cpu eating for the new process */
+ global.tune.options &= ~GTUNE_BUSY_POLLING;
+
+ if (tick_isset(global.close_spread_time)) {
+ global.close_spread_end = tick_add(now_ms, global.close_spread_time);
+ }
+
+ /* schedule a hard-stop after a delay if needed */
+ if (tick_isset(global.hard_stop_after)) {
+ task = task_new_anywhere();
+ if (task) {
+ task->process = hard_stop;
+ task_schedule(task, tick_add(now_ms, global.hard_stop_after));
+ }
+ else {
+ ha_alert("out of memory trying to allocate the hard-stop task.\n");
+ }
+ }
+
+ /* we isolate so that we have a chance of stopping listeners in other groups */
+ thread_isolate();
+
+ /* stop all stoppable listeners */
+ protocol_stop_now();
+
+ thread_release();
+
+ /* Loop on proxies to stop backends */
+ p = proxies_list;
+ while (p) {
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ proxy_cond_disable(p);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ p = p->next;
+ }
+
+ /* signal zero is used to broadcast the "stopping" event */
+ signal_handler(0);
+}
+
+/* triggered by a soft-stop delayed with `grace` */
+static struct task *grace_expired(struct task *t, void *context, unsigned int state)
+{
+ ha_notice("Grace period expired, proceeding with soft-stop now.\n");
+ send_log(NULL, LOG_NOTICE, "Grace period expired, proceeding with soft-stop now.\n");
+ do_soft_stop_now();
+ task_destroy(t);
+ return NULL;
+}
+
+/*
+ * this function disables health-check servers so that the process will quickly be ignored
+ * by load balancers.
+ */
+void soft_stop(void)
+{
+ struct task *task;
+
+ stopping = 1;
+
+ if (tick_isset(global.grace_delay)) {
+ task = task_new_anywhere();
+ if (task) {
+ ha_notice("Scheduling a soft-stop in %u ms.\n", global.grace_delay);
+ send_log(NULL, LOG_WARNING, "Scheduling a soft-stop in %u ms.\n", global.grace_delay);
+ task->process = grace_expired;
+ task_schedule(task, tick_add(now_ms, global.grace_delay));
+ return;
+ }
+ else {
+ ha_alert("out of memory trying to allocate the stop-stop task, stopping now.\n");
+ }
+ }
+
+ /* no grace (or failure to enforce it): stop now */
+ do_soft_stop_now();
+}
+
+
+/* Temporarily disables listening on all of the proxy's listeners. Upon
+ * success, the proxy enters the PR_PAUSED state. The function returns 0
+ * if it fails, or non-zero on success.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+int pause_proxy(struct proxy *p)
+{
+ struct listener *l;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ if (!(p->cap & PR_CAP_FE) || (p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !p->li_ready)
+ goto end;
+
+ list_for_each_entry(l, &p->conf.listeners, by_fe)
+ suspend_listener(l, 1, 0);
+
+ if (p->li_ready) {
+ ha_warning("%s %s failed to enter pause mode.\n", proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "%s %s failed to enter pause mode.\n", proxy_cap_str(p->cap), p->id);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 0;
+ }
+end:
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 1;
+}
+
+/*
+ * This function completely stops a proxy and releases its listeners. It has
+ * to be called when going down in order to release the ports so that another
+ * process may bind to them. It must also be called on disabled proxies at the
+ * end of start-up. If all listeners are closed, the proxy is set to the
+ * PR_STOPPED state.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+void stop_proxy(struct proxy *p)
+{
+ struct listener *l;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ list_for_each_entry(l, &p->conf.listeners, by_fe)
+ stop_listener(l, 1, 0, 0);
+
+ if (!(p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) && !p->li_ready) {
+ /* might be just a backend */
+ p->flags |= PR_FL_STOPPED;
+ }
+
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+}
+
+/* This function resumes listening on the specified proxy. It scans all of its
+ * listeners and tries to enable them all. If any of them fails, the proxy is
+ * put back to the paused state. It returns 1 upon success, or zero if an error
+ * is encountered.
+ * The function takes the proxy's lock so it's safe to
+ * call from multiple places.
+ */
+int resume_proxy(struct proxy *p)
+{
+ struct listener *l;
+ int fail;
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+
+ if ((p->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !p->li_paused)
+ goto end;
+
+ fail = 0;
+ list_for_each_entry(l, &p->conf.listeners, by_fe) {
+ if (!resume_listener(l, 1, 0)) {
+ int port;
+
+ port = get_host_port(&l->rx.addr);
+ if (port) {
+ ha_warning("Port %d busy while trying to enable %s %s.\n",
+ port, proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "Port %d busy while trying to enable %s %s.\n",
+ port, proxy_cap_str(p->cap), p->id);
+ }
+ else {
+ ha_warning("Bind on socket %d busy while trying to enable %s %s.\n",
+ l->luid, proxy_cap_str(p->cap), p->id);
+ send_log(p, LOG_WARNING, "Bind on socket %d busy while trying to enable %s %s.\n",
+ l->luid, proxy_cap_str(p->cap), p->id);
+ }
+
+ /* Another port might have been enabled. Let's stop everything. */
+ fail = 1;
+ break;
+ }
+ }
+
+ if (fail) {
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ /* pause_proxy will take PROXY_LOCK */
+ pause_proxy(p);
+ return 0;
+ }
+end:
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ return 1;
+}
+
+/* Set current stream's backend to <be>. Nothing is done if the
+ * stream already had a backend assigned, which is indicated by
+ * s->flags & SF_BE_ASSIGNED.
+ * All flags, stats and counters which need be updated are updated.
+ * Returns 1 if done, 0 in case of internal error, eg: lack of resource.
+ */
+int stream_set_backend(struct stream *s, struct proxy *be)
+{
+ unsigned int req_ana;
+
+ if (s->flags & SF_BE_ASSIGNED)
+ return 1;
+
+ if (flt_set_stream_backend(s, be) < 0)
+ return 0;
+
+ s->be = be;
+ HA_ATOMIC_UPDATE_MAX(&be->be_counters.conn_max,
+ HA_ATOMIC_ADD_FETCH(&be->beconn, 1));
+ proxy_inc_be_ctr(be);
+
+ /* assign new parameters to the stream from the new backend */
+ s->scb->flags &= ~SC_FL_INDEP_STR;
+ if (be->options2 & PR_O2_INDEPSTR)
+ s->scb->flags |= SC_FL_INDEP_STR;
+
+ /* We want to enable the backend-specific analysers except those which
+ * were already run as part of the frontend/listener. Note that it would
+ * be more reliable to store the list of analysers that have been run,
+ * but what we do here is OK for now.
+ */
+ req_ana = be->be_req_ana;
+ if (!(strm_fe(s)->options & PR_O_WREQ_BODY) && be->options & PR_O_WREQ_BODY) {
+ /* The backend request to parse a request body while it was not
+ * performed on the frontend, so add the corresponding analyser
+ */
+ req_ana |= AN_REQ_HTTP_BODY;
+ }
+ if (IS_HTX_STRM(s) && strm_fe(s)->mode != PR_MODE_HTTP) {
+ /* The stream was already upgraded to HTTP, so remove analysers
+ * set during the upgrade
+ */
+ req_ana &= ~(AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE);
+ }
+ s->req.analysers |= req_ana & ~(strm_li(s) ? strm_li(s)->bind_conf->analysers : 0);
+
+ if (!IS_HTX_STRM(s) && be->mode == PR_MODE_HTTP) {
+ /* If we chain a TCP frontend to an HTX backend, we must upgrade
+ * the client mux */
+ if (!stream_set_http_mode(s, NULL))
+ return 0;
+ }
+ else if (IS_HTX_STRM(s) && be->mode != PR_MODE_HTTP) {
+ /* If a TCP backend is assgiend to an HTX stream, return an
+ * error. It may happens for a new stream on a previously
+ * upgraded connections. */
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ return 0;
+ }
+ else {
+ /* If the target backend requires HTTP processing, we have to allocate
+ * the HTTP transaction if we did not have one.
+ */
+ if (unlikely(!s->txn && be->http_needed && !http_create_txn(s)))
+ return 0;
+ }
+
+ s->flags |= SF_BE_ASSIGNED;
+ if (be->options2 & PR_O2_NODELAY) {
+ s->scf->flags |= SC_FL_SND_NEVERWAIT;
+ s->scb->flags |= SC_FL_SND_NEVERWAIT;
+ }
+
+ return 1;
+}
+
+/* Capture a bad request or response and archive it in the proxy's structure.
+ * It is relatively protocol-agnostic so it requires that a number of elements
+ * are passed :
+ * - <proxy> is the proxy where the error was detected and where the snapshot
+ * needs to be stored
+ * - <is_back> indicates that the error happened when receiving the response
+ * - <other_end> is a pointer to the proxy on the other side when known
+ * - <target> is the target of the connection, usually a server or a proxy
+ * - <sess> is the session which experienced the error
+ * - <ctx> may be NULL or should contain any info relevant to the protocol
+ * - <buf> is the buffer containing the offending data
+ * - <buf_ofs> is the position of this buffer's input data in the input
+ * stream, starting at zero. It may be passed as zero if unknown.
+ * - <buf_out> is the portion of <buf->data> which was already forwarded and
+ * which precedes the buffer's input. The buffer's input starts at
+ * buf->head + buf_out.
+ * - <err_pos> is the pointer to the faulty byte in the buffer's input.
+ * - <show> is the callback to use to display <ctx>. It may be NULL.
+ */
+void proxy_capture_error(struct proxy *proxy, int is_back,
+ struct proxy *other_end, enum obj_type *target,
+ const struct session *sess,
+ const struct buffer *buf, long buf_ofs,
+ unsigned int buf_out, unsigned int err_pos,
+ const union error_snapshot_ctx *ctx,
+ void (*show)(struct buffer *, const struct error_snapshot *))
+{
+ struct error_snapshot *es;
+ unsigned int buf_len;
+ int len1, len2;
+ unsigned int ev_id;
+
+ ev_id = HA_ATOMIC_FETCH_ADD(&error_snapshot_id, 1);
+
+ buf_len = b_data(buf) - buf_out;
+
+ es = malloc(sizeof(*es) + buf_len);
+ if (!es)
+ return;
+
+ es->buf_len = buf_len;
+ es->ev_id = ev_id;
+
+ len1 = b_size(buf) - b_peek_ofs(buf, buf_out);
+ if (len1 > buf_len)
+ len1 = buf_len;
+
+ if (len1) {
+ memcpy(es->buf, b_peek(buf, buf_out), len1);
+ len2 = buf_len - len1;
+ if (len2)
+ memcpy(es->buf + len1, b_orig(buf), len2);
+ }
+
+ es->buf_err = err_pos;
+ es->when = date; // user-visible date
+ es->srv = objt_server(target);
+ es->oe = other_end;
+ if (sess && objt_conn(sess->origin) && conn_get_src(__objt_conn(sess->origin)))
+ es->src = *__objt_conn(sess->origin)->src;
+ else
+ memset(&es->src, 0, sizeof(es->src));
+
+ es->buf_wrap = b_wrap(buf) - b_peek(buf, buf_out);
+ es->buf_out = buf_out;
+ es->buf_ofs = buf_ofs;
+
+ /* be sure to indicate the offset of the first IN byte */
+ if (es->buf_ofs >= es->buf_len)
+ es->buf_ofs -= es->buf_len;
+ else
+ es->buf_ofs = 0;
+
+ /* protocol-specific part now */
+ if (ctx)
+ es->ctx = *ctx;
+ else
+ memset(&es->ctx, 0, sizeof(es->ctx));
+ es->show = show;
+
+ /* note: we still lock since we have to be certain that nobody is
+ * dumping the output while we free.
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &proxy->lock);
+ if (is_back) {
+ es = HA_ATOMIC_XCHG(&proxy->invalid_rep, es);
+ } else {
+ es = HA_ATOMIC_XCHG(&proxy->invalid_req, es);
+ }
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &proxy->lock);
+ ha_free(&es);
+}
+
+/* Configure all proxies which lack a maxconn setting to use the global one by
+ * default. This avoids the common mistake consisting in setting maxconn only
+ * in the global section and discovering the hard way that it doesn't propagate
+ * through the frontends. These values are also propagated through the various
+ * targeted backends, whose fullconn is finally calculated if not yet set.
+ */
+void proxy_adjust_all_maxconn()
+{
+ struct proxy *curproxy;
+ struct switching_rule *swrule1, *swrule2;
+
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ if (!(curproxy->cap & PR_CAP_FE))
+ continue;
+
+ if (!curproxy->maxconn)
+ curproxy->maxconn = global.maxconn;
+
+ /* update the target backend's fullconn count : default_backend */
+ if (curproxy->defbe.be)
+ curproxy->defbe.be->tot_fe_maxconn += curproxy->maxconn;
+ else if ((curproxy->cap & PR_CAP_LISTEN) == PR_CAP_LISTEN)
+ curproxy->tot_fe_maxconn += curproxy->maxconn;
+
+ list_for_each_entry(swrule1, &curproxy->switching_rules, list) {
+ /* For each target of switching rules, we update their
+ * tot_fe_maxconn, except if a previous rule points to
+ * the same backend or to the default backend.
+ */
+ if (swrule1->be.backend != curproxy->defbe.be) {
+ /* note: swrule1->be.backend isn't a backend if the rule
+ * is dynamic, it's an expression instead, so it must not
+ * be dereferenced as a backend before being certain it is.
+ */
+ list_for_each_entry(swrule2, &curproxy->switching_rules, list) {
+ if (swrule2 == swrule1) {
+ if (!swrule1->dynamic)
+ swrule1->be.backend->tot_fe_maxconn += curproxy->maxconn;
+ break;
+ }
+ else if (!swrule2->dynamic && swrule2->be.backend == swrule1->be.backend) {
+ /* there are multiple refs of this backend */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* automatically compute fullconn if not set. We must not do it in the
+ * loop above because cross-references are not yet fully resolved.
+ */
+ for (curproxy = proxies_list; curproxy; curproxy = curproxy->next) {
+ if (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ continue;
+
+ /* If <fullconn> is not set, let's set it to 10% of the sum of
+ * the possible incoming frontend's maxconns.
+ */
+ if (!curproxy->fullconn && (curproxy->cap & PR_CAP_BE)) {
+ /* we have the sum of the maxconns in <total>. We only
+ * keep 10% of that sum to set the default fullconn, with
+ * a hard minimum of 1 (to avoid a divide by zero).
+ */
+ curproxy->fullconn = (curproxy->tot_fe_maxconn + 9) / 10;
+ if (!curproxy->fullconn)
+ curproxy->fullconn = 1;
+ }
+ }
+}
+
+/* Config keywords below */
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "grace", proxy_parse_grace },
+ { CFG_GLOBAL, "hard-stop-after", proxy_parse_hard_stop_after },
+ { CFG_GLOBAL, "close-spread-time", proxy_parse_close_spread_time },
+ { CFG_LISTEN, "timeout", proxy_parse_timeout },
+ { CFG_LISTEN, "clitimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "contimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "srvtimeout", proxy_parse_timeout }, /* This keyword actually fails to parse, this line remains for better error messages. */
+ { CFG_LISTEN, "rate-limit", proxy_parse_rate_limit },
+ { CFG_LISTEN, "max-keep-alive-queue", proxy_parse_max_ka_queue },
+ { CFG_LISTEN, "declare", proxy_parse_declare },
+ { CFG_LISTEN, "retry-on", proxy_parse_retry_on },
+#ifdef TCP_KEEPCNT
+ { CFG_LISTEN, "clitcpka-cnt", proxy_parse_tcpka_cnt },
+ { CFG_LISTEN, "srvtcpka-cnt", proxy_parse_tcpka_cnt },
+#endif
+#ifdef TCP_KEEPIDLE
+ { CFG_LISTEN, "clitcpka-idle", proxy_parse_tcpka_idle },
+ { CFG_LISTEN, "srvtcpka-idle", proxy_parse_tcpka_idle },
+#endif
+#ifdef TCP_KEEPINTVL
+ { CFG_LISTEN, "clitcpka-intvl", proxy_parse_tcpka_intvl },
+ { CFG_LISTEN, "srvtcpka-intvl", proxy_parse_tcpka_intvl },
+#endif
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/* Expects to find a frontend named <arg> and returns it, otherwise displays various
+ * adequate error messages and returns NULL. This function is designed to be used by
+ * functions requiring a frontend on the CLI.
+ */
+struct proxy *cli_find_frontend(struct appctx *appctx, const char *arg)
+{
+ struct proxy *px;
+
+ if (!*arg) {
+ cli_err(appctx, "A frontend name is expected.\n");
+ return NULL;
+ }
+
+ px = proxy_fe_by_name(arg);
+ if (!px) {
+ cli_err(appctx, "No such frontend.\n");
+ return NULL;
+ }
+ return px;
+}
+
+/* Expects to find a backend named <arg> and returns it, otherwise displays various
+ * adequate error messages and returns NULL. This function is designed to be used by
+ * functions requiring a frontend on the CLI.
+ */
+struct proxy *cli_find_backend(struct appctx *appctx, const char *arg)
+{
+ struct proxy *px;
+
+ if (!*arg) {
+ cli_err(appctx, "A backend name is expected.\n");
+ return NULL;
+ }
+
+ px = proxy_be_by_name(arg);
+ if (!px) {
+ cli_err(appctx, "No such backend.\n");
+ return NULL;
+ }
+ return px;
+}
+
+
+/* parse a "show servers [state|conn]" CLI line, returns 0 if it wants to start
+ * the dump or 1 if it stops immediately. If an argument is specified, it will
+ * reserve a show_srv_ctx context and set the proxy pointer into ->px, its ID
+ * into ->only_pxid, and ->show_conn to 0 for "state", or 1 for "conn".
+ */
+static int cli_parse_show_servers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_srv_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct proxy *px;
+
+ ctx->show_conn = *args[2] == 'c'; // "conn" vs "state"
+
+ /* check if a backend name has been provided */
+ if (*args[3]) {
+ /* read server state from local file */
+ px = proxy_be_by_name(args[3]);
+
+ if (!px)
+ return cli_err(appctx, "Can't find backend.\n");
+
+ ctx->px = px;
+ ctx->only_pxid = px->uuid;
+ }
+ return 0;
+}
+
+/* helper to dump server addr */
+static void dump_server_addr(const struct sockaddr_storage *addr, char *addr_str)
+{
+ addr_str[0] = '\0';
+ switch (addr->ss_family) {
+ case AF_INET:
+ case AF_INET6:
+ addr_to_str(addr, addr_str, INET6_ADDRSTRLEN + 1);
+ break;
+ default:
+ memcpy(addr_str, "-\0", 2);
+ break;
+ }
+}
+
+/* dumps server state information for all the servers found in backend cli.p0.
+ * These information are all the parameters which may change during HAProxy runtime.
+ * By default, we only export to the last known server state file format. These
+ * information can be used at next startup to recover same level of server
+ * state. It takes its context from show_srv_ctx, with the proxy pointer from
+ * ->px, the proxy's id ->only_pxid, the server's pointer from ->sv, and the
+ * choice of what to dump from ->show_conn.
+ */
+static int dump_servers_state(struct stconn *sc)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_srv_ctx *ctx = appctx->svcctx;
+ struct proxy *px = ctx->px;
+ struct server *srv;
+ char srv_addr[INET6_ADDRSTRLEN + 1];
+ char srv_agent_addr[INET6_ADDRSTRLEN + 1];
+ char srv_check_addr[INET6_ADDRSTRLEN + 1];
+ time_t srv_time_since_last_change;
+ int bk_f_forced_id, srv_f_forced_id;
+ char *srvrecord;
+
+ if (!ctx->sv)
+ ctx->sv = px->srv;
+
+ for (; ctx->sv != NULL; ctx->sv = srv->next) {
+ srv = ctx->sv;
+
+ dump_server_addr(&srv->addr, srv_addr);
+ dump_server_addr(&srv->check.addr, srv_check_addr);
+ dump_server_addr(&srv->agent.addr, srv_agent_addr);
+
+ srv_time_since_last_change = ns_to_sec(now_ns) - srv->last_change;
+ bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0;
+ srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0;
+
+ srvrecord = NULL;
+ if (srv->srvrq && srv->srvrq->name)
+ srvrecord = srv->srvrq->name;
+
+ if (ctx->show_conn == 0) {
+ /* show servers state */
+ chunk_printf(&trash,
+ "%d %s "
+ "%d %s %s "
+ "%d %d %d %d %ld "
+ "%d %d %d %d %d "
+ "%d %d %s %u "
+ "%s %d %d "
+ "%s %s %d"
+ "\n",
+ px->uuid, HA_ANON_CLI(px->id),
+ srv->puid, HA_ANON_CLI(srv->id),
+ hash_ipanon(appctx->cli_anon_key, srv_addr, 0),
+ srv->cur_state, srv->cur_admin, srv->uweight, srv->iweight,
+ (long int)srv_time_since_last_change,
+ srv->check.status, srv->check.result, srv->check.health,
+ srv->check.state & 0x0F, srv->agent.state & 0x1F,
+ bk_f_forced_id, srv_f_forced_id,
+ srv->hostname ? HA_ANON_CLI(srv->hostname) : "-", srv->svc_port,
+ srvrecord ? srvrecord : "-", srv->use_ssl, srv->check.port,
+ srv_check_addr, srv_agent_addr, srv->agent.port);
+ } else {
+ /* show servers conn */
+ int thr;
+
+ chunk_printf(&trash,
+ "%s/%s %d/%d %s %u - %u %u %u %u %u %u %d %u",
+ HA_ANON_CLI(px->id), HA_ANON_CLI(srv->id),
+ px->uuid, srv->puid, hash_ipanon(appctx->cli_anon_key, srv_addr, 0),
+ srv->svc_port, srv->pool_purge_delay,
+ srv->curr_used_conns, srv->max_used_conns, srv->est_need_conns,
+ srv->curr_idle_nb, srv->curr_safe_nb, (int)srv->max_idle_conns, srv->curr_idle_conns);
+
+ for (thr = 0; thr < global.nbthread && srv->curr_idle_thr; thr++)
+ chunk_appendf(&trash, " %u", srv->curr_idle_thr[thr]);
+
+ chunk_appendf(&trash, "\n");
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/* Parses backend list or simply use backend name provided by the user to return
+ * states of servers to stdout. It takes its context from show_srv_ctx and dumps
+ * proxy ->px and stops if ->only_pxid is non-null.
+ */
+static int cli_io_handler_servers_state(struct appctx *appctx)
+{
+ struct show_srv_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct proxy *curproxy;
+
+ if (ctx->state == SHOW_SRV_HEAD) {
+ if (ctx->show_conn == 0)
+ chunk_printf(&trash, "%d\n# %s\n", SRV_STATE_FILE_VERSION, SRV_STATE_FILE_FIELD_NAMES);
+ else
+ chunk_printf(&trash,
+ "# bkname/svname bkid/svid addr port - purge_delay used_cur used_max need_est unsafe_nb safe_nb idle_lim idle_cur idle_per_thr[%d]\n",
+ global.nbthread);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ ctx->state = SHOW_SRV_LIST;
+
+ if (!ctx->px)
+ ctx->px = proxies_list;
+ }
+
+ for (; ctx->px != NULL; ctx->px = curproxy->next) {
+ curproxy = ctx->px;
+ /* servers are only in backends */
+ if ((curproxy->cap & PR_CAP_BE) && !(curproxy->cap & PR_CAP_INT)) {
+ if (!dump_servers_state(sc))
+ return 0;
+ }
+ /* only the selected proxy is dumped */
+ if (ctx->only_pxid)
+ break;
+ }
+
+ return 1;
+}
+
+/* Parses backend list and simply report backend names. It keeps the proxy
+ * pointer in svcctx since there's nothing else to store there.
+ */
+static int cli_io_handler_show_backend(struct appctx *appctx)
+{
+ struct proxy *curproxy;
+
+ chunk_reset(&trash);
+
+ if (!appctx->svcctx) {
+ chunk_printf(&trash, "# name\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ appctx->svcctx = proxies_list;
+ }
+
+ for (; appctx->svcctx != NULL; appctx->svcctx = curproxy->next) {
+ curproxy = appctx->svcctx;
+
+ /* looking for non-internal backends only */
+ if ((curproxy->cap & (PR_CAP_BE|PR_CAP_INT)) != PR_CAP_BE)
+ continue;
+
+ chunk_appendf(&trash, "%s\n", curproxy->id);
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Parses the "enable dynamic-cookies backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_enable_dyncookie_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (px->mode != PR_MODE_TCP && px->mode != PR_MODE_HTTP)
+ return cli_err(appctx, "Not available.\n");
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ px->ck_opts |= PR_CK_DYNAMIC;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ srv_set_dyncookie(s);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "disable dynamic-cookies backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_disable_dyncookie_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (px->mode != PR_MODE_TCP && px->mode != PR_MODE_HTTP)
+ return cli_err(appctx, "Not available.\n");
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ px->ck_opts &= ~PR_CK_DYNAMIC;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ if (!(s->flags & SRV_F_COOKIESET))
+ ha_free(&s->cookie);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "set dynamic-cookie-key backend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock and each server's lock.
+ */
+static int cli_parse_set_dyncookie_key_backend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *s;
+ char *newkey;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_backend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (px->mode != PR_MODE_TCP && px->mode != PR_MODE_HTTP)
+ return cli_err(appctx, "Not available.\n");
+
+ if (!*args[4])
+ return cli_err(appctx, "String value expected.\n");
+
+ newkey = strdup(args[4]);
+ if (!newkey)
+ return cli_err(appctx, "Failed to allocate memory.\n");
+
+ /* Note: this lock is to make sure this doesn't change while another
+ * thread is in srv_set_dyncookie().
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+ free(px->dyncookie_key);
+ px->dyncookie_key = newkey;
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ for (s = px->srv; s != NULL; s = s->next) {
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ srv_set_dyncookie(s);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ }
+
+ return 1;
+}
+
+/* Parses the "set maxconn frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_set_maxconn_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct listener *l;
+ int v;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[3]);
+ if (!px)
+ return 1;
+
+ if (!*args[4])
+ return cli_err(appctx, "Integer value expected.\n");
+
+ v = atoi(args[4]);
+ if (v < 0)
+ return cli_err(appctx, "Value out of range.\n");
+
+ /* OK, the value is fine, so we assign it to the proxy and to all of
+ * its listeners. The blocked ones will be dequeued.
+ */
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &px->lock);
+
+ px->maxconn = v;
+ list_for_each_entry(l, &px->conf.listeners, by_fe) {
+ if (l->state == LI_FULL)
+ relax_listener(l, 1, 0);
+ }
+
+ if (px->maxconn > px->feconn)
+ dequeue_proxy_listeners(px);
+
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &px->lock);
+
+ return 1;
+}
+
+/* Parses the "shutdown frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_shutdown_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_msg(appctx, LOG_NOTICE, "Frontend was already shut down.\n");
+
+ stop_proxy(px);
+ return 1;
+}
+
+/* Parses the "disable frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_disable_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ int ret;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_msg(appctx, LOG_NOTICE, "Frontend was previously shut down, cannot disable.\n");
+
+ if (!px->li_ready)
+ return cli_msg(appctx, LOG_NOTICE, "All sockets are already disabled.\n");
+
+ /* pause_proxy will take PROXY_LOCK */
+ ret = pause_proxy(px);
+
+ if (!ret)
+ return cli_err(appctx, "Failed to pause frontend, check logs for precise cause.\n");
+
+ return 1;
+}
+
+/* Parses the "enable frontend" directive, it always returns 1.
+ *
+ * Grabs the proxy lock.
+ */
+static int cli_parse_enable_frontend(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ int ret;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ px = cli_find_frontend(appctx, args[2]);
+ if (!px)
+ return 1;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ return cli_err(appctx, "Frontend was previously shut down, cannot enable.\n");
+
+ if (px->li_ready == px->li_all)
+ return cli_msg(appctx, LOG_NOTICE, "All sockets are already enabled.\n");
+
+ /* resume_proxy will take PROXY_LOCK */
+ ret = resume_proxy(px);
+
+ if (!ret)
+ return cli_err(appctx, "Failed to resume frontend, check logs for precise cause (port conflict?).\n");
+ return 1;
+}
+
+/* appctx context used during "show errors" */
+struct show_errors_ctx {
+ struct proxy *px; /* current proxy being dumped, NULL = not started yet. */
+ unsigned int flag; /* bit0: buffer being dumped, 0 = req, 1 = resp ; bit1=skip req ; bit2=skip resp. */
+ unsigned int ev_id; /* event ID of error being dumped */
+ int iid; /* if >= 0, ID of the proxy to filter on */
+ int ptr; /* <0: headers, >=0 : text pointer to restart from */
+ int bol; /* pointer to beginning of current line */
+};
+
+/* "show errors" handler for the CLI. Returns 0 if wants to continue, 1 to stop
+ * now.
+ */
+static int cli_parse_show_errors(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_errors_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (*args[2]) {
+ struct proxy *px;
+
+ px = proxy_find_by_name(args[2], 0, 0);
+ if (px)
+ ctx->iid = px->uuid;
+ else
+ ctx->iid = atoi(args[2]);
+
+ if (!ctx->iid)
+ return cli_err(appctx, "No such proxy.\n");
+ }
+ else
+ ctx->iid = -1; // dump all proxies
+
+ ctx->flag = 0;
+ if (strcmp(args[3], "request") == 0)
+ ctx->flag |= 4; // ignore response
+ else if (strcmp(args[3], "response") == 0)
+ ctx->flag |= 2; // ignore request
+ ctx->px = NULL;
+ return 0;
+}
+
+/* This function dumps all captured errors onto the stream connector's
+ * read buffer. It returns 0 if the output buffer is full and it needs
+ * to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_show_errors(struct appctx *appctx)
+{
+ struct show_errors_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ extern const char *monthname[12];
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ chunk_reset(&trash);
+
+ if (!ctx->px) {
+ /* the function had not been called yet, let's prepare the
+ * buffer for a response.
+ */
+ struct tm tm;
+
+ get_localtime(date.tv_sec, &tm);
+ chunk_appendf(&trash, "Total events captured on [%02d/%s/%04d:%02d:%02d:%02d.%03d] : %u\n",
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(date.tv_usec/1000),
+ error_snapshot_id);
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send;
+
+ ctx->px = proxies_list;
+ ctx->bol = 0;
+ ctx->ptr = -1;
+ }
+
+ /* we have two inner loops here, one for the proxy, the other one for
+ * the buffer.
+ */
+ while (ctx->px) {
+ struct error_snapshot *es;
+
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &ctx->px->lock);
+
+ if ((ctx->flag & 1) == 0) {
+ es = ctx->px->invalid_req;
+ if (ctx->flag & 2) // skip req
+ goto next;
+ }
+ else {
+ es = ctx->px->invalid_rep;
+ if (ctx->flag & 4) // skip resp
+ goto next;
+ }
+
+ if (!es)
+ goto next;
+
+ if (ctx->iid >= 0 &&
+ ctx->px->uuid != ctx->iid &&
+ (!es->oe || es->oe->uuid != ctx->iid))
+ goto next;
+
+ if (ctx->ptr < 0) {
+ /* just print headers now */
+
+ char pn[INET6_ADDRSTRLEN];
+ struct tm tm;
+ int port;
+
+ get_localtime(es->when.tv_sec, &tm);
+ chunk_appendf(&trash, " \n[%02d/%s/%04d:%02d:%02d:%02d.%03d]",
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(es->when.tv_usec/1000));
+
+ switch (addr_to_str(&es->src, pn, sizeof(pn))) {
+ case AF_INET:
+ case AF_INET6:
+ port = get_host_port(&es->src);
+ break;
+ default:
+ port = 0;
+ }
+
+ switch (ctx->flag & 1) {
+ case 0:
+ chunk_appendf(&trash,
+ " frontend %s (#%d): invalid request\n"
+ " backend %s (#%d)",
+ ctx->px->id, ctx->px->uuid,
+ (es->oe && es->oe->cap & PR_CAP_BE) ? es->oe->id : "<NONE>",
+ (es->oe && es->oe->cap & PR_CAP_BE) ? es->oe->uuid : -1);
+ break;
+ case 1:
+ chunk_appendf(&trash,
+ " backend %s (#%d): invalid response\n"
+ " frontend %s (#%d)",
+ ctx->px->id, ctx->px->uuid,
+ es->oe ? es->oe->id : "<NONE>" , es->oe ? es->oe->uuid : -1);
+ break;
+ }
+
+ chunk_appendf(&trash,
+ ", server %s (#%d), event #%u, src %s:%d\n"
+ " buffer starts at %llu (including %u out), %u free,\n"
+ " len %u, wraps at %u, error at position %u\n",
+ es->srv ? es->srv->id : "<NONE>",
+ es->srv ? es->srv->puid : -1,
+ es->ev_id, pn, port,
+ es->buf_ofs, es->buf_out,
+ global.tune.bufsize - es->buf_out - es->buf_len,
+ es->buf_len, es->buf_wrap, es->buf_err);
+
+ if (es->show)
+ es->show(&trash, es);
+
+ chunk_appendf(&trash, " \n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ ctx->ptr = 0;
+ ctx->ev_id = es->ev_id;
+ }
+
+ if (ctx->ev_id != es->ev_id) {
+ /* the snapshot changed while we were dumping it */
+ chunk_appendf(&trash,
+ " WARNING! update detected on this snapshot, dump interrupted. Please re-check!\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ goto next;
+ }
+
+ /* OK, ptr >= 0, so we have to dump the current line */
+ while (ctx->ptr < es->buf_len && ctx->ptr < global.tune.bufsize) {
+ int newptr;
+ int newline;
+
+ newline = ctx->bol;
+ newptr = dump_text_line(&trash, es->buf, global.tune.bufsize, es->buf_len, &newline, ctx->ptr);
+ if (newptr == ctx->ptr) {
+ sc_need_room(sc, 0);
+ goto cant_send_unlock;
+ }
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto cant_send_unlock;
+
+ ctx->ptr = newptr;
+ ctx->bol = newline;
+ };
+ next:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &ctx->px->lock);
+ ctx->bol = 0;
+ ctx->ptr = -1;
+ ctx->flag ^= 1;
+ if (!(ctx->flag & 1))
+ ctx->px = ctx->px->next;
+ }
+
+ /* dump complete */
+ return 1;
+
+ cant_send_unlock:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &ctx->px->lock);
+ cant_send:
+ return 0;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "disable", "frontend", NULL }, "disable frontend <frontend> : temporarily disable specific frontend", cli_parse_disable_frontend, NULL, NULL },
+ { { "enable", "frontend", NULL }, "enable frontend <frontend> : re-enable specific frontend", cli_parse_enable_frontend, NULL, NULL },
+ { { "set", "maxconn", "frontend", NULL }, "set maxconn frontend <frontend> <value> : change a frontend's maxconn setting", cli_parse_set_maxconn_frontend, NULL },
+ { { "show","servers", "conn", NULL }, "show servers conn [<backend>] : dump server connections status (all or for a single backend)", cli_parse_show_servers, cli_io_handler_servers_state },
+ { { "show","servers", "state", NULL }, "show servers state [<backend>] : dump volatile server information (all or for a single backend)", cli_parse_show_servers, cli_io_handler_servers_state },
+ { { "show", "backend", NULL }, "show backend : list backends in the current running config", NULL, cli_io_handler_show_backend },
+ { { "shutdown", "frontend", NULL }, "shutdown frontend <frontend> : stop a specific frontend", cli_parse_shutdown_frontend, NULL, NULL },
+ { { "set", "dynamic-cookie-key", "backend", NULL }, "set dynamic-cookie-key backend <bk> <k> : change a backend secret key for dynamic cookies", cli_parse_set_dyncookie_key_backend, NULL },
+ { { "enable", "dynamic-cookie", "backend", NULL }, "enable dynamic-cookie backend <bk> : enable dynamic cookies on a specific backend", cli_parse_enable_dyncookie_backend, NULL },
+ { { "disable", "dynamic-cookie", "backend", NULL }, "disable dynamic-cookie backend <bk> : disable dynamic cookies on a specific backend", cli_parse_disable_dyncookie_backend, NULL },
+ { { "show", "errors", NULL }, "show errors [<px>] [request|response] : report last request and/or response errors for each proxy", cli_parse_show_errors, cli_io_handler_show_errors, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/qmux_http.c b/src/qmux_http.c
new file mode 100644
index 0000000..edf26b1
--- /dev/null
+++ b/src/qmux_http.c
@@ -0,0 +1,108 @@
+#include <haproxy/qmux_http.h>
+
+#include <haproxy/api-t.h>
+#include <haproxy/htx.h>
+#include <haproxy/qmux_trace.h>
+
+/* QUIC MUX rcv_buf operation using HTX data. Received data from stream <qcs>
+ * will be transferred as HTX in <buf>. Output buffer is expected to be of
+ * length <count>. <fin> will be set to signal the last data to receive on this
+ * stream.
+ *
+ * Return the size in bytes of transferred data.
+ */
+size_t qcs_http_rcv_buf(struct qcs *qcs, struct buffer *buf, size_t count,
+ char *fin)
+{
+ struct htx *qcs_htx = NULL;
+ struct htx *cs_htx = NULL;
+ size_t ret = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ *fin = 0;
+ qcs_htx = htx_from_buf(&qcs->rx.app_buf);
+ if (htx_is_empty(qcs_htx)) {
+ /* Set buffer data to 0 as HTX is empty. */
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ goto end;
+ }
+
+ ret = qcs_htx->data;
+
+ cs_htx = htx_from_buf(buf);
+ if (htx_is_empty(cs_htx) && htx_used_space(qcs_htx) <= count) {
+ /* EOM will be copied to cs_htx via b_xfer(). */
+ if (qcs_htx->flags & HTX_FL_EOM)
+ *fin = 1;
+
+ htx_to_buf(cs_htx, buf);
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ b_xfer(buf, &qcs->rx.app_buf, b_data(&qcs->rx.app_buf));
+ goto end;
+ }
+
+ htx_xfer_blks(cs_htx, qcs_htx, count, HTX_BLK_UNUSED);
+ BUG_ON(qcs_htx->flags & HTX_FL_PARSING_ERROR);
+
+ /* Copy EOM from src to dst buffer if all data copied. */
+ if (htx_is_empty(qcs_htx) && (qcs_htx->flags & HTX_FL_EOM)) {
+ cs_htx->flags |= HTX_FL_EOM;
+ *fin = 1;
+ }
+
+ cs_htx->extra = qcs_htx->extra ? (qcs_htx->data + qcs_htx->extra) : 0;
+ htx_to_buf(cs_htx, buf);
+ htx_to_buf(qcs_htx, &qcs->rx.app_buf);
+ ret -= qcs_htx->data;
+
+ end:
+ TRACE_LEAVE(QMUX_EV_STRM_RECV, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+/* QUIC MUX snd_buf operation using HTX data. HTX data will be transferred from
+ * <buf> to <qcs> stream buffer. Input buffer is expected to be of length
+ * <count>. <fin> will be set to signal the last data to send for this stream.
+ *
+ * Return the size in bytes of transferred data.
+ */
+size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count,
+ char *fin)
+{
+ struct htx *htx;
+ size_t ret;
+ int eom = 0;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ htx = htxbuf(buf);
+ eom = (htx->flags & HTX_FL_EOM);
+ ret = qcs->qcc->app_ops->snd_buf(qcs, buf, count);
+ *fin = (eom && !b_data(buf));
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return ret;
+}
+
+/* QUIC MUX snd_buf reset. HTX data stored in <buf> of length <count> will be
+ * cleared. This can be used when data should not be transmitted any longer.
+ *
+ * Return the size in bytes of cleared data.
+ */
+size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count)
+{
+ struct htx *htx;
+
+ TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ htx = htx_from_buf(buf);
+ htx_reset(htx);
+ htx_to_buf(htx, buf);
+
+ TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs);
+
+ return count;
+}
diff --git a/src/qmux_trace.c b/src/qmux_trace.c
new file mode 100644
index 0000000..b213ed4
--- /dev/null
+++ b/src/qmux_trace.c
@@ -0,0 +1,114 @@
+#include <haproxy/qmux_trace.h>
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/chunk.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/quic_frame-t.h>
+
+/* trace source and events */
+static void qmux_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct name_desc qmux_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="qcs", .desc="QUIC stream" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc qmux_trace_decoding[] = {
+#define QMUX_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define QMUX_VERB_MINIMAL 2
+ { .name="minimal", .desc="report only qcc/qcs state and flags, no real decoding" },
+ { /* end */ }
+};
+
+struct trace_source trace_qmux = {
+ .name = IST("qmux"),
+ .desc = "QUIC multiplexer",
+ .arg_def = TRC_ARG1_CONN, /* TRACE()'s first argument is always a connection */
+ .default_cb = qmux_trace,
+ .known_events = qmux_trace_events,
+ .lockon_args = qmux_trace_lockon_args,
+ .decoding = qmux_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+
+static void qmux_trace_frm(const struct quic_frame *frm)
+{
+ switch (frm->type) {
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ chunk_appendf(&trace_buf, " max_streams=%llu",
+ (ullong)frm->max_streams_bidi.max_streams);
+ break;
+
+ case QUIC_FT_MAX_STREAMS_UNI:
+ chunk_appendf(&trace_buf, " max_streams=%llu",
+ (ullong)frm->max_streams_uni.max_streams);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/* quic-mux trace handler */
+static void qmux_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct connection *conn = a1;
+ const struct qcc *qcc = conn ? conn->ctx : NULL;
+ const struct qcs *qcs = a2;
+
+ if (!qcc)
+ return;
+
+ if (src->verbosity > QMUX_VERB_CLEAN) {
+ chunk_appendf(&trace_buf, " : qcc=%p(F)", qcc);
+ if (qcc->conn->handle.qc)
+ chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc);
+
+ chunk_appendf(&trace_buf, " md=%llu/%llu/%llu",
+ (ullong)qcc->rfctl.md, (ullong)qcc->tx.offsets, (ullong)qcc->tx.sent_offsets);
+
+ if (qcs) {
+ chunk_appendf(&trace_buf, " qcs=%p .id=%llu .st=%s",
+ qcs, (ullong)qcs->id,
+ qcs_st_to_str(qcs->st));
+ chunk_appendf(&trace_buf, " msd=%llu/%llu/%llu",
+ (ullong)qcs->tx.msd, (ullong)qcs->tx.offset, (ullong)qcs->tx.sent_offset);
+ }
+
+ if (mask & QMUX_EV_QCC_NQCS) {
+ const uint64_t *id = a3;
+ chunk_appendf(&trace_buf, " id=%llu", (ullong)*id);
+ }
+
+ if (mask & QMUX_EV_SEND_FRM)
+ qmux_trace_frm(a3);
+
+ if (mask & QMUX_EV_QCS_XFER_DATA) {
+ const struct qcs_xfer_data_trace_arg *arg = a3;
+ chunk_appendf(&trace_buf, " prep=%lu xfer=%d",
+ (ulong)arg->prep, arg->xfer);
+ }
+
+ if (mask & QMUX_EV_QCS_BUILD_STRM) {
+ const struct qcs_build_stream_trace_arg *arg = a3;
+ chunk_appendf(&trace_buf, " len=%lu fin=%d offset=%llu",
+ (ulong)arg->len, arg->fin, (ullong)arg->offset);
+ }
+ }
+}
+
+
+/* register qmux traces */
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
diff --git a/src/qpack-dec.c b/src/qpack-dec.c
new file mode 100644
index 0000000..97392bb
--- /dev/null
+++ b/src/qpack-dec.c
@@ -0,0 +1,563 @@
+/*
+ * QPACK decompressor
+ *
+ * Copyright 2021 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <import/ist.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/h3.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/qpack-t.h>
+#include <haproxy/qpack-dec.h>
+#include <haproxy/qpack-tbl.h>
+#include <haproxy/hpack-huff.h>
+#include <haproxy/hpack-tbl.h>
+#include <haproxy/http-hdr.h>
+#include <haproxy/tools.h>
+
+#if defined(DEBUG_QPACK)
+#define qpack_debug_printf fprintf
+#define qpack_debug_hexdump debug_hexdump
+#else
+#define qpack_debug_printf(...) do { } while (0)
+#define qpack_debug_hexdump(...) do { } while (0)
+#endif
+
+/* Encoded field line bitmask */
+#define QPACK_EFL_BITMASK 0xf0
+#define QPACK_LFL_WPBNM 0x00 // Literal field line with post-base name reference
+#define QPACK_IFL_WPBI 0x10 // Indexed field line with post-based index
+#define QPACK_LFL_WLN_BIT 0x20 // Literal field line with literal name
+#define QPACK_LFL_WNR_BIT 0x40 // Literal field line with name reference
+#define QPACK_IFL_BIT 0x80 // Indexed field line
+
+/* reads a varint from <raw>'s lowest <b> bits and <len> bytes max (raw included).
+ * returns the 64-bit value on success after updating buf and len_in. Forces
+ * len_in to (uint64_t)-1 on truncated input.
+ * Note that this function is similar to the one used for HPACK (except that is supports
+ * up to 62-bits integers).
+ */
+static uint64_t qpack_get_varint(const unsigned char **buf, uint64_t *len_in, int b)
+{
+ uint64_t ret = 0;
+ int len = *len_in;
+ const uint8_t *raw = *buf;
+ uint8_t shift = 0;
+
+ len--;
+ ret = *raw++ & ((1ULL << b) - 1);
+ if (ret != (uint64_t)((1ULL << b) - 1))
+ goto end;
+
+ while (len && (*raw & 128)) {
+ ret += ((uint64_t)*raw++ & 127) << shift;
+ shift += 7;
+ len--;
+ }
+
+ /* last 7 bits */
+ if (!len)
+ goto too_short;
+
+ len--;
+ ret += ((uint64_t)*raw++ & 127) << shift;
+
+ end:
+ *buf = raw;
+ *len_in = len;
+ return ret;
+
+ too_short:
+ *len_in = (uint64_t)-1;
+ return 0;
+}
+
+/* Decode an encoder stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qpack_decode_enc(struct buffer *buf, int fin, void *ctx)
+{
+ struct qcs *qcs = ctx;
+ size_t len;
+ unsigned char inst;
+
+ /* RFC 9204 4.2. Encoder and Decoder Streams
+ *
+ * The sender MUST NOT close either of these streams, and the receiver
+ * MUST NOT request that the sender close either of these streams.
+ * Closure of either unidirectional stream type MUST be treated as a
+ * connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (fin) {
+ qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return -1;
+ }
+
+ len = b_data(buf);
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-ENC] ", b_head(buf), 0, len);
+
+ if (!len) {
+ qpack_debug_printf(stderr, "[QPACK-DEC-ENC] empty stream\n");
+ return 0;
+ }
+
+ inst = (unsigned char)*b_head(buf) & QPACK_ENC_INST_BITMASK;
+ if (inst == QPACK_ENC_INST_DUP) {
+ /* Duplicate */
+ }
+ else if (inst & QPACK_ENC_INST_IWNR_BIT) {
+ /* Insert With Name Reference */
+ }
+ else if (inst & QPACK_ENC_INST_IWLN_BIT) {
+ /* Insert with literal name */
+ }
+ else if (inst & QPACK_ENC_INST_SDTC_BIT) {
+ /* Set dynamic table capacity */
+ }
+
+ return 0;
+}
+
+/* Decode an decoder stream.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qpack_decode_dec(struct buffer *buf, int fin, void *ctx)
+{
+ struct qcs *qcs = ctx;
+ size_t len;
+ unsigned char inst;
+
+ /* RFC 9204 4.2. Encoder and Decoder Streams
+ *
+ * The sender MUST NOT close either of these streams, and the receiver
+ * MUST NOT request that the sender close either of these streams.
+ * Closure of either unidirectional stream type MUST be treated as a
+ * connection error of type H3_CLOSED_CRITICAL_STREAM.
+ */
+ if (fin) {
+ qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1);
+ return -1;
+ }
+
+ len = b_data(buf);
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-DEC] ", b_head(buf), 0, len);
+
+ if (!len) {
+ qpack_debug_printf(stderr, "[QPACK-DEC-DEC] empty stream\n");
+ return 0;
+ }
+
+ inst = (unsigned char)*b_head(buf) & QPACK_DEC_INST_BITMASK;
+ if (inst == QPACK_DEC_INST_ICINC) {
+ /* Insert count increment */
+ }
+ else if (inst & QPACK_DEC_INST_SACK) {
+ /* Section Acknowledgment */
+ }
+ else if (inst & QPACK_DEC_INST_SCCL) {
+ /* Stream cancellation */
+ }
+
+ return 0;
+}
+
+/* Decode a field section prefix made of <enc_ric> and <db> two varints.
+ * Also set the 'S' sign bit for <db>.
+ * Return a negative error if failed, 0 if not.
+ */
+static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit,
+ const unsigned char **raw, uint64_t *len)
+{
+ *enc_ric = qpack_get_varint(raw, len, 8);
+ if (*len == (uint64_t)-1)
+ return -QPACK_ERR_RIC;
+
+ *sign_bit = **raw & 0x8;
+ *db = qpack_get_varint(raw, len, 7);
+ if (*len == (uint64_t)-1)
+ return -QPACK_ERR_DB;
+
+ return 0;
+}
+
+/* Decode a field section from the <raw> buffer of <len> bytes. Each parsed
+ * header is inserted into <list> of <list_size> entries max and uses <tmp> as
+ * a storage for some elements pointing into it. An end marker is inserted at
+ * the end of the list with empty strings as name/value.
+ *
+ * Returns the number of headers inserted into list excluding the end marker.
+ * In case of error, a negative code QPACK_ERR_* is returned.
+ */
+int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp,
+ struct http_hdr *list, int list_size)
+{
+ struct ist name, value;
+ uint64_t enc_ric, db;
+ int s;
+ unsigned int efl_type;
+ int ret;
+ int hdr_idx = 0;
+
+ qpack_debug_hexdump(stderr, "[QPACK-DEC-FS] ", (const char *)raw, 0, len);
+
+ /* parse field section prefix */
+ ret = qpack_decode_fs_pfx(&enc_ric, &db, &s, &raw, &len);
+ if (ret < 0) {
+ qpack_debug_printf(stderr, "##ERR@%d(%d)\n", __LINE__, ret);
+ goto out;
+ }
+
+ chunk_reset(tmp);
+ qpack_debug_printf(stderr, "enc_ric: %llu db: %llu s=%d\n",
+ (unsigned long long)enc_ric, (unsigned long long)db, !!s);
+ /* Decode field lines */
+ while (len) {
+ if (hdr_idx >= list_size) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TOO_LARGE;
+ goto out;
+ }
+
+ /* parse field line representation */
+ efl_type = *raw & QPACK_EFL_BITMASK;
+ qpack_debug_printf(stderr, "efl_type=0x%02x\n", efl_type);
+
+ if (efl_type == QPACK_LFL_WPBNM) {
+ /* Literal field line with post-base name reference
+ * TODO adjust this when dynamic table support is implemented.
+ */
+#if 0
+ uint64_t index __maybe_unused, length;
+ unsigned int n __maybe_unused, h __maybe_unused;
+
+ qpack_debug_printf(stderr, "literal field line with post-base name reference:");
+ n = *raw & 0x08;
+ index = qpack_get_varint(&raw, &len, 3);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " n=%d index=%llu", !!n, (unsigned long long)index);
+ h = *raw & 0x80;
+ length = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " h=%d length=%llu", !!h, (unsigned long long)length);
+
+ if (len < length) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ raw += length;
+ len -= length;
+#endif
+
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+ else if (efl_type == QPACK_IFL_WPBI) {
+ /* Indexed field line with post-base index
+ * TODO adjust this when dynamic table support is implemented.
+ */
+#if 0
+ uint64_t index __maybe_unused;
+
+ qpack_debug_printf(stderr, "indexed field line with post-base index:");
+ index = qpack_get_varint(&raw, &len, 4);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " index=%llu", (unsigned long long)index);
+#endif
+
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+ else if (efl_type & QPACK_IFL_BIT) {
+ /* Indexed field line */
+ uint64_t index;
+ unsigned int static_tbl;
+
+ qpack_debug_printf(stderr, "indexed field line:");
+ static_tbl = efl_type & 0x40;
+ index = qpack_get_varint(&raw, &len, 6);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (static_tbl && index < QPACK_SHT_SIZE) {
+ name = qpack_sht[index].n;
+ value = qpack_sht[index].v;
+ }
+ else {
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ *
+ * TODO adjust this when dynamic table support is implemented.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+
+ qpack_debug_printf(stderr, " t=%d index=%llu", !!static_tbl, (unsigned long long)index);
+ }
+ else if (efl_type & QPACK_LFL_WNR_BIT) {
+ /* Literal field line with name reference */
+ uint64_t index, length;
+ unsigned int static_tbl, n __maybe_unused, h;
+
+ qpack_debug_printf(stderr, "Literal field line with name reference:");
+ n = efl_type & 0x20;
+ static_tbl = efl_type & 0x10;
+ index = qpack_get_varint(&raw, &len, 4);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (static_tbl && index < QPACK_SHT_SIZE) {
+ name = qpack_sht[index].n;
+ }
+ else {
+ /* RFC9204 2.2.3 Invalid References
+ *
+ * If the decoder encounters a reference in a field line representation
+ * to a dynamic table entry that has already been evicted or that has an
+ * absolute index greater than or equal to the declared Required Insert
+ * Count (Section 4.5.1), it MUST treat this as a connection error of
+ * type QPACK_DECOMPRESSION_FAILED.
+ *
+ * TODO adjust this when dynamic table support is implemented.
+ */
+ return -QPACK_DECOMPRESSION_FAILED;
+ }
+
+ qpack_debug_printf(stderr, " n=%d t=%d index=%llu", !!n, !!static_tbl, (unsigned long long)index);
+ h = *raw & 0x80;
+ length = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " h=%d length=%llu", !!h, (unsigned long long)length);
+ if (h) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, length, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)length, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ value = ist2(trash, nlen);
+ }
+ else {
+ value = ist2(raw, length);
+ }
+
+ if (len < length) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ raw += length;
+ len -= length;
+ }
+ else if (efl_type & QPACK_LFL_WLN_BIT) {
+ /* Literal field line with literal name */
+ unsigned int n __maybe_unused, hname, hvalue;
+ uint64_t name_len, value_len;
+
+ qpack_debug_printf(stderr, "Literal field line with literal name:");
+ n = *raw & 0x10;
+ hname = *raw & 0x08;
+ name_len = qpack_get_varint(&raw, &len, 3);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " n=%d hname=%d name_len=%llu", !!n, !!hname, (unsigned long long)name_len);
+ /* Name string */
+
+ if (len < name_len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (hname) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, name_len, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)name_len, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ name = ist2(trash, nlen);
+ }
+ else {
+ name = ist2(raw, name_len);
+ }
+
+ raw += name_len;
+ len -= name_len;
+
+ hvalue = *raw & 0x80;
+ value_len = qpack_get_varint(&raw, &len, 7);
+ if (len == (uint64_t)-1) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " hvalue=%d value_len=%llu", !!hvalue, (unsigned long long)value_len);
+
+ if (len < value_len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TRUNCATED;
+ goto out;
+ }
+
+ if (hvalue) {
+ char *trash;
+ int nlen;
+
+ trash = chunk_newstr(tmp);
+ if (!trash) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+ nlen = huff_dec(raw, value_len, trash, tmp->size - tmp->data);
+ if (nlen == (uint32_t)-1) {
+ qpack_debug_printf(stderr, " can't decode huffman.\n");
+ ret = -QPACK_ERR_HUFFMAN;
+ goto out;
+ }
+
+ qpack_debug_printf(stderr, " [name huff %d->%d '%s']", (int)value_len, (int)nlen, trash);
+ /* makes an ist from tmp storage */
+ b_add(tmp, nlen);
+ value = ist2(trash, nlen);
+ }
+ else {
+ value = ist2(raw, value_len);
+ }
+
+ raw += value_len;
+ len -= value_len;
+ }
+
+ /* We must not accept empty header names (forbidden by the spec and used
+ * as a list termination).
+ */
+ if (!name.len) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_DECOMPRESSION_FAILED;
+ goto out;
+ }
+
+ list[hdr_idx].n = name;
+ list[hdr_idx].v = value;
+ ++hdr_idx;
+
+ qpack_debug_printf(stderr, "\n");
+ }
+
+ if (hdr_idx >= list_size) {
+ qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__);
+ ret = -QPACK_ERR_TOO_LARGE;
+ goto out;
+ }
+
+ /* put an end marker */
+ list[hdr_idx].n = list[hdr_idx].v = IST_NULL;
+ ret = hdr_idx;
+
+ out:
+ qpack_debug_printf(stderr, "-- done: ret=%d\n", ret);
+ return ret;
+}
diff --git a/src/qpack-enc.c b/src/qpack-enc.c
new file mode 100644
index 0000000..006f1f1
--- /dev/null
+++ b/src/qpack-enc.c
@@ -0,0 +1,185 @@
+#include <haproxy/qpack-enc.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/intops.h>
+
+/* Returns the byte size required to encode <i> as a <prefix_size>-prefix
+ * integer.
+ */
+static size_t qpack_get_prefix_int_size(int i, int prefix_size)
+{
+ int n = (1 << prefix_size) - 1;
+ if (i < n) {
+ return 1;
+ }
+ else {
+ size_t result = 0;
+ while (i) {
+ ++result;
+ i >>= 7;
+ }
+ return 1 + result;
+ }
+}
+
+/* Encode the integer <i> in the buffer <out> in a <prefix_size>-bit prefix
+ * integer. The caller must ensure there is enough size in the buffer. The
+ * prefix is OR-ed with <before_prefix> byte.
+ *
+ * Returns 0 if success else non-zero.
+ */
+static int qpack_encode_prefix_integer(struct buffer *out, int i,
+ int prefix_size,
+ unsigned char before_prefix)
+{
+ const int mod = (1 << prefix_size) - 1;
+ BUG_ON_HOT(!prefix_size);
+
+ if (i < mod) {
+ if (b_room(out) < 1)
+ return 1;
+
+ b_putchr(out, before_prefix | i);
+ }
+ else {
+ int to_encode = i - mod;
+ const size_t sz = to_encode / mod;
+
+ if (b_room(out) < sz)
+ return 1;
+
+ b_putchr(out, before_prefix | mod);
+ while (1) {
+ if (to_encode > 0x7f) {
+ b_putchr(out, 0x80 | (to_encode & 0x7f));
+ to_encode >>= 7;
+ }
+ else {
+ b_putchr(out, to_encode & 0x7f);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* Returns 0 on success else non-zero. */
+int qpack_encode_int_status(struct buffer *out, unsigned int status)
+{
+ int status_size, idx = 0;
+
+ if (status < 100 || status > 999)
+ return 1;
+
+ switch (status) {
+ case 103: idx = 24; break;
+ case 200: idx = 25; break;
+ case 304: idx = 26; break;
+ case 404: idx = 27; break;
+ case 503: idx = 28; break;
+ case 100: idx = 63; break;
+ case 204: idx = 64; break;
+ case 206: idx = 65; break;
+ case 302: idx = 66; break;
+ case 400: idx = 67; break;
+ case 403: idx = 68; break;
+ case 421: idx = 69; break;
+ case 425: idx = 70; break;
+ case 500: idx = 71; break;
+
+ /* status code not in QPACK static table, idx is null. */
+ default: break;
+ }
+
+ if (idx) {
+ /* status code present in QPACK static table
+ * -> indexed field line
+ */
+ status_size = qpack_get_prefix_int_size(idx, 6);
+ if (b_room(out) < status_size)
+ return 1;
+
+ qpack_encode_prefix_integer(out, idx, 6, 0xc0);
+ }
+ else {
+ /* status code not present in QPACK static table
+ * -> literal field line with name reference
+ */
+ char a, b, c;
+ a = '0' + status / 100;
+ status -= (status / 100 * 100);
+ b = '0' + status / 10;
+ status -= (status / 10 * 10);
+ c = '0' + status;
+
+ /* field name */
+ if (qpack_encode_prefix_integer(out, 24, 4, 0x50))
+ return 1;
+
+ /* field value length */
+ if (qpack_encode_prefix_integer(out, 3, 7, 0x00))
+ return 1;
+
+ if (b_room(out) < 3)
+ return 1;
+
+ b_putchr(out, a);
+ b_putchr(out, b);
+ b_putchr(out, c);
+ }
+
+ return 0;
+}
+
+/* Returns 0 on success else non-zero. */
+int qpack_encode_field_section_line(struct buffer *out)
+{
+ char qpack_field_section[] = {
+ '\x00', /* required insert count */
+ '\x00', /* S + delta base */
+ };
+
+ if (b_room(out) < 2)
+ return 1;
+
+ b_putblk(out, qpack_field_section, 2);
+
+ return 0;
+}
+
+#define QPACK_LFL_WLN_BIT 0x20 // Literal field line with literal name
+
+/* Encode a header in literal field line with literal name.
+ * Returns 0 on success else non-zero.
+ */
+int qpack_encode_header(struct buffer *out, const struct ist n, const struct ist v)
+{
+ int i;
+ size_t sz = qpack_get_prefix_int_size(n.len, 3) + n.len +
+ qpack_get_prefix_int_size(v.len, 7) + v.len;
+
+ if (sz > b_room(out))
+ return 1;
+
+ /* literal field line with literal name
+ * | 0 | 0 | 1 | N | H | . | . | . |
+ * N :(allow an intermediary to add the header in a dynamic table)
+ * H: huffman encoded
+ * name len
+ */
+ qpack_encode_prefix_integer(out, n.len, 3, QPACK_LFL_WLN_BIT);
+ /* name */
+ for (i = 0; i < n.len; ++i)
+ b_putchr(out, n.ptr[i]);
+
+ /* | 0 | . | . | . | . | . | . | . |
+ * value len
+ */
+ qpack_encode_prefix_integer(out, v.len, 7, 0x00);
+ /* value */
+ for (i = 0; i < v.len; ++i)
+ b_putchr(out, v.ptr[i]);
+
+ return 0;
+}
diff --git a/src/qpack-tbl.c b/src/qpack-tbl.c
new file mode 100644
index 0000000..7c59fd2
--- /dev/null
+++ b/src/qpack-tbl.c
@@ -0,0 +1,415 @@
+/*
+ * QPACK header table management (draft-ietf-quic-qpack-20)
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <import/ist.h>
+#include <haproxy/http-hdr-t.h>
+#include <haproxy/qpack-tbl.h>
+
+/* static header table as in draft-ietf-quic-qpack-20 Appendix A. */
+const struct http_hdr qpack_sht[QPACK_SHT_SIZE] = {
+ [ 0] = { .n = IST(":authority"), .v = IST("") },
+ [ 1] = { .n = IST(":path"), .v = IST("/") },
+ [ 2] = { .n = IST("age"), .v = IST("0") },
+ [ 3] = { .n = IST("content-disposition"), .v = IST("") },
+ [ 4] = { .n = IST("content-length"), .v = IST("0") },
+ [ 5] = { .n = IST("cookie"), .v = IST("") },
+ [ 6] = { .n = IST("date"), .v = IST("") },
+ [ 7] = { .n = IST("etag"), .v = IST("") },
+ [ 8] = { .n = IST("if-modified-since"), .v = IST("") },
+ [ 9] = { .n = IST("if-none-match"), .v = IST("") },
+ [10] = { .n = IST("last-modified"), .v = IST("") },
+ [11] = { .n = IST("link"), .v = IST("") },
+ [12] = { .n = IST("location"), .v = IST("") },
+ [13] = { .n = IST("referer"), .v = IST("") },
+ [14] = { .n = IST("set-cookie"), .v = IST("") },
+ [15] = { .n = IST(":method"), .v = IST("CONNECT") },
+ [16] = { .n = IST(":method"), .v = IST("DELETE") },
+ [17] = { .n = IST(":method"), .v = IST("GET") },
+ [18] = { .n = IST(":method"), .v = IST("HEAD") },
+ [19] = { .n = IST(":method"), .v = IST("OPTIONS") },
+ [20] = { .n = IST(":method"), .v = IST("POST") },
+ [21] = { .n = IST(":method"), .v = IST("PUT") },
+ [22] = { .n = IST(":scheme"), .v = IST("http") },
+ [23] = { .n = IST(":scheme"), .v = IST("https") },
+ [24] = { .n = IST(":status"), .v = IST("103") },
+ [25] = { .n = IST(":status"), .v = IST("200") },
+ [26] = { .n = IST(":status"), .v = IST("304") },
+ [27] = { .n = IST(":status"), .v = IST("404") },
+ [28] = { .n = IST(":status"), .v = IST("503") },
+ [29] = { .n = IST("accept"), .v = IST("*/*") },
+ [30] = { .n = IST("accept"), .v = IST("application/dns-message") },
+ [31] = { .n = IST("accept-encoding"), .v = IST("gzip, deflate, br") },
+ [32] = { .n = IST("accept-ranges"), .v = IST("bytes") },
+ [33] = { .n = IST("access-control-allow-headers"), .v = IST("cache-control") },
+ [34] = { .n = IST("access-control-allow-headers"), .v = IST("content-type") },
+ [35] = { .n = IST("access-control-allow-origin"), .v = IST("*") },
+ [36] = { .n = IST("cache-control"), .v = IST("max-age=0") },
+ [37] = { .n = IST("cache-control"), .v = IST("max-age=2592000") },
+ [38] = { .n = IST("cache-control"), .v = IST("max-age=604800") },
+ [39] = { .n = IST("cache-control"), .v = IST("no-cache") },
+ [40] = { .n = IST("cache-control"), .v = IST("no-store") },
+ [41] = { .n = IST("cache-control"), .v = IST("public, max-age=31536000") },
+ [42] = { .n = IST("content-encoding"), .v = IST("br") },
+ [43] = { .n = IST("content-encoding"), .v = IST("gzip") },
+ [44] = { .n = IST("content-type"), .v = IST("application/dns-message") },
+ [45] = { .n = IST("content-type"), .v = IST("application/javascript") },
+ [46] = { .n = IST("content-type"), .v = IST("application/json") },
+ [47] = { .n = IST("content-type"), .v = IST("application/"
+ "x-www-form-urlencoded") },
+ [48] = { .n = IST("content-type"), .v = IST("image/gif") },
+ [49] = { .n = IST("content-type"), .v = IST("image/jpeg") },
+ [50] = { .n = IST("content-type"), .v = IST("image/png") },
+ [51] = { .n = IST("content-type"), .v = IST("text/css") },
+ [52] = { .n = IST("content-type"), .v = IST("text/html;"
+ " charset=utf-8") },
+ [53] = { .n = IST("content-type"), .v = IST("text/plain") },
+ [54] = { .n = IST("content-type"), .v = IST("text/plain;"
+ "charset=utf-8") },
+ [55] = { .n = IST("range"), .v = IST("bytes=0-") },
+ [56] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000") },
+ [57] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000;"
+ " includesubdomains") },
+ [58] = { .n = IST("strict-transport-security"), .v = IST("max-age=31536000;"
+ " includesubdomains;"
+ " preload") },
+ [59] = { .n = IST("vary"), .v = IST("accept-encoding") },
+ [60] = { .n = IST("vary"), .v = IST("origin") },
+ [61] = { .n = IST("x-content-type-options"), .v = IST("nosniff") },
+ [62] = { .n = IST("x-xss-protection"), .v = IST("1; mode=block") },
+ [63] = { .n = IST(":status"), .v = IST("100") },
+ [64] = { .n = IST(":status"), .v = IST("204") },
+ [65] = { .n = IST(":status"), .v = IST("206") },
+ [66] = { .n = IST(":status"), .v = IST("302") },
+ [67] = { .n = IST(":status"), .v = IST("400") },
+ [68] = { .n = IST(":status"), .v = IST("403") },
+ [69] = { .n = IST(":status"), .v = IST("421") },
+ [70] = { .n = IST(":status"), .v = IST("425") },
+ [71] = { .n = IST(":status"), .v = IST("500") },
+ [72] = { .n = IST("accept-language"), .v = IST("") },
+ [73] = { .n = IST("access-control-allow-credentials"), .v = IST("FALSE") },
+ [74] = { .n = IST("access-control-allow-credentials"), .v = IST("TRUE") },
+ [75] = { .n = IST("access-control-allow-headers"), .v = IST("*") },
+ [76] = { .n = IST("access-control-allow-methods"), .v = IST("get") },
+ [77] = { .n = IST("access-control-allow-methods"), .v = IST("get, post, options") },
+ [78] = { .n = IST("access-control-allow-methods"), .v = IST("options") },
+ [79] = { .n = IST("access-control-expose-headers"), .v = IST("content-length") },
+ [80] = { .n = IST("access-control-request-headers"), .v = IST("content-type") },
+ [81] = { .n = IST("access-control-request-method"), .v = IST("get") },
+ [82] = { .n = IST("access-control-request-method"), .v = IST("post") },
+ [83] = { .n = IST("alt-svc"), .v = IST("clear") },
+ [84] = { .n = IST("authorization"), .v = IST("") },
+ [85] = { .n = IST("content-security-policy"), .v = IST("script-src 'none';"
+ " object-src 'none';"
+ " base-uri 'none'") },
+ [86] = { .n = IST("early-data"), .v = IST("1") },
+ [87] = { .n = IST("expect-ct"), .v = IST("") },
+ [88] = { .n = IST("forwarded"), .v = IST("") },
+ [89] = { .n = IST("if-range"), .v = IST("") },
+ [90] = { .n = IST("origin"), .v = IST("") },
+ [91] = { .n = IST("purpose"), .v = IST("prefetch") },
+ [92] = { .n = IST("server"), .v = IST("") },
+ [93] = { .n = IST("timing-allow-origin"), .v = IST("*") },
+ [94] = { .n = IST("upgrade-insecure-requests"), .v = IST("1") },
+ [95] = { .n = IST("user-agent"), .v = IST("") },
+ [96] = { .n = IST("x-forwarded-for"), .v = IST("") },
+ [97] = { .n = IST("x-frame-options"), .v = IST("deny") },
+ [98] = { .n = IST("x-frame-options"), .v = IST("sameorigin") },
+};
+
+struct pool_head *pool_head_qpack_tbl = NULL;
+
+#ifdef DEBUG_QPACK
+/* dump the whole dynamic header table */
+void qpack_dht_dump(FILE *out, const struct qpack_dht *dht)
+{
+ unsigned int i;
+ unsigned int slot;
+ char name[4096], value[4096];
+
+ for (i = QPACK_SHT_SIZE; i < QPACK_SHT_SIZE + dht->used; i++) {
+ slot = (qpack_get_dte(dht, i - QPACK_SHT_SIZE + 1) - dht->dte);
+ fprintf(out, "idx=%u slot=%u name=<%s> value=<%s> addr=%u-%u\n",
+ i, slot,
+ istpad(name, qpack_idx_to_name(dht, i)).ptr,
+ istpad(value, qpack_idx_to_value(dht, i)).ptr,
+ dht->dte[slot].addr, dht->dte[slot].addr+dht->dte[slot].nlen+dht->dte[slot].vlen-1);
+ }
+}
+
+/* check for the whole dynamic header table consistency, abort on failures */
+void qpack_dht_check_consistency(const struct qpack_dht *dht)
+{
+ unsigned slot = qpack_dht_get_tail(dht);
+ unsigned used2 = dht->used;
+ unsigned total = 0;
+
+ if (!dht->used)
+ return;
+
+ if (dht->front >= dht->wrap)
+ abort();
+
+ if (dht->used > dht->wrap)
+ abort();
+
+ if (dht->head >= dht->wrap)
+ abort();
+
+ while (used2--) {
+ total += dht->dte[slot].nlen + dht->dte[slot].vlen;
+ slot++;
+ if (slot >= dht->wrap)
+ slot = 0;
+ }
+
+ if (total != dht->total) {
+ fprintf(stderr, "%d: total=%u dht=%u\n", __LINE__, total, dht->total);
+ abort();
+ }
+}
+#endif // DEBUG_QPACK
+
+/* rebuild a new dynamic header table from <dht> with an unwrapped index and
+ * contents at the end. The new table is returned, the caller must not use the
+ * previous one anymore. NULL may be returned if no table could be allocated.
+ */
+static struct qpack_dht *qpack_dht_defrag(struct qpack_dht *dht)
+{
+ struct qpack_dht *alt_dht;
+ uint16_t old, new;
+ uint32_t addr;
+
+ /* Note: for small tables we could use alloca() instead but
+ * portability especially for large tables can be problematic.
+ */
+ alt_dht = qpack_dht_alloc();
+ if (!alt_dht)
+ return NULL;
+
+ alt_dht->total = dht->total;
+ alt_dht->used = dht->used;
+ alt_dht->wrap = dht->used;
+
+ new = 0;
+ addr = alt_dht->size;
+
+ if (dht->used) {
+ /* start from the tail */
+ old = qpack_dht_get_tail(dht);
+ do {
+ alt_dht->dte[new].nlen = dht->dte[old].nlen;
+ alt_dht->dte[new].vlen = dht->dte[old].vlen;
+ addr -= dht->dte[old].nlen + dht->dte[old].vlen;
+ alt_dht->dte[new].addr = addr;
+
+ memcpy((void *)alt_dht + alt_dht->dte[new].addr,
+ (void *)dht + dht->dte[old].addr,
+ dht->dte[old].nlen + dht->dte[old].vlen);
+
+ old++;
+ if (old >= dht->wrap)
+ old = 0;
+ new++;
+ } while (new < dht->used);
+ }
+
+ alt_dht->front = alt_dht->head = new - 1;
+
+ memcpy(dht, alt_dht, dht->size);
+ qpack_dht_free(alt_dht);
+
+ return dht;
+}
+
+/* Purges table dht until a header field of <needed> bytes fits according to
+ * the protocol (adding 32 bytes overhead). Returns non-zero on success, zero
+ * on failure (ie: table empty but still not sufficient). It must only be
+ * called when the table is not large enough to suit the new entry and there
+ * are some entries left. In case of doubt, use dht_make_room() instead.
+ */
+int __qpack_dht_make_room(struct qpack_dht *dht, unsigned int needed)
+{
+ unsigned int used = dht->used;
+ unsigned int wrap = dht->wrap;
+ unsigned int tail;
+
+ do {
+ tail = ((dht->head + 1U < used) ? wrap : 0) + dht->head + 1U - used;
+ dht->total -= dht->dte[tail].nlen + dht->dte[tail].vlen;
+ if (tail == dht->front)
+ dht->front = dht->head;
+ used--;
+ } while (used && used * 32 + dht->total + needed + 32 > dht->size);
+
+ dht->used = used;
+
+ /* realign if empty */
+ if (!used)
+ dht->front = dht->head = 0;
+
+ /* pack the table if it doesn't wrap anymore */
+ if (dht->head + 1U >= used)
+ dht->wrap = dht->head + 1;
+
+ /* no need to check for 'used' here as if it doesn't fit, used==0 */
+ return needed + 32 <= dht->size;
+}
+
+/* tries to insert a new header <name>:<value> in front of the current head. A
+ * negative value is returned on error.
+ */
+int qpack_dht_insert(struct qpack_dht *dht, struct ist name, struct ist value)
+{
+ unsigned int used;
+ unsigned int head;
+ unsigned int prev;
+ unsigned int wrap;
+ unsigned int tail;
+ uint32_t headroom, tailroom;
+
+ if (!qpack_dht_make_room(dht, name.len + value.len))
+ return 0;
+
+ /* Now there is enough room in the table, that's guaranteed by the
+ * protocol, but not necessarily where we need it.
+ */
+
+ used = dht->used;
+ if (!used) {
+ /* easy, the table was empty */
+ dht->front = dht->head = 0;
+ dht->wrap = dht->used = 1;
+ dht->total = 0;
+ head = 0;
+ dht->dte[head].addr = dht->size - (name.len + value.len);
+ goto copy;
+ }
+
+ /* compute the new head, used and wrap position */
+ prev = head = dht->head;
+ wrap = dht->wrap;
+ tail = qpack_dht_get_tail(dht);
+
+ used++;
+ head++;
+
+ if (head >= wrap) {
+ /* head is leading the entries, we either need to push the
+ * table further or to loop back to released entries. We could
+ * force to loop back when at least half of the allocatable
+ * entries are free but in practice it never happens.
+ */
+ if ((sizeof(*dht) + (wrap + 1) * sizeof(dht->dte[0]) <= dht->dte[dht->front].addr))
+ wrap++;
+ else if (head >= used) /* there's a hole at the beginning */
+ head = 0;
+ else {
+ /* no more room, head hits tail and the index cannot be
+ * extended, we have to realign the whole table.
+ */
+ if (!qpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+ }
+ else if (used >= wrap) {
+ /* we've hit the tail, we need to reorganize the index so that
+ * the head is at the end (but not necessarily move the data).
+ */
+ if (!qpack_dht_defrag(dht))
+ return -1;
+
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ prev = head - 1;
+ tail = 0;
+ }
+
+ /* Now we have updated head, used and wrap, we know that there is some
+ * available room at least from the protocol's perspective. This space
+ * is split in two areas :
+ *
+ * 1: if the previous head was the front cell, the space between the
+ * end of the index table and the front cell's address.
+ * 2: if the previous head was the front cell, the space between the
+ * end of the tail and the end of the table ; or if the previous
+ * head was not the front cell, the space between the end of the
+ * tail and the head's address.
+ */
+ if (prev == dht->front) {
+ /* the area was contiguous */
+ headroom = dht->dte[dht->front].addr - (sizeof(*dht) + wrap * sizeof(dht->dte[0]));
+ tailroom = dht->size - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+ else {
+ /* it's already wrapped so we can't store anything in the headroom */
+ headroom = 0;
+ tailroom = dht->dte[prev].addr - dht->dte[tail].addr - dht->dte[tail].nlen - dht->dte[tail].vlen;
+ }
+
+ /* We can decide to stop filling the headroom as soon as there's enough
+ * room left in the tail to suit the protocol, but tests show that in
+ * practice it almost never happens in other situations so the extra
+ * test is useless and we simply fill the headroom as long as it's
+ * available and we don't wrap.
+ */
+ if (prev == dht->front && headroom >= name.len + value.len) {
+ /* install upfront and update ->front */
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+ else if (tailroom >= name.len + value.len) {
+ dht->dte[head].addr = dht->dte[tail].addr + dht->dte[tail].nlen + dht->dte[tail].vlen + tailroom - (name.len + value.len);
+ }
+ else {
+ /* need to defragment the table before inserting upfront */
+ dht = qpack_dht_defrag(dht);
+ wrap = dht->wrap + 1;
+ head = dht->head + 1;
+ dht->dte[head].addr = dht->dte[dht->front].addr - (name.len + value.len);
+ dht->front = head;
+ }
+
+ dht->wrap = wrap;
+ dht->head = head;
+ dht->used = used;
+
+ copy:
+ dht->total += name.len + value.len;
+ dht->dte[head].nlen = name.len;
+ dht->dte[head].vlen = value.len;
+
+ memcpy((void *)dht + dht->dte[head].addr, name.ptr, name.len);
+ memcpy((void *)dht + dht->dte[head].addr + name.len, value.ptr, value.len);
+ return 0;
+}
diff --git a/src/queue.c b/src/queue.c
new file mode 100644
index 0000000..f20285b
--- /dev/null
+++ b/src/queue.c
@@ -0,0 +1,761 @@
+/*
+ * Queue management functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* Short explanation on the locking, which is far from being trivial : a
+ * pendconn is a list element which necessarily is associated with an existing
+ * stream. It has pendconn->strm always valid. A pendconn may only be in one of
+ * these three states :
+ * - unlinked : in this case it is an empty list head ;
+ * - linked into the server's queue ;
+ * - linked into the proxy's queue.
+ *
+ * A stream does not necessarily have such a pendconn. Thus the pendconn is
+ * designated by the stream->pend_pos pointer. This results in some properties :
+ * - pendconn->strm->pend_pos is never NULL for any valid pendconn
+ * - if p->node.node.leaf_p is NULL, the element is unlinked,
+ * otherwise it necessarily belongs to one of the other lists ; this may
+ * not be atomically checked under threads though ;
+ * - pendconn->px is never NULL if pendconn->list is not empty
+ * - pendconn->srv is never NULL if pendconn->list is in the server's queue,
+ * and is always NULL if pendconn->list is in the backend's queue or empty.
+ * - pendconn->target is NULL while the element is queued, and points to the
+ * assigned server when the pendconn is picked.
+ *
+ * Threads complicate the design a little bit but rules remain simple :
+ * - the server's queue lock must be held at least when manipulating the
+ * server's queue, which is when adding a pendconn to the queue and when
+ * removing a pendconn from the queue. It protects the queue's integrity.
+ *
+ * - the proxy's queue lock must be held at least when manipulating the
+ * proxy's queue, which is when adding a pendconn to the queue and when
+ * removing a pendconn from the queue. It protects the queue's integrity.
+ *
+ * - both locks are compatible and may be held at the same time.
+ *
+ * - a pendconn_add() is only performed by the stream which will own the
+ * pendconn ; the pendconn is allocated at this moment and returned ; it is
+ * added to either the server or the proxy's queue while holding this
+s * queue's lock.
+ *
+ * - the pendconn is then met by a thread walking over the proxy or server's
+ * queue with the respective lock held. This lock is exclusive and the
+ * pendconn can only appear in one queue so by definition a single thread
+ * may find this pendconn at a time.
+ *
+ * - the pendconn is unlinked either by its own stream upon success/abort/
+ * free, or by another one offering it its server slot. This is achieved by
+ * pendconn_process_next_strm() under either the server or proxy's lock,
+ * pendconn_redistribute() under the server's lock, pendconn_grab_from_px()
+ * under the proxy's lock, or pendconn_unlink() under either the proxy's or
+ * the server's lock depending on the queue the pendconn is attached to.
+ *
+ * - no single operation except the pendconn initialisation prior to the
+ * insertion are performed without eithre a queue lock held or the element
+ * being unlinked and visible exclusively to its stream.
+ *
+ * - pendconn_grab_from_px() and pendconn_process_next_strm() assign ->target
+ * so that the stream knows what server to work with (via
+ * pendconn_dequeue() which sets it on strm->target).
+ *
+ * - a pendconn doesn't switch between queues, it stays where it is.
+ */
+
+#include <import/eb32tree.h>
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/pool.h>
+#include <haproxy/queue.h>
+#include <haproxy/sample.h>
+#include <haproxy/server-t.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+
+#define NOW_OFFSET_BOUNDARY() ((now_ms - (TIMER_LOOK_BACK >> 12)) & 0xfffff)
+#define KEY_CLASS(key) ((u32)key & 0xfff00000)
+#define KEY_OFFSET(key) ((u32)key & 0x000fffff)
+#define KEY_CLASS_OFFSET_BOUNDARY(key) (KEY_CLASS(key) | NOW_OFFSET_BOUNDARY())
+#define MAKE_KEY(class, offset) (((u32)(class + 0x7ff) << 20) | ((u32)(now_ms + offset) & 0xfffff))
+
+DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn));
+
+/* returns the effective dynamic maxconn for a server, considering the minconn
+ * and the proxy's usage relative to its dynamic connections limit. It is
+ * expected that 0 < s->minconn <= s->maxconn when this is called. If the
+ * server is currently warming up, the slowstart is also applied to the
+ * resulting value, which can be lower than minconn in this case, but never
+ * less than 1.
+ */
+unsigned int srv_dynamic_maxconn(const struct server *s)
+{
+ unsigned int max;
+
+ if (s->proxy->beconn >= s->proxy->fullconn)
+ /* no fullconn or proxy is full */
+ max = s->maxconn;
+ else if (s->minconn == s->maxconn)
+ /* static limit */
+ max = s->maxconn;
+ else max = MAX(s->minconn,
+ s->proxy->beconn * s->maxconn / s->proxy->fullconn);
+
+ if ((s->cur_state == SRV_ST_STARTING) &&
+ ns_to_sec(now_ns) < s->last_change + s->slowstart &&
+ ns_to_sec(now_ns) >= s->last_change) {
+ unsigned int ratio;
+ ratio = 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart;
+ max = MAX(1, max * ratio / 100);
+ }
+ return max;
+}
+
+/* Remove the pendconn from the server's queue. At this stage, the connection
+ * is not really dequeued. It will be done during the process_stream. It is
+ * up to the caller to atomically decrement the pending counts.
+ *
+ * The caller must own the lock on the server queue. The pendconn must still be
+ * queued (p->node.leaf_p != NULL) and must be in a server (p->srv != NULL).
+ */
+static void __pendconn_unlink_srv(struct pendconn *p)
+{
+ p->strm->logs.srv_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
+ eb32_delete(&p->node);
+}
+
+/* Remove the pendconn from the proxy's queue. At this stage, the connection
+ * is not really dequeued. It will be done during the process_stream. It is
+ * up to the caller to atomically decrement the pending counts.
+ *
+ * The caller must own the lock on the proxy queue. The pendconn must still be
+ * queued (p->node.leaf_p != NULL) and must be in the proxy (p->srv == NULL).
+ */
+static void __pendconn_unlink_prx(struct pendconn *p)
+{
+ p->strm->logs.prx_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
+ eb32_delete(&p->node);
+}
+
+/* Locks the queue the pendconn element belongs to. This relies on both p->px
+ * and p->srv to be properly initialized (which is always the case once the
+ * element has been added).
+ */
+static inline void pendconn_queue_lock(struct pendconn *p)
+{
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->queue->lock);
+}
+
+/* Unlocks the queue the pendconn element belongs to. This relies on both p->px
+ * and p->srv to be properly initialized (which is always the case once the
+ * element has been added).
+ */
+static inline void pendconn_queue_unlock(struct pendconn *p)
+{
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->queue->lock);
+}
+
+/* Removes the pendconn from the server/proxy queue. At this stage, the
+ * connection is not really dequeued. It will be done during process_stream().
+ * This function takes all the required locks for the operation. The pendconn
+ * must be valid, though it doesn't matter if it was already unlinked. Prefer
+ * pendconn_cond_unlink() to first check <p>. It also forces a serialization
+ * on p->del_lock to make sure another thread currently waking it up finishes
+ * first.
+ */
+void pendconn_unlink(struct pendconn *p)
+{
+ struct queue *q = p->queue;
+ struct proxy *px = q->px;
+ struct server *sv = q->sv;
+ uint oldidx;
+ int done = 0;
+
+ oldidx = _HA_ATOMIC_LOAD(&p->queue->idx);
+ HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+
+ if (p->node.node.leaf_p) {
+ eb32_delete(&p->node);
+ done = 1;
+ }
+
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
+
+ if (done) {
+ oldidx -= p->queue_idx;
+ if (sv)
+ p->strm->logs.srv_queue_pos += oldidx;
+ else
+ p->strm->logs.prx_queue_pos += oldidx;
+
+ _HA_ATOMIC_DEC(&q->length);
+ _HA_ATOMIC_DEC(&px->totpend);
+ }
+}
+
+/* Retrieve the first pendconn from tree <pendconns>. Classes are always
+ * considered first, then the time offset. The time does wrap, so the
+ * lookup is performed twice, one to retrieve the first class and a second
+ * time to retrieve the earliest time in this class.
+ */
+static struct pendconn *pendconn_first(struct eb_root *pendconns)
+{
+ struct eb32_node *node, *node2 = NULL;
+ u32 key;
+
+ node = eb32_first(pendconns);
+ if (!node)
+ return NULL;
+
+ key = KEY_CLASS_OFFSET_BOUNDARY(node->key);
+ node2 = eb32_lookup_ge(pendconns, key);
+
+ if (!node2 ||
+ KEY_CLASS(node2->key) != KEY_CLASS(node->key)) {
+ /* no other key in the tree, or in this class */
+ return eb32_entry(node, struct pendconn, node);
+ }
+
+ /* found a better key */
+ return eb32_entry(node2, struct pendconn, node);
+}
+
+/* Process the next pending connection from either a server or a proxy, and
+ * returns a strictly positive value on success (see below). If no pending
+ * connection is found, 0 is returned. Note that neither <srv> nor <px> may be
+ * NULL. Priority is given to the oldest request in the queue if both <srv> and
+ * <px> have pending requests. This ensures that no request will be left
+ * unserved. The <px> queue is not considered if the server (or a tracked
+ * server) is not RUNNING, is disabled, or has a null weight (server going
+ * down). The <srv> queue is still considered in this case, because if some
+ * connections remain there, it means that some requests have been forced there
+ * after it was seen down (eg: due to option persist). The stream is
+ * immediately marked as "assigned", and both its <srv> and <srv_conn> are set
+ * to <srv>.
+ *
+ * The proxy's queue will be consulted only if px_ok is non-zero.
+ *
+ * This function must only be called if the server queue is locked _AND_ the
+ * proxy queue is not. Today it is only called by process_srv_queue.
+ * When a pending connection is dequeued, this function returns 1 if a pendconn
+ * is dequeued, otherwise 0.
+ */
+static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int px_ok)
+{
+ struct pendconn *p = NULL;
+ struct pendconn *pp = NULL;
+ u32 pkey, ppkey;
+
+ p = NULL;
+ if (srv->queue.length)
+ p = pendconn_first(&srv->queue.head);
+
+ pp = NULL;
+ if (px_ok && px->queue.length) {
+ /* the lock only remains held as long as the pp is
+ * in the proxy's queue.
+ */
+ HA_SPIN_LOCK(QUEUE_LOCK, &px->queue.lock);
+ pp = pendconn_first(&px->queue.head);
+ if (!pp)
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+ }
+
+ if (!p && !pp)
+ return 0;
+ else if (!pp)
+ goto use_p; /* p != NULL */
+ else if (!p)
+ goto use_pp; /* pp != NULL */
+
+ /* p != NULL && pp != NULL*/
+
+ if (KEY_CLASS(p->node.key) < KEY_CLASS(pp->node.key))
+ goto use_p;
+
+ if (KEY_CLASS(pp->node.key) < KEY_CLASS(p->node.key))
+ goto use_pp;
+
+ pkey = KEY_OFFSET(p->node.key);
+ ppkey = KEY_OFFSET(pp->node.key);
+
+ if (pkey < NOW_OFFSET_BOUNDARY())
+ pkey += 0x100000; // key in the future
+
+ if (ppkey < NOW_OFFSET_BOUNDARY())
+ ppkey += 0x100000; // key in the future
+
+ if (pkey <= ppkey)
+ goto use_p;
+
+ use_pp:
+ /* we'd like to release the proxy lock ASAP to let other threads
+ * work with other servers. But for this we must first hold the
+ * pendconn alive to prevent a removal from its owning stream.
+ */
+ HA_SPIN_LOCK(QUEUE_LOCK, &pp->del_lock);
+
+ /* now the element won't go, we can release the proxy */
+ __pendconn_unlink_prx(pp);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
+ pp->strm_flags |= SF_ASSIGNED;
+ pp->target = srv;
+ stream_add_srv_conn(pp->strm, srv);
+
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
+ task_wakeup(pp->strm->task, TASK_WOKEN_RES);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &pp->del_lock);
+
+ _HA_ATOMIC_DEC(&px->queue.length);
+ _HA_ATOMIC_INC(&px->queue.idx);
+ return 1;
+
+ use_p:
+ /* we don't need the px queue lock anymore, we have the server's lock */
+ if (pp)
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
+
+ p->strm_flags |= SF_ASSIGNED;
+ p->target = srv;
+ stream_add_srv_conn(p->strm, srv);
+
+ /* we must wake the task up before releasing the lock as it's the only
+ * way to make sure the task still exists. The pendconn cannot vanish
+ * under us since the task will need to take the lock anyway and to wait
+ * if it wakes up on a different thread.
+ */
+ task_wakeup(p->strm->task, TASK_WOKEN_RES);
+ __pendconn_unlink_srv(p);
+
+ _HA_ATOMIC_DEC(&srv->queue.length);
+ _HA_ATOMIC_INC(&srv->queue.idx);
+ return 1;
+}
+
+/* Manages a server's connection queue. This function will try to dequeue as
+ * many pending streams as possible, and wake them up.
+ */
+void process_srv_queue(struct server *s)
+{
+ struct server *ref = s->track ? s->track : s;
+ struct proxy *p = s->proxy;
+ int maxconn;
+ int stop = 0;
+ int done = 0;
+ int px_ok;
+
+ /* if a server is not usable or backup and must not be used
+ * to dequeue backend requests.
+ */
+ px_ok = srv_currently_usable(ref) &&
+ (!(s->flags & SRV_F_BACKUP) ||
+ (!p->srv_act &&
+ (s == p->lbprm.fbck || (p->options & PR_O_USE_ALL_BK))));
+
+ /* let's repeat that under the lock on each round. Threads competing
+ * for the same server will give up, knowing that at least one of
+ * them will check the conditions again before quitting. In order
+ * to avoid the deadly situation where one thread spends its time
+ * dequeueing for others, we limit the number of rounds it does.
+ * However we still re-enter the loop for one pass if there's no
+ * more served, otherwise we could end up with no other thread
+ * trying to dequeue them.
+ */
+ while (!stop && (done < global.tune.maxpollevents || !s->served) &&
+ s->served < (maxconn = srv_dynamic_maxconn(s))) {
+ if (HA_SPIN_TRYLOCK(QUEUE_LOCK, &s->queue.lock) != 0)
+ break;
+
+ while (s->served < maxconn) {
+ stop = !pendconn_process_next_strm(s, p, px_ok);
+ if (stop)
+ break;
+ _HA_ATOMIC_INC(&s->served);
+ done++;
+ if (done >= global.tune.maxpollevents)
+ break;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
+ }
+
+ if (done) {
+ _HA_ATOMIC_SUB(&p->totpend, done);
+ _HA_ATOMIC_ADD(&p->served, done);
+ __ha_barrier_atomic_store();
+ if (p->lbprm.server_take_conn)
+ p->lbprm.server_take_conn(s);
+ }
+}
+
+/* Adds the stream <strm> to the pending connection queue of server <strm>->srv
+ * or to the one of <strm>->proxy if srv is NULL. All counters and back pointers
+ * are updated accordingly. Returns NULL if no memory is available, otherwise the
+ * pendconn itself. If the stream was already marked as served, its flag is
+ * cleared. It is illegal to call this function with a non-NULL strm->srv_conn.
+ * The stream's queue position is counted with an offset of -1 because we want
+ * to make sure that being at the first position in the queue reports 1.
+ *
+ * The queue is sorted by the composition of the priority_class, and the current
+ * timestamp offset by strm->priority_offset. The timestamp is in milliseconds
+ * and truncated to 20 bits, so will wrap every 17m28s575ms.
+ * The offset can be positive or negative, and an offset of 0 puts it in the
+ * middle of this range (~ 8 min). Note that this also means if the adjusted
+ * timestamp wraps around, the request will be misinterpreted as being of
+ * the highest priority for that priority class.
+ *
+ * This function must be called by the stream itself, so in the context of
+ * process_stream.
+ */
+struct pendconn *pendconn_add(struct stream *strm)
+{
+ struct pendconn *p;
+ struct proxy *px;
+ struct server *srv;
+ struct queue *q;
+ unsigned int *max_ptr;
+ unsigned int old_max, new_max;
+
+ p = pool_alloc(pool_head_pendconn);
+ if (!p)
+ return NULL;
+
+ p->target = NULL;
+ p->node.key = MAKE_KEY(strm->priority_class, strm->priority_offset);
+ p->strm = strm;
+ p->strm_flags = strm->flags;
+ HA_SPIN_INIT(&p->del_lock);
+ strm->pend_pos = p;
+
+ px = strm->be;
+ if (strm->flags & SF_ASSIGNED)
+ srv = objt_server(strm->target);
+ else
+ srv = NULL;
+
+ if (srv) {
+ q = &srv->queue;
+ max_ptr = &srv->counters.nbpend_max;
+ }
+ else {
+ q = &px->queue;
+ max_ptr = &px->be_counters.nbpend_max;
+ }
+
+ p->queue = q;
+ p->queue_idx = _HA_ATOMIC_LOAD(&q->idx) - 1; // for logging only
+ new_max = _HA_ATOMIC_ADD_FETCH(&q->length, 1);
+ old_max = _HA_ATOMIC_LOAD(max_ptr);
+ while (new_max > old_max) {
+ if (likely(_HA_ATOMIC_CAS(max_ptr, &old_max, new_max)))
+ break;
+ }
+ __ha_barrier_atomic_store();
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
+ eb32_insert(&q->head, &p->node);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
+
+ _HA_ATOMIC_INC(&px->totpend);
+ return p;
+}
+
+/* Redistribute pending connections when a server goes down. The number of
+ * connections redistributed is returned. It will take the server queue lock
+ * and does not use nor depend on other locks.
+ */
+int pendconn_redistribute(struct server *s)
+{
+ struct pendconn *p;
+ struct eb32_node *node, *nodeb;
+ int xferred = 0;
+
+ /* The REDISP option was specified. We will ignore cookie and force to
+ * balance or use the dispatcher. */
+ if ((s->proxy->options & (PR_O_REDISP|PR_O_PERSIST)) != PR_O_REDISP)
+ return 0;
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &s->queue.lock);
+ for (node = eb32_first(&s->queue.head); node; node = nodeb) {
+ nodeb = eb32_next(node);
+
+ p = eb32_entry(node, struct pendconn, node);
+ if (p->strm_flags & SF_FORCE_PRST)
+ continue;
+
+ /* it's left to the dispatcher to choose a server */
+ __pendconn_unlink_srv(p);
+ p->strm_flags &= ~(SF_DIRECT | SF_ASSIGNED);
+
+ task_wakeup(p->strm->task, TASK_WOKEN_RES);
+ xferred++;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
+
+ if (xferred) {
+ _HA_ATOMIC_SUB(&s->queue.length, xferred);
+ _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
+ }
+ return xferred;
+}
+
+/* Check for pending connections at the backend, and assign some of them to
+ * the server coming up. The server's weight is checked before being assigned
+ * connections it may not be able to handle. The total number of transferred
+ * connections is returned. It will take the proxy's queue lock and will not
+ * use nor depend on other locks.
+ */
+int pendconn_grab_from_px(struct server *s)
+{
+ struct pendconn *p;
+ int maxconn, xferred = 0;
+
+ if (!srv_currently_usable(s))
+ return 0;
+
+ /* if this is a backup server and there are active servers or at
+ * least another backup server was elected, then this one must
+ * not dequeue requests from the proxy.
+ */
+ if ((s->flags & SRV_F_BACKUP) &&
+ (s->proxy->srv_act ||
+ ((s != s->proxy->lbprm.fbck) && !(s->proxy->options & PR_O_USE_ALL_BK))))
+ return 0;
+
+ HA_SPIN_LOCK(QUEUE_LOCK, &s->proxy->queue.lock);
+ maxconn = srv_dynamic_maxconn(s);
+ while ((p = pendconn_first(&s->proxy->queue.head))) {
+ if (s->maxconn && s->served + xferred >= maxconn)
+ break;
+
+ __pendconn_unlink_prx(p);
+ p->target = s;
+
+ task_wakeup(p->strm->task, TASK_WOKEN_RES);
+ xferred++;
+ }
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &s->proxy->queue.lock);
+ if (xferred) {
+ _HA_ATOMIC_SUB(&s->proxy->queue.length, xferred);
+ _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
+ }
+ return xferred;
+}
+
+/* Try to dequeue pending connection attached to the stream <strm>. It must
+ * always exists here. If the pendconn is still linked to the server or the
+ * proxy queue, nothing is done and the function returns 1. Otherwise,
+ * <strm>->flags and <strm>->target are updated, the pendconn is released and 0
+ * is returned.
+ *
+ * This function must be called by the stream itself, so in the context of
+ * process_stream.
+ */
+int pendconn_dequeue(struct stream *strm)
+{
+ struct pendconn *p;
+ int is_unlinked;
+
+ /* unexpected case because it is called by the stream itself and
+ * only the stream can release a pendconn. So it is only
+ * possible if a pendconn is released by someone else or if the
+ * stream is supposed to be queued but without its associated
+ * pendconn. In both cases it is a bug! */
+ BUG_ON(!strm->pend_pos);
+
+ p = strm->pend_pos;
+
+ /* note below : we need to grab the queue's lock to check for emptiness
+ * because we don't want a partial _grab_from_px() or _redistribute()
+ * to be called in parallel and show an empty list without having the
+ * time to finish. With this we know that if we see the element
+ * unlinked, these functions were completely done.
+ */
+ pendconn_queue_lock(p);
+ is_unlinked = !p->node.node.leaf_p;
+ pendconn_queue_unlock(p);
+
+ /* serialize to make sure the element was finished processing */
+ HA_SPIN_LOCK(QUEUE_LOCK, &p->del_lock);
+ HA_SPIN_UNLOCK(QUEUE_LOCK, &p->del_lock);
+
+ if (!is_unlinked)
+ return 1;
+
+ /* the pendconn is not queued anymore and will not be so we're safe
+ * to proceed.
+ */
+ strm->flags &= ~(SF_DIRECT | SF_ASSIGNED);
+ strm->flags |= p->strm_flags & (SF_DIRECT | SF_ASSIGNED);
+
+ /* the entry might have been redistributed to another server */
+ if (!(strm->flags & SF_ASSIGNED))
+ sockaddr_free(&strm->scb->dst);
+
+ if (p->target) {
+ /* a server picked this pendconn, it must skip LB */
+ strm->target = &p->target->obj_type;
+ strm->flags |= SF_ASSIGNED;
+ }
+
+ strm->pend_pos = NULL;
+ pool_free(pool_head_pendconn, p);
+ return 0;
+}
+
+static enum act_return action_set_priority_class(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (!smp)
+ return ACT_RET_CONT;
+
+ s->priority_class = queue_limit_class(smp->data.u.sint);
+ return ACT_RET_CONT;
+}
+
+static enum act_return action_set_priority_offset(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (!smp)
+ return ACT_RET_CONT;
+
+ s->priority_offset = queue_limit_offset(smp->data.u.sint);
+
+ return ACT_RET_CONT;
+}
+
+static enum act_parse_ret parse_set_priority_class(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ unsigned int where = 0;
+
+ rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(rule->arg.expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], sample_src_names(rule->arg.expr->fetch->use));
+ free(rule->arg.expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_set_priority_class;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_parse_ret parse_set_priority_offset(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ unsigned int where = 0;
+
+ rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(rule->arg.expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], sample_src_names(rule->arg.expr->fetch->use));
+ free(rule->arg.expr);
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_set_priority_offset;
+ return ACT_RET_PRS_OK;
+}
+
+static struct action_kw_list tcp_cont_kws = {ILH, {
+ { "set-priority-class", parse_set_priority_class },
+ { "set-priority-offset", parse_set_priority_offset },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_cont_kws);
+
+static struct action_kw_list http_req_kws = {ILH, {
+ { "set-priority-class", parse_set_priority_class },
+ { "set-priority-offset", parse_set_priority_offset },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static int
+smp_fetch_priority_class(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->priority_class;
+
+ return 1;
+}
+
+static int
+smp_fetch_priority_offset(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->priority_offset;
+
+ return 1;
+}
+
+
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "prio_class", smp_fetch_priority_class, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "prio_offset", smp_fetch_priority_offset, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */},
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/quic_ack.c b/src/quic_ack.c
new file mode 100644
index 0000000..d28a698
--- /dev/null
+++ b/src/quic_ack.c
@@ -0,0 +1,258 @@
+#include <inttypes.h>
+
+#include <import/eb64tree.h>
+
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+
+DECLARE_STATIC_POOL(pool_head_quic_arng, "quic_arng", sizeof(struct quic_arng_node));
+
+/* Deallocate <l> list of ACK ranges. */
+void quic_free_arngs(struct quic_conn *qc, struct quic_arngs *arngs)
+{
+ struct eb64_node *n;
+ struct quic_arng_node *ar;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ n = eb64_first(&arngs->root);
+ while (n) {
+ struct eb64_node *next;
+
+ ar = eb64_entry(n, struct quic_arng_node, first);
+ next = eb64_next(n);
+ eb64_delete(n);
+ pool_free(pool_head_quic_arng, ar);
+ n = next;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Return the gap value between <p> and <q> ACK ranges where <q> follows <p> in
+ * descending order.
+ */
+static inline size_t sack_gap(struct quic_arng_node *p,
+ struct quic_arng_node *q)
+{
+ return p->first.key - q->last - 2;
+}
+
+/* Set the encoded size of <arngs> QUIC ack ranges. */
+static void quic_arngs_set_enc_sz(struct quic_conn *qc, struct quic_arngs *arngs)
+{
+ struct eb64_node *node, *next;
+ struct quic_arng_node *ar, *ar_next;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ node = eb64_last(&arngs->root);
+ if (!node)
+ goto leave;
+
+ ar = eb64_entry(node, struct quic_arng_node, first);
+ arngs->enc_sz = quic_int_getsize(ar->last) +
+ quic_int_getsize(ar->last - ar->first.key) + quic_int_getsize(arngs->sz - 1);
+
+ while ((next = eb64_prev(node))) {
+ ar_next = eb64_entry(next, struct quic_arng_node, first);
+ arngs->enc_sz += quic_int_getsize(sack_gap(ar, ar_next)) +
+ quic_int_getsize(ar_next->last - ar_next->first.key);
+ node = next;
+ ar = eb64_entry(node, struct quic_arng_node, first);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Insert <ar> ack range into <argns> tree of ack ranges.
+ * Returns the ack range node which has been inserted if succeeded, NULL if not.
+ */
+static inline
+struct quic_arng_node *quic_insert_new_range(struct quic_conn *qc,
+ struct quic_arngs *arngs,
+ struct quic_arng *ar)
+{
+ struct quic_arng_node *new_ar;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ if (arngs->sz >= QUIC_MAX_ACK_RANGES) {
+ struct eb64_node *first;
+
+ first = eb64_first(&arngs->root);
+ BUG_ON(first == NULL);
+ eb64_delete(first);
+ pool_free(pool_head_quic_arng, first);
+ arngs->sz--;
+ }
+
+ new_ar = pool_alloc(pool_head_quic_arng);
+ if (!new_ar) {
+ TRACE_ERROR("ack range allocation failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ new_ar->first.key = ar->first;
+ new_ar->last = ar->last;
+ eb64_insert(&arngs->root, &new_ar->first);
+ arngs->sz++;
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return new_ar;
+}
+
+/* Update <arngs> tree of ACK ranges with <ar> as new ACK range value.
+ * Note that this function computes the number of bytes required to encode
+ * this tree of ACK ranges in descending order.
+ *
+ * Descending order
+ * ------------->
+ * range1 range2
+ * ..........|--------|..............|--------|
+ * ^ ^ ^ ^
+ * | | | |
+ * last1 first1 last2 first2
+ * ..........+--------+--------------+--------+......
+ * diff1 gap12 diff2
+ *
+ * To encode the previous list of ranges we must encode integers as follows in
+ * descending order:
+ * enc(last2),enc(diff2),enc(gap12),enc(diff1)
+ * with diff1 = last1 - first1
+ * diff2 = last2 - first2
+ * gap12 = first1 - last2 - 2 (>= 0)
+ *
+
+returns 0 on error
+
+ */
+int quic_update_ack_ranges_list(struct quic_conn *qc,
+ struct quic_arngs *arngs,
+ struct quic_arng *ar)
+{
+ int ret = 0;
+ struct eb64_node *le;
+ struct quic_arng_node *new_node;
+ struct eb64_node *new;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ new = NULL;
+ if (eb_is_empty(&arngs->root)) {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (new_node)
+ ret = 1;
+
+ goto leave;
+ }
+
+ le = eb64_lookup_le(&arngs->root, ar->first);
+ if (!le) {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (!new_node)
+ goto leave;
+
+ new = &new_node->first;
+ }
+ else {
+ struct quic_arng_node *le_ar =
+ eb64_entry(le, struct quic_arng_node, first);
+
+ /* Already existing range */
+ if (le_ar->last >= ar->last) {
+ ret = 1;
+ }
+ else if (le_ar->last + 1 >= ar->first) {
+ le_ar->last = ar->last;
+ new = le;
+ new_node = le_ar;
+ }
+ else {
+ new_node = quic_insert_new_range(qc, arngs, ar);
+ if (!new_node)
+ goto leave;
+
+ new = &new_node->first;
+ }
+ }
+
+ /* Verify that the new inserted node does not overlap the nodes
+ * which follow it.
+ */
+ if (new) {
+ struct eb64_node *next;
+ struct quic_arng_node *next_node;
+
+ while ((next = eb64_next(new))) {
+ next_node =
+ eb64_entry(next, struct quic_arng_node, first);
+ if (new_node->last + 1 < next_node->first.key)
+ break;
+
+ if (next_node->last > new_node->last)
+ new_node->last = next_node->last;
+ eb64_delete(next);
+ pool_free(pool_head_quic_arng, next_node);
+ /* Decrement the size of these ranges. */
+ arngs->sz--;
+ }
+ }
+
+ ret = 1;
+ leave:
+ quic_arngs_set_enc_sz(qc, arngs);
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+
+/* Remove already sent ranges of acknowledged packet numbers from
+ * <pktns> packet number space tree below <largest_acked_pn> possibly
+ * updating the range which contains <largest_acked_pn>.
+ * Never fails.
+ */
+void qc_treat_ack_of_ack(struct quic_conn *qc, struct quic_arngs *arngs,
+ int64_t largest_acked_pn)
+{
+ struct eb64_node *ar, *next_ar;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ ar = eb64_first(&arngs->root);
+ while (ar) {
+ struct quic_arng_node *ar_node;
+
+ next_ar = eb64_next(ar);
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+
+ if ((int64_t)ar_node->first.key > largest_acked_pn) {
+ TRACE_DEVEL("first.key > largest", QUIC_EV_CONN_PRSAFRM, qc);
+ break;
+ }
+
+ if (largest_acked_pn < ar_node->last) {
+ eb64_delete(ar);
+ ar_node->first.key = largest_acked_pn + 1;
+ eb64_insert(&arngs->root, ar);
+ break;
+ }
+
+ /* Do not empty the tree: the first ACK range contains the
+ * largest acknowledged packet number.
+ */
+ if (arngs->sz == 1)
+ break;
+
+ eb64_delete(ar);
+ pool_free(pool_head_quic_arng, ar_node);
+ arngs->sz--;
+ ar = next_ar;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
diff --git a/src/quic_cc.c b/src/quic_cc.c
new file mode 100644
index 0000000..8fd99d3
--- /dev/null
+++ b/src/quic_cc.c
@@ -0,0 +1,49 @@
+/*
+ * Congestion controller handling.
+ *
+ * This file contains definitions for QUIC congestion control.
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/quic_cc.h>
+
+struct quic_cc_algo *default_quic_cc_algo = &quic_cc_algo_cubic;
+
+/*
+ * Initialize <cc> congestion control with <algo> as algorithm depending on <ipv4>
+ * a boolean which is true for an IPv4 path.
+ */
+void quic_cc_init(struct quic_cc *cc,
+ struct quic_cc_algo *algo, struct quic_conn *qc)
+{
+ cc->qc = qc;
+ cc->algo = algo;
+ if (cc->algo->init)
+ (cc->algo->init(cc));
+}
+
+/* Send <ev> event to <cc> congestion controller. */
+void quic_cc_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ cc->algo->event(cc, ev);
+}
+
+void quic_cc_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ cc->algo->state_trace(buf, cc);
+}
diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c
new file mode 100644
index 0000000..76a62ac
--- /dev/null
+++ b/src/quic_cc_cubic.c
@@ -0,0 +1,542 @@
+#include <haproxy/quic_cc.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/ticks.h>
+#include <haproxy/trace.h>
+
+/* IMPORTANT NOTE about the units defined by the RFC 9438
+ * (CUBIC for Fast and Long-Distance Networks):
+ *
+ * RFC 9438 4.1. Definitions:
+ * The unit of all window sizes in this document is segments of the SMSS, and
+ * the unit of all times is seconds. Implementations can use bytes to express
+ * window sizes, which would require factoring in the SMSS wherever necessary
+ * and replacing segments_acked (Figure 4) with the number of acknowledged
+ * bytes.
+ */
+
+/* So, this is the reason why here in this implementation each time a number
+ * of segments is used (typically a congestion window value), its value is
+ * multiplied by the MTU value.
+ */
+
+/* This source file is highly inspired from Linux kernel source file
+ * implementation for TCP Cubic. In fact, we have no choice if we do
+ * not want to use any floating point operations to be fast!
+ * (See net/ipv4/tcp_cubic.c)
+ */
+
+/* Constants definitions:
+ * CUBIC_BETA_SCALED refers to the scaled value of RFC 9438 beta_cubic variable.
+ * CUBIC_C_SCALED refers to the scaled value of RFC 9438 C variable.
+ */
+
+/* The right shifting value to apply to scaled values to get its real value. */
+#define CUBIC_SCALE_FACTOR_SHIFT 10
+
+/* CUBIC multiplicative decrease factor as described in RFC 9438 section 4.6 */
+#define CUBIC_BETA_SCALED 717 /* beta_cubic = 0.7 (constant) */
+
+/* CUBIC C constant that determines the aggressiveness of CUBIC in competing
+ * with other congestion control algorithms in high-BDP networks.
+ */
+#define CUBIC_C_SCALED 410 /* RFC 9438 C = 0.4 segment/seconds^3
+ * or 410 mB/s^3 in this implementation.
+ */
+
+/* The scaled value of 1 */
+#define CUBIC_ONE_SCALED (1 << CUBIC_SCALE_FACTOR_SHIFT)
+
+/* The maximum time value which may be cubed and multiplied by CUBIC_C_SCALED */
+#define CUBIC_TIME_LIMIT 355535ULL /* ms */
+
+/* By connection CUBIC algorithm state. Note that the current congestion window
+ * value is not stored in this structure.
+ */
+struct cubic {
+ /* QUIC_CC_ST_* state values. */
+ uint32_t state;
+ /* Slow start threshold (in bytes) */
+ uint32_t ssthresh;
+ /* Remaining number of acknowledged bytes between two ACK for CUBIC congestion
+ * control window (in bytes).
+ */
+ uint32_t remaining_inc;
+ /* Start time of at which the current avoidance stage started (in ms). */
+ uint32_t t_epoch;
+ /* The window to reach for each recovery period during a concave region (in bytes). */
+ uint32_t W_target;
+ /* The time period to reach W_target during a concave region (in ms). */
+ uint32_t K;
+ /* The last window maximum reached (in bytes). */
+ uint32_t last_w_max;
+ /* Estimated value of the Reno congestion window in the TCP-friendly region (in bytes). */
+ uint32_t W_est;
+ /* Remaining number of acknowledged bytes between two ACKs for estimated
+ * TCP-Reno congestion control window (in bytes).
+ */
+ uint32_t remaining_W_est_inc;
+ /* Start time of recovery period (used to avoid re-entering this state, if already
+ * in recovery period) (in ms).
+ */
+ uint32_t recovery_start_time;
+};
+
+static void quic_cc_cubic_reset(struct quic_cc *cc)
+{
+ struct cubic *c = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ c->state = QUIC_CC_ST_SS;
+ c->ssthresh = QUIC_CC_INFINITE_SSTHESH;
+ c->remaining_inc = 0;
+ c->remaining_W_est_inc = 0;
+ c->t_epoch = 0;
+ c->W_target = 0;
+ c->K = 0;
+ c->last_w_max = 0;
+ c->W_est = 0;
+ c->recovery_start_time = 0;
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+static int quic_cc_cubic_init(struct quic_cc *cc)
+{
+ quic_cc_cubic_reset(cc);
+ return 1;
+}
+
+/* Cubic root.
+ * Highly inspired from Linux kernel sources.
+ * See net/ipv4/tcp_cubic.c
+ */
+static uint32_t cubic_root(uint64_t val)
+{
+ uint32_t x, b, shift;
+
+ static const uint8_t v[] = {
+ 0, 54, 54, 54, 118, 118, 118, 118,
+ 123, 129, 134, 138, 143, 147, 151, 156,
+ 157, 161, 164, 168, 170, 173, 176, 179,
+ 181, 185, 187, 190, 192, 194, 197, 199,
+ 200, 202, 204, 206, 209, 211, 213, 215,
+ 217, 219, 221, 222, 224, 225, 227, 229,
+ 231, 232, 234, 236, 237, 239, 240, 242,
+ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ if (!val || (b = my_flsl(val)) < 7) {
+ /* val in [0..63] */
+ return ((uint32_t)v[(uint32_t)val] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (val >> (b * 3));
+
+ x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6;
+
+ x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1)));
+ x = ((x * 341) >> 10);
+
+ return x;
+}
+
+/*
+ * RFC 9438 3.1. Principle 1 for the CUBIC Increase Function
+ *
+ * For better network utilization and stability, CUBIC [HRX08] uses a cubic
+ * window increase function in terms of the elapsed time from the last
+ * congestion event. While most congestion control algorithms that provide
+ * alternatives to Reno increase the congestion window using convex functions,
+ * CUBIC uses both the concave and convex profiles of a cubic function for
+ * window growth.
+ *
+ * After a window reduction in response to a congestion event detected by
+ * duplicate acknowledgments (ACKs), Explicit Congestion Notification-Echo
+ * (ECN-Echo (ECE)) ACKs [RFC3168], RACK-TLP for TCP [RFC8985], or QUIC loss
+ * detection [RFC9002], CUBIC remembers the congestion window size at which it
+ * received the congestion event and performs a multiplicative decrease of the
+ * congestion window. When CUBIC enters into congestion avoidance, it starts to
+ * increase the congestion window using the concave profile of the cubic
+ * function. The cubic function is set to have its plateau at the remembered
+ * congestion window size, so that the concave window increase continues until
+ * then. After that, the cubic function turns into a convex profile and the
+ * convex window increase begins.
+ *
+ * W_cubic(time) (bytes)
+ * ^ convex region
+ * | <------------------------->
+ * | . +
+ * | . +
+ * | . +
+ * | . +
+ * | . + ^
+ * | . + | W_cubic_t
+ * | . + |
+ * | . + |
+ * W_target |-----------+--------------------------+------------------------+
+ * (W_max) | +. + . t
+ * | + . + .
+ * | + . + .
+ * | + . + .
+ * | + . + .
+ * | .+ .
+ * | + .
+ * | + .
+ * | + .
+ * | . .
+ * | . .
+ * | . .
+ * +-----------+--------------------------+-+------------------------> time (s)
+ * 0 t_epoch (t_epoch + K)
+ * <-------------------------->
+ * . concave region
+ * .
+ * congestion
+ * event
+ *
+ * RFC 9438 4.2. Window Increase Function:
+ *
+ * W_cubic(t) = C*(t-K)^3 + W_max (Figure 1)
+ * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2)
+ *
+ * +--------------------------------------------------------------------+
+ * | RFC 9438 definitions | Code variables |
+ * +--------------------------------------------------------------------+
+ * | C (segments/s^3) | CUBIC_C_SCALED (mB/s^3) |
+ * +--------------------------------------------------------------------+
+ * | W_max (segments) | c->last_w_max - path->cwnd (bytes) |
+ * +--------------------------------------------------------------------+
+ * | K (s) | c->K (ms) |
+ * +--------------------------------------------------------------------+
+ * | beta_cubic (constant) | CUBIC_BETA_SCALED (constant) |
+ * +--------------------------------------------------------------------+
+ */
+static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
+{
+ struct cubic *c = quic_cc_priv(cc);
+ struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
+ /* The elapsed time since the start of the congestion event. */
+ uint32_t elapsed_time;
+ /* Target value of the congestion window. */
+ uint32_t target;
+ /* The time at which the congestion window will be computed based
+ * on the cubic increase function.
+ */
+ uint64_t t;
+ /* The computed value of the congestion window at time t based on the cubic
+ * increase function.
+ */
+ uint64_t W_cubic_t;
+ uint32_t inc, inc_diff;
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ if (!c->t_epoch) {
+ c->t_epoch = now_ms;
+ if (c->last_w_max <= path->cwnd) {
+ c->K = 0;
+ c->W_target = path->cwnd;
+ }
+ else {
+ /* K value computing (in seconds):
+ * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2)
+ * Note that K is stored in milliseconds.
+ */
+ c->K = cubic_root(((c->last_w_max - path->cwnd) << CUBIC_SCALE_FACTOR_SHIFT) / (CUBIC_C_SCALED * path->mtu));
+ /* Convert to miliseconds. */
+ c->K *= 1000;
+ c->W_target = c->last_w_max;
+ }
+
+ c->W_est = path->cwnd;
+ c->remaining_inc = 0;
+ c->remaining_W_est_inc = 0;
+ }
+
+ elapsed_time = now_ms + path->loss.rtt_min - c->t_epoch;
+ if (elapsed_time < c->K) {
+ t = c->K - elapsed_time;
+ }
+ else {
+ t = elapsed_time - c->K;
+ }
+
+ if (t > CUBIC_TIME_LIMIT) {
+ /* TODO : should not happen if we handle the case
+ * of very late acks receipt. This must be handled as a congestion
+ * control event: a very late ack should trigger a congestion
+ * control algorithm reset.
+ */
+ quic_cc_cubic_reset(cc);
+ goto leave;
+ }
+
+ /* Compute W_cubic_t at t time. */
+ W_cubic_t = CUBIC_C_SCALED * path->mtu;
+ W_cubic_t = (W_cubic_t * t) / 1000;
+ W_cubic_t = (W_cubic_t * t) / 1000;
+ W_cubic_t = (W_cubic_t * t) / 1000;
+ W_cubic_t >>= CUBIC_SCALE_FACTOR_SHIFT;
+ if (elapsed_time < c->K)
+ target = c->W_target - W_cubic_t;
+ else
+ target = c->W_target + W_cubic_t;
+
+ if (target > path->cwnd) {
+ /* Concave region */
+
+ /* RFC 9438 4.4. Concave Region
+ *
+ * When receiving a new ACK in congestion avoidance, if CUBIC is not in
+ * the Reno-friendly region and cwnd is less than Wmax, then CUBIC is
+ * in the concave region. In this region, cwnd MUST be incremented by
+ * (target - cwnd) / cwnd.
+ */
+ inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd);
+ c->remaining_inc = inc_diff % path->cwnd;
+ inc = inc_diff / path->cwnd;
+ }
+ else {
+ /* Convex region: very small increment */
+
+ /* RFC 9438 4.5. Convex Region
+ *
+ * When receiving a new ACK in congestion avoidance, if CUBIC is not in
+ * the Reno-friendly region and cwnd is larger than or equal to Wmax,
+ * then CUBIC is in the convex region.The convex region indicates that
+ * the network conditions might have changed since the last congestion
+ * event, possibly implying more available bandwidth after some flow
+ * departures. Since the Internet is highly asynchronous, some amount
+ * of perturbation is always possible without causing a major change in
+ * available bandwidth.Unless the cwnd is overridden by the AIMD window
+ * increase, CUBIC will behave cautiously when operating in this region.
+ * The convex profile aims to increase the window very slowly at the
+ * beginning when cwnd is around Wmax and then gradually increases its
+ * rate of increase. This region is also called the "maximum probing
+ * phase", since CUBIC is searching for a new Wmax. In this region,
+ * cwnd MUST be incremented by (target - cwnd) / cwnd) for each received
+ * new ACK, where target is calculated as described in Section 4.2.
+ */
+ inc_diff = c->remaining_inc + path->mtu;
+ c->remaining_inc = inc_diff % (100 * path->cwnd);
+ inc = inc_diff / (100 * path->cwnd);
+ }
+
+ inc_diff = c->remaining_W_est_inc + path->mtu * acked;
+ c->W_est += inc_diff / path->cwnd;
+ c->remaining_W_est_inc = inc_diff % path->cwnd;
+
+ /* TCP friendliness :
+ * RFC 9438 4.3. Reno-Friendly Region
+ *
+ * Reno performs well in certain types of networks -- for example, under
+ * short RTTs and small bandwidths (or small BDPs). In these networks,
+ * CUBIC remains in the Reno-friendly region to achieve at least the same
+ * throughput as Reno.
+ *
+ * When receiving a new ACK in congestion avoidance (where cwnd could be
+ * greater than or less than Wmax), CUBIC checks whether Wcubic(t) is less
+ * than West. If so, CUBIC is in the Reno-friendly region and cwnd SHOULD
+ * be set to West at each reception of a new ACK.
+ *
+ * West is set equal to cwnd_epoch at the start of the congestion avoidance
+ * stage. After that, on every new ACK, West is updated using Figure 4.
+ * Note that this equation uses segments_acked and cwnd is measured in
+ * segments. An implementation that measures cwnd in bytes should adjust the
+ * equation accordingly using the number of acknowledged bytes and the SMSS.
+ * Also note that this equation works for connections with enabled or
+ * disabled delayed ACKs [RFC5681], as segments_acked will be different based
+ * on the segments actually acknowledged by a new ACK.
+ *
+ * Figure 4 : West = West + alpha_cubic * (segments_acked / cwnd)
+ *
+ * Once West has grown to reach the cwnd at the time of most recently
+ * setting ssthresh -- that is, West >= cwndprior -- the sender SHOULD set
+ * alpha_cubic to 1 to ensure that it can achieve the same congestion window
+ * increment rate as Reno, which uses AIMD(1, 0.5).
+ */
+ if (c->W_est > path->cwnd) {
+ uint32_t W_est_inc = path->mtu * (c->W_est - path->cwnd) / path->cwnd;
+ if (W_est_inc > inc)
+ inc = W_est_inc;
+ }
+
+ path->cwnd += inc;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+static void quic_cc_cubic_slow_start(struct quic_cc *cc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ quic_cc_cubic_reset(cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+static void quic_enter_recovery(struct quic_cc *cc)
+{
+ struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
+ struct cubic *c = quic_cc_priv(cc);
+ /* Current cwnd as number of packets */
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ c->t_epoch = 0;
+ c->recovery_start_time = now_ms;
+
+ /* RFC 9438 4.7. Fast Convergence
+ *
+ * To improve convergence speed, CUBIC uses a heuristic. When a new flow
+ * joins the network, existing flows need to give up some of their bandwidth
+ * to allow the new flow some room for growth if the existing flows have
+ * been using all the network bandwidth. To speed up this bandwidth release
+ * by existing flows, the following fast convergence mechanism SHOULD be
+ * implemented.With fast convergence, when a congestion event occurs, Wmax
+ * is updated as follows, before the window reduction described in Section
+ * 4.6.
+ *
+ * if cwnd < Wmax and fast convergence enabled, further reduce Wax:
+ * Wmax = cwnd * (1 + beta_cubic)
+ * otherwise, remember cwn before reduction:
+ * Wmax = cwnd
+ */
+ if (path->cwnd < c->last_w_max) {
+ /* (1 + beta_cubic) * path->cwnd / 2 */
+ c->last_w_max = (path->cwnd * (CUBIC_ONE_SCALED + CUBIC_BETA_SCALED) / 2) >> CUBIC_SCALE_FACTOR_SHIFT;
+ }
+ else {
+ c->last_w_max = path->cwnd;
+ }
+
+ c->ssthresh = (CUBIC_BETA_SCALED * path->cwnd) >> CUBIC_SCALE_FACTOR_SHIFT;
+ path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd);
+ c->state = QUIC_CC_ST_RP;
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+}
+
+/* Congestion slow-start callback. */
+static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc);
+ struct cubic *c = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) {
+ path->cwnd += ev->ack.acked;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ }
+ /* Exit to congestion avoidance if slow start threshold is reached. */
+ if (path->cwnd >= c->ssthresh)
+ c->state = QUIC_CC_ST_CA;
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ break;
+
+ case QUIC_CC_EVT_LOSS:
+ quic_enter_recovery(cc);
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ out:
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Congestion avoidance callback. */
+static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ quic_cubic_update(cc, ev->ack.acked);
+ break;
+ case QUIC_CC_EVT_LOSS:
+ quic_enter_recovery(cc);
+ break;
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ out:
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Recovery period callback */
+static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct cubic *c = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc);
+
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ /* RFC 9002 7.3.2. Recovery
+ * A recovery period ends and the sender enters congestion avoidance when a
+ * packet sent during the recovery period is acknowledged.
+ */
+ if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) {
+ TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc);
+ goto leave;
+ }
+
+ c->state = QUIC_CC_ST_CA;
+ c->recovery_start_time = TICK_ETERNITY;
+ break;
+ case QUIC_CC_EVT_LOSS:
+ break;
+ case QUIC_CC_EVT_ECN_CE:
+ /* TODO */
+ break;
+ }
+
+ leave:
+ TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+}
+
+static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
+ struct quic_cc_event *ev) = {
+ [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
+ [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
+ [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb,
+};
+
+static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct cubic *c = quic_cc_priv(cc);
+
+ return quic_cc_cubic_state_cbs[c->state](cc, ev);
+}
+
+static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+ struct cubic *c = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms",
+ quic_cc_state_str(c->state),
+ (unsigned long long)path->cwnd,
+ (unsigned long long)path->mcwnd,
+ (int)c->ssthresh,
+ !tick_isset(c->recovery_start_time) ? -1 :
+ TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms)));
+}
+
+struct quic_cc_algo quic_cc_algo_cubic = {
+ .type = QUIC_CC_ALGO_TP_CUBIC,
+ .init = quic_cc_cubic_init,
+ .event = quic_cc_cubic_event,
+ .slow_start = quic_cc_cubic_slow_start,
+ .state_trace = quic_cc_cubic_state_trace,
+};
diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c
new file mode 100644
index 0000000..405b0ba
--- /dev/null
+++ b/src/quic_cc_newreno.c
@@ -0,0 +1,220 @@
+/*
+ * NewReno congestion control algorithm.
+ *
+ * This file contains definitions for QUIC congestion control.
+ *
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <haproxy/api-t.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/quic_cc.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+
+/* Newreno state */
+struct nr {
+ uint32_t state;
+ uint32_t ssthresh;
+ uint32_t recovery_start_time;
+ uint32_t remain_acked;
+};
+
+static int quic_cc_nr_init(struct quic_cc *cc)
+{
+ struct nr *nr = quic_cc_priv(cc);
+
+ nr->state = QUIC_CC_ST_SS;
+ nr->ssthresh = QUIC_CC_INFINITE_SSTHESH;
+ nr->recovery_start_time = 0;
+ nr->remain_acked = 0;
+
+ return 1;
+}
+
+/* Re-enter slow start state. */
+static void quic_cc_nr_slow_start(struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ path->cwnd = path->min_cwnd;
+ /* Re-entering slow start state. */
+ nr->state = QUIC_CC_ST_SS;
+ /* Recovery start time reset */
+ nr->recovery_start_time = 0;
+}
+
+/* Enter a recovery period. */
+static void quic_cc_nr_enter_recovery(struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ nr->recovery_start_time = now_ms;
+ nr->ssthresh = path->cwnd >> 1;
+ path->cwnd = QUIC_MAX(nr->ssthresh, (uint32_t)path->min_cwnd);
+ nr->state = QUIC_CC_ST_RP;
+}
+
+/* Slow start callback. */
+static void quic_cc_nr_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, ev);
+ path = container_of(cc, struct quic_cc_path, cc);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ path->cwnd += ev->ack.acked;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ /* Exit to congestion avoidance if slow start threshold is reached. */
+ if (path->cwnd > nr->ssthresh)
+ nr->state = QUIC_CC_ST_CA;
+ break;
+
+ case QUIC_CC_EVT_LOSS:
+ quic_cc_nr_enter_recovery(cc);
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* XXX TO DO XXX */
+ break;
+ }
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Congestion avoidance callback. */
+static void quic_cc_nr_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, ev);
+ path = container_of(cc, struct quic_cc_path, cc);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ {
+ uint64_t acked;
+
+ /* Increasing the congestion window by (acked / cwnd)
+ */
+ acked = ev->ack.acked * path->mtu + nr->remain_acked;
+ nr->remain_acked = acked % path->cwnd;
+ path->cwnd += acked / path->cwnd;
+ path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd);
+ path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
+ break;
+ }
+
+ case QUIC_CC_EVT_LOSS:
+ quic_cc_nr_enter_recovery(cc);
+ break;
+
+ case QUIC_CC_EVT_ECN_CE:
+ /* XXX TO DO XXX */
+ break;
+ }
+
+ out:
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Recovery period callback. */
+static void quic_cc_nr_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, ev);
+ path = container_of(cc, struct quic_cc_path, cc);
+ switch (ev->type) {
+ case QUIC_CC_EVT_ACK:
+ /* RFC 9022 7.3.2. Recovery
+ * A recovery period ends and the sender enters congestion avoidance when a
+ * packet sent during the recovery period is acknowledged.
+ */
+ if (tick_is_le(ev->ack.time_sent, nr->recovery_start_time)) {
+ TRACE_PROTO("CC reno (still in recovery period)", QUIC_EV_CONN_CC, cc->qc, ev);
+ goto leave;
+ }
+
+ nr->state = QUIC_CC_ST_CA;
+ nr->recovery_start_time = TICK_ETERNITY;
+ path->cwnd = nr->ssthresh;
+ break;
+ case QUIC_CC_EVT_LOSS:
+ /* Do nothing */
+ break;
+ case QUIC_CC_EVT_ECN_CE:
+ /* XXX TO DO XXX */
+ break;
+ }
+
+ leave:
+ TRACE_PROTO("CC reno", QUIC_EV_CONN_CC, cc->qc, ev);
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
+}
+static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+ struct nr *nr = quic_cc_priv(cc);
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%ld rpst=%dms pktloss=%llu",
+ quic_cc_state_str(nr->state),
+ (unsigned long long)path->cwnd,
+ (unsigned long long)path->mcwnd,
+ (long)nr->ssthresh,
+ !tick_isset(nr->recovery_start_time) ? -1 :
+ TICKS_TO_MS(tick_remain(nr->recovery_start_time, now_ms)),
+ (unsigned long long)path->loss.nb_lost_pkt);
+}
+
+static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc,
+ struct quic_cc_event *ev) = {
+ [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb,
+ [QUIC_CC_ST_CA] = quic_cc_nr_ca_cb,
+ [QUIC_CC_ST_RP] = quic_cc_nr_rp_cb,
+};
+
+static void quic_cc_nr_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ struct nr *nr = quic_cc_priv(cc);
+
+ return quic_cc_nr_state_cbs[nr->state](cc, ev);
+}
+
+struct quic_cc_algo quic_cc_algo_nr = {
+ .type = QUIC_CC_ALGO_TP_NEWRENO,
+ .init = quic_cc_nr_init,
+ .event = quic_cc_nr_event,
+ .slow_start = quic_cc_nr_slow_start,
+ .state_trace = quic_cc_nr_state_trace,
+};
+
diff --git a/src/quic_cc_nocc.c b/src/quic_cc_nocc.c
new file mode 100644
index 0000000..6e5cff9
--- /dev/null
+++ b/src/quic_cc_nocc.c
@@ -0,0 +1,76 @@
+/*
+ * Fake congestion control algorithm which does nothing except initializing
+ * the congestion control window to a fixed value.
+ *
+ */
+
+#include <haproxy/api-t.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+
+static int quic_cc_nocc_init(struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ path->cwnd = path->max_cwnd;
+ return 1;
+}
+
+static void quic_cc_nocc_slow_start(struct quic_cc *cc)
+{
+}
+
+/* Slow start callback. */
+static void quic_cc_nocc_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC nocc", QUIC_EV_CONN_CC, cc->qc, ev, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Congestion avoidance callback. */
+static void quic_cc_nocc_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC nocc", QUIC_EV_CONN_CC, cc->qc, ev, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+/* Recovery period callback. */
+static void quic_cc_nocc_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
+ TRACE_PROTO("CC nocc", QUIC_EV_CONN_CC, cc->qc, ev, cc);
+ TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
+}
+
+static void quic_cc_nocc_state_trace(struct buffer *buf, const struct quic_cc *cc)
+{
+ struct quic_cc_path *path;
+
+ path = container_of(cc, struct quic_cc_path, cc);
+ chunk_appendf(buf, " cwnd=%llu", (unsigned long long)path->cwnd);
+}
+
+static void (*quic_cc_nocc_state_cbs[])(struct quic_cc *cc,
+ struct quic_cc_event *ev) = {
+ [QUIC_CC_ST_SS] = quic_cc_nocc_ss_cb,
+ [QUIC_CC_ST_CA] = quic_cc_nocc_ca_cb,
+ [QUIC_CC_ST_RP] = quic_cc_nocc_rp_cb,
+};
+
+static void quic_cc_nocc_event(struct quic_cc *cc, struct quic_cc_event *ev)
+{
+ return quic_cc_nocc_state_cbs[QUIC_CC_ST_SS](cc, ev);
+}
+
+struct quic_cc_algo quic_cc_algo_nocc = {
+ .type = QUIC_CC_ALGO_TP_NOCC,
+ .init = quic_cc_nocc_init,
+ .event = quic_cc_nocc_event,
+ .slow_start = quic_cc_nocc_slow_start,
+ .state_trace = quic_cc_nocc_state_trace,
+};
+
diff --git a/src/quic_cid.c b/src/quic_cid.c
new file mode 100644
index 0000000..19c1f07
--- /dev/null
+++ b/src/quic_cid.c
@@ -0,0 +1,286 @@
+#include <import/eb64tree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/pool.h>
+#include <haproxy/quic_cid.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_rx-t.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+#include <haproxy/xxhash.h>
+
+/* Initialize the stateless reset token attached to <conn_id> connection ID.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_stateless_reset_token_init(struct quic_connection_id *conn_id)
+{
+ /* Output secret */
+ unsigned char *token = conn_id->stateless_reset_token;
+ size_t tokenlen = sizeof conn_id->stateless_reset_token;
+ /* Salt */
+ const unsigned char *cid = conn_id->cid.data;
+ size_t cidlen = conn_id->cid.len;
+
+ return quic_stateless_reset_token_cpy(token, tokenlen, cid, cidlen);
+}
+
+/* Generate a CID directly derived from <orig> CID and <addr> address.
+ *
+ * Returns the derived CID.
+ */
+struct quic_cid quic_derive_cid(const struct quic_cid *orig,
+ const struct sockaddr_storage *addr)
+{
+ struct quic_cid cid;
+ const struct sockaddr_in *in;
+ const struct sockaddr_in6 *in6;
+ char *pos = trash.area;
+ size_t idx = 0;
+ uint64_t hash;
+ int i;
+
+ /* Prepare buffer for hash using original CID first. */
+ memcpy(pos, orig->data, orig->len);
+ idx += orig->len;
+
+ /* Concatenate client address. */
+ switch (addr->ss_family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)addr;
+
+ memcpy(&pos[idx], &in->sin_addr, sizeof(in->sin_addr));
+ idx += sizeof(in->sin_addr);
+ memcpy(&pos[idx], &in->sin_port, sizeof(in->sin_port));
+ idx += sizeof(in->sin_port);
+ break;
+
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)addr;
+
+ memcpy(&pos[idx], &in6->sin6_addr, sizeof(in6->sin6_addr));
+ idx += sizeof(in6->sin6_addr);
+ memcpy(&pos[idx], &in6->sin6_port, sizeof(in6->sin6_port));
+ idx += sizeof(in6->sin6_port);
+ break;
+
+ default:
+ /* TODO to implement */
+ ABORT_NOW();
+ }
+
+ /* Avoid similar values between multiple haproxy process. */
+ memcpy(&pos[idx], boot_seed, sizeof(boot_seed));
+ idx += sizeof(boot_seed);
+
+ /* Hash the final buffer content. */
+ hash = XXH64(pos, idx, 0);
+
+ for (i = 0; i < sizeof(hash); ++i)
+ cid.data[i] = hash >> ((sizeof(hash) * 7) - (8 * i));
+ cid.len = sizeof(hash);
+
+ return cid;
+}
+
+/* Allocate a new CID and attach it to <root> ebtree.
+ *
+ * If <orig> and <addr> params are non null, the new CID value is directly
+ * derived from them. Else a random value is generated. The CID is then marked
+ * with the current thread ID.
+ *
+ * Returns the new CID if succeeded, NULL if not.
+ */
+struct quic_connection_id *new_quic_cid(struct eb_root *root,
+ struct quic_conn *qc,
+ const struct quic_cid *orig,
+ const struct sockaddr_storage *addr)
+{
+ struct quic_connection_id *conn_id;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ /* Caller must set either none or both values. */
+ BUG_ON(!!orig != !!addr);
+
+ conn_id = pool_alloc(pool_head_quic_connection_id);
+ if (!conn_id) {
+ TRACE_ERROR("cid allocation failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ conn_id->cid.len = QUIC_HAP_CID_LEN;
+
+ if (!orig) {
+ if (quic_newcid_from_hash64)
+ quic_newcid_from_hash64(conn_id->cid.data, conn_id->cid.len, qc->hash64,
+ global.cluster_secret, sizeof(global.cluster_secret));
+ else if (RAND_bytes(conn_id->cid.data, conn_id->cid.len) != 1) {
+ /* TODO: RAND_bytes() should be replaced */
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+ }
+ else {
+ /* Derive the new CID value from original CID. */
+ conn_id->cid = quic_derive_cid(orig, addr);
+ }
+
+ if (quic_stateless_reset_token_init(conn_id) != 1) {
+ TRACE_ERROR("quic_stateless_reset_token_init() failed", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ conn_id->qc = qc;
+ HA_ATOMIC_STORE(&conn_id->tid, tid);
+
+ conn_id->seq_num.key = qc ? qc->next_cid_seq_num++ : 0;
+ conn_id->retire_prior_to = 0;
+ /* insert the allocated CID in the quic_conn tree */
+ if (root)
+ eb64_insert(root, &conn_id->seq_num);
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return conn_id;
+
+ err:
+ pool_free(pool_head_quic_connection_id, conn_id);
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return NULL;
+}
+
+/* Retrieve the thread ID associated to QUIC connection ID <cid> of length
+ * <cid_len>. CID may be not found on the CID tree because it is an ODCID. In
+ * this case, it will derived using client address <cli_addr> as hash
+ * parameter. However, this is done only if <pos> points to an INITIAL or 0RTT
+ * packet of length <len>.
+ *
+ * Returns the thread ID or a negative error code.
+ */
+int quic_get_cid_tid(const unsigned char *cid, size_t cid_len,
+ const struct sockaddr_storage *cli_addr,
+ unsigned char *pos, size_t len)
+{
+ struct quic_cid_tree *tree;
+ struct quic_connection_id *conn_id;
+ struct ebmb_node *node;
+
+ tree = &quic_cid_trees[_quic_cid_tree_idx(cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, cid, cid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
+ if (!node) {
+ struct quic_cid orig, derive_cid;
+ struct quic_rx_packet pkt;
+
+ if (!qc_parse_hd_form(&pkt, &pos, pos + len))
+ goto not_found;
+
+ if (pkt.type != QUIC_PACKET_TYPE_INITIAL &&
+ pkt.type != QUIC_PACKET_TYPE_0RTT) {
+ goto not_found;
+ }
+
+ memcpy(orig.data, cid, cid_len);
+ orig.len = cid_len;
+ derive_cid = quic_derive_cid(&orig, cli_addr);
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, cid, cid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+ }
+
+ if (!node)
+ goto not_found;
+
+ conn_id = ebmb_entry(node, struct quic_connection_id, node);
+ return HA_ATOMIC_LOAD(&conn_id->tid);
+
+ not_found:
+ return -1;
+}
+
+/* Retrieve a quic_conn instance from the <pkt> DCID field. If the packet is an
+ * INITIAL or 0RTT type, we may have to use client address <saddr> if an ODCID
+ * is used.
+ *
+ * Returns the instance or NULL if not found.
+ */
+struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt,
+ struct sockaddr_storage *saddr,
+ int *new_tid)
+{
+ struct quic_conn *qc = NULL;
+ struct ebmb_node *node;
+ struct quic_connection_id *conn_id;
+ struct quic_cid_tree *tree;
+ uint conn_id_tid;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+ *new_tid = -1;
+
+ /* First look into DCID tree. */
+ tree = &quic_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len);
+
+ /* If not found on an Initial/0-RTT packet, it could be because an
+ * ODCID is reused by the client. Calculate the derived CID value to
+ * retrieve it from the DCID tree.
+ */
+ if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL ||
+ pkt->type == QUIC_PACKET_TYPE_0RTT)) {
+ const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr);
+
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)];
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len);
+ }
+
+ if (!node)
+ goto end;
+
+ conn_id = ebmb_entry(node, struct quic_connection_id, node);
+ conn_id_tid = HA_ATOMIC_LOAD(&conn_id->tid);
+ if (conn_id_tid != tid) {
+ *new_tid = conn_id_tid;
+ goto end;
+ }
+ qc = conn_id->qc;
+
+ end:
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return qc;
+}
+
+/* Build a NEW_CONNECTION_ID frame for <conn_id> CID of <qc> connection.
+ *
+ * Returns 1 on success else 0.
+ */
+int qc_build_new_connection_id_frm(struct quic_conn *qc,
+ struct quic_connection_id *conn_id)
+{
+ int ret = 0;
+ struct quic_frame *frm;
+ struct quic_enc_level *qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+
+ qel = qc->ael;
+ frm = qc_frm_alloc(QUIC_FT_NEW_CONNECTION_ID);
+ if (!frm) {
+ TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+
+ quic_connection_id_to_frm_cpy(frm, conn_id);
+ LIST_APPEND(&qel->pktns->tx.frms, &frm->list);
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
diff --git a/src/quic_cli.c b/src/quic_cli.c
new file mode 100644
index 0000000..56301fa
--- /dev/null
+++ b/src/quic_cli.c
@@ -0,0 +1,413 @@
+#include <import/eb64tree.h>
+
+#include <haproxy/applet-t.h>
+#include <haproxy/cli.h>
+#include <haproxy/list.h>
+#include <haproxy/tools.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_tp.h>
+
+/* incremented by each "show quic". */
+unsigned int qc_epoch = 0;
+
+enum quic_dump_format {
+ QUIC_DUMP_FMT_ONELINE,
+ QUIC_DUMP_FMT_FULL,
+};
+
+/* appctx context used by "show quic" command */
+struct show_quic_ctx {
+ unsigned int epoch;
+ struct bref bref; /* back-reference to the quic-conn being dumped */
+ unsigned int thr;
+ int flags;
+ enum quic_dump_format format;
+};
+
+#define QC_CLI_FL_SHOW_ALL 0x1 /* show closing/draining connections */
+
+static int cli_parse_show_quic(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_quic_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int argc = 2;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ ctx->epoch = _HA_ATOMIC_FETCH_ADD(&qc_epoch, 1);
+ ctx->thr = 0;
+ ctx->flags = 0;
+ ctx->format = QUIC_DUMP_FMT_ONELINE;
+
+ if (strcmp(args[argc], "oneline") == 0) {
+ /* format already used as default value */
+ ++argc;
+ }
+ else if (strcmp(args[argc], "full") == 0) {
+ ctx->format = QUIC_DUMP_FMT_FULL;
+ ++argc;
+ }
+
+ while (*args[argc]) {
+ if (strcmp(args[argc], "all") == 0)
+ ctx->flags |= QC_CLI_FL_SHOW_ALL;
+
+ ++argc;
+ }
+
+ LIST_INIT(&ctx->bref.users);
+
+ return 0;
+}
+
+/* Dump for "show quic" with "oneline" format. */
+static void dump_quic_oneline(struct show_quic_ctx *ctx, struct quic_conn *qc)
+{
+ char bufaddr[INET6_ADDRSTRLEN], bufport[6];
+ int ret;
+ unsigned char cid_len;
+
+ ret = chunk_appendf(&trash, "%p[%02u]/%-.12s ", qc, ctx->thr,
+ qc->li->bind_conf->frontend->id);
+ chunk_appendf(&trash, "%*s", 36 - ret, " "); /* align output */
+
+ /* State */
+ if (qc->flags & QUIC_FL_CONN_CLOSING)
+ chunk_appendf(&trash, "CLOSE ");
+ else if (qc->flags & QUIC_FL_CONN_DRAINING)
+ chunk_appendf(&trash, "DRAIN ");
+ else if (qc->state < QUIC_HS_ST_COMPLETE)
+ chunk_appendf(&trash, "HDSHK ");
+ else
+ chunk_appendf(&trash, "ESTAB ");
+
+ /* Bytes in flight / Lost packets */
+ chunk_appendf(&trash, "%9llu %6llu %6llu ",
+ (ullong)qc->path->in_flight,
+ (ullong)qc->path->ifae_pkts,
+ (ullong)qc->path->loss.nb_lost_pkt);
+
+ /* Socket */
+ if (qc->local_addr.ss_family == AF_INET ||
+ qc->local_addr.ss_family == AF_INET6) {
+ addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr));
+ port_to_str(&qc->local_addr, bufport, sizeof(bufport));
+ chunk_appendf(&trash, "%15s:%-5s ", bufaddr, bufport);
+
+ addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr));
+ port_to_str(&qc->peer_addr, bufport, sizeof(bufport));
+ chunk_appendf(&trash, "%15s:%-5s ", bufaddr, bufport);
+
+ }
+
+ /* CIDs */
+ for (cid_len = 0; cid_len < qc->scid.len; ++cid_len)
+ chunk_appendf(&trash, "%02x", qc->scid.data[cid_len]);
+
+ chunk_appendf(&trash, " ");
+ for (cid_len = 0; cid_len < qc->dcid.len; ++cid_len)
+ chunk_appendf(&trash, "%02x", qc->dcid.data[cid_len]);
+
+ chunk_appendf(&trash, "\n");
+}
+
+/* Dump for "show quic" with "full" format. */
+static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc)
+{
+ struct quic_pktns *pktns;
+ struct eb64_node *node;
+ struct qc_stream_desc *stream;
+ char bufaddr[INET6_ADDRSTRLEN], bufport[6];
+ int expire, i, addnl;
+ unsigned char cid_len;
+
+ addnl = 0;
+ /* CIDs */
+ chunk_appendf(&trash, "* %p[%02u]: scid=", qc, ctx->thr);
+ for (cid_len = 0; cid_len < qc->scid.len; ++cid_len)
+ chunk_appendf(&trash, "%02x", qc->scid.data[cid_len]);
+ while (cid_len++ < 20)
+ chunk_appendf(&trash, "..");
+
+ chunk_appendf(&trash, " dcid=");
+ for (cid_len = 0; cid_len < qc->dcid.len; ++cid_len)
+ chunk_appendf(&trash, "%02x", qc->dcid.data[cid_len]);
+ while (cid_len++ < 20)
+ chunk_appendf(&trash, "..");
+
+ chunk_appendf(&trash, "\n");
+
+ chunk_appendf(&trash, " loc. TPs:");
+ quic_transport_params_dump(&trash, qc, &qc->rx.params);
+ chunk_appendf(&trash, "\n");
+ chunk_appendf(&trash, " rem. TPs:");
+ quic_transport_params_dump(&trash, qc, &qc->tx.params);
+ chunk_appendf(&trash, "\n");
+
+ /* Connection state */
+ if (qc->flags & QUIC_FL_CONN_CLOSING)
+ chunk_appendf(&trash, " st=closing ");
+ else if (qc->flags & QUIC_FL_CONN_DRAINING)
+ chunk_appendf(&trash, " st=draining ");
+ else if (qc->state < QUIC_HS_ST_CONFIRMED)
+ chunk_appendf(&trash, " st=handshake ");
+ else
+ chunk_appendf(&trash, " st=opened ");
+
+ if (qc->mux_state == QC_MUX_NULL)
+ chunk_appendf(&trash, "mux=null ");
+ else if (qc->mux_state == QC_MUX_READY)
+ chunk_appendf(&trash, "mux=ready ");
+ else
+ chunk_appendf(&trash, "mux=released ");
+
+ if (qc->idle_timer_task) {
+ expire = qc->idle_timer_task->expire;
+ chunk_appendf(&trash, "expire=%02ds ",
+ TICKS_TO_MS(tick_remain(now_ms, expire)) / 1000);
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ /* Socket */
+ chunk_appendf(&trash, " fd=%d", qc->fd);
+ if (qc->local_addr.ss_family == AF_INET ||
+ qc->local_addr.ss_family == AF_INET6) {
+ addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr));
+ port_to_str(&qc->local_addr, bufport, sizeof(bufport));
+ chunk_appendf(&trash, " local_addr=%s:%s", bufaddr, bufport);
+
+ addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr));
+ port_to_str(&qc->peer_addr, bufport, sizeof(bufport));
+ chunk_appendf(&trash, " foreign_addr=%s:%s", bufaddr, bufport);
+ }
+
+ chunk_appendf(&trash, "\n");
+
+ /* Packet number spaces information */
+ pktns = qc->ipktns;
+ if (pktns) {
+ chunk_appendf(&trash, " [initl] rx.ackrng=%-6zu tx.inflight=%-6zu",
+ pktns->rx.arngs.sz, pktns->tx.in_flight);
+ }
+
+ pktns = qc->hpktns;
+ if (pktns) {
+ chunk_appendf(&trash, " [hndshk] rx.ackrng=%-6zu tx.inflight=%-6zu\n",
+ pktns->rx.arngs.sz, pktns->tx.in_flight);
+ }
+
+ pktns = qc->apktns;
+ if (pktns) {
+ chunk_appendf(&trash, " [01rtt] rx.ackrng=%-6zu tx.inflight=%-6zu\n",
+ pktns->rx.arngs.sz, pktns->tx.in_flight);
+ }
+
+ chunk_appendf(&trash, " srtt=%-4u rttvar=%-4u rttmin=%-4u ptoc=%-4u cwnd=%-6llu"
+ " mcwnd=%-6llu sentpkts=%-6llu lostpkts=%-6llu\n reorderedpkts=%-6llu",
+ qc->path->loss.srtt, qc->path->loss.rtt_var,
+ qc->path->loss.rtt_min, qc->path->loss.pto_count, (ullong)qc->path->cwnd,
+ (ullong)qc->path->mcwnd, (ullong)qc->cntrs.sent_pkt, (ullong)qc->path->loss.nb_lost_pkt, (ullong)qc->path->loss.nb_reordered_pkt);
+
+ if (qc->cntrs.dropped_pkt) {
+ chunk_appendf(&trash, " droppkts=%-6llu", qc->cntrs.dropped_pkt);
+ addnl = 1;
+ }
+ if (qc->cntrs.dropped_pkt_bufoverrun) {
+ chunk_appendf(&trash, " dropbuff=%-6llu", qc->cntrs.dropped_pkt_bufoverrun);
+ addnl = 1;
+ }
+ if (qc->cntrs.dropped_parsing) {
+ chunk_appendf(&trash, " droppars=%-6llu", qc->cntrs.dropped_parsing);
+ addnl = 1;
+ }
+ if (qc->cntrs.socket_full) {
+ chunk_appendf(&trash, " sockfull=%-6llu", qc->cntrs.socket_full);
+ addnl = 1;
+ }
+ if (qc->cntrs.sendto_err) {
+ chunk_appendf(&trash, " sendtoerr=%-6llu", qc->cntrs.sendto_err);
+ addnl = 1;
+ }
+ if (qc->cntrs.sendto_err_unknown) {
+ chunk_appendf(&trash, " sendtounknerr=%-6llu", qc->cntrs.sendto_err);
+ addnl = 1;
+ }
+ if (qc->cntrs.conn_migration_done) {
+ chunk_appendf(&trash, " migrdone=%-6llu", qc->cntrs.conn_migration_done);
+ addnl = 1;
+ }
+ if (qc->cntrs.data_blocked) {
+ chunk_appendf(&trash, " datablocked=%-6llu", qc->cntrs.data_blocked);
+ addnl = 1;
+ }
+ if (qc->cntrs.stream_data_blocked) {
+ chunk_appendf(&trash, " sdatablocked=%-6llu", qc->cntrs.stream_data_blocked);
+ addnl = 1;
+ }
+ if (qc->cntrs.streams_blocked_bidi) {
+ chunk_appendf(&trash, " sblockebidi=%-6llu", qc->cntrs.streams_blocked_bidi);
+ addnl = 1;
+ }
+ if (qc->cntrs.streams_blocked_uni) {
+ chunk_appendf(&trash, " sblockeduni=%-6llu", qc->cntrs.streams_blocked_uni);
+ addnl = 1;
+ }
+ if (addnl)
+ chunk_appendf(&trash, "\n");
+
+ /* Streams */
+ node = eb64_first(&qc->streams_by_id);
+ i = 0;
+ while (node) {
+ stream = eb64_entry(node, struct qc_stream_desc, by_id);
+ node = eb64_next(node);
+
+ chunk_appendf(&trash, " | stream=%-8llu", (unsigned long long)stream->by_id.key);
+ chunk_appendf(&trash, " off=%-8llu ack=%-8llu",
+ (unsigned long long)stream->buf_offset,
+ (unsigned long long)stream->ack_offset);
+
+ if (!(++i % 3)) {
+ chunk_appendf(&trash, "\n");
+ i = 0;
+ }
+ }
+
+ chunk_appendf(&trash, "\n");
+}
+
+static int cli_io_handler_dump_quic(struct appctx *appctx)
+{
+ struct show_quic_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct quic_conn *qc;
+
+ thread_isolate();
+
+ if (ctx->thr >= global.nbthread)
+ goto done;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) {
+ /* If we're forced to shut down, we might have to remove our
+ * reference to the last stream being dumped.
+ */
+ if (!LIST_ISEMPTY(&ctx->bref.users))
+ LIST_DEL_INIT(&ctx->bref.users);
+ goto done;
+ }
+
+ chunk_reset(&trash);
+
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ /* Remove show_quic_ctx from previous quic_conn instance. */
+ LIST_DEL_INIT(&ctx->bref.users);
+ }
+ else if (!ctx->bref.ref) {
+ /* First invocation. */
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns.n;
+
+ /* Print legend for oneline format. */
+ if (ctx->format == QUIC_DUMP_FMT_ONELINE) {
+ chunk_appendf(&trash, "# conn/frontend state "
+ "in_flight infl_p lost_p "
+ "Local Address Foreign Address "
+ "local & remote CIDs\n");
+ applet_putchk(appctx, &trash);
+ }
+ }
+
+ while (1) {
+ int done = 0;
+
+ if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].quic_conns) {
+ /* If closing connections requested through "all", move
+ * to quic_conns_clo list after browsing quic_conns.
+ * Else move directly to the next quic_conns thread.
+ */
+ if (ctx->flags & QC_CLI_FL_SHOW_ALL) {
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns_clo.n;
+ continue;
+ }
+
+ done = 1;
+ }
+ else if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].quic_conns_clo) {
+ /* Closing list entirely browsed, go to next quic_conns
+ * thread.
+ */
+ done = 1;
+ }
+ else {
+ /* Retrieve next element of the current list. */
+ qc = LIST_ELEM(ctx->bref.ref, struct quic_conn *, el_th_ctx);
+ if ((int)(qc->qc_epoch - ctx->epoch) > 0)
+ done = 1;
+ }
+
+ if (done) {
+ ++ctx->thr;
+ if (ctx->thr >= global.nbthread)
+ break;
+ /* Switch to next thread quic_conns list. */
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns.n;
+ continue;
+ }
+
+ switch (ctx->format) {
+ case QUIC_DUMP_FMT_FULL:
+ dump_quic_full(ctx, qc);
+ break;
+ case QUIC_DUMP_FMT_ONELINE:
+ dump_quic_oneline(ctx, qc);
+ break;
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* Register show_quic_ctx to quic_conn instance. */
+ LIST_APPEND(&qc->back_refs, &ctx->bref.users);
+ goto full;
+ }
+
+ ctx->bref.ref = qc->el_th_ctx.n;
+ }
+
+ done:
+ thread_release();
+ return 1;
+
+ full:
+ thread_release();
+ return 0;
+}
+
+static void cli_release_show_quic(struct appctx *appctx)
+{
+ struct show_quic_ctx *ctx = appctx->svcctx;
+
+ if (ctx->thr < global.nbthread) {
+ thread_isolate();
+ if (!LIST_ISEMPTY(&ctx->bref.users))
+ LIST_DEL_INIT(&ctx->bref.users);
+ thread_release();
+ }
+}
+
+static struct cli_kw_list cli_kws = {{ }, {
+ { { "show", "quic", NULL }, "show quic [oneline|full] [all] : display quic connections status", cli_parse_show_quic, cli_io_handler_dump_quic, cli_release_show_quic },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static void cli_quic_init()
+{
+ int thr;
+
+ for (thr = 0; thr < MAX_THREADS; ++thr) {
+ LIST_INIT(&ha_thread_ctx[thr].quic_conns);
+ LIST_INIT(&ha_thread_ctx[thr].quic_conns_clo);
+ }
+}
+INITCALL0(STG_INIT, cli_quic_init);
diff --git a/src/quic_conn.c b/src/quic_conn.c
new file mode 100644
index 0000000..5233496
--- /dev/null
+++ b/src/quic_conn.c
@@ -0,0 +1,1893 @@
+/*
+ * QUIC protocol implementation. Lower layer with internal features implemented
+ * here such as QUIC encryption, idle timeout, acknowledgement and
+ * retransmission.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/quic_conn.h>
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/buf-t.h>
+#include <haproxy/compat.h>
+#include <haproxy/api.h>
+#include <haproxy/debug.h>
+#include <haproxy/tools.h>
+#include <haproxy/ticks.h>
+
+#include <haproxy/connection.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/h3.h>
+#include <haproxy/hq_interop.h>
+#include <haproxy/log.h>
+#include <haproxy/mux_quic.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/pipe.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_ack.h>
+#include <haproxy/quic_cc.h>
+#include <haproxy/quic_cli-t.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_loss.h>
+#include <haproxy/quic_rx.h>
+#include <haproxy/quic_ssl.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_stats.h>
+#include <haproxy/quic_stream.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/quic_tx.h>
+#include <haproxy/cbuf.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/task.h>
+#include <haproxy/thread.h>
+#include <haproxy/trace.h>
+
+/* list of supported QUIC versions by this implementation */
+const struct quic_version quic_versions[] = {
+ {
+ .num = QUIC_PROTOCOL_VERSION_DRAFT_29,
+ .initial_salt = initial_salt_draft_29,
+ .initial_salt_len = sizeof initial_salt_draft_29,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V1,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V1) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V1,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V1) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V1,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V1) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V1,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V1) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_DRAFT,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_DRAFT,
+ },
+ {
+ .num = QUIC_PROTOCOL_VERSION_1,
+ .initial_salt = initial_salt_v1,
+ .initial_salt_len = sizeof initial_salt_v1,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V1,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V1) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V1,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V1) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V1,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V1) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V1,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V1) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_V1,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_V1,
+ },
+ {
+ .num = QUIC_PROTOCOL_VERSION_2,
+ .initial_salt = initial_salt_v2,
+ .initial_salt_len = sizeof initial_salt_v2,
+ .key_label = (const unsigned char *)QUIC_HKDF_KEY_LABEL_V2,
+ .key_label_len = sizeof(QUIC_HKDF_KEY_LABEL_V2) - 1,
+ .iv_label = (const unsigned char *)QUIC_HKDF_IV_LABEL_V2,
+ .iv_label_len = sizeof(QUIC_HKDF_IV_LABEL_V2) - 1,
+ .hp_label = (const unsigned char *)QUIC_HKDF_HP_LABEL_V2,
+ .hp_label_len = sizeof(QUIC_HKDF_HP_LABEL_V2) - 1,
+ .ku_label = (const unsigned char *)QUIC_HKDF_KU_LABEL_V2,
+ .ku_label_len = sizeof(QUIC_HKDF_KU_LABEL_V2) - 1,
+ .retry_tag_key = (const unsigned char *)QUIC_TLS_RETRY_KEY_V2,
+ .retry_tag_nonce = (const unsigned char *)QUIC_TLS_RETRY_NONCE_V2,
+ },
+};
+
+/* Function pointers, can be used to compute a hash from first generated CID and to derive new CIDs */
+uint64_t (*quic_hash64_from_cid)(const unsigned char *cid, int size, const unsigned char *secret, size_t secretlen) = NULL;
+void (*quic_newcid_from_hash64)(unsigned char *cid, int size, uint64_t hash, const unsigned char *secret, size_t secretlen) = NULL;
+
+/* The total number of supported versions */
+const size_t quic_versions_nb = sizeof quic_versions / sizeof *quic_versions;
+/* Listener only preferred version */
+const struct quic_version *preferred_version;
+/* RFC 8999 5.4. Version
+ * A Version field with a
+ * value of 0x00000000 is reserved for version negotiation
+ */
+const struct quic_version quic_version_VN_reserved = { .num = 0, };
+
+DECLARE_STATIC_POOL(pool_head_quic_conn, "quic_conn", sizeof(struct quic_conn));
+DECLARE_STATIC_POOL(pool_head_quic_conn_closed, "quic_conn_closed", sizeof(struct quic_conn_closed));
+DECLARE_STATIC_POOL(pool_head_quic_cids, "quic_cids", sizeof(struct eb_root));
+DECLARE_POOL(pool_head_quic_connection_id,
+ "quic_connection_id", sizeof(struct quic_connection_id));
+
+struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state);
+static int quic_conn_init_timer(struct quic_conn *qc);
+static int quic_conn_init_idle_timer_task(struct quic_conn *qc, struct proxy *px);
+
+/* Returns 1 if the peer has validated <qc> QUIC connection address, 0 if not. */
+int quic_peer_validated_addr(struct quic_conn *qc)
+{
+ if (!qc_is_listener(qc))
+ return 1;
+
+ if (qc->flags & QUIC_FL_CONN_PEER_VALIDATED_ADDR)
+ return 1;
+
+ BUG_ON(qc->bytes.prep > 3 * qc->bytes.rx);
+
+ return 0;
+}
+
+/* To be called to kill a connection as soon as possible (without sending any packet). */
+void qc_kill_conn(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_KILL, qc);
+ TRACE_PROTO("killing the connection", QUIC_EV_CONN_KILL, qc);
+ qc->flags |= QUIC_FL_CONN_TO_KILL;
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_NEEDED;
+ task_wakeup(qc->idle_timer_task, TASK_WOKEN_OTHER);
+
+ qc_notify_err(qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_KILL, qc);
+}
+
+/* Set the timer attached to the QUIC connection with <ctx> as I/O handler and used for
+ * both loss detection and PTO and schedule the task assiated to this timer if needed.
+ */
+void qc_set_timer(struct quic_conn *qc)
+{
+ struct quic_pktns *pktns;
+ unsigned int pto;
+ int handshake_confirmed;
+
+ TRACE_ENTER(QUIC_EV_CONN_STIMER, qc);
+ TRACE_PROTO("set timer", QUIC_EV_CONN_STIMER, qc, NULL, NULL, &qc->path->ifae_pkts);
+
+ pktns = NULL;
+ if (!qc->timer_task) {
+ TRACE_PROTO("already released timer task", QUIC_EV_CONN_STIMER, qc);
+ goto leave;
+ }
+
+ pktns = quic_loss_pktns(qc);
+ if (tick_isset(pktns->tx.loss_time)) {
+ qc->timer = pktns->tx.loss_time;
+ goto out;
+ }
+
+ /* anti-amplification: the timer must be
+ * cancelled for a server which reached the anti-amplification limit.
+ */
+ if (!quic_peer_validated_addr(qc) &&
+ (qc->flags & QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED)) {
+ TRACE_PROTO("anti-amplification reached", QUIC_EV_CONN_STIMER, qc);
+ qc->timer = TICK_ETERNITY;
+ goto out;
+ }
+
+ if (!qc->path->ifae_pkts && quic_peer_validated_addr(qc)) {
+ TRACE_PROTO("timer cancellation", QUIC_EV_CONN_STIMER, qc);
+ /* Timer cancellation. */
+ qc->timer = TICK_ETERNITY;
+ goto out;
+ }
+
+ handshake_confirmed = qc->state >= QUIC_HS_ST_CONFIRMED;
+ pktns = quic_pto_pktns(qc, handshake_confirmed, &pto);
+ if (tick_isset(pto))
+ qc->timer = pto;
+ out:
+ if (qc->timer == TICK_ETERNITY) {
+ qc->timer_task->expire = TICK_ETERNITY;
+ }
+ else if (tick_is_expired(qc->timer, now_ms)) {
+ TRACE_DEVEL("wakeup asap timer task", QUIC_EV_CONN_STIMER, qc);
+ task_wakeup(qc->timer_task, TASK_WOKEN_MSG);
+ }
+ else {
+ TRACE_DEVEL("timer task scheduling", QUIC_EV_CONN_STIMER, qc);
+ task_schedule(qc->timer_task, qc->timer);
+ }
+ leave:
+ TRACE_PROTO("set timer", QUIC_EV_CONN_STIMER, qc, pktns);
+ TRACE_LEAVE(QUIC_EV_CONN_STIMER, qc);
+}
+
+/* Prepare the emission of CONNECTION_CLOSE with error <err>. All send/receive
+ * activity for <qc> will be interrupted.
+ */
+void quic_set_connection_close(struct quic_conn *qc, const struct quic_err err)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)
+ goto leave;
+
+ TRACE_STATE("setting immediate close", QUIC_EV_CONN_CLOSE, qc);
+ qc->flags |= QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc->err.code = err.code;
+ qc->err.app = err.app;
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Set <alert> TLS alert as QUIC CRYPTO_ERROR error */
+void quic_set_tls_alert(struct quic_conn *qc, int alert)
+{
+ TRACE_ENTER(QUIC_EV_CONN_SSLALERT, qc);
+
+ quic_set_connection_close(qc, quic_err_tls(alert));
+ qc->flags |= QUIC_FL_CONN_TLS_ALERT;
+ TRACE_STATE("Alert set", QUIC_EV_CONN_SSLALERT, qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSLALERT, qc);
+}
+
+/* Set the application for <qc> QUIC connection.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_set_app_ops(struct quic_conn *qc, const unsigned char *alpn, size_t alpn_len)
+{
+ if (alpn_len >= 2 && memcmp(alpn, "h3", 2) == 0)
+ qc->app_ops = &h3_ops;
+ else if (alpn_len >= 10 && memcmp(alpn, "hq-interop", 10) == 0)
+ qc->app_ops = &hq_interop_ops;
+ else
+ return 0;
+
+ return 1;
+}
+
+/* Schedule a CONNECTION_CLOSE emission on <qc> if the MUX has been released
+ * and all STREAM data are acknowledged. The MUX is responsible to have set
+ * <qc.err> before as it is reused for the CONNECTION_CLOSE frame.
+ *
+ * TODO this should also be called on lost packet detection
+ */
+void qc_check_close_on_released_mux(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (qc->mux_state == QC_MUX_RELEASED && eb_is_empty(&qc->streams_by_id)) {
+ /* Reuse errcode which should have been previously set by the MUX on release. */
+ quic_set_connection_close(qc, qc->err);
+ tasklet_wakeup(qc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Finalize <qc> QUIC connection:
+
+ * MUST be called after having received the remote transport parameters which
+ * are parsed when the TLS callback for the ClientHello message is called upon
+ * SSL_do_handshake() calls, not necessarily at the first time as this TLS
+ * message may be split between packets
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_conn_finalize(struct quic_conn *qc, int server)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ if (qc->flags & QUIC_FL_CONN_FINALIZED)
+ goto finalized;
+
+ if (!quic_tls_finalize(qc, server))
+ goto out;
+
+ /* This connection is functional (ready to send/receive) */
+ qc->flags |= QUIC_FL_CONN_FINALIZED;
+
+ finalized:
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+void quic_conn_closed_err_count_inc(struct quic_conn *qc, struct quic_frame *frm)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (frm->type == QUIC_FT_CONNECTION_CLOSE)
+ quic_stats_transp_err_count_inc(qc->prx_counters, frm->connection_close.error_code);
+ else if (frm->type == QUIC_FT_CONNECTION_CLOSE_APP) {
+ if (qc->mux_state != QC_MUX_READY || !qc->qcc->app_ops->inc_err_cnt)
+ goto out;
+
+ qc->qcc->app_ops->inc_err_cnt(qc->qcc->ctx, frm->connection_close_app.error_code);
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Cancel a request on connection <qc> for stream id <id>. This is useful when
+ * the client opens a new stream but the MUX has already been released. A
+ * STOP_SENDING + RESET_STREAM frames are prepared for emission.
+ *
+ * TODO this function is closely related to H3. Its place should be in H3 layer
+ * instead of quic-conn but this requires an architecture adjustment.
+ *
+ * Returns 1 on success else 0.
+ */
+int qc_h3_request_reject(struct quic_conn *qc, uint64_t id)
+{
+ int ret = 0;
+ struct quic_frame *ss, *rs;
+ struct quic_enc_level *qel = qc->ael;
+ const uint64_t app_error_code = H3_REQUEST_REJECTED;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+
+ /* Do not emit rejection for unknown unidirectional stream as it is
+ * forbidden to close some of them (H3 control stream and QPACK
+ * encoder/decoder streams).
+ */
+ if (quic_stream_is_uni(id)) {
+ ret = 1;
+ goto out;
+ }
+
+ ss = qc_frm_alloc(QUIC_FT_STOP_SENDING);
+ if (!ss) {
+ TRACE_ERROR("failed to allocate quic_frame", QUIC_EV_CONN_PRSHPKT, qc);
+ goto out;
+ }
+
+ ss->stop_sending.id = id;
+ ss->stop_sending.app_error_code = app_error_code;
+
+ rs = qc_frm_alloc(QUIC_FT_RESET_STREAM);
+ if (!rs) {
+ TRACE_ERROR("failed to allocate quic_frame", QUIC_EV_CONN_PRSHPKT, qc);
+ qc_frm_free(qc, &ss);
+ goto out;
+ }
+
+ rs->reset_stream.id = id;
+ rs->reset_stream.app_error_code = app_error_code;
+ rs->reset_stream.final_size = 0;
+
+ LIST_APPEND(&qel->pktns->tx.frms, &ss->list);
+ LIST_APPEND(&qel->pktns->tx.frms, &rs->list);
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+/* Remove a <qc> quic-conn from its ha_thread_ctx list. If <closing> is true,
+ * it will immediately be reinserted in the ha_thread_ctx quic_conns_clo list.
+ */
+void qc_detach_th_ctx_list(struct quic_conn *qc, int closing)
+{
+ struct bref *bref, *back;
+
+ /* Detach CLI context watchers currently dumping this connection.
+ * Reattach them to the next quic_conn instance.
+ */
+ list_for_each_entry_safe(bref, back, &qc->back_refs, users) {
+ /* Remove watcher from this quic_conn instance. */
+ LIST_DEL_INIT(&bref->users);
+
+ /* Attach it to next instance unless it was the last list element. */
+ if (qc->el_th_ctx.n != &th_ctx->quic_conns &&
+ qc->el_th_ctx.n != &th_ctx->quic_conns_clo) {
+ struct quic_conn *next = LIST_NEXT(&qc->el_th_ctx,
+ struct quic_conn *,
+ el_th_ctx);
+ LIST_APPEND(&next->back_refs, &bref->users);
+ }
+ bref->ref = qc->el_th_ctx.n;
+ __ha_barrier_store();
+ }
+
+ /* Remove quic_conn from global ha_thread_ctx list. */
+ LIST_DEL_INIT(&qc->el_th_ctx);
+
+ if (closing)
+ LIST_APPEND(&th_ctx->quic_conns_clo, &qc->el_th_ctx);
+}
+
+
+/* Copy at <pos> position a stateless reset token depending on the
+ * <salt> salt input. This is the cluster secret which will be derived
+ * as HKDF input secret to generate this token.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_stateless_reset_token_cpy(unsigned char *pos, size_t len,
+ const unsigned char *salt, size_t saltlen)
+{
+ /* Input secret */
+ const unsigned char *key = global.cluster_secret;
+ size_t keylen = sizeof global.cluster_secret;
+ /* Info */
+ const unsigned char label[] = "stateless token";
+ size_t labellen = sizeof label - 1;
+ int ret;
+
+ ret = quic_hkdf_extract_and_expand(EVP_sha256(), pos, len,
+ key, keylen, salt, saltlen, label, labellen);
+ return ret;
+}
+
+/* Build all the frames which must be sent just after the handshake have succeeded.
+ * This is essentially NEW_CONNECTION_ID frames. A QUIC server must also send
+ * a HANDSHAKE_DONE frame.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_build_post_handshake_frames(struct quic_conn *qc)
+{
+ int ret = 0, max;
+ struct quic_enc_level *qel;
+ struct quic_frame *frm, *frmbak;
+ struct list frm_list = LIST_HEAD_INIT(frm_list);
+ struct eb64_node *node;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+
+ qel = qc->ael;
+ /* Only servers must send a HANDSHAKE_DONE frame. */
+ if (qc_is_listener(qc)) {
+ frm = qc_frm_alloc(QUIC_FT_HANDSHAKE_DONE);
+ if (!frm) {
+ TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+
+ LIST_APPEND(&frm_list, &frm->list);
+ }
+
+ /* Initialize <max> connection IDs minus one: there is
+ * already one connection ID used for the current connection. Also limit
+ * the number of connection IDs sent to the peer to 4 (3 from this function
+ * plus 1 for the current connection.
+ * Note that active_connection_id_limit >= 2: this has been already checked
+ * when receiving this parameter.
+ */
+ max = QUIC_MIN(qc->tx.params.active_connection_id_limit - 1, (uint64_t)3);
+ while (max--) {
+ struct quic_connection_id *conn_id;
+
+ frm = qc_frm_alloc(QUIC_FT_NEW_CONNECTION_ID);
+ if (!frm) {
+ TRACE_ERROR("frame allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto err;
+ }
+
+ conn_id = new_quic_cid(qc->cids, qc, NULL, NULL);
+ if (!conn_id) {
+ qc_frm_free(qc, &frm);
+ TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
+ goto err;
+ }
+
+ /* TODO To prevent CID tree locking, all CIDs created here
+ * could be allocated at the same time as the first one.
+ */
+ quic_cid_insert(conn_id);
+
+ quic_connection_id_to_frm_cpy(frm, conn_id);
+ LIST_APPEND(&frm_list, &frm->list);
+ }
+
+ LIST_SPLICE(&qel->pktns->tx.frms, &frm_list);
+ qc->flags &= ~QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+ return ret;
+
+ err:
+ /* free the frames */
+ list_for_each_entry_safe(frm, frmbak, &frm_list, list)
+ qc_frm_free(qc, &frm);
+
+ /* The first CID sequence number value used to allocated CIDs by this function is 1,
+ * 0 being the sequence number of the CID for this connection.
+ */
+ node = eb64_lookup_ge(qc->cids, 1);
+ while (node) {
+ struct quic_connection_id *conn_id;
+
+ conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
+ if (conn_id->seq_num.key >= max)
+ break;
+
+ node = eb64_next(node);
+ quic_cid_delete(conn_id);
+
+ eb64_delete(&conn_id->seq_num);
+ pool_free(pool_head_quic_connection_id, conn_id);
+ }
+ goto leave;
+}
+
+
+/* QUIC connection packet handler task (post handshake) */
+struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state)
+{
+ struct quic_conn *qc = context;
+ struct quic_enc_level *qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+
+ qel = qc->ael;
+ TRACE_STATE("connection handshake state", QUIC_EV_CONN_IO_CB, qc, &qc->state);
+
+ if (qc_test_fd(qc))
+ qc_rcv_buf(qc);
+
+ /* Prepare post-handshake frames
+ * - after connection is instantiated (accept is done)
+ * - handshake state is completed (may not be the case here in 0-RTT)
+ */
+ if ((qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) && qc->conn &&
+ qc->state >= QUIC_HS_ST_COMPLETE) {
+ quic_build_post_handshake_frames(qc);
+ }
+
+ /* Retranmissions */
+ if (qc->flags & QUIC_FL_CONN_RETRANS_NEEDED) {
+ TRACE_STATE("retransmission needed", QUIC_EV_CONN_IO_CB, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_NEEDED;
+ if (!qc_dgrams_retransmit(qc))
+ goto out;
+ }
+
+ if (!qc_treat_rx_pkts(qc)) {
+ TRACE_DEVEL("qc_treat_rx_pkts() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ if ((qc->flags & QUIC_FL_CONN_DRAINING) &&
+ !(qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)) {
+ TRACE_STATE("draining connection (must not send packets)", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ /* XXX TODO: how to limit the list frames to send */
+ if (!qc_send_app_pkts(qc, &qel->pktns->tx.frms)) {
+ TRACE_DEVEL("qc_send_app_pkts() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto out;
+ }
+
+ out:
+ if ((qc->flags & QUIC_FL_CONN_CLOSING) && qc->mux_state != QC_MUX_READY) {
+ quic_conn_release(qc);
+ qc = NULL;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+ return t;
+}
+
+static void quic_release_cc_conn(struct quic_conn_closed *cc_qc)
+{
+ struct quic_conn *qc = (struct quic_conn *)cc_qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, cc_qc);
+
+ task_destroy(cc_qc->idle_timer_task);
+ cc_qc->idle_timer_task = NULL;
+ tasklet_free(qc->wait_event.tasklet);
+ free_quic_conn_cids(qc);
+ pool_free(pool_head_quic_cids, cc_qc->cids);
+ cc_qc->cids = NULL;
+ pool_free(pool_head_quic_cc_buf, cc_qc->cc_buf_area);
+ cc_qc->cc_buf_area = NULL;
+ /* free the SSL sock context */
+ pool_free(pool_head_quic_conn_closed, cc_qc);
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB);
+}
+
+/* QUIC connection packet handler task used when in "closing connection" state. */
+static struct task *quic_conn_closed_io_cb(struct task *t, void *context, unsigned int state)
+{
+ struct quic_conn_closed *cc_qc = context;
+ struct quic_conn *qc = (struct quic_conn *)cc_qc;
+ struct buffer buf;
+ uint16_t dglen;
+ struct quic_tx_packet *first_pkt;
+ size_t headlen = sizeof dglen + sizeof first_pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+
+ if (qc_test_fd(qc))
+ qc_rcv_buf(qc);
+
+ /* Do not send too much data if the peer address was not validated. */
+ if ((qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) &&
+ !(qc->flags & QUIC_FL_CONN_PEER_VALIDATED_ADDR) &&
+ quic_may_send_bytes(qc) < cc_qc->cc_dgram_len)
+ goto leave;
+
+ buf = b_make(cc_qc->cc_buf_area + headlen,
+ QUIC_MAX_CC_BUFSIZE - headlen, 0, cc_qc->cc_dgram_len);
+ if (qc_snd_buf(qc, &buf, buf.data, 0) < 0) {
+ TRACE_ERROR("sendto fatal error", QUIC_EV_CONN_IO_CB, qc);
+ quic_release_cc_conn(cc_qc);
+ cc_qc = NULL;
+ qc = NULL;
+ t = NULL;
+ goto leave;
+ }
+
+ qc->flags &= ~QUIC_FL_CONN_IMMEDIATE_CLOSE;
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+
+ return t;
+}
+
+/* The task handling the idle timeout of a connection in "connection close" state */
+static struct task *quic_conn_closed_idle_timer_task(struct task *t, void *ctx, unsigned int state)
+{
+ struct quic_conn_closed *cc_qc = ctx;
+
+ quic_release_cc_conn(cc_qc);
+
+ return NULL;
+}
+
+/* Allocate a new connection in "connection close" state and return it
+ * if succeeded, NULL if not. This function is also responsible of
+ * copying enough and the least possible information from <qc> original
+ * connection to the newly allocated connection so that to keep it
+ * functional until its idle timer expires.
+ */
+static struct quic_conn_closed *qc_new_cc_conn(struct quic_conn *qc)
+{
+ struct quic_conn_closed *cc_qc;
+
+ cc_qc = pool_alloc(pool_head_quic_conn_closed);
+ if (!cc_qc)
+ return NULL;
+
+ quic_conn_mv_cids_to_cc_conn(cc_qc, qc);
+
+ qc_init_fd((struct quic_conn *)cc_qc);
+
+ cc_qc->flags = qc->flags;
+ cc_qc->err = qc->err;
+
+ cc_qc->nb_pkt_for_cc = qc->nb_pkt_for_cc;
+ cc_qc->nb_pkt_since_cc = qc->nb_pkt_since_cc;
+
+ cc_qc->local_addr = qc->local_addr;
+ cc_qc->peer_addr = qc->peer_addr;
+
+ cc_qc->wait_event.tasklet = qc->wait_event.tasklet;
+ cc_qc->wait_event.tasklet->process = quic_conn_closed_io_cb;
+ cc_qc->wait_event.tasklet->context = cc_qc;
+ cc_qc->wait_event.events = 0;
+ cc_qc->subs = NULL;
+
+ cc_qc->bytes.prep = qc->bytes.prep;
+ cc_qc->bytes.tx = qc->bytes.tx;
+ cc_qc->bytes.rx = qc->bytes.rx;
+
+ cc_qc->odcid = qc->odcid;
+ cc_qc->dcid = qc->dcid;
+ cc_qc->scid = qc->scid;
+
+ cc_qc->li = qc->li;
+ cc_qc->cids = qc->cids;
+
+ cc_qc->idle_timer_task = qc->idle_timer_task;
+ cc_qc->idle_timer_task->process = quic_conn_closed_idle_timer_task;
+ cc_qc->idle_timer_task->context = cc_qc;
+ cc_qc->idle_expire = qc->idle_expire;
+
+ cc_qc->conn = qc->conn;
+ qc->conn = NULL;
+
+ cc_qc->cc_buf_area = qc->tx.cc_buf_area;
+ cc_qc->cc_dgram_len = qc->tx.cc_dgram_len;
+ TRACE_PRINTF(TRACE_LEVEL_PROTO, QUIC_EV_CONN_IO_CB, qc, 0, 0, 0,
+ "switch qc@%p to cc_qc@%p", qc, cc_qc);
+
+ return cc_qc;
+}
+
+/* QUIC connection packet handler task. */
+struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state)
+{
+ int ret;
+ struct quic_conn *qc = context;
+ struct buffer *buf = NULL;
+ int st;
+ struct tasklet *tl = (struct tasklet *)t;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+
+ st = qc->state;
+ TRACE_PROTO("connection state", QUIC_EV_CONN_IO_CB, qc, &st);
+
+ if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) {
+ HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY);
+ qc_ssl_provide_all_quic_data(qc, qc->xprt_ctx);
+ }
+
+ /* Retranmissions */
+ if (qc->flags & QUIC_FL_CONN_RETRANS_NEEDED) {
+ TRACE_DEVEL("retransmission needed", QUIC_EV_CONN_PHPKTS, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_NEEDED;
+ if (!qc_dgrams_retransmit(qc))
+ goto out;
+ }
+
+ if (qc_test_fd(qc))
+ qc_rcv_buf(qc);
+
+ if (!qc_treat_rx_pkts(qc))
+ goto out;
+
+ if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) {
+ tasklet_wakeup(tl);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_PHPKTS, qc);
+ goto out;
+ }
+
+ if ((qc->flags & QUIC_FL_CONN_DRAINING) &&
+ !(qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE))
+ goto out;
+
+ st = qc->state;
+ if (st >= QUIC_HS_ST_COMPLETE) {
+ if (!(qc->flags & QUIC_FL_CONN_HPKTNS_DCD)) {
+ /* Discard the Handshake packet number space. */
+ TRACE_PROTO("discarding Handshake pktns", QUIC_EV_CONN_PHPKTS, qc);
+ quic_pktns_discard(qc->hel->pktns, qc);
+ qc_set_timer(qc);
+ qc_el_rx_pkts_del(qc->hel);
+ qc_release_pktns_frms(qc, qc->hel->pktns);
+ }
+ }
+
+ buf = qc_get_txb(qc);
+ if (!buf)
+ goto out;
+
+ if (b_data(buf) && !qc_purge_txbuf(qc, buf))
+ goto out;
+
+ /* Currently buf cannot be non-empty at this stage. Even if a previous
+ * sendto() has failed it is emptied to simulate packet emission and
+ * rely on QUIC lost detection to try to emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ ret = qc_prep_hpkts(qc, buf, NULL);
+ if (ret == -1) {
+ qc_txb_release(qc);
+ goto out;
+ }
+
+ if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) {
+ if (qc->flags & QUIC_FL_CONN_TO_KILL)
+ qc_txb_release(qc);
+ goto out;
+ }
+
+ qc_txb_release(qc);
+
+ out:
+ /* Release the Handshake encryption level and packet number space if
+ * the Handshake is confirmed and if there is no need to send
+ * anymore Handshake packets.
+ */
+ if (quic_tls_pktns_is_dcd(qc, qc->hpktns) &&
+ !qc_need_sending(qc, qc->hel)) {
+ /* Ensure Initial packet encryption level and packet number space have
+ * been released.
+ */
+ qc_enc_level_free(qc, &qc->iel);
+ quic_pktns_release(qc, &qc->ipktns);
+ qc_enc_level_free(qc, &qc->hel);
+ quic_pktns_release(qc, &qc->hpktns);
+ /* Also release the negotiated Initial TLS context. */
+ quic_nictx_free(qc);
+ }
+
+ if ((qc->flags & QUIC_FL_CONN_CLOSING) && qc->mux_state != QC_MUX_READY) {
+ quic_conn_release(qc);
+ qc = NULL;
+ }
+
+ TRACE_PROTO("ssl error", QUIC_EV_CONN_IO_CB, qc, &st);
+ TRACE_LEAVE(QUIC_EV_CONN_IO_CB, qc);
+ return t;
+}
+
+/* Callback called upon loss detection and PTO timer expirations. */
+struct task *qc_process_timer(struct task *task, void *ctx, unsigned int state)
+{
+ struct quic_conn *qc = ctx;
+ struct quic_pktns *pktns;
+
+ TRACE_ENTER(QUIC_EV_CONN_PTIMER, qc);
+ TRACE_PROTO("process timer", QUIC_EV_CONN_PTIMER, qc,
+ NULL, NULL, &qc->path->ifae_pkts);
+
+ task->expire = TICK_ETERNITY;
+ pktns = quic_loss_pktns(qc);
+
+ if (qc->flags & (QUIC_FL_CONN_DRAINING|QUIC_FL_CONN_TO_KILL)) {
+ TRACE_PROTO("cancelled action (draining state)", QUIC_EV_CONN_PTIMER, qc);
+ goto out;
+ }
+
+ if (tick_isset(pktns->tx.loss_time)) {
+ struct list lost_pkts = LIST_HEAD_INIT(lost_pkts);
+
+ qc_packet_loss_lookup(pktns, qc, &lost_pkts);
+ if (!LIST_ISEMPTY(&lost_pkts))
+ tasklet_wakeup(qc->wait_event.tasklet);
+ if (qc_release_lost_pkts(qc, pktns, &lost_pkts, now_ms))
+ qc_set_timer(qc);
+ goto out;
+ }
+
+ if (qc->path->in_flight) {
+ pktns = quic_pto_pktns(qc, qc->state >= QUIC_HS_ST_CONFIRMED, NULL);
+ if (!pktns->tx.in_flight) {
+ TRACE_PROTO("No in flight packets to probe with", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ if (pktns == qc->ipktns) {
+ if (qc_may_probe_ipktns(qc)) {
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ else {
+ TRACE_STATE("Cannot probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ if (qc->hpktns && qc->hpktns->tx.in_flight) {
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ qc->hpktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Handshake packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ }
+ else if (pktns == qc->hpktns) {
+ TRACE_STATE("needs to probe Handshake packet number space", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ if (qc->ipktns && qc->ipktns->tx.in_flight) {
+ if (qc_may_probe_ipktns(qc)) {
+ qc->ipktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ TRACE_STATE("needs to probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ else {
+ TRACE_STATE("Cannot probe Initial packet number space", QUIC_EV_CONN_TXPKT, qc);
+ }
+ }
+ }
+ else if (pktns == qc->apktns) {
+ pktns->tx.pto_probe = QUIC_MAX_NB_PTO_DGRAMS;
+ /* Wake up upper layer if waiting to send new data. */
+ if (!qc_notify_send(qc)) {
+ TRACE_STATE("needs to probe 01RTT packet number space", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_NEEDED;
+ pktns->flags |= QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ }
+ }
+ else if (!qc_is_listener(qc) && qc->state <= QUIC_HS_ST_COMPLETE) {
+ if (quic_tls_has_tx_sec(qc->hel))
+ qc->hel->pktns->tx.pto_probe = 1;
+ if (quic_tls_has_tx_sec(qc->iel))
+ qc->iel->pktns->tx.pto_probe = 1;
+ }
+
+ tasklet_wakeup(qc->wait_event.tasklet);
+ qc->path->loss.pto_count++;
+
+ out:
+ TRACE_PROTO("process timer", QUIC_EV_CONN_PTIMER, qc, pktns);
+ TRACE_LEAVE(QUIC_EV_CONN_PTIMER, qc);
+
+ return task;
+}
+
+/* Allocate a new QUIC connection with <version> as QUIC version. <ipv4>
+ * boolean is set to 1 for IPv4 connection, 0 for IPv6. <server> is set to 1
+ * for QUIC servers (or haproxy listeners).
+ * <dcid> is the destination connection ID, <scid> is the source connection ID.
+ * This latter <scid> CID as the same value on the wire as the one for <conn_id>
+ * which is the first CID of this connection but a different internal representation used to build
+ * NEW_CONNECTION_ID frames. This is the responsibility of the caller to insert
+ * <conn_id> in the CIDs tree for this connection (qc->cids).
+ * <token> is the token found to be used for this connection with <token_len> as
+ * length. Endpoints addresses are specified via <local_addr> and <peer_addr>.
+ * Returns the connection if succeeded, NULL if not.
+ */
+struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
+ struct quic_cid *dcid, struct quic_cid *scid,
+ const struct quic_cid *token_odcid,
+ struct quic_connection_id *conn_id,
+ struct sockaddr_storage *local_addr,
+ struct sockaddr_storage *peer_addr,
+ int server, int token, void *owner)
+{
+ int i;
+ struct quic_conn *qc = NULL;
+ struct listener *l = server ? owner : NULL;
+ struct proxy *prx = l ? l->bind_conf->frontend : NULL;
+ struct quic_cc_algo *cc_algo = NULL;
+ unsigned int next_actconn = 0, next_sslconn = 0, next_handshake = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_INIT);
+
+ next_actconn = increment_actconn();
+ if (!next_actconn) {
+ _HA_ATOMIC_INC(&maxconn_reached);
+ TRACE_STATE("maxconn reached", QUIC_EV_CONN_INIT);
+ goto err;
+ }
+
+ next_sslconn = increment_sslconn();
+ if (!next_sslconn) {
+ TRACE_STATE("sslconn reached", QUIC_EV_CONN_INIT);
+ goto err;
+ }
+
+ if (server) {
+ next_handshake = quic_increment_curr_handshake(l);
+ if (!next_handshake) {
+ TRACE_STATE("max handshake reached", QUIC_EV_CONN_INIT);
+ goto err;
+ }
+ }
+
+ qc = pool_alloc(pool_head_quic_conn);
+ if (!qc) {
+ TRACE_ERROR("Could not allocate a new connection", QUIC_EV_CONN_INIT);
+ goto err;
+ }
+
+ /* Now that quic_conn instance is allocated, quic_conn_release() will
+ * ensure global accounting is decremented.
+ */
+ next_handshake = next_sslconn = next_actconn = 0;
+
+ /* Initialize in priority qc members required for a safe dealloc. */
+ qc->nictx = NULL;
+ /* Prevents these CID to be dumped by TRACE() calls */
+ qc->scid.len = qc->odcid.len = qc->dcid.len = 0;
+ /* required to use MTLIST_IN_LIST */
+ MT_LIST_INIT(&qc->accept_list);
+
+ LIST_INIT(&qc->rx.pkt_list);
+
+ qc->streams_by_id = EB_ROOT_UNIQUE;
+
+ /* Required to call free_quic_conn_cids() from quic_conn_release() */
+ qc->cids = NULL;
+ qc->tx.cc_buf_area = NULL;
+ qc_init_fd(qc);
+
+ LIST_INIT(&qc->back_refs);
+ LIST_INIT(&qc->el_th_ctx);
+
+ qc->wait_event.tasklet = NULL;
+
+ /* Required to destroy <qc> tasks from quic_conn_release() */
+ qc->timer_task = NULL;
+ qc->idle_timer_task = NULL;
+
+ qc->xprt_ctx = NULL;
+ qc->conn = NULL;
+ qc->qcc = NULL;
+ qc->app_ops = NULL;
+ qc->path = NULL;
+
+ /* Keyupdate: required to safely call quic_tls_ku_free() from
+ * quic_conn_release().
+ */
+ quic_tls_ku_reset(&qc->ku.prv_rx);
+ quic_tls_ku_reset(&qc->ku.nxt_rx);
+ quic_tls_ku_reset(&qc->ku.nxt_tx);
+
+ /* Encryption levels */
+ qc->iel = qc->eel = qc->hel = qc->ael = NULL;
+ LIST_INIT(&qc->qel_list);
+ /* Packet number spaces */
+ qc->ipktns = qc->hpktns = qc->apktns = NULL;
+ LIST_INIT(&qc->pktns_list);
+
+ /* Required to safely call quic_conn_prx_cntrs_update() from quic_conn_release(). */
+ qc->prx_counters = NULL;
+
+ /* QUIC Server (or listener). */
+ if (server) {
+ cc_algo = l->bind_conf->quic_cc_algo;
+
+ qc->prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe,
+ &quic_stats_module);
+ qc->flags = QUIC_FL_CONN_LISTENER;
+ qc->state = QUIC_HS_ST_SERVER_INITIAL;
+ /* Copy the client original DCID. */
+ qc->odcid = *dcid;
+ /* Copy the packet SCID to reuse it as DCID for sending */
+ qc->dcid = *scid;
+ qc->tx.buf = BUF_NULL;
+ qc->li = l;
+ }
+ /* QUIC Client (outgoing connection to servers) */
+ else {
+ qc->state = QUIC_HS_ST_CLIENT_INITIAL;
+ if (dcid->len)
+ memcpy(qc->dcid.data, dcid->data, dcid->len);
+ qc->dcid.len = dcid->len;
+ qc->li = NULL;
+ }
+ qc->mux_state = QC_MUX_NULL;
+ qc->err = quic_err_transport(QC_ERR_NO_ERROR);
+
+ /* If connection is instantiated due to an INITIAL packet with an
+ * already checked token, consider the peer address as validated.
+ */
+ if (token_odcid->len) {
+ TRACE_STATE("validate peer address due to initial token",
+ QUIC_EV_CONN_INIT, qc);
+ qc->flags |= QUIC_FL_CONN_PEER_VALIDATED_ADDR;
+ }
+ else {
+ HA_ATOMIC_INC(&qc->prx_counters->half_open_conn);
+ }
+
+ /* Now proceeds to allocation of qc members. */
+ qc->rx.buf.area = pool_alloc(pool_head_quic_conn_rxbuf);
+ if (!qc->rx.buf.area) {
+ TRACE_ERROR("Could not allocate a new RX buffer", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ qc->cids = pool_alloc(pool_head_quic_cids);
+ if (!qc->cids) {
+ TRACE_ERROR("Could not allocate a new CID tree", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+ *qc->cids = EB_ROOT;
+
+ conn_id->qc = qc;
+
+ if (HA_ATOMIC_LOAD(&l->rx.quic_mode) == QUIC_SOCK_MODE_CONN &&
+ (global.tune.options & GTUNE_QUIC_SOCK_PER_CONN) &&
+ is_addr(local_addr)) {
+ TRACE_USER("Allocate a socket for QUIC connection", QUIC_EV_CONN_INIT, qc);
+ qc_alloc_fd(qc, local_addr, peer_addr);
+
+ /* haproxy soft-stop is supported only for QUIC connections
+ * with their owned socket.
+ */
+ if (qc_test_fd(qc))
+ _HA_ATOMIC_INC(&jobs);
+ }
+
+ /* Select our SCID which is the first CID with 0 as sequence number. */
+ qc->scid = conn_id->cid;
+
+ if (!qc_enc_level_alloc(qc, &qc->ipktns, &qc->iel, ssl_encryption_initial)) {
+ TRACE_ERROR("Could not initialize an encryption level", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ qc->original_version = qv;
+ qc->negotiated_version = NULL;
+ qc->tps_tls_ext = (qc->original_version->num & 0xff000000) == 0xff000000 ?
+ TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS_DRAFT:
+ TLS_EXTENSION_QUIC_TRANSPORT_PARAMETERS;
+ /* TX part. */
+ qc->bytes.tx = qc->bytes.prep = 0;
+ memset(&qc->tx.params, 0, sizeof(qc->tx.params));
+ qc->tx.buf = BUF_NULL;
+ qc->tx.cc_buf = BUF_NULL;
+ qc->tx.cc_buf_area = NULL;
+ qc->tx.cc_dgram_len = 0;
+ /* RX part. */
+ qc->bytes.rx = 0;
+ memset(&qc->rx.params, 0, sizeof(qc->rx.params));
+ qc->rx.buf = b_make(qc->rx.buf.area, QUIC_CONN_RX_BUFSZ, 0, 0);
+ for (i = 0; i < QCS_MAX_TYPES; i++)
+ qc->rx.strms[i].nb_streams = 0;
+
+ qc->nb_pkt_for_cc = 1;
+ qc->nb_pkt_since_cc = 0;
+
+ if (!quic_tls_ku_init(qc)) {
+ TRACE_ERROR("Key update initialization failed", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ qc->max_ack_delay = 0;
+ /* Only one path at this time (multipath not supported) */
+ qc->path = &qc->paths[0];
+ quic_cc_path_init(qc->path, ipv4, server ? l->bind_conf->max_cwnd : 0,
+ cc_algo ? cc_algo : default_quic_cc_algo, qc);
+
+ qc->stream_buf_count = 0;
+ memcpy(&qc->local_addr, local_addr, sizeof(qc->local_addr));
+ memcpy(&qc->peer_addr, peer_addr, sizeof qc->peer_addr);
+
+ if (server && !qc_lstnr_params_init(qc, &l->bind_conf->quic_params,
+ conn_id->stateless_reset_token,
+ dcid->data, dcid->len,
+ qc->scid.data, qc->scid.len, token_odcid))
+ goto err;
+
+ /* Initialize the idle timeout of the connection at the "max_idle_timeout"
+ * value from local transport parameters.
+ */
+ qc->max_idle_timeout = qc->rx.params.max_idle_timeout;
+ qc->wait_event.tasklet = tasklet_new();
+ if (!qc->wait_event.tasklet) {
+ TRACE_ERROR("tasklet_new() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+ qc->wait_event.tasklet->process = quic_conn_io_cb;
+ qc->wait_event.tasklet->context = qc;
+ qc->wait_event.events = 0;
+ qc->subs = NULL;
+
+ if (qc_alloc_ssl_sock_ctx(qc) ||
+ !quic_conn_init_timer(qc) ||
+ !quic_conn_init_idle_timer_task(qc, prx))
+ goto err;
+
+ if (!qc_new_isecs(qc, &qc->iel->tls_ctx, qc->original_version, dcid->data, dcid->len, 1))
+ goto err;
+
+ /* Counters initialization */
+ memset(&qc->cntrs, 0, sizeof qc->cntrs);
+
+ LIST_APPEND(&th_ctx->quic_conns, &qc->el_th_ctx);
+ qc->qc_epoch = HA_ATOMIC_LOAD(&qc_epoch);
+
+ TRACE_LEAVE(QUIC_EV_CONN_INIT, qc);
+
+ return qc;
+
+ err:
+ quic_conn_release(qc);
+
+ /* Decrement global counters. Done only for errors happening before or
+ * on pool_head_quic_conn alloc. All other cases are covered by
+ * quic_conn_release().
+ */
+ if (next_actconn)
+ _HA_ATOMIC_DEC(&actconn);
+ if (next_sslconn)
+ _HA_ATOMIC_DEC(&global.sslconns);
+ if (next_handshake)
+ _HA_ATOMIC_DEC(&l->rx.quic_curr_handshake);
+
+ TRACE_LEAVE(QUIC_EV_CONN_INIT);
+ return NULL;
+}
+
+/* React to a connection migration initiated on <qc> by a client with the new
+ * path addresses <peer_addr>/<local_addr>.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qc_handle_conn_migration(struct quic_conn *qc,
+ const struct sockaddr_storage *peer_addr,
+ const struct sockaddr_storage *local_addr)
+{
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ /* RFC 9000. Connection Migration
+ *
+ * If the peer sent the disable_active_migration transport parameter,
+ * an endpoint also MUST NOT send packets (including probing packets;
+ * see Section 9.1) from a different local address to the address the peer
+ * used during the handshake, unless the endpoint has acted on a
+ * preferred_address transport parameter from the peer.
+ */
+ if (qc->li->bind_conf->quic_params.disable_active_migration) {
+ TRACE_ERROR("Active migration was disabled, datagram dropped", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ /* RFC 9000 9. Connection Migration
+ *
+ * The design of QUIC relies on endpoints retaining a stable address for
+ * the duration of the handshake. An endpoint MUST NOT initiate
+ * connection migration before the handshake is confirmed, as defined in
+ * Section 4.1.2 of [QUIC-TLS].
+ */
+ if (qc->state < QUIC_HS_ST_COMPLETE) {
+ TRACE_STATE("Connection migration during handshake rejected", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ /* RFC 9000 9. Connection Migration
+ *
+ * TODO
+ * An endpoint MUST
+ * perform path validation (Section 8.2) if it detects any change to a
+ * peer's address, unless it has previously validated that address.
+ */
+
+ /* Update quic-conn owned socket if in used.
+ * TODO try to reuse it instead of closing and opening a new one.
+ */
+ if (qc_test_fd(qc)) {
+ /* TODO try to reuse socket instead of closing it and opening a new one. */
+ TRACE_STATE("Connection migration detected, allocate a new connection socket", QUIC_EV_CONN_LPKT, qc);
+ qc_release_fd(qc, 1);
+ /* TODO need to adjust <jobs> on socket allocation failure. */
+ qc_alloc_fd(qc, local_addr, peer_addr);
+ }
+
+ qc->local_addr = *local_addr;
+ qc->peer_addr = *peer_addr;
+ qc->cntrs.conn_migration_done++;
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return 0;
+
+ err:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return 1;
+}
+
+
+/* Update the proxy counters of <qc> QUIC connection from its counters */
+static inline void quic_conn_prx_cntrs_update(struct quic_conn *qc)
+{
+ if (!qc->prx_counters)
+ return;
+
+ HA_ATOMIC_ADD(&qc->prx_counters->dropped_pkt, qc->cntrs.dropped_pkt);
+ HA_ATOMIC_ADD(&qc->prx_counters->dropped_pkt_bufoverrun, qc->cntrs.dropped_pkt_bufoverrun);
+ HA_ATOMIC_ADD(&qc->prx_counters->dropped_parsing, qc->cntrs.dropped_parsing);
+ HA_ATOMIC_ADD(&qc->prx_counters->socket_full, qc->cntrs.socket_full);
+ HA_ATOMIC_ADD(&qc->prx_counters->sendto_err, qc->cntrs.sendto_err);
+ HA_ATOMIC_ADD(&qc->prx_counters->sendto_err_unknown, qc->cntrs.sendto_err_unknown);
+ HA_ATOMIC_ADD(&qc->prx_counters->sent_pkt, qc->cntrs.sent_pkt);
+ /* It is possible that ->path was not initialized. For instance if a
+ * QUIC connection allocation has failed.
+ */
+ if (qc->path)
+ HA_ATOMIC_ADD(&qc->prx_counters->lost_pkt, qc->path->loss.nb_lost_pkt);
+ HA_ATOMIC_ADD(&qc->prx_counters->conn_migration_done, qc->cntrs.conn_migration_done);
+ /* Stream related counters */
+ HA_ATOMIC_ADD(&qc->prx_counters->data_blocked, qc->cntrs.data_blocked);
+ HA_ATOMIC_ADD(&qc->prx_counters->stream_data_blocked, qc->cntrs.stream_data_blocked);
+ HA_ATOMIC_ADD(&qc->prx_counters->streams_blocked_bidi, qc->cntrs.streams_blocked_bidi);
+ HA_ATOMIC_ADD(&qc->prx_counters->streams_blocked_uni, qc->cntrs.streams_blocked_uni);
+}
+
+/* Release the quic_conn <qc>. The connection is removed from the CIDs tree.
+ * The connection tasklet is killed.
+ *
+ * This function must only be called by the thread responsible of the quic_conn
+ * tasklet.
+ */
+void quic_conn_release(struct quic_conn *qc)
+{
+ struct eb64_node *node;
+ struct quic_rx_packet *pkt, *pktback;
+ struct quic_conn_closed *cc_qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (!qc)
+ goto leave;
+
+ /* We must not free the quic-conn if the MUX is still allocated. */
+ BUG_ON(qc->mux_state == QC_MUX_READY);
+
+ cc_qc = NULL;
+ if ((qc->flags & QUIC_FL_CONN_CLOSING) && !(qc->flags & QUIC_FL_CONN_EXP_TIMER) &&
+ qc->tx.cc_buf_area)
+ cc_qc = qc_new_cc_conn(qc);
+
+ if (!cc_qc) {
+ task_destroy(qc->idle_timer_task);
+ qc->idle_timer_task = NULL;
+ tasklet_free(qc->wait_event.tasklet);
+ /* remove the connection from receiver cids trees */
+ free_quic_conn_cids(qc);
+ pool_free(pool_head_quic_cids, qc->cids);
+ qc->cids = NULL;
+ pool_free(pool_head_quic_cc_buf, qc->tx.cc_buf_area);
+ qc->tx.cc_buf_area = NULL;
+ }
+
+ if (qc_test_fd(qc))
+ _HA_ATOMIC_DEC(&jobs);
+
+ /* Close quic-conn socket fd. */
+ qc_release_fd(qc, 0);
+
+ /* in the unlikely (but possible) case the connection was just added to
+ * the accept_list we must delete it from there.
+ */
+ if (MT_LIST_INLIST(&qc->accept_list)) {
+ MT_LIST_DELETE(&qc->accept_list);
+ BUG_ON(qc->li->rx.quic_curr_accept == 0);
+ HA_ATOMIC_DEC(&qc->li->rx.quic_curr_accept);
+ }
+
+ /* free remaining stream descriptors */
+ node = eb64_first(&qc->streams_by_id);
+ while (node) {
+ struct qc_stream_desc *stream;
+
+ stream = eb64_entry(node, struct qc_stream_desc, by_id);
+ node = eb64_next(node);
+
+ /* all streams attached to the quic-conn are released, so
+ * qc_stream_desc_free will liberate the stream instance.
+ */
+ BUG_ON(!stream->release);
+ qc_stream_desc_free(stream, 1);
+ }
+
+ /* free the SSL sock context */
+ qc_free_ssl_sock_ctx(&qc->xprt_ctx);
+ /* Purge Rx packet list. */
+ list_for_each_entry_safe(pkt, pktback, &qc->rx.pkt_list, qc_rx_pkt_list) {
+ LIST_DELETE(&pkt->qc_rx_pkt_list);
+ pool_free(pool_head_quic_rx_packet, pkt);
+ }
+
+ task_destroy(qc->timer_task);
+ qc->timer_task = NULL;
+
+ quic_tls_ku_free(qc);
+ if (qc->ael) {
+ struct quic_tls_ctx *actx = &qc->ael->tls_ctx;
+
+ /* Secrets used by keyupdate */
+ pool_free(pool_head_quic_tls_secret, actx->rx.secret);
+ pool_free(pool_head_quic_tls_secret, actx->tx.secret);
+ }
+
+ qc_enc_level_free(qc, &qc->iel);
+ qc_enc_level_free(qc, &qc->eel);
+ qc_enc_level_free(qc, &qc->hel);
+ qc_enc_level_free(qc, &qc->ael);
+
+ quic_tls_ctx_free(&qc->nictx);
+
+ quic_pktns_release(qc, &qc->ipktns);
+ quic_pktns_release(qc, &qc->hpktns);
+ quic_pktns_release(qc, &qc->apktns);
+
+ qc_detach_th_ctx_list(qc, 0);
+
+ quic_conn_prx_cntrs_update(qc);
+ pool_free(pool_head_quic_conn_rxbuf, qc->rx.buf.area);
+ qc->rx.buf.area = NULL;
+
+ /* Connection released before peer address validated. */
+ if (unlikely(!(qc->flags & QUIC_FL_CONN_PEER_VALIDATED_ADDR))) {
+ BUG_ON(!qc->prx_counters->half_open_conn);
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ }
+
+ /* Connection released before handshake completion. */
+ if (unlikely(qc->state < QUIC_HS_ST_COMPLETE)) {
+ if (qc_is_listener(qc)) {
+ BUG_ON(qc->li->rx.quic_curr_handshake == 0);
+ HA_ATOMIC_DEC(&qc->li->rx.quic_curr_handshake);
+ }
+ }
+
+ pool_free(pool_head_quic_conn, qc);
+ qc = NULL;
+
+ /* Decrement global counters when quic_conn is deallocated.
+ * quic_conn_closed instances are not accounted as they run for a short
+ * time with limited resources.
+ */
+ _HA_ATOMIC_DEC(&actconn);
+ _HA_ATOMIC_DEC(&global.sslconns);
+
+ TRACE_PROTO("QUIC conn. freed", QUIC_EV_CONN_FREED, qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Initialize the timer task of <qc> QUIC connection.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_conn_init_timer(struct quic_conn *qc)
+{
+ int ret = 0;
+ /* Attach this task to the same thread ID used for the connection */
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ qc->timer_task = task_new_here();
+ if (!qc->timer_task) {
+ TRACE_ERROR("timer task allocation failed", QUIC_EV_CONN_NEW, qc);
+ goto leave;
+ }
+
+ qc->timer = TICK_ETERNITY;
+ qc->timer_task->process = qc_process_timer;
+ qc->timer_task->context = qc;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Rearm the idle timer or the ack timer (if not already armde) for <qc> QUIC
+ * connection. */
+void qc_idle_timer_do_rearm(struct quic_conn *qc, int arm_ack)
+{
+ unsigned int expire;
+
+ /* It is possible the idle timer task has been already released. */
+ if (!qc->idle_timer_task)
+ return;
+
+ if (qc->flags & (QUIC_FL_CONN_CLOSING|QUIC_FL_CONN_DRAINING)) {
+ /* RFC 9000 10.2. Immediate Close
+ *
+ * The closing and draining connection states exist to ensure that
+ * connections close cleanly and that delayed or reordered packets are
+ * properly discarded. These states SHOULD persist for at least three
+ * times the current PTO interval as defined in [QUIC-RECOVERY].
+ */
+
+ /* Delay is limited to 1s which should cover most of network
+ * conditions. The process should not be impacted by a
+ * connection with a high RTT.
+ */
+ expire = MIN(3 * quic_pto(qc), 1000);
+ }
+ else {
+ /* RFC 9000 10.1. Idle Timeout
+ *
+ * To avoid excessively small idle timeout periods, endpoints MUST
+ * increase the idle timeout period to be at least three times the
+ * current Probe Timeout (PTO). This allows for multiple PTOs to expire,
+ * and therefore multiple probes to be sent and lost, prior to idle
+ * timeout.
+ */
+ expire = QUIC_MAX(3 * quic_pto(qc), qc->max_idle_timeout);
+ }
+
+ qc->idle_expire = tick_add(now_ms, MS_TO_TICKS(expire));
+ /* Note that the ACK timer is not armed during the handshake. So,
+ * the handshake expiration date is taken into an account only
+ * when <arm_ack> is false.
+ */
+ if (arm_ack) {
+ /* Arm the ack timer only if not already armed. */
+ if (!tick_isset(qc->ack_expire)) {
+ qc->ack_expire = tick_add(now_ms, MS_TO_TICKS(QUIC_ACK_DELAY));
+ qc->idle_timer_task->expire = qc->ack_expire;
+ task_queue(qc->idle_timer_task);
+ TRACE_PROTO("ack timer armed", QUIC_EV_CONN_IDLE_TIMER, qc);
+ }
+ }
+ else {
+ qc->idle_timer_task->expire = tick_first(qc->ack_expire, qc->idle_expire);
+ if (qc->state < QUIC_HS_ST_COMPLETE)
+ qc->idle_timer_task->expire = tick_first(qc->hs_expire, qc->idle_expire);
+ task_queue(qc->idle_timer_task);
+ TRACE_PROTO("idle timer armed", QUIC_EV_CONN_IDLE_TIMER, qc);
+ }
+}
+
+/* Rearm the idle timer or ack timer for <qc> QUIC connection depending on <read>
+ * and <arm_ack> booleans. The former is set to 1 when receiving a packet ,
+ * and 0 when sending packet. <arm_ack> is set to 1 if this is the ack timer
+ * which must be rearmed.
+ */
+void qc_idle_timer_rearm(struct quic_conn *qc, int read, int arm_ack)
+{
+ TRACE_ENTER(QUIC_EV_CONN_IDLE_TIMER, qc);
+
+ if (read) {
+ qc->flags |= QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ;
+ }
+ else {
+ qc->flags &= ~QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ;
+ }
+ qc_idle_timer_do_rearm(qc, arm_ack);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc);
+}
+
+/* The task handling the idle timeout */
+struct task *qc_idle_timer_task(struct task *t, void *ctx, unsigned int state)
+{
+ struct quic_conn *qc = ctx;
+
+ TRACE_ENTER(QUIC_EV_CONN_IDLE_TIMER, qc);
+
+ if ((state & TASK_WOKEN_ANY) == TASK_WOKEN_TIMER && !tick_is_expired(t->expire, now_ms))
+ goto requeue;
+
+ if (tick_is_expired(qc->ack_expire, now_ms)) {
+ TRACE_PROTO("ack timer expired", QUIC_EV_CONN_IDLE_TIMER, qc);
+ qc->ack_expire = TICK_ETERNITY;
+ /* Note that ->idle_expire is always set. */
+ t->expire = qc->idle_expire;
+ /* Do not wakeup the I/O handler in DRAINING state or if the
+ * connection must be killed as soon as possible.
+ */
+ if (!(qc->flags & (QUIC_FL_CONN_DRAINING|QUIC_FL_CONN_TO_KILL))) {
+ qc->flags |= QUIC_FL_CONN_ACK_TIMER_FIRED;
+ tasklet_wakeup(qc->wait_event.tasklet);
+ }
+
+ goto requeue;
+ }
+
+ TRACE_PROTO("idle timer task running", QUIC_EV_CONN_IDLE_TIMER, qc);
+ /* Notify the MUX before settings QUIC_FL_CONN_EXP_TIMER or the MUX
+ * might free the quic-conn too early via quic_close().
+ */
+ qc_notify_err(qc);
+
+ /* If the MUX is still alive, keep the quic-conn. The MUX is
+ * responsible to call quic_close to release it.
+ */
+ qc->flags |= QUIC_FL_CONN_EXP_TIMER;
+ if (qc->mux_state != QC_MUX_READY) {
+ quic_conn_release(qc);
+ qc = NULL;
+ }
+ else {
+ task_destroy(t);
+ qc->idle_timer_task = NULL;
+ }
+
+ t = NULL;
+
+ /* TODO if the quic-conn cannot be freed because of the MUX, we may at
+ * least clean some parts of it such as the tasklet.
+ */
+
+ requeue:
+ TRACE_LEAVE(QUIC_EV_CONN_IDLE_TIMER, qc);
+ return t;
+}
+
+/* Initialize the idle timeout task for <qc>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_conn_init_idle_timer_task(struct quic_conn *qc,
+ struct proxy *px)
+{
+ int ret = 0;
+ int timeout;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+
+ timeout = px->timeout.client_hs ? px->timeout.client_hs : px->timeout.client;
+ qc->idle_timer_task = task_new_here();
+ if (!qc->idle_timer_task) {
+ TRACE_ERROR("Idle timer task allocation failed", QUIC_EV_CONN_NEW, qc);
+ goto leave;
+ }
+
+ qc->idle_timer_task->process = qc_idle_timer_task;
+ qc->idle_timer_task->context = qc;
+ qc->ack_expire = TICK_ETERNITY;
+ qc->hs_expire = tick_add_ifset(now_ms, MS_TO_TICKS(timeout));
+ qc_idle_timer_rearm(qc, 1, 0);
+ task_queue(qc->idle_timer_task);
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Return the QUIC version (quic_version struct) with <version> as version number
+ * if supported or NULL if not.
+ */
+const struct quic_version *qc_supported_version(uint32_t version)
+{
+ int i;
+
+ if (unlikely(!version))
+ return &quic_version_VN_reserved;
+
+ for (i = 0; i < quic_versions_nb; i++)
+ if (quic_versions[i].num == version)
+ return &quic_versions[i];
+
+ return NULL;
+}
+
+/* Check if connection ID <dcid> of length <dcid_len> belongs to <qc> local
+ * CIDs. This can be used to determine if a datagram is addressed to the right
+ * connection instance.
+ *
+ * Returns a boolean value.
+ */
+int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len)
+{
+ const uchar idx = _quic_cid_tree_idx(dcid);
+ struct quic_connection_id *conn_id;
+ struct ebmb_node *node = NULL;
+ struct quic_cid_tree *tree = &quic_cid_trees[idx];
+
+ /* Test against our default CID or client ODCID. */
+ if ((qc->scid.len == dcid_len &&
+ memcmp(qc->scid.data, dcid, dcid_len) == 0) ||
+ (qc->odcid.len == dcid_len &&
+ memcmp(qc->odcid.data, dcid, dcid_len) == 0)) {
+ return 1;
+ }
+
+ /* Test against our other CIDs. This can happen if the client has
+ * decided to switch to a new one.
+ *
+ * TODO to avoid locking, loop through qc.cids as an alternative.
+ *
+ * TODO set it to our default CID to avoid this operation next time.
+ */
+ HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_lookup(&tree->root, dcid, dcid_len);
+ HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock);
+
+ if (node) {
+ conn_id = ebmb_entry(node, struct quic_connection_id, node);
+ if (qc == conn_id->qc)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Wake-up upper layer for sending if all conditions are met :
+ * - room in congestion window or probe packet to sent
+ * - socket FD ready to sent or listener socket used
+ *
+ * Returns 1 if upper layer has been woken up else 0.
+ */
+int qc_notify_send(struct quic_conn *qc)
+{
+ const struct quic_pktns *pktns = qc->apktns;
+
+ if (qc->subs && qc->subs->events & SUB_RETRY_SEND) {
+ /* RFC 9002 7.5. Probe Timeout
+ *
+ * Probe packets MUST NOT be blocked by the congestion controller.
+ */
+ if ((quic_cc_path_prep_data(qc->path) || pktns->tx.pto_probe) &&
+ (!qc_test_fd(qc) || !fd_send_active(qc->fd))) {
+ tasklet_wakeup(qc->subs->tasklet);
+ qc->subs->events &= ~SUB_RETRY_SEND;
+ if (!qc->subs->events)
+ qc->subs = NULL;
+
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* Notify upper layer of a fatal error which forces to close the connection. */
+void qc_notify_err(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ if (qc->mux_state == QC_MUX_READY) {
+ TRACE_STATE("error notified to mux", QUIC_EV_CONN_CLOSE, qc);
+
+ /* Mark socket as closed. */
+ qc->conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+
+ /* TODO quic-conn layer must stay active until MUX is released.
+ * Thus, we have to wake up directly to ensure upper stream
+ * layer will be notified of the error. If a proper separation
+ * is made between MUX and quic-conn layer, wake up could be
+ * conducted only with qc.subs.
+ */
+ tasklet_wakeup(qc->qcc->wait_event.tasklet);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Move a <qc> QUIC connection and its resources from the current thread to the
+ * new one <new_tid> optionally in association with <new_li> (since it may need
+ * to change when migrating to a thread from a different group, otherwise leave
+ * it NULL). After this call, the connection cannot be dereferenced anymore on
+ * the current thread.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new_li)
+{
+ struct task *t1 = NULL, *t2 = NULL;
+ struct tasklet *t3 = NULL;
+
+ struct quic_connection_id *conn_id;
+ struct eb64_node *node;
+
+ TRACE_ENTER(QUIC_EV_CONN_SET_AFFINITY, qc);
+
+ /* Pre-allocate all required resources. This ensures we do not left a
+ * connection with only some of its field rebinded.
+ */
+ if (((t1 = task_new_on(new_tid)) == NULL) ||
+ (qc->timer_task && (t2 = task_new_on(new_tid)) == NULL) ||
+ (t3 = tasklet_new()) == NULL) {
+ goto err;
+ }
+
+ /* Reinit idle timer task. */
+ task_kill(qc->idle_timer_task);
+ t1->expire = qc->idle_timer_task->expire;
+ qc->idle_timer_task = t1;
+ qc->idle_timer_task->process = qc_idle_timer_task;
+ qc->idle_timer_task->context = qc;
+
+ /* Reinit timer task if allocated. */
+ if (qc->timer_task) {
+ task_kill(qc->timer_task);
+ qc->timer_task = t2;
+ qc->timer_task->process = qc_process_timer;
+ qc->timer_task->context = qc;
+ }
+
+ /* Reinit IO tasklet. */
+ if (qc->wait_event.tasklet->state & TASK_IN_LIST)
+ qc->flags |= QUIC_FL_CONN_IO_TO_REQUEUE;
+ tasklet_kill(qc->wait_event.tasklet);
+ /* In most cases quic_conn_app_io_cb is used but for 0-RTT quic_conn_io_cb can be still activated. */
+ t3->process = qc->wait_event.tasklet->process;
+ qc->wait_event.tasklet = t3;
+ qc->wait_event.tasklet->tid = new_tid;
+ qc->wait_event.tasklet->context = qc;
+ qc->wait_event.events = 0;
+
+ /* Rebind the connection FD. */
+ if (qc_test_fd(qc)) {
+ /* Reading is reactivated by the new thread. */
+ fd_migrate_on(qc->fd, new_tid);
+ }
+
+ /* Remove conn from per-thread list instance. It will be hidden from
+ * "show quic" until rebinding is completed.
+ */
+ qc_detach_th_ctx_list(qc, 0);
+
+ node = eb64_first(qc->cids);
+ BUG_ON(!node || eb64_next(node)); /* One and only one CID must be present before affinity rebind. */
+ conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
+
+ /* At this point no connection was accounted for yet on this
+ * listener so it's OK to just swap the pointer.
+ */
+ if (new_li && new_li != qc->li)
+ qc->li = new_li;
+
+ /* Rebinding is considered done when CID points to the new thread. No
+ * access should be done to quic-conn instance after it.
+ */
+ qc->flags |= QUIC_FL_CONN_AFFINITY_CHANGED;
+ HA_ATOMIC_STORE(&conn_id->tid, new_tid);
+ qc = NULL;
+
+ TRACE_LEAVE(QUIC_EV_CONN_SET_AFFINITY, NULL);
+ return 0;
+
+ err:
+ task_destroy(t1);
+ task_destroy(t2);
+ tasklet_free(t3);
+
+ TRACE_DEVEL("leaving on error", QUIC_EV_CONN_SET_AFFINITY, qc);
+ return 1;
+}
+
+/* Must be called after qc_set_tid_affinity() on the new thread. */
+void qc_finalize_affinity_rebind(struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_SET_AFFINITY, qc);
+
+ /* This function must not be called twice after an affinity rebind. */
+ BUG_ON(!(qc->flags & QUIC_FL_CONN_AFFINITY_CHANGED));
+ qc->flags &= ~QUIC_FL_CONN_AFFINITY_CHANGED;
+
+ /* If quic_conn is closing it is unnecessary to migrate it as it will
+ * be soon released. Besides, special care must be taken for CLOSING
+ * connections (using quic_conn_closed and th_ctx.quic_conns_clo list for
+ * instance). This should never occur as CLOSING connections are
+ * skipped by quic_sock_accept_conn().
+ */
+ BUG_ON(qc->flags & (QUIC_FL_CONN_CLOSING|QUIC_FL_CONN_DRAINING));
+
+ /* Reinsert connection in ha_thread_ctx global list. */
+ LIST_APPEND(&th_ctx->quic_conns, &qc->el_th_ctx);
+ qc->qc_epoch = HA_ATOMIC_LOAD(&qc_epoch);
+
+ /* Reactivate FD polling if connection socket is active. */
+ qc_want_recv(qc);
+
+ /* Reactivate timer task if needed. */
+ qc_set_timer(qc);
+
+ /* Idle timer task is always active. */
+ task_queue(qc->idle_timer_task);
+
+ /* Reactivate IO tasklet if needed. */
+ if (qc->flags & QUIC_FL_CONN_IO_TO_REQUEUE) {
+ tasklet_wakeup(qc->wait_event.tasklet);
+ qc->flags &= ~QUIC_FL_CONN_IO_TO_REQUEUE;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_SET_AFFINITY, qc);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/quic_frame.c b/src/quic_frame.c
new file mode 100644
index 0000000..61d2c93
--- /dev/null
+++ b/src/quic_frame.c
@@ -0,0 +1,1273 @@
+/*
+ * Copyright 2019 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <string.h>
+
+#include <import/eb64tree.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/chunk.h>
+#include <haproxy/pool.h>
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_rx-t.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/quic_tx.h>
+#include <haproxy/trace.h>
+
+DECLARE_POOL(pool_head_quic_frame, "quic_frame", sizeof(struct quic_frame));
+DECLARE_POOL(pool_head_qf_crypto, "qf_crypto", sizeof(struct qf_crypto));
+
+const char *quic_frame_type_string(enum quic_frame_type ft)
+{
+ switch (ft) {
+ case QUIC_FT_PADDING:
+ return "PADDING";
+ case QUIC_FT_PING:
+ return "PING";
+ case QUIC_FT_ACK:
+ return "ACK";
+ case QUIC_FT_ACK_ECN:
+ return "ACK_ECN";
+ case QUIC_FT_RESET_STREAM:
+ return "RESET_STREAM";
+ case QUIC_FT_STOP_SENDING:
+ return "STOP_SENDING";
+ case QUIC_FT_CRYPTO:
+ return "CRYPTO";
+ case QUIC_FT_NEW_TOKEN:
+ return "NEW_TOKEN";
+
+ case QUIC_FT_STREAM_8:
+ return "STREAM_8";
+ case QUIC_FT_STREAM_9:
+ return "STREAM_9";
+ case QUIC_FT_STREAM_A:
+ return "STREAM_A";
+ case QUIC_FT_STREAM_B:
+ return "STREAM_B";
+ case QUIC_FT_STREAM_C:
+ return "STREAM_C";
+ case QUIC_FT_STREAM_D:
+ return "STREAM_D";
+ case QUIC_FT_STREAM_E:
+ return "STREAM_E";
+ case QUIC_FT_STREAM_F:
+ return "STREAM_F";
+
+ case QUIC_FT_MAX_DATA:
+ return "MAX_DATA";
+ case QUIC_FT_MAX_STREAM_DATA:
+ return "MAX_STREAM_DATA";
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ return "MAX_STREAMS_BIDI";
+ case QUIC_FT_MAX_STREAMS_UNI:
+ return "MAX_STREAMS_UNI";
+ case QUIC_FT_DATA_BLOCKED:
+ return "DATA_BLOCKED";
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ return "STREAM_DATA_BLOCKED";
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ return "STREAMS_BLOCKED_BIDI";
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ return "STREAMS_BLOCKED_UNI";
+ case QUIC_FT_NEW_CONNECTION_ID:
+ return "NEW_CONNECTION_ID";
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ return "RETIRE_CONNECTION_ID";
+ case QUIC_FT_PATH_CHALLENGE:
+ return "PATH_CHALLENGE";
+ case QUIC_FT_PATH_RESPONSE:
+ return "PATH_RESPONSE";
+ case QUIC_FT_CONNECTION_CLOSE:
+ return "CONNECTION_CLOSE";
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ return "CONNECTION_CLOSE_APP";
+ case QUIC_FT_HANDSHAKE_DONE:
+ return "HANDSHAKE_DONE";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void chunk_cc_phrase_appendf(struct buffer *buf,
+ const unsigned char *phr, size_t phrlen)
+{
+ chunk_appendf(buf, " reason_phrase: '");
+ while (phrlen--)
+ chunk_appendf(buf, "%c", *phr++);
+ chunk_appendf(buf, "'");
+}
+
+/* Add traces to <buf> depending on <frm> frame type. */
+void chunk_frm_appendf(struct buffer *buf, const struct quic_frame *frm)
+{
+ chunk_appendf(buf, " %s", quic_frame_type_string(frm->type));
+ switch (frm->type) {
+ case QUIC_FT_CRYPTO:
+ {
+ const struct qf_crypto *crypto_frm = &frm->crypto;
+ chunk_appendf(buf, " cfoff=%llu cflen=%llu",
+ (ull)crypto_frm->offset, (ull)crypto_frm->len);
+ break;
+ }
+ case QUIC_FT_RESET_STREAM:
+ {
+ const struct qf_reset_stream *rs_frm = &frm->reset_stream;
+ chunk_appendf(buf, " id=%llu app_error_code=%llu final_size=%llu",
+ (ull)rs_frm->id, (ull)rs_frm->app_error_code, (ull)rs_frm->final_size);
+ break;
+ }
+ case QUIC_FT_STOP_SENDING:
+ {
+ const struct qf_stop_sending *ss_frm = &frm->stop_sending;
+ chunk_appendf(&trace_buf, " id=%llu app_error_code=%llu",
+ (ull)ss_frm->id, (ull)ss_frm->app_error_code);
+ break;
+ }
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ const struct qf_stream *strm_frm = &frm->stream;
+ chunk_appendf(&trace_buf, " uni=%d fin=%d id=%llu off=%llu len=%llu",
+ !!(strm_frm->id & QUIC_STREAM_FRAME_ID_DIR_BIT),
+ !!(frm->type & QUIC_STREAM_FRAME_TYPE_FIN_BIT),
+ (ull)strm_frm->id, (ull)strm_frm->offset.key, (ull)strm_frm->len);
+ break;
+ }
+ case QUIC_FT_MAX_DATA:
+ {
+ const struct qf_max_data *md_frm = &frm->max_data;
+ chunk_appendf(&trace_buf, " max_data=%llu", (ull)md_frm->max_data);
+ break;
+ }
+ case QUIC_FT_MAX_STREAM_DATA:
+ {
+ const struct qf_max_stream_data *msd_frm = &frm->max_stream_data;
+ chunk_appendf(&trace_buf, " id=%llu max_stream_data=%llu",
+ (ull)msd_frm->id, (ull)msd_frm->max_stream_data);
+ break;
+ }
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ {
+ const struct qf_max_streams *ms_frm = &frm->max_streams_bidi;
+ chunk_appendf(&trace_buf, " max_streams=%llu", (ull)ms_frm->max_streams);
+ break;
+ }
+ case QUIC_FT_MAX_STREAMS_UNI:
+ {
+ const struct qf_max_streams *ms_frm = &frm->max_streams_uni;
+ chunk_appendf(&trace_buf, " max_streams=%llu", (ull)ms_frm->max_streams);
+ break;
+ }
+ case QUIC_FT_DATA_BLOCKED:
+ {
+ const struct qf_data_blocked *db_frm = &frm->data_blocked;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)db_frm->limit);
+ break;
+ }
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ {
+ const struct qf_stream_data_blocked *sdb_frm = &frm->stream_data_blocked;
+ chunk_appendf(&trace_buf, " id=%llu limit=%llu",
+ (ull)sdb_frm->id, (ull)sdb_frm->limit);
+ break;
+ }
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ {
+ const struct qf_streams_blocked *sb_frm = &frm->streams_blocked_bidi;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)sb_frm->limit);
+ break;
+ }
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ {
+ const struct qf_streams_blocked *sb_frm = &frm->streams_blocked_uni;
+ chunk_appendf(&trace_buf, " limit=%llu", (ull)sb_frm->limit);
+ break;
+ }
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ {
+ const struct qf_retire_connection_id *rcid_frm = &frm->retire_connection_id;
+ chunk_appendf(&trace_buf, " seq_num=%llu", (ull)rcid_frm->seq_num);
+ break;
+ }
+ case QUIC_FT_CONNECTION_CLOSE:
+ {
+ const struct qf_connection_close *cc_frm = &frm->connection_close;
+ size_t plen = QUIC_MIN((size_t)cc_frm->reason_phrase_len, sizeof cc_frm->reason_phrase);
+ chunk_appendf(&trace_buf,
+ " error_code=%llu frame_type=%llu reason_phrase_len=%llu",
+ (ull)cc_frm->error_code, (ull)cc_frm->frame_type,
+ (ull)cc_frm->reason_phrase_len);
+ if (plen)
+ chunk_cc_phrase_appendf(&trace_buf, cc_frm->reason_phrase, plen);
+ break;
+ }
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ {
+ const struct qf_connection_close_app *cc_frm = &frm->connection_close_app;
+ size_t plen = QUIC_MIN((size_t)cc_frm->reason_phrase_len, sizeof cc_frm->reason_phrase);
+ chunk_appendf(&trace_buf,
+ " error_code=%llu reason_phrase_len=%llu",
+ (ull)cc_frm->error_code, (ull)cc_frm->reason_phrase_len);
+ if (plen)
+ chunk_cc_phrase_appendf(&trace_buf, cc_frm->reason_phrase, plen);
+ break;
+ }
+ }
+}
+
+/* Encode <frm> PADDING frame at <pos> buffer position, <end> being one byte past the end
+ * of this buffer.
+ * Returns 1 if succeeded (enough room in the buffer to encode the frame), 0 if not.
+ */
+static int quic_build_padding_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_padding *padding_frm = &frm->padding;
+
+ if (end - *pos < padding_frm->len - 1)
+ return 0;
+
+ memset(*pos, 0, padding_frm->len - 1);
+ *pos += padding_frm->len - 1;
+
+ return 1;
+}
+
+/* Parse a PADDING frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_padding_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ const unsigned char *beg;
+ struct qf_padding *padding_frm = &frm->padding;
+
+ beg = *pos;
+ padding_frm->len = 1;
+ while (*pos < end && !**pos)
+ (*pos)++;
+ padding_frm->len += *pos - beg;
+
+ return 1;
+}
+
+/* Encode a ACK frame at <pos> buffer position.
+ * Always succeeds.
+ */
+static int quic_build_ping_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ /* No field */
+ return 1;
+}
+
+/* Parse a PADDING frame from <pos> buffer position with <end> as end into <frm> frame.
+ * Always succeeds.
+ */
+static int quic_parse_ping_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ /* No field */
+ return 1;
+}
+
+/* Encode a ACK frame.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_ack_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *qc)
+{
+ struct qf_tx_ack *ack_frm = &frm->tx_ack;
+ struct eb64_node *ar, *prev_ar;
+ struct quic_arng_node *ar_node, *prev_ar_node;
+
+ ar = eb64_last(&ack_frm->arngs->root);
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+ TRACE_PROTO("TX ack range", QUIC_EV_CONN_PRSAFRM,
+ qc,, &ar_node->last, &ar_node->first.key);
+ if (!quic_enc_int(pos, end, ar_node->last) ||
+ !quic_enc_int(pos, end, ack_frm->ack_delay) ||
+ !quic_enc_int(pos, end, ack_frm->arngs->sz - 1) ||
+ !quic_enc_int(pos, end, ar_node->last - ar_node->first.key))
+ return 0;
+
+ while ((prev_ar = eb64_prev(ar))) {
+ prev_ar_node = eb64_entry(prev_ar, struct quic_arng_node, first);
+ TRACE_PROTO("TX ack range", QUIC_EV_CONN_PRSAFRM, qc,,
+ &prev_ar_node->last, &prev_ar_node->first.key);
+ if (!quic_enc_int(pos, end, ar_node->first.key - prev_ar_node->last - 2) ||
+ !quic_enc_int(pos, end, prev_ar_node->last - prev_ar_node->first.key))
+ return 0;
+
+ ar = prev_ar;
+ ar_node = eb64_entry(ar, struct quic_arng_node, first);
+ }
+
+ return 1;
+}
+
+/* Parse an ACK frame header at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_ack_frame_header(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ int ret;
+ struct qf_ack *ack_frm = &frm->ack;
+
+ ret = quic_dec_int(&ack_frm->largest_ack, pos, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack_frm->ack_delay, pos, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack_frm->ack_range_num, pos, end);
+ if (!ret)
+ return 0;
+
+ ret = quic_dec_int(&ack_frm->first_ack_range, pos, end);
+ if (!ret)
+ return 0;
+
+ return 1;
+}
+
+/* Encode a ACK_ECN frame.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_ack_ecn_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_ack *ack_frm = &frm->ack;
+
+ return quic_enc_int(pos, end, ack_frm->largest_ack) &&
+ quic_enc_int(pos, end, ack_frm->ack_delay) &&
+ quic_enc_int(pos, end, ack_frm->first_ack_range) &&
+ quic_enc_int(pos, end, ack_frm->ack_range_num);
+}
+
+/* Parse an ACK_ECN frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_ack_ecn_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_ack *ack_frm = &frm->ack;
+
+ return quic_dec_int(&ack_frm->largest_ack, pos, end) &&
+ quic_dec_int(&ack_frm->ack_delay, pos, end) &&
+ quic_dec_int(&ack_frm->first_ack_range, pos, end) &&
+ quic_dec_int(&ack_frm->ack_range_num, pos, end);
+}
+
+/* Encode a RESET_STREAM frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_reset_stream_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_reset_stream *rs_frm = &frm->reset_stream;
+
+ return quic_enc_int(pos, end, rs_frm->id) &&
+ quic_enc_int(pos, end, rs_frm->app_error_code) &&
+ quic_enc_int(pos, end, rs_frm->final_size);
+}
+
+/* Parse a RESET_STREAM frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_reset_stream_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_reset_stream *rs_frm = &frm->reset_stream;
+
+ return quic_dec_int(&rs_frm->id, pos, end) &&
+ quic_dec_int(&rs_frm->app_error_code, pos, end) &&
+ quic_dec_int(&rs_frm->final_size, pos, end);
+}
+
+/* Encode a STOP_SENDING frame.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_stop_sending_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_stop_sending *ss_frm = &frm->stop_sending;
+
+ return quic_enc_int(pos, end, ss_frm->id) &&
+ quic_enc_int(pos, end, ss_frm->app_error_code);
+}
+
+/* Parse a STOP_SENDING frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_stop_sending_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_stop_sending *ss_frm = &frm->stop_sending;
+
+ return quic_dec_int(&ss_frm->id, pos, end) &&
+ quic_dec_int(&ss_frm->app_error_code, pos, end);
+}
+
+/* Encode a CRYPTO frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_crypto_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_crypto *crypto_frm = &frm->crypto;
+ const struct quic_enc_level *qel = crypto_frm->qel;
+ size_t offset, len;
+
+ if (!quic_enc_int(pos, end, crypto_frm->offset) ||
+ !quic_enc_int(pos, end, crypto_frm->len) || end - *pos < crypto_frm->len)
+ return 0;
+
+ len = crypto_frm->len;
+ offset = crypto_frm->offset;
+ while (len) {
+ int idx;
+ size_t to_copy;
+ const unsigned char *data;
+
+ idx = offset >> QUIC_CRYPTO_BUF_SHIFT;
+ to_copy = qel->tx.crypto.bufs[idx]->sz - (offset & QUIC_CRYPTO_BUF_MASK);
+ if (to_copy > len)
+ to_copy = len;
+ data = qel->tx.crypto.bufs[idx]->data + (offset & QUIC_CRYPTO_BUF_MASK);
+ memcpy(*pos, data, to_copy);
+ *pos += to_copy;
+ offset += to_copy;
+ len -= to_copy;
+ }
+
+ return 1;
+}
+
+/* Parse a CRYPTO frame from <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_crypto_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_crypto *crypto_frm = &frm->crypto;
+
+ if (!quic_dec_int(&crypto_frm->offset, pos, end) ||
+ !quic_dec_int(&crypto_frm->len, pos, end) || end - *pos < crypto_frm->len)
+ return 0;
+
+ crypto_frm->data = *pos;
+ *pos += crypto_frm->len;
+
+ return 1;
+}
+
+/* Encode a NEW_TOKEN frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_new_token_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_new_token *new_token_frm = &frm->new_token;
+
+ if (!quic_enc_int(pos, end, new_token_frm->len) || end - *pos < new_token_frm->len)
+ return 0;
+
+ memcpy(*pos, new_token_frm->data, new_token_frm->len);
+
+ return 1;
+}
+
+/* Parse a NEW_TOKEN frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_new_token_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_new_token *new_token_frm = &frm->new_token;
+
+ if (!quic_dec_int(&new_token_frm->len, pos, end) || end - *pos < new_token_frm->len)
+ return 0;
+
+ new_token_frm->data = *pos;
+ *pos += new_token_frm->len;
+
+ return 1;
+}
+
+/* Encode a STREAM frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_stream_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_stream *strm_frm = &frm->stream;
+ const unsigned char *wrap;
+
+ /* Caller must set OFF bit if and only if a non-null offset is used. */
+ BUG_ON(!!(frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) !=
+ !!strm_frm->offset.key);
+
+ if (!quic_enc_int(pos, end, strm_frm->id) ||
+ ((frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) && !quic_enc_int(pos, end, strm_frm->offset.key)) ||
+ ((frm->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT) &&
+ (!quic_enc_int(pos, end, strm_frm->len) || end - *pos < strm_frm->len)))
+ return 0;
+
+ /* No need for data memcpy if no payload. */
+ if (!strm_frm->len)
+ return 1;
+
+ wrap = (const unsigned char *)b_wrap(strm_frm->buf);
+ if (strm_frm->data + strm_frm->len > wrap) {
+ size_t to_copy = wrap - strm_frm->data;
+ memcpy(*pos, strm_frm->data, to_copy);
+ *pos += to_copy;
+
+ to_copy = strm_frm->len - to_copy;
+ memcpy(*pos, b_orig(strm_frm->buf), to_copy);
+ *pos += to_copy;
+ }
+ else {
+ memcpy(*pos, strm_frm->data, strm_frm->len);
+ *pos += strm_frm->len;
+ }
+
+ return 1;
+}
+
+/* Parse a STREAM frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_stream_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_stream *strm_frm = &frm->stream;
+
+ if (!quic_dec_int(&strm_frm->id, pos, end))
+ return 0;
+
+ /* Offset parsing */
+ if (!(frm->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT)) {
+ strm_frm->offset.key = 0;
+ }
+ else if (!quic_dec_int((uint64_t *)&strm_frm->offset.key, pos, end))
+ return 0;
+
+ /* Length parsing */
+ if (!(frm->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT)) {
+ strm_frm->len = end - *pos;
+ }
+ else if (!quic_dec_int(&strm_frm->len, pos, end) || end - *pos < strm_frm->len)
+ return 0;
+
+ strm_frm->data = *pos;
+ *pos += strm_frm->len;
+
+ return 1;
+}
+
+/* Encode a MAX_DATA frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_max_data_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_max_data *md_frm = &frm->max_data;
+
+ return quic_enc_int(pos, end, md_frm->max_data);
+}
+
+/* Parse a MAX_DATA frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_data_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_max_data *md_frm = &frm->max_data;
+
+ return quic_dec_int(&md_frm->max_data, pos, end);
+}
+
+/* Encode a MAX_STREAM_DATA frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_max_stream_data_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_max_stream_data *msd_frm = &frm->max_stream_data;
+
+ return quic_enc_int(pos, end, msd_frm->id) &&
+ quic_enc_int(pos, end, msd_frm->max_stream_data);
+}
+
+/* Parse a MAX_STREAM_DATA frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_stream_data_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_max_stream_data *msd_frm = &frm->max_stream_data;
+
+ return quic_dec_int(&msd_frm->id, pos, end) &&
+ quic_dec_int(&msd_frm->max_stream_data, pos, end);
+}
+
+/* Encode a MAX_STREAMS frame for bidirectional streams at <buf> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_max_streams_bidi_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_max_streams *ms_frm = &frm->max_streams_bidi;
+
+ return quic_enc_int(pos, end, ms_frm->max_streams);
+}
+
+/* Parse a MAX_STREAMS frame for bidirectional streams at <pos> buffer position with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_streams_bidi_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_max_streams *ms_frm = &frm->max_streams_bidi;
+
+ return quic_dec_int(&ms_frm->max_streams, pos, end);
+}
+
+/* Encode a MAX_STREAMS frame for unidirectional streams at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_max_streams_uni_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_max_streams *ms_frm = &frm->max_streams_uni;
+
+ return quic_enc_int(pos, end, ms_frm->max_streams);
+}
+
+/* Parse a MAX_STREAMS frame for undirectional streams at <pos> buffer position with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_max_streams_uni_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_max_streams *ms_frm = &frm->max_streams_uni;
+
+ return quic_dec_int(&ms_frm->max_streams, pos, end);
+}
+
+/* Encode a DATA_BLOCKED frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_data_blocked_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_data_blocked *db_frm = &frm->data_blocked;
+
+ return quic_enc_int(pos, end, db_frm->limit);
+}
+
+/* Parse a DATA_BLOCKED frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_data_blocked_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_data_blocked *db_frm = &frm->data_blocked;
+
+ return quic_dec_int(&db_frm->limit, pos, end);
+}
+
+/* Encode a STREAM_DATA_BLOCKED at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_stream_data_blocked_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_stream_data_blocked *sdb_frm = &frm->stream_data_blocked;
+
+ return quic_enc_int(pos, end, sdb_frm->id) &&
+ quic_enc_int(pos, end, sdb_frm->limit);
+}
+
+/* Parse a STREAM_DATA_BLOCKED frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_stream_data_blocked_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_stream_data_blocked *sdb_frm = &frm->stream_data_blocked;
+
+ return quic_dec_int(&sdb_frm->id, pos, end) &&
+ quic_dec_int(&sdb_frm->limit, pos, end);
+}
+
+/* Encode a STREAMS_BLOCKED frame for bidirectional streams at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_streams_blocked_bidi_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_streams_blocked *sb_frm = &frm->streams_blocked_bidi;
+
+ return quic_enc_int(pos, end, sb_frm->limit);
+}
+
+/* Parse a STREAMS_BLOCKED frame for bidirectional streams at <pos> buffer position with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_streams_blocked_bidi_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_streams_blocked *sb_frm = &frm->streams_blocked_bidi;
+
+ return quic_dec_int(&sb_frm->limit, pos, end);
+}
+
+/* Encode a STREAMS_BLOCKED frame for unidirectional streams at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_streams_blocked_uni_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_streams_blocked *sb_frm = &frm->streams_blocked_uni;
+
+ return quic_enc_int(pos, end, sb_frm->limit);
+}
+
+/* Parse a STREAMS_BLOCKED frame for unidirectional streams at <pos> buffer position with <end>
+ * as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_streams_blocked_uni_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_streams_blocked *sb_frm = &frm->streams_blocked_uni;
+
+ return quic_dec_int(&sb_frm->limit, pos, end);
+}
+
+/* Encode a NEW_CONNECTION_ID frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_new_connection_id_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_new_connection_id *ncid_frm = &frm->new_connection_id;
+
+ if (!quic_enc_int(pos, end, ncid_frm->seq_num) ||
+ !quic_enc_int(pos, end, ncid_frm->retire_prior_to) ||
+ end - *pos < sizeof ncid_frm->cid.len + ncid_frm->cid.len + QUIC_STATELESS_RESET_TOKEN_LEN)
+ return 0;
+
+ *(*pos)++ = ncid_frm->cid.len;
+
+ if (ncid_frm->cid.len) {
+ memcpy(*pos, ncid_frm->cid.data, ncid_frm->cid.len);
+ *pos += ncid_frm->cid.len;
+ }
+ memcpy(*pos, ncid_frm->stateless_reset_token, QUIC_STATELESS_RESET_TOKEN_LEN);
+ *pos += QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ return 1;
+}
+
+/* Parse a NEW_CONNECTION_ID frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_new_connection_id_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_new_connection_id *ncid_frm = &frm->new_connection_id;
+
+ if (!quic_dec_int(&ncid_frm->seq_num, pos, end) ||
+ !quic_dec_int(&ncid_frm->retire_prior_to, pos, end) || end <= *pos)
+ return 0;
+
+ ncid_frm->cid.len = *(*pos)++;
+ if (end - *pos < ncid_frm->cid.len + QUIC_STATELESS_RESET_TOKEN_LEN)
+ return 0;
+
+ if (ncid_frm->cid.len) {
+ ncid_frm->cid.data = *pos;
+ *pos += ncid_frm->cid.len;
+ }
+ ncid_frm->stateless_reset_token = *pos;
+ *pos += QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ return 1;
+}
+
+/* Encode a RETIRE_CONNECTION_ID frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_retire_connection_id_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_retire_connection_id *rcid_frm = &frm->retire_connection_id;
+
+ return quic_enc_int(pos, end, rcid_frm->seq_num);
+}
+
+/* Parse a RETIRE_CONNECTION_ID frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room to parse this frame), 0 if not.
+ */
+static int quic_parse_retire_connection_id_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_retire_connection_id *rcid_frm = &frm->retire_connection_id;
+
+ return quic_dec_int(&rcid_frm->seq_num, pos, end);
+}
+
+/* Encode a PATH_CHALLENGE frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_path_challenge_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_path_challenge *pc_frm = &frm->path_challenge;
+
+ if (end - *pos < sizeof pc_frm->data)
+ return 0;
+
+ memcpy(*pos, pc_frm->data, sizeof pc_frm->data);
+ *pos += sizeof pc_frm->data;
+
+ return 1;
+}
+
+/* Parse a PATH_CHALLENGE frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_path_challenge_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_path_challenge *pc_frm = &frm->path_challenge;
+
+ if (end - *pos < sizeof pc_frm->data)
+ return 0;
+
+ memcpy(pc_frm->data, *pos, sizeof pc_frm->data);
+ *pos += sizeof pc_frm->data;
+
+ return 1;
+}
+
+
+/* Encode a PATH_RESPONSE frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_path_response_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_path_challenge_response *pcr_frm = &frm->path_challenge_response;
+
+ if (end - *pos < sizeof pcr_frm->data)
+ return 0;
+
+ memcpy(*pos, pcr_frm->data, sizeof pcr_frm->data);
+ *pos += sizeof pcr_frm->data;
+
+ return 1;
+}
+
+/* Parse a PATH_RESPONSE frame at <pos> buffer position with <end> as end into <frm> frame.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_path_response_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_path_challenge_response *pcr_frm = &frm->path_challenge_response;
+
+ if (end - *pos < sizeof pcr_frm->data)
+ return 0;
+
+ memcpy(pcr_frm->data, *pos, sizeof pcr_frm->data);
+ *pos += sizeof pcr_frm->data;
+
+ return 1;
+}
+
+/* Encode a CONNECTION_CLOSE frame at QUIC layer at <pos> buffer position.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_connection_close_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_connection_close *cc_frm = &frm->connection_close;
+
+ if (!quic_enc_int(pos, end, cc_frm->error_code) ||
+ !quic_enc_int(pos, end, cc_frm->frame_type) ||
+ !quic_enc_int(pos, end, cc_frm->reason_phrase_len) ||
+ end - *pos < cc_frm->reason_phrase_len)
+ return 0;
+
+ memcpy(*pos, cc_frm->reason_phrase, cc_frm->reason_phrase_len);
+ *pos += cc_frm->reason_phrase_len;
+
+ return 1;
+}
+
+/* Parse a CONNECTION_CLOSE frame at QUIC layer at <pos> buffer position with <end> as end into <frm> frame.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_connection_close_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ size_t plen;
+ struct qf_connection_close *cc_frm = &frm->connection_close;
+
+ if (!quic_dec_int(&cc_frm->error_code, pos, end) ||
+ !quic_dec_int(&cc_frm->frame_type, pos, end) ||
+ !quic_dec_int(&cc_frm->reason_phrase_len, pos, end) ||
+ end - *pos < cc_frm->reason_phrase_len)
+ return 0;
+
+ plen = QUIC_MIN((size_t)cc_frm->reason_phrase_len, sizeof cc_frm->reason_phrase);
+ memcpy(cc_frm->reason_phrase, *pos, plen);
+ *pos += cc_frm->reason_phrase_len;
+
+ return 1;
+}
+
+/* Encode a CONNECTION_CLOSE frame at application layer at <pos> buffer position.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for application layer
+ * and another at QUIC layer.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ */
+static int quic_build_connection_close_app_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ struct qf_connection_close_app *cc_frm = &frm->connection_close_app;
+
+ if (!quic_enc_int(pos, end, cc_frm->error_code) ||
+ !quic_enc_int(pos, end, cc_frm->reason_phrase_len) ||
+ end - *pos < cc_frm->reason_phrase_len)
+ return 0;
+
+ memcpy(*pos, cc_frm->reason_phrase, cc_frm->reason_phrase_len);
+ *pos += cc_frm->reason_phrase_len;
+
+ return 1;
+}
+
+/* Parse a CONNECTION_CLOSE frame at QUIC layer at <pos> buffer position with <end> as end into <frm> frame.
+ * Note there exist two types of CONNECTION_CLOSE frame, one for the application layer
+ * and another at QUIC layer.
+ * Return 1 if succeeded (enough room at <pos> buffer position to parse this frame), 0 if not.
+ */
+static int quic_parse_connection_close_app_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ size_t plen;
+ struct qf_connection_close_app *cc_frm = &frm->connection_close_app;
+
+ if (!quic_dec_int(&cc_frm->error_code, pos, end) ||
+ !quic_dec_int(&cc_frm->reason_phrase_len, pos, end) ||
+ end - *pos < cc_frm->reason_phrase_len)
+ return 0;
+
+ plen = QUIC_MIN((size_t)cc_frm->reason_phrase_len, sizeof cc_frm->reason_phrase);
+ memcpy(cc_frm->reason_phrase, *pos, plen);
+ *pos += cc_frm->reason_phrase_len;
+
+ return 1;
+}
+
+/* Encode a HANDSHAKE_DONE frame at <pos> buffer position.
+ * Always succeeds.
+ */
+static int quic_build_handshake_done_frame(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn)
+{
+ /* No field */
+ return 1;
+}
+
+/* Parse a HANDSHAKE_DONE frame at QUIC layer at <pos> buffer position with <end> as end into <frm> frame.
+ * Always succeed.
+ */
+static int quic_parse_handshake_done_frame(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end)
+{
+ /* No field */
+ return 1;
+}
+
+struct quic_frame_builder {
+ int (*func)(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_conn *conn);
+ uint32_t mask;
+ unsigned char flags;
+};
+
+const struct quic_frame_builder quic_frame_builders[] = {
+ [QUIC_FT_PADDING] = { .func = quic_build_padding_frame, .flags = QUIC_FL_TX_PACKET_PADDING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_PING] = { .func = quic_build_ping_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_ACK] = { .func = quic_build_ack_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_ACK_ECN] = { .func = quic_build_ack_ecn_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_RESET_STREAM] = { .func = quic_build_reset_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STOP_SENDING] = { .func = quic_build_stop_sending_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CRYPTO] = { .func = quic_build_crypto_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_NEW_TOKEN] = { .func = quic_build_new_token_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+ [QUIC_FT_STREAM_8] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_9] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_A] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_B] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_C] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_D] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_E] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_F] = { .func = quic_build_stream_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_DATA] = { .func = quic_build_max_data_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAM_DATA] = { .func = quic_build_max_stream_data_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_BIDI] = { .func = quic_build_max_streams_bidi_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_UNI] = { .func = quic_build_max_streams_uni_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_DATA_BLOCKED] = { .func = quic_build_data_blocked_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_DATA_BLOCKED] = { .func = quic_build_stream_data_blocked_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_BIDI] = { .func = quic_build_streams_blocked_bidi_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_UNI] = { .func = quic_build_streams_blocked_uni_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_NEW_CONNECTION_ID] = { .func = quic_build_new_connection_id_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_RETIRE_CONNECTION_ID] = { .func = quic_build_retire_connection_id_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_CHALLENGE] = { .func = quic_build_path_challenge_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_RESPONSE] = { .func = quic_build_path_response_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE] = { .func = quic_build_connection_close_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE_APP] = { .func = quic_build_connection_close_app_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_HANDSHAKE_DONE] = { .func = quic_build_handshake_done_frame, .flags = QUIC_FL_TX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+};
+
+struct quic_frame_parser {
+ int (*func)(struct quic_frame *frm, struct quic_conn *qc,
+ const unsigned char **pos, const unsigned char *end);
+ uint32_t mask;
+ unsigned char flags;
+};
+
+const struct quic_frame_parser quic_frame_parsers[] = {
+ [QUIC_FT_PADDING] = { .func = quic_parse_padding_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_PING] = { .func = quic_parse_ping_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_ACK] = { .func = quic_parse_ack_frame_header, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_ACK_ECN] = { .func = quic_parse_ack_ecn_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_RESET_STREAM] = { .func = quic_parse_reset_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STOP_SENDING] = { .func = quic_parse_stop_sending_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CRYPTO] = { .func = quic_parse_crypto_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE_IH_1_BITMASK, },
+ [QUIC_FT_NEW_TOKEN] = { .func = quic_parse_new_token_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+ [QUIC_FT_STREAM_8] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_9] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_A] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_B] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_C] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_D] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_E] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_F] = { .func = quic_parse_stream_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_DATA] = { .func = quic_parse_max_data_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAM_DATA] = { .func = quic_parse_max_stream_data_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_BIDI] = { .func = quic_parse_max_streams_bidi_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_MAX_STREAMS_UNI] = { .func = quic_parse_max_streams_uni_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_DATA_BLOCKED] = { .func = quic_parse_data_blocked_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAM_DATA_BLOCKED] = { .func = quic_parse_stream_data_blocked_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_BIDI] = { .func = quic_parse_streams_blocked_bidi_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_STREAMS_BLOCKED_UNI] = { .func = quic_parse_streams_blocked_uni_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_NEW_CONNECTION_ID] = { .func = quic_parse_new_connection_id_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_RETIRE_CONNECTION_ID] = { .func = quic_parse_retire_connection_id_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_CHALLENGE] = { .func = quic_parse_path_challenge_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_PATH_RESPONSE] = { .func = quic_parse_path_response_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE] = { .func = quic_parse_connection_close_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE_IH01_BITMASK, },
+ [QUIC_FT_CONNECTION_CLOSE_APP] = { .func = quic_parse_connection_close_app_frame, .flags = 0, .mask = QUIC_FT_PKT_TYPE___01_BITMASK, },
+ [QUIC_FT_HANDSHAKE_DONE] = { .func = quic_parse_handshake_done_frame, .flags = QUIC_FL_RX_PACKET_ACK_ELICITING, .mask = QUIC_FT_PKT_TYPE____1_BITMASK, },
+};
+
+/* Decode a QUIC frame at <pos> buffer position into <frm> frame.
+ * Returns 1 if succeeded (enough data at <pos> buffer position to parse the frame), 0 if not.
+ */
+int qc_parse_frm(struct quic_frame *frm, struct quic_rx_packet *pkt,
+ const unsigned char **pos, const unsigned char *end,
+ struct quic_conn *qc)
+{
+ int ret = 0;
+ const struct quic_frame_parser *parser;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSFRM, qc);
+ if (end <= *pos) {
+ TRACE_DEVEL("wrong frame", QUIC_EV_CONN_PRSFRM, qc);
+ goto leave;
+ }
+
+ frm->type = *(*pos)++;
+ if (frm->type >= QUIC_FT_MAX) {
+ TRACE_DEVEL("wrong frame type", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ parser = &quic_frame_parsers[frm->type];
+ if (!(parser->mask & (1U << pkt->type))) {
+ TRACE_DEVEL("unauthorized frame", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ if (!parser->func(frm, qc, pos, end)) {
+ TRACE_DEVEL("parsing error", QUIC_EV_CONN_PRSFRM, qc, frm);
+ goto leave;
+ }
+
+ TRACE_PROTO("RX frm", QUIC_EV_CONN_PSTRM, qc, frm);
+
+ pkt->flags |= parser->flags;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSFRM, qc);
+ return ret;
+}
+
+/* Encode <frm> QUIC frame at <pos> buffer position.
+ * Returns 1 if succeeded (enough room at <pos> buffer position to encode the frame), 0 if not.
+ * The buffer is updated to point to one byte past the end of the built frame
+ * only if succeeded.
+ */
+int qc_build_frm(unsigned char **pos, const unsigned char *end,
+ struct quic_frame *frm, struct quic_tx_packet *pkt,
+ struct quic_conn *qc)
+{
+ int ret = 0;
+ const struct quic_frame_builder *builder;
+ unsigned char *p = *pos;
+
+ TRACE_ENTER(QUIC_EV_CONN_BFRM, qc);
+ builder = &quic_frame_builders[frm->type];
+ if (!(builder->mask & (1U << pkt->type))) {
+ /* XXX This it a bug to send an unauthorized frame with such a packet type XXX */
+ TRACE_ERROR("unauthorized frame", QUIC_EV_CONN_BFRM, qc, frm);
+ BUG_ON(!(builder->mask & (1U << pkt->type)));
+ }
+
+ if (end <= p) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_BFRM, qc, frm);
+ goto leave;
+ }
+
+ TRACE_PROTO("TX frm", QUIC_EV_CONN_BFRM, qc, frm);
+ *p++ = frm->type;
+ if (!quic_frame_builders[frm->type].func(&p, end, frm, qc)) {
+ TRACE_ERROR("frame building error", QUIC_EV_CONN_BFRM, qc, frm);
+ goto leave;
+ }
+
+ pkt->flags |= builder->flags;
+ *pos = p;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_BFRM, qc);
+ return ret;
+}
+
+/* Detach all duplicated frames from <frm> reflist. */
+void qc_frm_unref(struct quic_frame *frm, struct quic_conn *qc)
+{
+ struct quic_frame *f, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc, frm);
+
+ list_for_each_entry_safe(f, tmp, &frm->reflist, ref) {
+ f->origin = NULL;
+ LIST_DEL_INIT(&f->ref);
+ if (f->pkt) {
+ TRACE_DEVEL("remove frame reference",
+ QUIC_EV_CONN_PRSAFRM, qc, f, &f->pkt->pn_node.key);
+ }
+ else {
+ TRACE_DEVEL("remove frame reference for unsent frame",
+ QUIC_EV_CONN_PRSAFRM, qc, f);
+ }
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Free a <frm> quic_frame. Remove it from parent element if still attached. */
+void qc_frm_free(struct quic_conn *qc, struct quic_frame **frm)
+{
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc, *frm);
+ /* Caller must ensure that no other frame points to <frm>. Use
+ * qc_frm_unref() to handle this properly.
+ */
+ BUG_ON(!LIST_ISEMPTY(&((*frm)->reflist)));
+ BUG_ON(LIST_INLIST(&((*frm)->ref)));
+
+ /* TODO simplify frame deallocation. In some code paths, we must
+ * manually call this LIST_DEL_INIT before using
+ * quic_tx_packet_refdec() and freeing the frame.
+ */
+ LIST_DEL_INIT(&((*frm)->list));
+
+ pool_free(pool_head_quic_frame, *frm);
+ *frm = NULL;
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Release <frm> frame and mark its copies as acknowledged */
+void qc_release_frm(struct quic_conn *qc, struct quic_frame *frm)
+{
+ uint64_t pn;
+ struct quic_frame *origin, *f, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc, frm);
+
+ /* Identify this frame: a frame copy or one of its copies */
+ origin = frm->origin ? frm->origin : frm;
+ /* Ensure the source of the copies is flagged as acked, <frm> being
+ * possibly a copy of <origin>
+ */
+ origin->flags |= QUIC_FL_TX_FRAME_ACKED;
+ /* Mark all the copy of <origin> as acknowledged. We must
+ * not release the packets (releasing the frames) at this time as
+ * they are possibly also to be acknowledged alongside the
+ * the current one.
+ */
+ list_for_each_entry_safe(f, tmp, &origin->reflist, ref) {
+ if (f->pkt) {
+ f->flags |= QUIC_FL_TX_FRAME_ACKED;
+ f->origin = NULL;
+ LIST_DEL_INIT(&f->ref);
+ pn = f->pkt->pn_node.key;
+ TRACE_DEVEL("mark frame as acked from packet",
+ QUIC_EV_CONN_PRSAFRM, qc, f, &pn);
+ }
+ else {
+ TRACE_DEVEL("freeing unsent frame",
+ QUIC_EV_CONN_PRSAFRM, qc, f);
+ LIST_DEL_INIT(&f->ref);
+ qc_frm_free(qc, &f);
+ }
+ }
+ LIST_DEL_INIT(&frm->list);
+ pn = frm->pkt->pn_node.key;
+ quic_tx_packet_refdec(frm->pkt);
+ TRACE_DEVEL("freeing frame from packet",
+ QUIC_EV_CONN_PRSAFRM, qc, frm, &pn);
+ qc_frm_free(qc, &frm);
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
diff --git a/src/quic_loss.c b/src/quic_loss.c
new file mode 100644
index 0000000..fd9568a
--- /dev/null
+++ b/src/quic_loss.c
@@ -0,0 +1,312 @@
+#include <import/eb64tree.h>
+
+#include <haproxy/quic_conn-t.h>
+#include <haproxy/quic_loss.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+
+#include <haproxy/atomic.h>
+#include <haproxy/list.h>
+#include <haproxy/ticks.h>
+#include <haproxy/trace.h>
+
+/* Update <ql> QUIC loss information with new <rtt> measurement and <ack_delay>
+ * on ACK frame receipt which MUST be min(ack->ack_delay, max_ack_delay)
+ * before the handshake is confirmed.
+ */
+void quic_loss_srtt_update(struct quic_loss *ql,
+ unsigned int rtt, unsigned int ack_delay,
+ struct quic_conn *qc)
+{
+ TRACE_ENTER(QUIC_EV_CONN_RTTUPDT, qc);
+ TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc, &rtt, &ack_delay, ql);
+
+ ql->latest_rtt = rtt;
+ if (!ql->rtt_min) {
+ /* No previous measurement. */
+ ql->srtt = rtt;
+ ql->rtt_var = rtt / 2;
+ ql->rtt_min = rtt;
+ }
+ else {
+ int diff;
+
+ ql->rtt_min = QUIC_MIN(rtt, ql->rtt_min);
+ /* Specific to QUIC (RTT adjustment). */
+ if (ack_delay && rtt >= ql->rtt_min + ack_delay)
+ rtt -= ack_delay;
+ diff = ql->srtt - rtt;
+ if (diff < 0)
+ diff = -diff;
+ ql->rtt_var = (3 * ql->rtt_var + diff) / 4;
+ ql->srtt = (7 * ql->srtt + rtt) / 8;
+ }
+
+ TRACE_PROTO("TX loss srtt update", QUIC_EV_CONN_RTTUPDT, qc,,, ql);
+ TRACE_LEAVE(QUIC_EV_CONN_RTTUPDT, qc);
+}
+
+/* Returns for <qc> QUIC connection the first packet number space which
+ * experienced packet loss, if any or a packet number space with
+ * TICK_ETERNITY as packet loss time if not.
+ */
+struct quic_pktns *quic_loss_pktns(struct quic_conn *qc)
+{
+ struct quic_pktns *pktns, *p;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+
+ BUG_ON(LIST_ISEMPTY(&qc->pktns_list));
+ pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list);
+
+ do {
+ TRACE_PROTO("TX loss pktns", QUIC_EV_CONN_SPTO, qc, p);
+ if (!tick_isset(pktns->tx.loss_time) ||
+ tick_is_lt(p->tx.loss_time, pktns->tx.loss_time)) {
+ pktns = p;
+ }
+ p = LIST_NEXT(&p->list, struct quic_pktns *, list);
+ } while (&p->list != &qc->pktns_list);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc);
+
+ return pktns;
+}
+
+/* Returns for <qc> QUIC connection the first packet number space to
+ * arm the PTO for if any or a packet number space with TICK_ETERNITY
+ * as PTO value if not.
+ */
+struct quic_pktns *quic_pto_pktns(struct quic_conn *qc,
+ int handshake_confirmed,
+ unsigned int *pto)
+{
+ unsigned int duration, lpto;
+ struct quic_loss *ql = &qc->path->loss;
+ struct quic_pktns *pktns, *p;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPTO, qc);
+
+ BUG_ON(LIST_ISEMPTY(&qc->pktns_list));
+ duration =
+ ql->srtt +
+ (QUIC_MAX(4 * ql->rtt_var, QUIC_TIMER_GRANULARITY) << ql->pto_count);
+
+ /* RFC 9002 6.2.2.1. Before Address Validation
+ *
+ * the client MUST set the PTO timer if the client has not received an
+ * acknowledgment for any of its Handshake packets and the handshake is
+ * not confirmed (see Section 4.1.2 of [QUIC-TLS]), even if there are no
+ * packets in flight.
+ *
+ * TODO implement the above paragraph for QUIC on backend side. Note
+ * that if now_ms is used this function is not reentrant anymore and can
+ * not be used anytime without side-effect (for example after QUIC
+ * connection migration).
+ */
+
+ lpto = TICK_ETERNITY;
+ pktns = p = LIST_NEXT(&qc->pktns_list, struct quic_pktns *, list);
+
+ do {
+ unsigned int tmp_pto;
+
+ if (p->tx.in_flight) {
+ if (p == qc->apktns) {
+ if (!handshake_confirmed) {
+ TRACE_STATE("TX PTO handshake not already confirmed", QUIC_EV_CONN_SPTO, qc);
+ goto out;
+ }
+
+ duration += qc->max_ack_delay << ql->pto_count;
+ }
+
+ tmp_pto = tick_add(p->tx.time_of_last_eliciting, duration);
+ if (!tick_isset(lpto) || tick_is_lt(tmp_pto, lpto)) {
+ lpto = tmp_pto;
+ pktns = p;
+ }
+
+ TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, p);
+ }
+
+ p = LIST_NEXT(&p->list, struct quic_pktns *, list);
+ } while (&p->list != &qc->pktns_list);
+
+ out:
+ if (pto)
+ *pto = lpto;
+ TRACE_PROTO("TX PTO", QUIC_EV_CONN_SPTO, qc, pktns, &duration);
+ TRACE_LEAVE(QUIC_EV_CONN_SPTO, qc);
+
+ return pktns;
+}
+
+/* Look for packet loss from sent packets for <qel> encryption level of a
+ * connection with <ctx> as I/O handler context. If remove is true, remove them from
+ * their tree if deemed as lost or set the <loss_time> value the packet number
+ * space if any not deemed lost.
+ * Should be called after having received an ACK frame with newly acknowledged
+ * packets or when the the loss detection timer has expired.
+ * Always succeeds.
+ */
+void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc,
+ struct list *lost_pkts)
+{
+ struct eb_root *pkts;
+ struct eb64_node *node;
+ struct quic_loss *ql;
+ unsigned int loss_delay;
+ uint64_t pktthresh;
+
+ TRACE_ENTER(QUIC_EV_CONN_PKTLOSS, qc);
+ TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns);
+ pkts = &pktns->tx.pkts;
+ pktns->tx.loss_time = TICK_ETERNITY;
+ if (eb_is_empty(pkts))
+ goto out;
+
+ ql = &qc->path->loss;
+ loss_delay = QUIC_MAX(ql->latest_rtt, ql->srtt);
+ loss_delay = QUIC_MAX(loss_delay, MS_TO_TICKS(QUIC_TIMER_GRANULARITY)) *
+ QUIC_LOSS_TIME_THRESHOLD_MULTIPLICAND / QUIC_LOSS_TIME_THRESHOLD_DIVISOR;
+
+ node = eb64_first(pkts);
+
+ /* RFC 9002 6.1.1. Packet Threshold
+ * The RECOMMENDED initial value for the packet reordering threshold
+ * (kPacketThreshold) is 3, based on best practices for TCP loss detection
+ * [RFC5681] [RFC6675]. In order to remain similar to TCP, implementations
+ * SHOULD NOT use a packet threshold less than 3; see [RFC5681].
+
+ * Some networks may exhibit higher degrees of packet reordering, causing a
+ * sender to detect spurious losses. Additionally, packet reordering could be
+ * more common with QUIC than TCP because network elements that could observe
+ * and reorder TCP packets cannot do that for QUIC and also because QUIC
+ * packet numbers are encrypted.
+ */
+
+ /* Dynamic packet reordering threshold calculation depending on the distance
+ * (in packets) between the last transmitted packet and the oldest still in
+ * flight before loss detection.
+ */
+ pktthresh = pktns->tx.next_pn - 1 - eb64_entry(node, struct quic_tx_packet, pn_node)->pn_node.key;
+ /* Apply a ratio to this threshold and add it to QUIC_LOSS_PACKET_THRESHOLD. */
+ pktthresh = pktthresh * global.tune.quic_reorder_ratio / 100 + QUIC_LOSS_PACKET_THRESHOLD;
+ while (node) {
+ struct quic_tx_packet *pkt;
+ int64_t largest_acked_pn;
+ unsigned int loss_time_limit, time_sent;
+ int reordered;
+
+ pkt = eb64_entry(&node->node, struct quic_tx_packet, pn_node);
+ largest_acked_pn = pktns->rx.largest_acked_pn;
+ node = eb64_next(node);
+ if ((int64_t)pkt->pn_node.key > largest_acked_pn)
+ break;
+
+ time_sent = pkt->time_sent;
+ loss_time_limit = tick_add(time_sent, loss_delay);
+
+ reordered = (int64_t)largest_acked_pn >= pkt->pn_node.key + pktthresh;
+ if (reordered)
+ ql->nb_reordered_pkt++;
+
+ if (tick_is_le(loss_time_limit, now_ms) || reordered) {
+ eb64_delete(&pkt->pn_node);
+ LIST_APPEND(lost_pkts, &pkt->list);
+ ql->nb_lost_pkt++;
+ }
+ else {
+ if (tick_isset(pktns->tx.loss_time))
+ pktns->tx.loss_time = tick_first(pktns->tx.loss_time, loss_time_limit);
+ else
+ pktns->tx.loss_time = loss_time_limit;
+ break;
+ }
+ }
+
+ out:
+ TRACE_PROTO("TX loss", QUIC_EV_CONN_PKTLOSS, qc, pktns, lost_pkts);
+ TRACE_LEAVE(QUIC_EV_CONN_PKTLOSS, qc);
+}
+
+/* Handle <pkts> list of lost packets detected at <now_us> handling their TX
+ * frames. Send a packet loss event to the congestion controller if in flight
+ * packet have been lost. Also frees the packet in <pkts> list.
+ *
+ * Returns 1 on success else 0 if loss limit has been exceeded. A
+ * CONNECTION_CLOSE was prepared to close the connection ASAP.
+ */
+int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
+ struct list *pkts, uint64_t now_us)
+{
+ struct quic_tx_packet *pkt, *tmp, *oldest_lost, *newest_lost;
+ int close = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ if (LIST_ISEMPTY(pkts))
+ goto leave;
+
+ oldest_lost = newest_lost = NULL;
+ list_for_each_entry_safe(pkt, tmp, pkts, list) {
+ struct list tmp = LIST_HEAD_INIT(tmp);
+
+ pkt->pktns->tx.in_flight -= pkt->in_flight_len;
+ qc->path->prep_in_flight -= pkt->in_flight_len;
+ qc->path->in_flight -= pkt->in_flight_len;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
+ qc->path->ifae_pkts--;
+ /* Treat the frames of this lost packet. */
+ if (!qc_handle_frms_of_lost_pkt(qc, pkt, &pktns->tx.frms))
+ close = 1;
+ LIST_DELETE(&pkt->list);
+ if (!oldest_lost) {
+ oldest_lost = newest_lost = pkt;
+ }
+ else {
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+ newest_lost = pkt;
+ }
+ }
+
+ if (!close) {
+ if (newest_lost) {
+ /* Sent a congestion event to the controller */
+ struct quic_cc_event ev = { };
+
+ ev.type = QUIC_CC_EVT_LOSS;
+ ev.loss.time_sent = newest_lost->time_sent;
+
+ quic_cc_event(&qc->path->cc, &ev);
+ }
+
+ /* If an RTT have been already sampled, <rtt_min> has been set.
+ * We must check if we are experiencing a persistent congestion.
+ * If this is the case, the congestion controller must re-enter
+ * slow start state.
+ */
+ if (qc->path->loss.rtt_min && newest_lost != oldest_lost) {
+ unsigned int period = newest_lost->time_sent - oldest_lost->time_sent;
+
+ if (quic_loss_persistent_congestion(&qc->path->loss, period,
+ now_ms, qc->max_ack_delay))
+ qc->path->cc.algo->slow_start(&qc->path->cc);
+ }
+ }
+
+ /* <oldest_lost> cannot be NULL at this stage because we have ensured
+ * that <pkts> list is not empty. Without this, GCC 12.2.0 reports a
+ * possible overflow on a 0 byte region with O2 optimization.
+ */
+ ALREADY_CHECKED(oldest_lost);
+ quic_tx_packet_refdec(oldest_lost);
+ if (newest_lost != oldest_lost)
+ quic_tx_packet_refdec(newest_lost);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return !close;
+}
diff --git a/src/quic_openssl_compat.c b/src/quic_openssl_compat.c
new file mode 100644
index 0000000..d914ac4
--- /dev/null
+++ b/src/quic_openssl_compat.c
@@ -0,0 +1,531 @@
+#ifndef USE_QUIC
+#error "Must define USE_QUIC"
+#endif
+
+#ifndef USE_OPENSSL
+#error "Must define USE_OPENSSL"
+#endif
+
+#include <haproxy/openssl-compat.h>
+/* Highly inspired from nginx QUIC TLS compatibility code */
+#include <openssl/kdf.h>
+
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/trace.h>
+
+#ifndef HAVE_SSL_KEYLOG
+#error "HAVE_SSL_KEYLOG is not defined"
+#endif
+
+#define QUIC_OPENSSL_COMPAT_RECORD_SIZE 1024
+
+#define QUIC_TLS_KEY_LABEL "key"
+#define QUIC_TLS_IV_LABEL "iv"
+
+struct quic_tls_compat_record {
+ unsigned char type;
+ const unsigned char *payload;
+ size_t payload_len;
+ uint64_t number;
+ struct quic_tls_compat_keys *keys;
+};
+
+/* Callback used to set the local transport parameters into the TLS stack.
+ * Must be called after having been set at the QUIC connection level.
+ */
+static int qc_ssl_compat_add_tps_cb(SSL *ssl, unsigned int ext_type, unsigned int context,
+ const unsigned char **out, size_t *outlen,
+ X509 *x, size_t chainidx, int *al, void *add_arg)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ *out = qc->enc_params;
+ *outlen = qc->enc_params_len;
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return 1;
+}
+
+/* Set the keylog callback used to derive TLS secrets and the callback
+ * used to pass local transport parameters to the TLS stack.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_compat_init(struct bind_conf *bind_conf, SSL_CTX *ctx)
+{
+ /* Ignore non-QUIC connections */
+ if (bind_conf->xprt != xprt_get(XPRT_QUIC))
+ return 1;
+
+ /* This callback is already registered if the TLS keylog is activated for
+ * traffic decryption analysis.
+ */
+ if (!global_ssl.keylog)
+ SSL_CTX_set_keylog_callback(ctx, quic_tls_compat_keylog_callback);
+
+ if (SSL_CTX_has_client_custom_ext(ctx, QUIC_OPENSSL_COMPAT_SSL_TP_EXT))
+ return 1;
+
+ if (!SSL_CTX_add_custom_ext(ctx, QUIC_OPENSSL_COMPAT_SSL_TP_EXT,
+ SSL_EXT_CLIENT_HELLO | SSL_EXT_TLS1_3_ENCRYPTED_EXTENSIONS,
+ qc_ssl_compat_add_tps_cb, NULL, NULL,
+ NULL, NULL))
+ return 0;
+
+ return 1;
+}
+
+static int quic_tls_compat_set_encryption_secret(struct quic_conn *qc,
+ struct quic_tls_compat_keys *keys,
+ enum ssl_encryption_level_t level,
+ const SSL_CIPHER *cipher,
+ const uint8_t *secret, size_t secret_len)
+{
+ int ret = 0, key_len;
+ struct quic_tls_secret *peer_secret;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ peer_secret = &keys->secret;
+ if (sizeof(peer_secret->secret.data) < secret_len)
+ goto leave;
+
+ keys->cipher = tls_aead(cipher);
+ if (!keys->cipher)
+ goto leave;
+
+ key_len = EVP_CIPHER_key_length(keys->cipher);
+
+ peer_secret->secret.len = secret_len;
+ memcpy(peer_secret->secret.data, secret, secret_len);
+
+ peer_secret->key.len = key_len;
+ peer_secret->iv.len = QUIC_OPENSSL_COMPAT_TLS_IV_LEN;
+ if (!quic_hkdf_expand_label(tls_md(cipher),
+ peer_secret->key.data, peer_secret->key.len,
+ secret, secret_len,
+ (const unsigned char *)QUIC_TLS_KEY_LABEL,
+ sizeof(QUIC_TLS_KEY_LABEL) - 1) ||
+ !quic_hkdf_expand_label(tls_md(cipher),
+ peer_secret->iv.data, peer_secret->iv.len,
+ secret, secret_len,
+ (const unsigned char *)QUIC_TLS_IV_LABEL,
+ sizeof(QUIC_TLS_IV_LABEL) - 1))
+ goto leave;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return ret;
+}
+
+/* Callback used to get the Handshake and Application level secrets from
+ * the TLS stack.
+ */
+void quic_tls_compat_keylog_callback(const SSL *ssl, const char *line)
+{
+ unsigned char ch, value;
+ const char *start, *p;
+ size_t n;
+ unsigned int write;
+ struct quic_openssl_compat *compat;
+ enum ssl_encryption_level_t level;
+ unsigned char secret[EVP_MAX_MD_SIZE];
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ /* Ignore non-QUIC connections */
+ if (!qc)
+ return;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ p = line;
+ for (start = p; *p && *p != ' '; p++);
+ n = p - start;
+
+ if (sizeof(QUIC_OPENSSL_COMPAT_CLIENT_HANDSHAKE) - 1 == n &&
+ !strncmp(start, QUIC_OPENSSL_COMPAT_CLIENT_HANDSHAKE, n)) {
+ level = ssl_encryption_handshake;
+ write = 0;
+ }
+ else if (sizeof(QUIC_OPENSSL_COMPAT_SERVER_HANDSHAKE) - 1 == n &&
+ !strncmp(start, QUIC_OPENSSL_COMPAT_SERVER_HANDSHAKE, n)) {
+ level = ssl_encryption_handshake;
+ write = 1;
+ }
+ else if (sizeof(QUIC_OPENSSL_COMPAT_CLIENT_APPLICATION) - 1 == n &&
+ !strncmp(start, QUIC_OPENSSL_COMPAT_CLIENT_APPLICATION, n)) {
+ level = ssl_encryption_application;
+ write = 0;
+ }
+ else if (sizeof(QUIC_OPENSSL_COMPAT_SERVER_APPLICATION) - 1 == n &&
+ !strncmp(start, QUIC_OPENSSL_COMPAT_SERVER_APPLICATION, n)) {
+ level = ssl_encryption_application;
+ write = 1;
+ }
+ else
+ goto leave;
+
+ if (*p++ == '\0')
+ goto leave;
+
+ while (*p && *p != ' ')
+ p++;
+
+ if (*p++ == '\0')
+ goto leave;
+
+ for (n = 0, start = p; *p; p++) {
+ ch = *p;
+ if (ch >= '0' && ch <= '9') {
+ value = ch - '0';
+ goto next;
+ }
+
+ ch = (unsigned char) (ch | 0x20);
+ if (ch >= 'a' && ch <= 'f') {
+ value = ch - 'a' + 10;
+ goto next;
+ }
+
+ goto leave;
+
+next:
+ if ((p - start) % 2) {
+ secret[n++] += value;
+ }
+ else {
+ if (n >= EVP_MAX_MD_SIZE)
+ goto leave;
+
+ secret[n] = (value << 4);
+ }
+ }
+
+ /* Secret successfully parsed */
+ compat = &qc->openssl_compat;
+ if (write) {
+ compat->method->set_encryption_secrets((SSL *) ssl, level, NULL, secret, n);
+ compat->write_level = level;
+
+ } else {
+ const SSL_CIPHER *cipher;
+
+ cipher = SSL_get_current_cipher(ssl);
+ /* AES_128_CCM_SHA256 not supported at this time. Furthermore, this
+ * algorithm is silently disabled by the TLS stack. But it can be
+ * enabled with "ssl-default-bind-ciphersuites" setting.
+ */
+ if (SSL_CIPHER_get_id(cipher) == TLS1_3_CK_AES_128_CCM_SHA256) {
+ quic_set_tls_alert(qc, SSL_AD_HANDSHAKE_FAILURE);
+ goto leave;
+ }
+
+ compat->method->set_encryption_secrets((SSL *) ssl, level, secret, NULL, n);
+ compat->read_level = level;
+ compat->read_record = 0;
+ quic_tls_compat_set_encryption_secret(qc, &compat->keys, level,
+ cipher, secret, n);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+}
+
+static size_t quic_tls_compat_create_header(struct quic_conn *qc,
+ struct quic_tls_compat_record *rec,
+ unsigned char *out, int plain)
+{
+ unsigned char type;
+ size_t len;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ len = rec->payload_len;
+ if (plain) {
+ type = rec->type;
+ }
+ else {
+ type = SSL3_RT_APPLICATION_DATA;
+ len += EVP_GCM_TLS_TAG_LEN;
+ }
+
+ out[0] = type;
+ out[1] = 0x03;
+ out[2] = 0x03;
+ out[3] = (len >> 8);
+ out[4] = len;
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return 5;
+}
+
+static void quic_tls_compute_nonce(unsigned char *nonce, size_t len, uint64_t pn)
+{
+ nonce[len - 8] ^= (pn >> 56) & 0x3f;
+ nonce[len - 7] ^= (pn >> 48) & 0xff;
+ nonce[len - 6] ^= (pn >> 40) & 0xff;
+ nonce[len - 5] ^= (pn >> 32) & 0xff;
+ nonce[len - 4] ^= (pn >> 24) & 0xff;
+ nonce[len - 3] ^= (pn >> 16) & 0xff;
+ nonce[len - 2] ^= (pn >> 8) & 0xff;
+ nonce[len - 1] ^= pn & 0xff;
+}
+
+/* Cipher <in> buffer data into <out> with <cipher> as AEAD cipher, <s> as secret.
+ * <ad> is the buffer for the additional data.
+ */
+static int quic_tls_tls_seal(struct quic_conn *qc,
+ const EVP_CIPHER *cipher, struct quic_tls_secret *s,
+ unsigned char *out, size_t *outlen, unsigned char *nonce,
+ const unsigned char *in, size_t inlen,
+ const unsigned char *ad, size_t adlen)
+{
+ int ret = 0, wlen;
+ EVP_CIPHER_CTX *ctx;
+ int aead_nid = EVP_CIPHER_nid(cipher);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+ ctx = EVP_CIPHER_CTX_new();
+ if (ctx == NULL)
+ goto leave;
+
+ /* Note that the following encryption code works with NID_aes_128_ccm, but leads
+ * to an handshake failure with "bad record mac" (20) TLS alert received from
+ * the peer.
+ */
+ if (!EVP_EncryptInit_ex(ctx, cipher, NULL, NULL, NULL) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_IVLEN, s->iv.len, NULL) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_SET_TAG, EVP_GCM_TLS_TAG_LEN, NULL)) ||
+ !EVP_EncryptInit_ex(ctx, NULL, NULL, s->key.data, nonce) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_EncryptUpdate(ctx, NULL, &wlen, NULL, inlen)) ||
+ !EVP_EncryptUpdate(ctx, NULL, &wlen, ad, adlen) ||
+ !EVP_EncryptUpdate(ctx, out, &wlen, in, inlen) ||
+ !EVP_EncryptFinal_ex(ctx, out + wlen, &wlen) ||
+ (aead_nid != NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_GET_TAG, EVP_GCM_TLS_TAG_LEN, out + inlen))) {
+ goto leave;
+ }
+
+ *outlen = inlen + adlen + EVP_GCM_TLS_TAG_LEN;
+ ret = 1;
+ leave:
+ /* Safe to call EVP_CIPHER_CTX_free() with null ctx */
+ EVP_CIPHER_CTX_free(ctx);
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return ret;
+}
+
+static int quic_tls_compat_create_record(struct quic_conn *qc,
+ enum ssl_encryption_level_t level,
+ struct quic_tls_compat_record *rec,
+ unsigned char *res)
+{
+ int ret = 0;
+ unsigned char *ad;
+ size_t adlen;
+ unsigned char *out;
+ size_t outlen;
+ struct quic_tls_secret *secret;
+ unsigned char nonce[QUIC_OPENSSL_COMPAT_TLS_IV_LEN];
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ ad = res;
+ adlen = quic_tls_compat_create_header(qc, rec, ad, 0);
+
+ out = res + adlen;
+ outlen = rec->payload_len + EVP_GCM_TLS_TAG_LEN;
+
+ secret = &rec->keys->secret;
+
+ memcpy(nonce, secret->iv.data, secret->iv.len);
+ quic_tls_compute_nonce(nonce, sizeof(nonce), rec->number);
+
+ if (!quic_tls_tls_seal(qc, rec->keys->cipher, secret, out, &outlen,
+ nonce, rec->payload, rec->payload_len, ad, adlen))
+ goto leave;
+
+ ret = outlen;
+leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return ret;
+}
+
+/* Callback use to parse TLS messages for <ssl> TLS session. */
+void quic_tls_compat_msg_callback(struct connection *conn,
+ int write_p, int version, int content_type,
+ const void *buf, size_t len, SSL *ssl)
+{
+ unsigned int alert;
+ enum ssl_encryption_level_t level;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ struct quic_openssl_compat *com;
+
+ if (!write_p || !qc)
+ goto leave;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ com = &qc->openssl_compat;
+ level = com->write_level;
+ switch (content_type) {
+ case SSL3_RT_HANDSHAKE:
+ com->method->add_handshake_data(ssl, level, buf, len);
+ break;
+ case SSL3_RT_ALERT:
+ if (len >= 2) {
+ alert = ((unsigned char *) buf)[1];
+ com->method->send_alert(ssl, level, alert);
+ }
+ break;
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+}
+
+int SSL_set_quic_method(SSL *ssl, const SSL_QUIC_METHOD *quic_method)
+{
+ int ret = 0;
+ BIO *rbio, *wbio = NULL;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ rbio = BIO_new(BIO_s_mem());
+ if (!rbio)
+ goto err;
+
+ wbio = BIO_new(BIO_s_null());
+ if (!wbio)
+ goto err;
+
+ SSL_set_bio(ssl, rbio, wbio);
+ /* No ealy data support */
+ SSL_set_max_early_data(ssl, 0);
+
+ qc->openssl_compat.rbio = rbio;
+ qc->openssl_compat.wbio = wbio;
+ qc->openssl_compat.method = quic_method;
+ qc->openssl_compat.read_level = ssl_encryption_initial;
+ qc->openssl_compat.write_level = ssl_encryption_initial;
+ ret = 1;
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return ret;
+ err:
+ BIO_free(rbio);
+ BIO_free(wbio);
+ goto leave;
+}
+
+enum ssl_encryption_level_t SSL_quic_read_level(const SSL *ssl)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return qc->openssl_compat.read_level;
+}
+
+
+enum ssl_encryption_level_t SSL_quic_write_level(const SSL *ssl)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return qc->openssl_compat.write_level;
+}
+
+int SSL_provide_quic_data(SSL *ssl, enum ssl_encryption_level_t level,
+ const uint8_t *data, size_t len)
+{
+ int ret = 0;
+ BIO *rbio;
+ struct quic_tls_compat_record rec;
+ unsigned char in[QUIC_OPENSSL_COMPAT_RECORD_SIZE + 1];
+ unsigned char out[QUIC_OPENSSL_COMPAT_RECORD_SIZE + 1 +
+ SSL3_RT_HEADER_LENGTH + EVP_GCM_TLS_TAG_LEN];
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ size_t n;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+
+ rbio = SSL_get_rbio(ssl);
+
+ while (len) {
+ memset(&rec, 0, sizeof rec);
+ rec.type = SSL3_RT_HANDSHAKE;
+ rec.number = qc->openssl_compat.read_record++;
+ rec.keys = &qc->openssl_compat.keys;
+ if (level == ssl_encryption_initial) {
+ n = QUIC_MIN(len, (size_t)65535);
+ rec.payload = (unsigned char *)data;
+ rec.payload_len = n;
+ quic_tls_compat_create_header(qc, &rec, out, 1);
+ BIO_write(rbio, out, SSL3_RT_HEADER_LENGTH);
+ BIO_write(rbio, data, n);
+ }
+ else {
+ size_t outlen;
+ unsigned char *p = in;
+
+ n = QUIC_MIN(len, (size_t)QUIC_OPENSSL_COMPAT_RECORD_SIZE);
+ memcpy(in, data, n);
+ p += n;
+ *p++ = SSL3_RT_HANDSHAKE;
+
+ rec.payload = in;
+ rec.payload_len = p - in;
+
+ if (!rec.keys->cipher)
+ goto leave;
+
+ outlen = quic_tls_compat_create_record(qc, level, &rec, out);
+ if (!outlen)
+ goto leave;
+
+ BIO_write(rbio, out, outlen);
+ }
+
+ data += n;
+ len -= n;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return ret;
+}
+
+int SSL_process_quic_post_handshake(SSL *ssl)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ /* Do nothing: rely on the TLS message callback to parse alert messages. */
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return 1;
+}
+
+int SSL_set_quic_transport_params(SSL *ssl, const uint8_t *params, size_t params_len)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ /* The local transport parameters are stored into the quic_conn object.
+ * There is no need to add an intermediary to store pointers to these
+ * transport paraemters.
+ */
+ TRACE_ENTER(QUIC_EV_CONN_SSL_COMPAT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_SSL_COMPAT, qc);
+ return 1;
+}
+
diff --git a/src/quic_retransmit.c b/src/quic_retransmit.c
new file mode 100644
index 0000000..d06293f
--- /dev/null
+++ b/src/quic_retransmit.c
@@ -0,0 +1,252 @@
+#include <import/eb64tree.h>
+
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_frame.h>
+#include <haproxy/quic_retransmit.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/quic_tx.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+/* Duplicate all frames from <pkt_frm_list> list into <out_frm_list> list
+ * for <qc> QUIC connection.
+ * This is a best effort function which never fails even if no memory could be
+ * allocated to duplicate these frames.
+ */
+static void qc_dup_pkt_frms(struct quic_conn *qc,
+ struct list *pkt_frm_list, struct list *out_frm_list)
+{
+ struct quic_frame *frm, *frmbak;
+ struct list tmp = LIST_HEAD_INIT(tmp);
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
+ struct quic_frame *dup_frm, *origin;
+
+ if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
+ TRACE_DEVEL("already acknowledged frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct qf_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc;
+
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("ignored frame for a released stream", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ /* Do not resend this frame if in the "already acked range" */
+ if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ continue;
+ }
+ else if (strm_frm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
+
+ qc_stream_frm_mv_fwd(frm, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+
+ strm_frm->dup = 1;
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ /* If <frm> is already a copy of another frame, we must take
+ * its original frame as source for the copy.
+ */
+ origin = frm->origin ? frm->origin : frm;
+ dup_frm = qc_frm_dup(origin);
+ if (!dup_frm) {
+ TRACE_ERROR("could not duplicate frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ break;
+ }
+
+ TRACE_DEVEL("built probing frame", QUIC_EV_CONN_PRSAFRM, qc, origin);
+ if (origin->pkt) {
+ TRACE_DEVEL("duplicated from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, dup_frm, &origin->pkt->pn_node.key);
+ }
+ else {
+ /* <origin> is a frame which was sent from a packet detected as lost. */
+ TRACE_DEVEL("duplicated from lost packet", QUIC_EV_CONN_PRSAFRM, qc);
+ }
+
+ LIST_APPEND(&tmp, &dup_frm->list);
+ }
+
+ LIST_SPLICE(out_frm_list, &tmp);
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Boolean function which return 1 if <pkt> TX packet is only made of
+ * already acknowledged frame.
+ */
+static inline int qc_pkt_with_only_acked_frms(struct quic_tx_packet *pkt)
+{
+ struct quic_frame *frm;
+
+ list_for_each_entry(frm, &pkt->frms, list)
+ if (!(frm->flags & QUIC_FL_TX_FRAME_ACKED))
+ return 0;
+
+ return 1;
+}
+
+/* Prepare a fast retransmission from <qel> encryption level */
+void qc_prep_fast_retrans(struct quic_conn *qc,
+ struct quic_pktns *pktns,
+ struct list *frms1, struct list *frms2)
+{
+ struct eb_root *pkts = &pktns->tx.pkts;
+ struct list *frms = frms1;
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+
+ BUG_ON(frms1 == frms2);
+
+ pkt = NULL;
+ node = eb64_first(pkts);
+ start:
+ while (node) {
+ struct quic_tx_packet *p;
+
+ p = eb64_entry(node, struct quic_tx_packet, pn_node);
+ node = eb64_next(node);
+ /* Skip the empty and coalesced packets */
+ TRACE_PRINTF(TRACE_LEVEL_PROTO, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
+ "--> pn=%llu (%d %d %d)", (ull)p->pn_node.key,
+ LIST_ISEMPTY(&p->frms), !!(p->flags & QUIC_FL_TX_PACKET_COALESCED),
+ qc_pkt_with_only_acked_frms(p));
+ if (!LIST_ISEMPTY(&p->frms) && !qc_pkt_with_only_acked_frms(p)) {
+ pkt = p;
+ break;
+ }
+ }
+
+ if (!pkt)
+ goto leave;
+
+ /* When building a packet from another one, the field which may increase the
+ * packet size is the packet number. And the maximum increase is 4 bytes.
+ */
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc) &&
+ pkt->len + 4 > quic_may_send_bytes(qc)) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ goto leave;
+ }
+
+ TRACE_PROTO("duplicating packet", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ qc_dup_pkt_frms(qc, &pkt->frms, frms);
+ if (frms == frms1 && frms2) {
+ frms = frms2;
+ goto start;
+ }
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+}
+
+/* Prepare a fast retransmission during a handshake after a client
+ * has resent Initial packets. According to the RFC a server may retransmit
+ * Initial packets send them coalescing with others (Handshake here).
+ * (Listener only function).
+ */
+void qc_prep_hdshk_fast_retrans(struct quic_conn *qc,
+ struct list *ifrms, struct list *hfrms)
+{
+ struct list itmp = LIST_HEAD_INIT(itmp);
+ struct list htmp = LIST_HEAD_INIT(htmp);
+
+ struct quic_enc_level *iqel = qc->iel;
+ struct quic_enc_level *hqel = qc->hel;
+ struct quic_enc_level *qel = iqel;
+ struct eb_root *pkts;
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+ struct list *tmp = &itmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+ start:
+ pkt = NULL;
+ pkts = &qel->pktns->tx.pkts;
+ node = eb64_first(pkts);
+ /* Skip the empty packet (they have already been retransmitted) */
+ while (node) {
+ struct quic_tx_packet *p;
+
+ p = eb64_entry(node, struct quic_tx_packet, pn_node);
+ TRACE_PRINTF(TRACE_LEVEL_PROTO, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
+ "--> pn=%llu (%d %d)", (ull)p->pn_node.key,
+ LIST_ISEMPTY(&p->frms), !!(p->flags & QUIC_FL_TX_PACKET_COALESCED));
+ if (!LIST_ISEMPTY(&p->frms) && !(p->flags & QUIC_FL_TX_PACKET_COALESCED) &&
+ !qc_pkt_with_only_acked_frms(p)) {
+ pkt = p;
+ break;
+ }
+
+ node = eb64_next(node);
+ }
+
+ if (!pkt)
+ goto end;
+
+ /* When building a packet from another one, the field which may increase the
+ * packet size is the packet number. And the maximum increase is 4 bytes.
+ */
+ if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ size_t dglen = pkt->len + 4;
+ size_t may_send;
+
+ may_send = quic_may_send_bytes(qc);
+ dglen += pkt->next ? pkt->next->len + 4 : 0;
+ if (dglen > may_send) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ if (pkt->next)
+ TRACE_PROTO("anti-amplification limit would be reached", QUIC_EV_CONN_SPPKTS, qc, pkt->next);
+ if (qel == iqel && may_send >= QUIC_INITIAL_PACKET_MINLEN)
+ TRACE_PROTO("will probe Initial packet number space", QUIC_EV_CONN_SPPKTS, qc);
+ goto end;
+ }
+ }
+
+ qel->pktns->tx.pto_probe += 1;
+
+ /* No risk to loop here, #packet per datagram is bounded */
+ requeue:
+ TRACE_PROTO("duplicating packet", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
+ qc_dup_pkt_frms(qc, &pkt->frms, tmp);
+ if (qel == iqel) {
+ if (pkt->next && pkt->next->type == QUIC_PACKET_TYPE_HANDSHAKE) {
+ pkt = pkt->next;
+ tmp = &htmp;
+ hqel->pktns->tx.pto_probe += 1;
+ TRACE_DEVEL("looping for next packet", QUIC_EV_CONN_SPPKTS, qc);
+ goto requeue;
+ }
+ }
+
+ end:
+ LIST_SPLICE(ifrms, &itmp);
+ LIST_SPLICE(hfrms, &htmp);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+}
diff --git a/src/quic_retry.c b/src/quic_retry.c
new file mode 100644
index 0000000..1c58e5e
--- /dev/null
+++ b/src/quic_retry.c
@@ -0,0 +1,320 @@
+#include <string.h>
+
+#include <haproxy/clock.h>
+#include <haproxy/global.h>
+#include <haproxy/quic_retry.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace-t.h>
+#include <haproxy/trace.h>
+
+#define TRACE_SOURCE &trace_quic
+
+/* Salt length used to derive retry token secret */
+#define QUIC_RETRY_TOKEN_SALTLEN 16 /* bytes */
+
+/* Copy <saddr> socket address data into <buf> buffer.
+ * This is the responsibility of the caller to check the output buffer is big
+ * enough to contain these socket address data.
+ * Return the number of bytes copied.
+ */
+static inline size_t quic_saddr_cpy(unsigned char *buf,
+ const struct sockaddr_storage *saddr)
+{
+ void *port, *addr;
+ unsigned char *p;
+ size_t port_len, addr_len;
+
+ p = buf;
+ if (saddr->ss_family == AF_INET6) {
+ port = &((struct sockaddr_in6 *)saddr)->sin6_port;
+ addr = &((struct sockaddr_in6 *)saddr)->sin6_addr;
+ port_len = sizeof ((struct sockaddr_in6 *)saddr)->sin6_port;
+ addr_len = sizeof ((struct sockaddr_in6 *)saddr)->sin6_addr;
+ }
+ else {
+ port = &((struct sockaddr_in *)saddr)->sin_port;
+ addr = &((struct sockaddr_in *)saddr)->sin_addr;
+ port_len = sizeof ((struct sockaddr_in *)saddr)->sin_port;
+ addr_len = sizeof ((struct sockaddr_in *)saddr)->sin_addr;
+ }
+ memcpy(p, port, port_len);
+ p += port_len;
+ memcpy(p, addr, addr_len);
+ p += addr_len;
+
+ return p - buf;
+}
+
+
+/* QUIC server only function.
+ * Add AAD to <add> buffer from <cid> connection ID and <addr> socket address.
+ * This is the responsibility of the caller to check <aad> size is big enough
+ * to contain these data.
+ * Return the number of bytes copied to <aad>.
+ */
+static int quic_generate_retry_token_aad(unsigned char *aad,
+ uint32_t version,
+ const struct quic_cid *cid,
+ const struct sockaddr_storage *addr)
+{
+ unsigned char *p;
+
+ p = aad;
+ *(uint32_t *)p = htonl(version);
+ p += sizeof version;
+ p += quic_saddr_cpy(p, addr);
+ memcpy(p, cid->data, cid->len);
+ p += cid->len;
+
+ return p - aad;
+}
+
+/* QUIC server only function.
+ * Generate the token to be used in Retry packets. The token is written to
+ * <token> with <len> as length. <odcid> is the original destination connection
+ * ID and <dcid> is our side destination connection ID (or client source
+ * connection ID).
+ * Returns the length of the encoded token or 0 on error.
+ */
+int quic_generate_retry_token(unsigned char *token, size_t len,
+ const uint32_t version,
+ const struct quic_cid *odcid,
+ const struct quic_cid *dcid,
+ struct sockaddr_storage *addr)
+{
+ int ret = 0;
+ unsigned char *p;
+ unsigned char aad[sizeof(uint32_t) + sizeof(in_port_t) +
+ sizeof(struct in6_addr) + QUIC_CID_MAXLEN];
+ size_t aadlen;
+ unsigned char salt[QUIC_RETRY_TOKEN_SALTLEN];
+ unsigned char key[QUIC_TLS_KEY_LEN];
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ const unsigned char *sec = global.cluster_secret;
+ size_t seclen = sizeof global.cluster_secret;
+ EVP_CIPHER_CTX *ctx = NULL;
+ const EVP_CIPHER *aead = EVP_aes_128_gcm();
+ uint32_t timestamp = (uint32_t)date.tv_sec;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+
+ /* The token is made of the token format byte, the ODCID prefixed by its one byte
+ * length, the creation timestamp, an AEAD TAG, and finally
+ * the random bytes used to derive the secret to encrypt the token.
+ */
+ if (1 + odcid->len + 1 + sizeof(timestamp) + QUIC_TLS_TAG_LEN + QUIC_RETRY_TOKEN_SALTLEN > len)
+ goto err;
+
+ aadlen = quic_generate_retry_token_aad(aad, version, dcid, addr);
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(salt, sizeof salt) != 1) {
+ TRACE_ERROR("RAND_bytes()", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ if (!quic_tls_derive_retry_token_secret(EVP_sha256(), key, sizeof key, iv, sizeof iv,
+ salt, sizeof salt, sec, seclen)) {
+ TRACE_ERROR("quic_tls_derive_retry_token_secret() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ if (!quic_tls_tx_ctx_init(&ctx, aead, key)) {
+ TRACE_ERROR("quic_tls_tx_ctx_init() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ /* Token build */
+ p = token;
+ *p++ = QUIC_TOKEN_FMT_RETRY,
+ *p++ = odcid->len;
+ memcpy(p, odcid->data, odcid->len);
+ p += odcid->len;
+ write_u32(p, htonl(timestamp));
+ p += sizeof timestamp;
+
+ /* Do not encrypt the QUIC_TOKEN_FMT_RETRY byte */
+ if (!quic_tls_encrypt(token + 1, p - token - 1, aad, aadlen, ctx, aead, iv)) {
+ TRACE_ERROR("quic_tls_encrypt() failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ p += QUIC_TLS_TAG_LEN;
+ memcpy(p, salt, sizeof salt);
+ p += sizeof salt;
+ EVP_CIPHER_CTX_free(ctx);
+
+ ret = p - token;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return ret;
+
+ err:
+ if (ctx)
+ EVP_CIPHER_CTX_free(ctx);
+ goto leave;
+}
+
+/* Parse the Retry token from buffer <token> with <end> a pointer to
+ * one byte past the end of this buffer. This will extract the ODCID
+ * which will be stored into <odcid>
+ *
+ * Returns 0 on success else non-zero.
+ */
+int parse_retry_token(struct quic_conn *qc,
+ const unsigned char *token, const unsigned char *end,
+ struct quic_cid *odcid)
+{
+ int ret = 0;
+ uint64_t odcid_len;
+ uint32_t timestamp;
+ uint32_t now_sec = (uint32_t)date.tv_sec;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ if (!quic_dec_int(&odcid_len, &token, end)) {
+ TRACE_ERROR("quic_dec_int() error", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ /* RFC 9000 7.2. Negotiating Connection IDs:
+ * When an Initial packet is sent by a client that has not previously
+ * received an Initial or Retry packet from the server, the client
+ * populates the Destination Connection ID field with an unpredictable
+ * value. This Destination Connection ID MUST be at least 8 bytes in length.
+ */
+ if (odcid_len < QUIC_ODCID_MINLEN || odcid_len > QUIC_CID_MAXLEN) {
+ TRACE_ERROR("wrong ODCID length", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ if (end - token < odcid_len + sizeof timestamp) {
+ TRACE_ERROR("too long ODCID length", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ timestamp = ntohl(read_u32(token + odcid_len));
+ /* check if elapsed time is +/- QUIC_RETRY_DURATION_SEC
+ * to tolerate token generator is not perfectly time synced
+ */
+ if ((uint32_t)(now_sec - timestamp) > QUIC_RETRY_DURATION_SEC &&
+ (uint32_t)(timestamp - now_sec) > QUIC_RETRY_DURATION_SEC) {
+ TRACE_ERROR("token has expired", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ ret = 1;
+ memcpy(odcid->data, token, odcid_len);
+ odcid->len = odcid_len;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return !ret;
+}
+
+/* QUIC server only function.
+ *
+ * Check the validity of the Retry token from Initial packet <pkt>. <dgram> is
+ * the UDP datagram containing <pkt> and <l> is the listener instance on which
+ * it was received. If the token is valid, the ODCID of <qc> QUIC connection
+ * will be put into <odcid>. <qc> is used to retrieve the QUIC version needed
+ * to validate the token but it can be NULL : in this case the version will be
+ * retrieved from the packet.
+ *
+ * Return 1 if succeeded, 0 if not.
+ */
+
+int quic_retry_token_check(struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram,
+ struct listener *l,
+ struct quic_conn *qc,
+ struct quic_cid *odcid)
+{
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+ int ret = 0;
+ unsigned char *token = pkt->token;
+ const uint64_t tokenlen = pkt->token_len;
+ unsigned char buf[128];
+ unsigned char aad[sizeof(uint32_t) + QUIC_CID_MAXLEN +
+ sizeof(in_port_t) + sizeof(struct in6_addr)];
+ size_t aadlen;
+ const unsigned char *salt;
+ unsigned char key[QUIC_TLS_KEY_LEN];
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ const unsigned char *sec = global.cluster_secret;
+ size_t seclen = sizeof global.cluster_secret;
+ EVP_CIPHER_CTX *ctx = NULL;
+ const EVP_CIPHER *aead = EVP_aes_128_gcm();
+ const struct quic_version *qv = qc ? qc->original_version :
+ pkt->version;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ /* The caller must ensure this. */
+ BUG_ON(!pkt->token_len);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+
+ if (*pkt->token != QUIC_TOKEN_FMT_RETRY) {
+ /* TODO: New token check */
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT, qc, NULL, NULL, pkt->version);
+ goto leave;
+ }
+
+ if (sizeof buf < tokenlen) {
+ TRACE_ERROR("too short buffer", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ /* The token is made of the token format byte, the ODCID prefixed by its one byte
+ * length, the creation timestamp, an AEAD TAG, and finally
+ * the random bytes used to derive the secret to encrypt the token.
+ */
+ if (tokenlen < 2 + QUIC_ODCID_MINLEN + sizeof(uint32_t) + QUIC_TLS_TAG_LEN + QUIC_RETRY_TOKEN_SALTLEN ||
+ tokenlen > 2 + QUIC_CID_MAXLEN + sizeof(uint32_t) + QUIC_TLS_TAG_LEN + QUIC_RETRY_TOKEN_SALTLEN) {
+ TRACE_ERROR("invalid token length", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ aadlen = quic_generate_retry_token_aad(aad, qv->num, &pkt->scid, &dgram->saddr);
+ salt = token + tokenlen - QUIC_RETRY_TOKEN_SALTLEN;
+ if (!quic_tls_derive_retry_token_secret(EVP_sha256(), key, sizeof key, iv, sizeof iv,
+ salt, QUIC_RETRY_TOKEN_SALTLEN, sec, seclen)) {
+ TRACE_ERROR("Could not derive retry secret", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ if (!quic_tls_rx_ctx_init(&ctx, aead, key)) {
+ TRACE_ERROR("quic_tls_rx_ctx_init() failed", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ /* The token is prefixed by a one-byte length format which is not ciphered. */
+ if (!quic_tls_decrypt2(buf, token + 1, tokenlen - QUIC_RETRY_TOKEN_SALTLEN - 1, aad, aadlen,
+ ctx, aead, key, iv)) {
+ TRACE_ERROR("Could not decrypt retry token", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ if (parse_retry_token(qc, buf, buf + tokenlen - QUIC_RETRY_TOKEN_SALTLEN - 1, odcid)) {
+ TRACE_ERROR("Error during Initial token parsing", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ EVP_CIPHER_CTX_free(ctx);
+
+ ret = 1;
+ HA_ATOMIC_INC(&prx_counters->retry_validated);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret;
+
+ err:
+ HA_ATOMIC_INC(&prx_counters->retry_error);
+ if (ctx)
+ EVP_CIPHER_CTX_free(ctx);
+ goto leave;
+}
+
+
diff --git a/src/quic_rx.c b/src/quic_rx.c
new file mode 100644
index 0000000..9e55aa3
--- /dev/null
+++ b/src/quic_rx.c
@@ -0,0 +1,2290 @@
+/*
+ * QUIC protocol implementation. Lower layer with internal features implemented
+ * here such as QUIC encryption, idle timeout, acknowledgement and
+ * retransmission.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/quic_rx.h>
+
+#include <haproxy/h3.h>
+#include <haproxy/list.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/quic_ack.h>
+#include <haproxy/quic_cid.h>
+#include <haproxy/quic_retransmit.h>
+#include <haproxy/quic_retry.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_stream.h>
+#include <haproxy/quic_ssl.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/quic_tx.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/trace.h>
+
+DECLARE_POOL(pool_head_quic_conn_rxbuf, "quic_conn_rxbuf", QUIC_CONN_RX_BUFSZ);
+DECLARE_POOL(pool_head_quic_dgram, "quic_dgram", sizeof(struct quic_dgram));
+DECLARE_POOL(pool_head_quic_rx_packet, "quic_rx_packet", sizeof(struct quic_rx_packet));
+
+/* Decode an expected packet number from <truncated_on> its truncated value,
+ * depending on <largest_pn> the largest received packet number, and <pn_nbits>
+ * the number of bits used to encode this packet number (its length in bytes * 8).
+ * See https://quicwg.org/base-drafts/draft-ietf-quic-transport.html#packet-encoding
+ */
+static uint64_t decode_packet_number(uint64_t largest_pn,
+ uint32_t truncated_pn, unsigned int pn_nbits)
+{
+ uint64_t expected_pn = largest_pn + 1;
+ uint64_t pn_win = (uint64_t)1 << pn_nbits;
+ uint64_t pn_hwin = pn_win / 2;
+ uint64_t pn_mask = pn_win - 1;
+ uint64_t candidate_pn;
+
+
+ candidate_pn = (expected_pn & ~pn_mask) | truncated_pn;
+ /* Note that <pn_win> > <pn_hwin>. */
+ if (candidate_pn < QUIC_MAX_PACKET_NUM - pn_win &&
+ candidate_pn + pn_hwin <= expected_pn)
+ return candidate_pn + pn_win;
+
+ if (candidate_pn > expected_pn + pn_hwin && candidate_pn >= pn_win)
+ return candidate_pn - pn_win;
+
+ return candidate_pn;
+}
+
+/* Remove the header protection of <pkt> QUIC packet using <tls_ctx> as QUIC TLS
+ * cryptographic context.
+ * <largest_pn> is the largest received packet number and <pn> the address of
+ * the packet number field for this packet with <byte0> address of its first byte.
+ * <end> points to one byte past the end of this packet.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int qc_do_rm_hp(struct quic_conn *qc,
+ struct quic_rx_packet *pkt, struct quic_tls_ctx *tls_ctx,
+ int64_t largest_pn, unsigned char *pn, unsigned char *byte0)
+{
+ int ret, i, pnlen;
+ uint64_t packet_number;
+ uint32_t truncated_pn = 0;
+ unsigned char mask[5] = {0};
+ unsigned char *sample;
+
+ TRACE_ENTER(QUIC_EV_CONN_RMHP, qc);
+
+ ret = 0;
+
+ /* Check there is enough data in this packet. */
+ if (pkt->len - (pn - byte0) < QUIC_PACKET_PN_MAXLEN + sizeof mask) {
+ TRACE_PROTO("too short packet", QUIC_EV_CONN_RMHP, qc, pkt);
+ goto leave;
+ }
+
+ sample = pn + QUIC_PACKET_PN_MAXLEN;
+
+ if (!quic_tls_aes_decrypt(mask, sample, sizeof mask, tls_ctx->rx.hp_ctx)) {
+ TRACE_ERROR("HP removing failed", QUIC_EV_CONN_RMHP, qc, pkt);
+ goto leave;
+ }
+
+ *byte0 ^= mask[0] & (*byte0 & QUIC_PACKET_LONG_HEADER_BIT ? 0xf : 0x1f);
+ pnlen = (*byte0 & QUIC_PACKET_PNL_BITMASK) + 1;
+ for (i = 0; i < pnlen; i++) {
+ pn[i] ^= mask[i + 1];
+ truncated_pn = (truncated_pn << 8) | pn[i];
+ }
+
+ packet_number = decode_packet_number(largest_pn, truncated_pn, pnlen * 8);
+ /* Store remaining information for this unprotected header */
+ pkt->pn = packet_number;
+ pkt->pnl = pnlen;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RMHP, qc);
+ return ret;
+}
+
+/* Decrypt <pkt> packet using encryption level <qel> for <qc> connection.
+ * Decryption is done in place in packet buffer.
+ *
+ * Returns 1 on success else 0.
+ */
+static int qc_pkt_decrypt(struct quic_conn *qc, struct quic_enc_level *qel,
+ struct quic_rx_packet *pkt)
+{
+ int ret, kp_changed;
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ struct quic_tls_ctx *tls_ctx =
+ qc_select_tls_ctx(qc, qel, pkt->type, pkt->version);
+ EVP_CIPHER_CTX *rx_ctx = tls_ctx->rx.ctx;
+ unsigned char *rx_iv = tls_ctx->rx.iv;
+ size_t rx_iv_sz = tls_ctx->rx.ivlen;
+ unsigned char *rx_key = tls_ctx->rx.key;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ ret = 0;
+ kp_changed = 0;
+
+ if (pkt->type == QUIC_PACKET_TYPE_SHORT) {
+ /* The two tested bits are not at the same position,
+ * this is why they are first both inversed.
+ */
+ if (!(*pkt->data & QUIC_PACKET_KEY_PHASE_BIT) ^ !(tls_ctx->flags & QUIC_FL_TLS_KP_BIT_SET)) {
+ if (pkt->pn < tls_ctx->rx.pn) {
+ /* The lowest packet number of a previous key phase
+ * cannot be null if it really stores previous key phase
+ * secrets.
+ */
+ // TODO: check if BUG_ON() more suitable
+ if (!qc->ku.prv_rx.pn) {
+ TRACE_ERROR("null previous packet number", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ rx_ctx = qc->ku.prv_rx.ctx;
+ rx_iv = qc->ku.prv_rx.iv;
+ rx_key = qc->ku.prv_rx.key;
+ }
+ else if (pkt->pn > qel->pktns->rx.largest_pn) {
+ /* Next key phase */
+ TRACE_PROTO("Key phase changed", QUIC_EV_CONN_RXPKT, qc);
+ kp_changed = 1;
+ rx_ctx = qc->ku.nxt_rx.ctx;
+ rx_iv = qc->ku.nxt_rx.iv;
+ rx_key = qc->ku.nxt_rx.key;
+ }
+ }
+ }
+
+ quic_aead_iv_build(iv, sizeof iv, rx_iv, rx_iv_sz, pkt->pn);
+
+ ret = quic_tls_decrypt(pkt->data + pkt->aad_len, pkt->len - pkt->aad_len,
+ pkt->data, pkt->aad_len,
+ rx_ctx, tls_ctx->rx.aead, rx_key, iv);
+ if (!ret) {
+ TRACE_ERROR("quic_tls_decrypt() failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+
+ /* Update the keys only if the packet decryption succeeded. */
+ if (kp_changed) {
+ quic_tls_rotate_keys(qc);
+ /* Toggle the Key Phase bit */
+ tls_ctx->flags ^= QUIC_FL_TLS_KP_BIT_SET;
+ /* Store the lowest packet number received for the current key phase */
+ tls_ctx->rx.pn = pkt->pn;
+ /* Prepare the next key update */
+ if (!quic_tls_key_update(qc)) {
+ TRACE_ERROR("quic_tls_key_update() failed", QUIC_EV_CONN_RXPKT, qc);
+ goto leave;
+ }
+ }
+
+ /* Update the packet length (required to parse the frames). */
+ pkt->len -= QUIC_TLS_TAG_LEN;
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+
+/* Remove from <stream> the acknowledged frames.
+ *
+ * Returns 1 if at least one frame was removed else 0.
+ */
+static int quic_stream_try_to_consume(struct quic_conn *qc,
+ struct qc_stream_desc *stream)
+{
+ int ret;
+ struct eb64_node *frm_node;
+
+ TRACE_ENTER(QUIC_EV_CONN_ACKSTRM, qc);
+
+ ret = 0;
+ frm_node = eb64_first(&stream->acked_frms);
+ while (frm_node) {
+ struct qf_stream *strm_frm;
+ struct quic_frame *frm;
+ size_t offset, len;
+
+ strm_frm = eb64_entry(frm_node, struct qf_stream, offset);
+ offset = strm_frm->offset.key;
+ len = strm_frm->len;
+
+ if (offset > stream->ack_offset)
+ break;
+
+ if (qc_stream_desc_ack(&stream, offset, len)) {
+ /* cf. next comment : frame may be freed at this stage. */
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, stream ? strm_frm : NULL, stream);
+ ret = 1;
+ }
+
+ /* If stream is NULL after qc_stream_desc_ack(), it means frame
+ * has been freed. with the stream frames tree. Nothing to do
+ * anymore in here.
+ */
+ if (!stream) {
+ qc_check_close_on_released_mux(qc);
+ ret = 1;
+ goto leave;
+ }
+
+ frm_node = eb64_next(frm_node);
+ eb64_delete(&strm_frm->offset);
+
+ frm = container_of(strm_frm, struct quic_frame, stream);
+ qc_release_frm(qc, frm);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ACKSTRM, qc);
+ return ret;
+}
+
+/* Handle <frm> frame whose packet it is attached to has just been acknowledged. The memory allocated
+ * for this frame will be at least released in every cases.
+ * Never fail.
+ */
+static void qc_handle_newly_acked_frm(struct quic_conn *qc, struct quic_frame *frm)
+{
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+ TRACE_PROTO("RX ack TX frm", QUIC_EV_CONN_PRSAFRM, qc, frm);
+
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct qf_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream = NULL;
+ const size_t offset = strm_frm->offset.key;
+ const size_t len = strm_frm->len;
+
+ /* do not use strm_frm->stream as the qc_stream_desc instance
+ * might be freed at this stage. Use the id to do a proper
+ * lookup.
+ *
+ * TODO if lookup operation impact on the perf is noticeable,
+ * implement a refcount on qc_stream_desc instances.
+ */
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("acked stream for released stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm);
+ qc_release_frm(qc, frm);
+ /* early return */
+ goto leave;
+ }
+ stream = eb64_entry(node, struct qc_stream_desc, by_id);
+
+ TRACE_DEVEL("acked stream", QUIC_EV_CONN_ACKSTRM, qc, strm_frm, stream);
+ if (offset <= stream->ack_offset) {
+ if (qc_stream_desc_ack(&stream, offset, len)) {
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, strm_frm, stream);
+ }
+
+ if (!stream) {
+ /* no need to continue if stream freed. */
+ TRACE_DEVEL("stream released and freed", QUIC_EV_CONN_ACKSTRM, qc);
+ qc_release_frm(qc, frm);
+ qc_check_close_on_released_mux(qc);
+ break;
+ }
+
+ TRACE_DEVEL("stream consumed", QUIC_EV_CONN_ACKSTRM,
+ qc, strm_frm, stream);
+ qc_release_frm(qc, frm);
+ }
+ else {
+ eb64_insert(&stream->acked_frms, &strm_frm->offset);
+ }
+
+ quic_stream_try_to_consume(qc, stream);
+ }
+ break;
+ default:
+ qc_release_frm(qc, frm);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Collect newly acknowledged TX packets from <pkts> ebtree into <newly_acked_pkts>
+ * list depending on <largest> and <smallest> packet number of a range of acknowledged
+ * packets announced in an ACK frame. <largest_node> may be provided to start
+ * looking from this packet node.
+ */
+static void qc_newly_acked_pkts(struct quic_conn *qc, struct eb_root *pkts,
+ struct list *newly_acked_pkts,
+ struct eb64_node *largest_node,
+ uint64_t largest, uint64_t smallest)
+{
+ struct eb64_node *node;
+ struct quic_tx_packet *pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ node = eb64_lookup_ge(pkts, smallest);
+ if (!node)
+ goto leave;
+
+ largest_node = largest_node ? largest_node : eb64_lookup_le(pkts, largest);
+ if (!largest_node)
+ goto leave;
+
+ while (node && node->key <= largest_node->key) {
+ pkt = eb64_entry(node, struct quic_tx_packet, pn_node);
+ LIST_APPEND(newly_acked_pkts, &pkt->list);
+ node = eb64_next(node);
+ eb64_delete(&pkt->pn_node);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Handle <newly_acked_pkts> list of newly acknowledged TX packets */
+static void qc_handle_newly_acked_pkts(struct quic_conn *qc,
+ unsigned int *pkt_flags, struct list *newly_acked_pkts)
+{
+ struct quic_tx_packet *pkt, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
+ struct quic_frame *frm, *frmbak;
+
+ *pkt_flags |= pkt->flags;
+ TRACE_DEVEL("Removing packet #", QUIC_EV_CONN_PRSAFRM, qc, NULL, &pkt->pn_node.key);
+ list_for_each_entry_safe(frm, frmbak, &pkt->frms, list)
+ qc_handle_newly_acked_frm(qc, frm);
+ /* If there are others packet in the same datagram <pkt> is attached to,
+ * detach the previous one and the next one from <pkt>.
+ */
+ quic_tx_packet_dgram_detach(pkt);
+ eb64_delete(&pkt->pn_node);
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+}
+
+/* Handle all frames sent from <pkt> packet and reinsert them in the same order
+ * they have been sent into <pktns_frm_list>. The loss counter of each frame is
+ * incremented and checked if it does not exceed retransmission limit.
+ *
+ * Returns 1 on success, 0 if a frame loss limit is exceeded. A
+ * CONNECTION_CLOSE is scheduled in this case.
+ */
+int qc_handle_frms_of_lost_pkt(struct quic_conn *qc,
+ struct quic_tx_packet *pkt,
+ struct list *pktns_frm_list)
+{
+ struct quic_frame *frm, *frmbak;
+ struct list *pkt_frm_list = &pkt->frms;
+ uint64_t pn = pkt->pn_node.key;
+ int close = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(frm, frmbak, pkt_frm_list, list) {
+ /* First remove this frame from the packet it was attached to */
+ LIST_DEL_INIT(&frm->list);
+ quic_tx_packet_refdec(pkt);
+ /* At this time, this frame is not freed but removed from its packet */
+ frm->pkt = NULL;
+ /* Remove any reference to this frame */
+ qc_frm_unref(frm, qc);
+ switch (frm->type) {
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct qf_stream *strm_frm = &frm->stream;
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc;
+
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("released stream", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ qc_frm_free(qc, &frm);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ /* Do not resend this frame if in the "already acked range" */
+ if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ qc_frm_free(qc, &frm);
+ continue;
+ }
+ else if (strm_frm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
+
+ qc_stream_frm_mv_fwd(frm, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ /* Do not resend probing packet with old data */
+ if (pkt->flags & QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA) {
+ TRACE_DEVEL("ignored frame with old data from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ if (frm->origin)
+ LIST_DEL_INIT(&frm->ref);
+ qc_frm_free(qc, &frm);
+ continue;
+ }
+
+ if (frm->flags & QUIC_FL_TX_FRAME_ACKED) {
+ TRACE_DEVEL("already acked frame", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ TRACE_DEVEL("freeing frame from packet", QUIC_EV_CONN_PRSAFRM,
+ qc, frm, &pn);
+ qc_frm_free(qc, &frm);
+ }
+ else {
+ if (++frm->loss_count >= global.tune.quic_max_frame_loss) {
+ TRACE_ERROR("retransmission limit reached, closing the connection", QUIC_EV_CONN_PRSAFRM, qc);
+ quic_set_connection_close(qc, quic_err_transport(QC_ERR_INTERNAL_ERROR));
+ qc_notify_err(qc);
+ close = 1;
+ }
+
+ LIST_APPEND(pktns_frm_list, &frm->list);
+ TRACE_DEVEL("frame requeued", QUIC_EV_CONN_PRSAFRM, qc, frm);
+ }
+ }
+
+ end:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return !close;
+}
+
+/* Send a packet ack event nofication for each newly acked packet of
+ * <newly_acked_pkts> list and free them.
+ * Always succeeds.
+ */
+static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
+ struct list *newly_acked_pkts)
+{
+ struct quic_tx_packet *pkt, *tmp;
+ struct quic_cc_event ev = { .type = QUIC_CC_EVT_ACK, };
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
+ pkt->pktns->tx.in_flight -= pkt->in_flight_len;
+ qc->path->prep_in_flight -= pkt->in_flight_len;
+ qc->path->in_flight -= pkt->in_flight_len;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)
+ qc->path->ifae_pkts--;
+ /* If this packet contained an ACK frame, proceed to the
+ * acknowledging of range of acks from the largest acknowledged
+ * packet number which was sent in an ACK frame by this packet.
+ */
+ if (pkt->largest_acked_pn != -1)
+ qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn);
+ ev.ack.acked = pkt->in_flight_len;
+ ev.ack.time_sent = pkt->time_sent;
+ quic_cc_event(&qc->path->cc, &ev);
+ LIST_DEL_INIT(&pkt->list);
+ quic_tx_packet_refdec(pkt);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+
+}
+
+/* Parse ACK frame into <frm> from a buffer at <buf> address with <end> being at
+ * one byte past the end of this buffer. Also update <rtt_sample> if needed, i.e.
+ * if the largest acked packet was newly acked and if there was at least one newly
+ * acked ack-eliciting packet.
+ * Return 1, if succeeded, 0 if not.
+ */
+static int qc_parse_ack_frm(struct quic_conn *qc,
+ struct quic_frame *frm,
+ struct quic_enc_level *qel,
+ unsigned int *rtt_sample,
+ const unsigned char **pos, const unsigned char *end)
+{
+ struct qf_ack *ack_frm = &frm->ack;
+ uint64_t smallest, largest;
+ struct eb_root *pkts;
+ struct eb64_node *largest_node;
+ unsigned int time_sent, pkt_flags;
+ struct list newly_acked_pkts = LIST_HEAD_INIT(newly_acked_pkts);
+ struct list lost_pkts = LIST_HEAD_INIT(lost_pkts);
+ int ret = 0, new_largest_acked_pn = 0;
+ struct quic_tx_packet *pkt, *tmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);
+
+ pkts = &qel->pktns->tx.pkts;
+ if (ack_frm->largest_ack > qel->pktns->tx.next_pn) {
+ TRACE_DEVEL("ACK for not sent packet", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &ack_frm->largest_ack);
+ goto err;
+ }
+
+ if (ack_frm->first_ack_range > ack_frm->largest_ack) {
+ TRACE_DEVEL("too big first ACK range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &ack_frm->first_ack_range);
+ goto err;
+ }
+
+ largest = ack_frm->largest_ack;
+ smallest = largest - ack_frm->first_ack_range;
+ pkt_flags = 0;
+ largest_node = NULL;
+ time_sent = 0;
+
+ if ((int64_t)ack_frm->largest_ack > qel->pktns->rx.largest_acked_pn) {
+ largest_node = eb64_lookup(pkts, largest);
+ if (!largest_node) {
+ TRACE_DEVEL("Largest acked packet not found",
+ QUIC_EV_CONN_PRSAFRM, qc);
+ }
+ else {
+ time_sent = eb64_entry(largest_node,
+ struct quic_tx_packet, pn_node)->time_sent;
+ new_largest_acked_pn = 1;
+ }
+ }
+
+ TRACE_PROTO("RX ack range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &smallest);
+ do {
+ uint64_t gap, ack_range;
+
+ qc_newly_acked_pkts(qc, pkts, &newly_acked_pkts,
+ largest_node, largest, smallest);
+ if (!ack_frm->ack_range_num--)
+ break;
+
+ if (!quic_dec_int(&gap, pos, end)) {
+ TRACE_ERROR("quic_dec_int(gap) failed", QUIC_EV_CONN_PRSAFRM, qc);
+ goto err;
+ }
+
+ if (smallest < gap + 2) {
+ TRACE_DEVEL("wrong gap value", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &gap, &smallest);
+ goto err;
+ }
+
+ largest = smallest - gap - 2;
+ if (!quic_dec_int(&ack_range, pos, end)) {
+ TRACE_ERROR("quic_dec_int(ack_range) failed", QUIC_EV_CONN_PRSAFRM, qc);
+ goto err;
+ }
+
+ if (largest < ack_range) {
+ TRACE_DEVEL("wrong ack range value", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &ack_range);
+ goto err;
+ }
+
+ /* Do not use this node anymore. */
+ largest_node = NULL;
+ /* Next range */
+ smallest = largest - ack_range;
+
+ TRACE_PROTO("RX next ack range", QUIC_EV_CONN_PRSAFRM,
+ qc, NULL, &largest, &smallest);
+ } while (1);
+
+ if (!LIST_ISEMPTY(&newly_acked_pkts)) {
+ qc_handle_newly_acked_pkts(qc, &pkt_flags, &newly_acked_pkts);
+ if (new_largest_acked_pn && (pkt_flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) {
+ *rtt_sample = tick_remain(time_sent, now_ms);
+ qel->pktns->rx.largest_acked_pn = ack_frm->largest_ack;
+ }
+
+ if (!eb_is_empty(&qel->pktns->tx.pkts)) {
+ qc_packet_loss_lookup(qel->pktns, qc, &lost_pkts);
+ if (!qc_release_lost_pkts(qc, qel->pktns, &lost_pkts, now_ms))
+ goto leave;
+ }
+ qc_notify_cc_of_newly_acked_pkts(qc, &newly_acked_pkts);
+ if (quic_peer_validated_addr(qc))
+ qc->path->loss.pto_count = 0;
+ qc_set_timer(qc);
+ qc_notify_send(qc);
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSAFRM, qc);
+ return ret;
+
+ err:
+ /* Move back these packets into their tree. */
+ list_for_each_entry_safe(pkt, tmp, &newly_acked_pkts, list) {
+ LIST_DEL_INIT(&pkt->list);
+ eb64_insert(pkts, &pkt->pn_node);
+ }
+ goto leave;
+}
+
+/* Parse a STREAM frame <strm_frm> received in <pkt> packet for <qc>
+ * connection. <fin> is true if FIN bit is set on frame type.
+ *
+ * Return 1 on success. On error, 0 is returned. In this case, the packet
+ * containing the frame must not be acknowledged.
+ */
+static int qc_handle_strm_frm(struct quic_rx_packet *pkt,
+ struct qf_stream *strm_frm,
+ struct quic_conn *qc, char fin)
+{
+ int ret;
+
+ /* RFC9000 13.1. Packet Processing
+ *
+ * A packet MUST NOT be acknowledged until packet protection has been
+ * successfully removed and all frames contained in the packet have
+ * been processed. For STREAM frames, this means the data has been
+ * enqueued in preparation to be received by the application protocol,
+ * but it does not require that data be delivered and consumed.
+ */
+ TRACE_ENTER(QUIC_EV_CONN_PRSFRM, qc);
+
+ ret = qcc_recv(qc->qcc, strm_frm->id, strm_frm->len,
+ strm_frm->offset.key, fin, (char *)strm_frm->data);
+
+ /* frame rejected - packet must not be acknowledeged */
+ TRACE_LEAVE(QUIC_EV_CONN_PRSFRM, qc);
+ return !ret;
+}
+
+/* Parse <frm> CRYPTO frame coming with <pkt> packet at <qel> <qc> connectionn.
+ * Returns 1 if succeeded, 0 if not. Also set <*fast_retrans> to 1 if the
+ * speed up handshake completion may be run after having received duplicated
+ * CRYPTO data.
+ */
+static int qc_handle_crypto_frm(struct quic_conn *qc,
+ struct qf_crypto *crypto_frm, struct quic_rx_packet *pkt,
+ struct quic_enc_level *qel, int *fast_retrans)
+{
+ int ret = 0;
+ enum ncb_ret ncb_ret;
+ /* XXX TO DO: <cfdebug> is used only for the traces. */
+ struct quic_rx_crypto_frm cfdebug = {
+ .offset_node.key = crypto_frm->offset,
+ .len = crypto_frm->len,
+ };
+ struct quic_cstream *cstream = qel->cstream;
+ struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+
+ if (unlikely(crypto_frm->offset < cstream->rx.offset)) {
+ size_t diff;
+
+ if (crypto_frm->offset + crypto_frm->len <= cstream->rx.offset) {
+ /* Nothing to do */
+ TRACE_PROTO("Already received CRYPTO data",
+ QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
+ if (qc_is_listener(qc) && qel == qc->iel &&
+ !(qc->flags & QUIC_FL_CONN_HANDSHAKE_SPEED_UP))
+ *fast_retrans = 1;
+ goto done;
+ }
+
+ TRACE_PROTO("Partially already received CRYPTO data",
+ QUIC_EV_CONN_RXPKT, qc, pkt, &cfdebug);
+
+ diff = cstream->rx.offset - crypto_frm->offset;
+ crypto_frm->len -= diff;
+ crypto_frm->data += diff;
+ crypto_frm->offset = cstream->rx.offset;
+ }
+
+ if (crypto_frm->offset == cstream->rx.offset && ncb_is_empty(ncbuf)) {
+ struct qf_crypto *qf_crypto;
+
+ qf_crypto = pool_alloc(pool_head_qf_crypto);
+ if (!qf_crypto) {
+ TRACE_ERROR("CRYPTO frame allocation failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ qf_crypto->offset = crypto_frm->offset;
+ qf_crypto->len = crypto_frm->len;
+ qf_crypto->data = crypto_frm->data;
+ qf_crypto->qel = qel;
+ LIST_APPEND(&qel->rx.crypto_frms, &qf_crypto->list);
+
+ cstream->rx.offset += crypto_frm->len;
+ HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY);
+ TRACE_DEVEL("increment crypto level offset", QUIC_EV_CONN_PHPKTS, qc, qel);
+ goto done;
+ }
+
+ if (!quic_get_ncbuf(ncbuf) ||
+ ncb_is_null(ncbuf)) {
+ TRACE_ERROR("CRYPTO ncbuf allocation failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ /* crypto_frm->offset > cstream-trx.offset */
+ ncb_ret = ncb_add(ncbuf, crypto_frm->offset - cstream->rx.offset,
+ (const char *)crypto_frm->data, crypto_frm->len, NCB_ADD_COMPARE);
+ if (ncb_ret != NCB_RET_OK) {
+ if (ncb_ret == NCB_RET_DATA_REJ) {
+ TRACE_ERROR("overlapping data rejected", QUIC_EV_CONN_PRSHPKT, qc);
+ quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
+ qc_notify_err(qc);
+ }
+ else if (ncb_ret == NCB_RET_GAP_SIZE) {
+ TRACE_ERROR("cannot bufferize frame due to gap size limit",
+ QUIC_EV_CONN_PRSHPKT, qc);
+ }
+ goto leave;
+ }
+
+ if (ncb_data(ncbuf, 0))
+ HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY);
+
+ done:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+/* Handle RETIRE_CONNECTION_ID frame from <frm> frame.
+ * Return 1 if succeeded, 0 if not. If succeeded, also set <to_retire>
+ * to the CID to be retired if not already retired.
+ */
+static int qc_handle_retire_connection_id_frm(struct quic_conn *qc,
+ struct quic_frame *frm,
+ struct quic_cid *dcid,
+ struct quic_connection_id **to_retire)
+{
+ int ret = 0;
+ struct qf_retire_connection_id *rcid_frm = &frm->retire_connection_id;
+ struct eb64_node *node;
+ struct quic_connection_id *conn_id;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+
+ /* RFC 9000 19.16. RETIRE_CONNECTION_ID Frames:
+ * Receipt of a RETIRE_CONNECTION_ID frame containing a sequence number greater
+ * than any previously sent to the peer MUST be treated as a connection error
+ * of type PROTOCOL_VIOLATION.
+ */
+ if (rcid_frm->seq_num >= qc->next_cid_seq_num) {
+ TRACE_PROTO("CID seq. number too big", QUIC_EV_CONN_PSTRM, qc, frm);
+ goto protocol_violation;
+ }
+
+ /* RFC 9000 19.16. RETIRE_CONNECTION_ID Frames:
+ * The sequence number specified in a RETIRE_CONNECTION_ID frame MUST NOT refer to
+ * the Destination Connection ID field of the packet in which the frame is contained.
+ * The peer MAY treat this as a connection error of type PROTOCOL_VIOLATION.
+ */
+ node = eb64_lookup(qc->cids, rcid_frm->seq_num);
+ if (!node) {
+ TRACE_PROTO("CID already retired", QUIC_EV_CONN_PSTRM, qc, frm);
+ goto out;
+ }
+
+ conn_id = eb64_entry(node, struct quic_connection_id, seq_num);
+ /* Note that the length of <dcid> has already been checked. It must match the
+ * length of the CIDs which have been provided to the peer.
+ */
+ if (!memcmp(dcid->data, conn_id->cid.data, QUIC_HAP_CID_LEN)) {
+ TRACE_PROTO("cannot retire the current CID", QUIC_EV_CONN_PSTRM, qc, frm);
+ goto protocol_violation;
+ }
+
+ *to_retire = conn_id;
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+ protocol_violation:
+ quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
+ qc_notify_err(qc);
+ goto leave;
+}
+
+/* Returns the <ack_delay> field value in milliseconds from <ack_frm> ACK frame for
+ * <conn> QUIC connection. Note that the value of <ack_delay> coming from
+ * ACK frame is in microseconds.
+ */
+static inline unsigned int quic_ack_delay_ms(struct qf_ack *ack_frm,
+ struct quic_conn *conn)
+{
+ return (ack_frm->ack_delay << conn->tx.params.ack_delay_exponent) / 1000;
+}
+
+/* Parse all the frames of <pkt> QUIC packet for QUIC connection <qc> and <qel>
+ * as encryption level.
+ * Returns 1 if succeeded, 0 if failed.
+ */
+static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ struct quic_enc_level *qel)
+{
+ struct quic_frame frm;
+ const unsigned char *pos, *end;
+ int fast_retrans = 0, ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc);
+ /* Skip the AAD */
+ pos = pkt->data + pkt->aad_len;
+ end = pkt->data + pkt->len;
+
+ /* Packet with no frame. */
+ if (pos == end) {
+ /* RFC9000 12.4. Frames and Frame Types
+ *
+ * The payload of a packet that contains frames MUST contain at least
+ * one frame, and MAY contain multiple frames and multiple frame types.
+ * An endpoint MUST treat receipt of a packet containing no frames as a
+ * connection error of type PROTOCOL_VIOLATION. Frames always fit within
+ * a single QUIC packet and cannot span multiple packets.
+ */
+ quic_set_connection_close(qc, quic_err_transport(QC_ERR_PROTOCOL_VIOLATION));
+ goto leave;
+ }
+
+ while (pos < end) {
+ if (!qc_parse_frm(&frm, pkt, &pos, end, qc)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ switch (frm.type) {
+ case QUIC_FT_PADDING:
+ break;
+ case QUIC_FT_PING:
+ break;
+ case QUIC_FT_ACK:
+ {
+ unsigned int rtt_sample;
+ rtt_sample = UINT_MAX;
+
+ if (!qc_parse_ack_frm(qc, &frm, qel, &rtt_sample, &pos, end)) {
+ // trace already emitted by function above
+ goto leave;
+ }
+
+ if (rtt_sample != UINT_MAX) {
+ unsigned int ack_delay;
+
+ ack_delay = !quic_application_pktns(qel->pktns, qc) ? 0 :
+ qc->state >= QUIC_HS_ST_CONFIRMED ?
+ MS_TO_TICKS(QUIC_MIN(quic_ack_delay_ms(&frm.ack, qc), qc->max_ack_delay)) :
+ MS_TO_TICKS(quic_ack_delay_ms(&frm.ack, qc));
+ quic_loss_srtt_update(&qc->path->loss, rtt_sample, ack_delay, qc);
+ }
+ break;
+ }
+ case QUIC_FT_RESET_STREAM:
+ if (qc->mux_state == QC_MUX_READY) {
+ struct qf_reset_stream *rs_frm = &frm.reset_stream;
+ qcc_recv_reset_stream(qc->qcc, rs_frm->id, rs_frm->app_error_code, rs_frm->final_size);
+ }
+ break;
+ case QUIC_FT_STOP_SENDING:
+ {
+ struct qf_stop_sending *ss_frm = &frm.stop_sending;
+ if (qc->mux_state == QC_MUX_READY) {
+ if (qcc_recv_stop_sending(qc->qcc, ss_frm->id,
+ ss_frm->app_error_code)) {
+ TRACE_ERROR("qcc_recv_stop_sending() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+ break;
+ }
+ case QUIC_FT_CRYPTO:
+ if (!qc_handle_crypto_frm(qc, &frm.crypto, pkt, qel, &fast_retrans))
+ goto leave;
+ break;
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ {
+ struct qf_stream *strm_frm = &frm.stream;
+ unsigned nb_streams = qc->rx.strms[qcs_id_type(strm_frm->id)].nb_streams;
+ const char fin = frm.type & QUIC_STREAM_FRAME_TYPE_FIN_BIT;
+
+ /* The upper layer may not be allocated. */
+ if (qc->mux_state != QC_MUX_READY) {
+ if ((strm_frm->id >> QCS_ID_TYPE_SHIFT) < nb_streams) {
+ TRACE_DATA("Already closed stream", QUIC_EV_CONN_PRSHPKT, qc);
+ }
+ else {
+ TRACE_DEVEL("No mux for new stream", QUIC_EV_CONN_PRSHPKT, qc);
+ if (qc->app_ops == &h3_ops) {
+ if (!qc_h3_request_reject(qc, strm_frm->id)) {
+ TRACE_ERROR("error on request rejection", QUIC_EV_CONN_PRSHPKT, qc);
+ /* This packet will not be acknowledged */
+ goto leave;
+ }
+ }
+ else {
+ /* This packet will not be acknowledged */
+ goto leave;
+ }
+ }
+
+ break;
+ }
+
+ if (!qc_handle_strm_frm(pkt, strm_frm, qc, fin)) {
+ TRACE_ERROR("qc_handle_strm_frm() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ break;
+ }
+ case QUIC_FT_MAX_DATA:
+ if (qc->mux_state == QC_MUX_READY) {
+ struct qf_max_data *md_frm = &frm.max_data;
+ qcc_recv_max_data(qc->qcc, md_frm->max_data);
+ }
+ break;
+ case QUIC_FT_MAX_STREAM_DATA:
+ if (qc->mux_state == QC_MUX_READY) {
+ struct qf_max_stream_data *msd_frm = &frm.max_stream_data;
+ if (qcc_recv_max_stream_data(qc->qcc, msd_frm->id,
+ msd_frm->max_stream_data)) {
+ TRACE_ERROR("qcc_recv_max_stream_data() failed", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+ break;
+ case QUIC_FT_MAX_STREAMS_BIDI:
+ case QUIC_FT_MAX_STREAMS_UNI:
+ break;
+ case QUIC_FT_DATA_BLOCKED:
+ qc->cntrs.data_blocked++;
+ break;
+ case QUIC_FT_STREAM_DATA_BLOCKED:
+ qc->cntrs.stream_data_blocked++;
+ break;
+ case QUIC_FT_STREAMS_BLOCKED_BIDI:
+ qc->cntrs.streams_blocked_bidi++;
+ break;
+ case QUIC_FT_STREAMS_BLOCKED_UNI:
+ qc->cntrs.streams_blocked_uni++;
+ break;
+ case QUIC_FT_NEW_CONNECTION_ID:
+ /* XXX TO DO XXX */
+ break;
+ case QUIC_FT_RETIRE_CONNECTION_ID:
+ {
+ struct quic_cid_tree *tree;
+ struct quic_connection_id *conn_id = NULL;
+
+ if (!qc_handle_retire_connection_id_frm(qc, &frm, &pkt->dcid, &conn_id))
+ goto leave;
+
+ if (!conn_id)
+ break;
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&conn_id->cid)];
+ HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
+ ebmb_delete(&conn_id->node);
+ HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
+ eb64_delete(&conn_id->seq_num);
+ pool_free(pool_head_quic_connection_id, conn_id);
+ TRACE_PROTO("CID retired", QUIC_EV_CONN_PSTRM, qc);
+
+ conn_id = new_quic_cid(qc->cids, qc, NULL, NULL);
+ if (!conn_id) {
+ TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc);
+ }
+ else {
+ quic_cid_insert(conn_id);
+ qc_build_new_connection_id_frm(qc, conn_id);
+ }
+ break;
+ }
+ case QUIC_FT_CONNECTION_CLOSE:
+ case QUIC_FT_CONNECTION_CLOSE_APP:
+ /* Increment the error counters */
+ quic_conn_closed_err_count_inc(qc, &frm);
+ if (!(qc->flags & QUIC_FL_CONN_DRAINING)) {
+ TRACE_STATE("Entering draining state", QUIC_EV_CONN_PRSHPKT, qc);
+ /* RFC 9000 10.2. Immediate Close:
+ * The closing and draining connection states exist to ensure
+ * that connections close cleanly and that delayed or reordered
+ * packets are properly discarded. These states SHOULD persist
+ * for at least three times the current PTO interval...
+ *
+ * Rearm the idle timeout only one time when entering draining
+ * state.
+ */
+ qc->flags |= QUIC_FL_CONN_DRAINING|QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc_detach_th_ctx_list(qc, 1);
+ qc_idle_timer_do_rearm(qc, 0);
+ qc_notify_err(qc);
+ }
+ break;
+ case QUIC_FT_HANDSHAKE_DONE:
+ if (qc_is_listener(qc)) {
+ TRACE_ERROR("non accepted QUIC_FT_HANDSHAKE_DONE frame",
+ QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+
+ qc->state = QUIC_HS_ST_CONFIRMED;
+ break;
+ default:
+ TRACE_ERROR("unknosw frame type", QUIC_EV_CONN_PRSHPKT, qc);
+ goto leave;
+ }
+ }
+
+ if (fast_retrans && qc->iel && qc->hel) {
+ struct quic_enc_level *iqel = qc->iel;
+ struct quic_enc_level *hqel = qc->hel;
+
+ TRACE_PROTO("speeding up handshake completion", QUIC_EV_CONN_PRSHPKT, qc);
+ qc_prep_hdshk_fast_retrans(qc, &iqel->pktns->tx.frms, &hqel->pktns->tx.frms);
+ qc->flags |= QUIC_FL_CONN_HANDSHAKE_SPEED_UP;
+ }
+
+ /* The server must switch from INITIAL to HANDSHAKE handshake state when it
+ * has successfully parse a Handshake packet. The Initial encryption must also
+ * be discarded.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_HANDSHAKE && qc_is_listener(qc)) {
+ if (qc->state >= QUIC_HS_ST_SERVER_INITIAL) {
+ if (qc->ipktns && !quic_tls_pktns_is_dcd(qc, qc->ipktns)) {
+ /* Discard the handshake packet number space. */
+ TRACE_PROTO("discarding Initial pktns", QUIC_EV_CONN_PRSHPKT, qc);
+ quic_pktns_discard(qc->ipktns, qc);
+ qc_set_timer(qc);
+ qc_el_rx_pkts_del(qc->iel);
+ qc_release_pktns_frms(qc, qc->ipktns);
+ }
+ if (qc->state < QUIC_HS_ST_SERVER_HANDSHAKE)
+ qc->state = QUIC_HS_ST_SERVER_HANDSHAKE;
+ }
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PRSHPKT, qc);
+ return ret;
+}
+
+/* Detect the value of the spin bit to be used. */
+static inline void qc_handle_spin_bit(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ struct quic_enc_level *qel)
+{
+ uint64_t largest_pn = qel->pktns->rx.largest_pn;
+
+ if (qel != qc->ael || largest_pn == -1 ||
+ pkt->pn <= largest_pn)
+ return;
+
+ if (qc_is_listener(qc)) {
+ if (pkt->flags & QUIC_FL_RX_PACKET_SPIN_BIT)
+ qc->flags |= QUIC_FL_CONN_SPIN_BIT;
+ else
+ qc->flags &= ~QUIC_FL_CONN_SPIN_BIT;
+ }
+ else {
+ if (pkt->flags & QUIC_FL_RX_PACKET_SPIN_BIT)
+ qc->flags &= ~QUIC_FL_CONN_SPIN_BIT;
+ else
+ qc->flags |= QUIC_FL_CONN_SPIN_BIT;
+ }
+}
+
+/* Remove the header protection of packets at <el> encryption level.
+ * Always succeeds.
+ */
+static void qc_rm_hp_pkts(struct quic_conn *qc, struct quic_enc_level *el)
+{
+ struct quic_rx_packet *pqpkt, *pkttmp;
+
+ TRACE_ENTER(QUIC_EV_CONN_ELRMHP, qc);
+ /* A server must not process incoming 1-RTT packets before the handshake is complete. */
+ if (el == qc->ael && qc_is_listener(qc) && qc->state < QUIC_HS_ST_COMPLETE) {
+ TRACE_PROTO("RX hp not removed (handshake not completed)",
+ QUIC_EV_CONN_ELRMHP, qc);
+ goto out;
+ }
+
+ list_for_each_entry_safe(pqpkt, pkttmp, &el->rx.pqpkts, list) {
+ struct quic_tls_ctx *tls_ctx;
+
+ tls_ctx = qc_select_tls_ctx(qc, el, pqpkt->type, pqpkt->version);
+ if (!qc_do_rm_hp(qc, pqpkt, tls_ctx, el->pktns->rx.largest_pn,
+ pqpkt->data + pqpkt->pn_offset, pqpkt->data)) {
+ TRACE_ERROR("RX hp removing error", QUIC_EV_CONN_ELRMHP, qc);
+ }
+ else {
+ qc_handle_spin_bit(qc, pqpkt, el);
+ /* The AAD includes the packet number field */
+ pqpkt->aad_len = pqpkt->pn_offset + pqpkt->pnl;
+ /* Store the packet into the tree of packets to decrypt. */
+ pqpkt->pn_node.key = pqpkt->pn;
+ eb64_insert(&el->rx.pkts, &pqpkt->pn_node);
+ quic_rx_packet_refinc(pqpkt);
+ TRACE_PROTO("RX hp removed", QUIC_EV_CONN_ELRMHP, qc, pqpkt);
+ }
+ LIST_DELETE(&pqpkt->list);
+ quic_rx_packet_refdec(pqpkt);
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_ELRMHP, qc);
+}
+
+/* Process all the CRYPTO frame at <el> encryption level. This is the
+ * responsibility of the called to ensure there exists a CRYPTO data
+ * stream for this level.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_treat_rx_crypto_frms(struct quic_conn *qc, struct quic_enc_level *el,
+ struct ssl_sock_ctx *ctx)
+{
+ int ret = 0;
+ struct ncbuf *ncbuf;
+ struct quic_cstream *cstream = el->cstream;
+ ncb_sz_t data;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+
+ BUG_ON(!cstream);
+ ncbuf = &cstream->rx.ncbuf;
+ if (ncb_is_null(ncbuf))
+ goto done;
+
+ /* TODO not working if buffer is wrapping */
+ while ((data = ncb_data(ncbuf, 0))) {
+ const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf);
+
+ if (!qc_ssl_provide_quic_data(&el->cstream->rx.ncbuf, el->level,
+ ctx, cdata, data))
+ goto leave;
+
+ cstream->rx.offset += data;
+ TRACE_DEVEL("buffered crypto data were provided to TLS stack",
+ QUIC_EV_CONN_PHPKTS, qc, el);
+ }
+
+ done:
+ ret = 1;
+ leave:
+ if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) {
+ TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, el);
+ quic_free_ncbuf(ncbuf);
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Check if it's possible to remove header protection for packets related to
+ * encryption level <qel>. If <qel> is NULL, assume it's false.
+ *
+ * Return true if the operation is possible else false.
+ */
+static int qc_qel_may_rm_hp(struct quic_conn *qc, struct quic_enc_level *qel)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TRMHP, qc);
+
+ if (!qel)
+ goto cant_rm_hp;
+
+ if (!quic_tls_has_rx_sec(qel)) {
+ TRACE_PROTO("non available secrets", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ if (qel == qc->ael && qc->state < QUIC_HS_ST_COMPLETE) {
+ TRACE_PROTO("handshake not complete", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ /* check if the connection layer is ready before using app level */
+ if ((qel == qc->ael || qel == qc->eel) &&
+ qc->mux_state == QC_MUX_NULL) {
+ TRACE_PROTO("connection layer not ready", QUIC_EV_CONN_TRMHP, qc);
+ goto cant_rm_hp;
+ }
+
+ ret = 1;
+ cant_rm_hp:
+ TRACE_LEAVE(QUIC_EV_CONN_TRMHP, qc);
+ return ret;
+}
+
+/* Process all the packets for all the encryption levels listed in <qc> QUIC connection.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_treat_rx_pkts(struct quic_conn *qc)
+{
+ int ret = 0;
+ struct eb64_node *node;
+ int64_t largest_pn = -1;
+ unsigned int largest_pn_time_received = 0;
+ struct quic_enc_level *qel, *qelbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ list_for_each_entry_safe(qel, qelbak, &qc->qel_list, list) {
+ /* Treat packets waiting for header packet protection decryption */
+ if (!LIST_ISEMPTY(&qel->rx.pqpkts) && qc_qel_may_rm_hp(qc, qel))
+ qc_rm_hp_pkts(qc, qel);
+
+ node = eb64_first(&qel->rx.pkts);
+ while (node) {
+ struct quic_rx_packet *pkt;
+
+ pkt = eb64_entry(node, struct quic_rx_packet, pn_node);
+ TRACE_DATA("new packet", QUIC_EV_CONN_RXPKT,
+ qc, pkt, NULL, qc->xprt_ctx->ssl);
+ if (!qc_pkt_decrypt(qc, qel, pkt)) {
+ /* Drop the packet */
+ TRACE_ERROR("packet decryption failed -> dropped",
+ QUIC_EV_CONN_RXPKT, qc, pkt);
+ }
+ else {
+ if (!qc_parse_pkt_frms(qc, pkt, qel)) {
+ /* Drop the packet */
+ TRACE_ERROR("packet parsing failed -> dropped",
+ QUIC_EV_CONN_RXPKT, qc, pkt);
+ qc->cntrs.dropped_parsing++;
+ }
+ else {
+ struct quic_arng ar = { .first = pkt->pn, .last = pkt->pn };
+
+ /* RFC 9000 8.1. Address Validation during Connection Establishment
+ *
+ * Connection establishment implicitly provides address validation for
+ * both endpoints. In particular, receipt of a packet protected with
+ * Handshake keys confirms that the peer successfully processed an
+ * Initial packet.
+ */
+ if (qel == qc->hel &&
+ !(qc->flags & QUIC_FL_CONN_PEER_VALIDATED_ADDR)) {
+ TRACE_STATE("validate peer address on handshake packet",
+ QUIC_EV_CONN_RXPKT, qc, pkt);
+ qc->flags |= QUIC_FL_CONN_PEER_VALIDATED_ADDR;
+ BUG_ON(!qc->prx_counters->half_open_conn);
+ HA_ATOMIC_DEC(&qc->prx_counters->half_open_conn);
+ }
+
+ /* Update the list of ranges to acknowledge. */
+ if (quic_update_ack_ranges_list(qc, &qel->pktns->rx.arngs, &ar)) {
+ if (pkt->flags & QUIC_FL_RX_PACKET_ACK_ELICITING) {
+ int arm_ack_timer =
+ qc->state >= QUIC_HS_ST_COMPLETE &&
+ qel->pktns == qc->apktns;
+
+ qel->pktns->flags |= QUIC_FL_PKTNS_ACK_REQUIRED;
+ qel->pktns->rx.nb_aepkts_since_last_ack++;
+ qc_idle_timer_rearm(qc, 1, arm_ack_timer);
+ }
+
+ if (pkt->pn > largest_pn) {
+ largest_pn = pkt->pn;
+ largest_pn_time_received = pkt->time_received;
+ }
+ }
+ else {
+ TRACE_ERROR("Could not update ack range list",
+ QUIC_EV_CONN_RXPKT, qc);
+ }
+ }
+ }
+ node = eb64_next(node);
+ eb64_delete(&pkt->pn_node);
+ quic_rx_packet_refdec(pkt);
+ }
+
+ if (largest_pn != -1 && largest_pn > qel->pktns->rx.largest_pn) {
+ /* Update the largest packet number. */
+ qel->pktns->rx.largest_pn = largest_pn;
+ /* Update the largest acknowledged packet timestamps */
+ qel->pktns->rx.largest_time_received = largest_pn_time_received;
+ qel->pktns->flags |= QUIC_FL_PKTNS_NEW_LARGEST_PN;
+ }
+
+ if (qel->cstream) {
+ struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf;
+
+ if (!ncb_is_null(ncbuf) && ncb_data(ncbuf, 0)) {
+ /* Some in order CRYPTO data were bufferized. */
+ HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY);
+ }
+ }
+
+ /* Release the Initial encryption level and packet number space. */
+ if ((qc->flags & QUIC_FL_CONN_IPKTNS_DCD) && qel == qc->iel) {
+ qc_enc_level_free(qc, &qc->iel);
+ quic_pktns_release(qc, &qc->ipktns);
+ }
+
+ largest_pn = -1;
+ }
+
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+ return ret;
+}
+
+/* Parse into <pkt> a long header located at <*pos> position, <end> begin a pointer to the end
+ * past one byte of this buffer.
+ */
+static inline int quic_packet_read_long_header(unsigned char **pos, const unsigned char *end,
+ struct quic_rx_packet *pkt)
+{
+ int ret = 0;
+ unsigned char dcid_len, scid_len;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ if (end == *pos) {
+ TRACE_ERROR("buffer data consumed", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ /* Destination Connection ID Length */
+ dcid_len = *(*pos)++;
+ /* We want to be sure we can read <dcid_len> bytes and one more for <scid_len> value */
+ if (dcid_len > QUIC_CID_MAXLEN || end - *pos < dcid_len + 1) {
+ TRACE_ERROR("too long DCID", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ if (dcid_len) {
+ /* Check that the length of this received DCID matches the CID lengths
+ * of our implementation for non Initials packets only.
+ */
+ if (pkt->version && pkt->version->num &&
+ pkt->type != QUIC_PACKET_TYPE_INITIAL &&
+ pkt->type != QUIC_PACKET_TYPE_0RTT &&
+ dcid_len != QUIC_HAP_CID_LEN) {
+ TRACE_ERROR("wrong DCID length", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ memcpy(pkt->dcid.data, *pos, dcid_len);
+ }
+
+ pkt->dcid.len = dcid_len;
+ *pos += dcid_len;
+
+ /* Source Connection ID Length */
+ scid_len = *(*pos)++;
+ if (scid_len > QUIC_CID_MAXLEN || end - *pos < scid_len) {
+ TRACE_ERROR("too long SCID", QUIC_EV_CONN_RXPKT);
+ goto leave;
+ }
+
+ if (scid_len)
+ memcpy(pkt->scid.data, *pos, scid_len);
+ pkt->scid.len = scid_len;
+ *pos += scid_len;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+}
+
+/* Try to remove the header protection of <pkt> QUIC packet with <beg> the
+ * address of the packet first byte, using the keys from encryption level <el>.
+ *
+ * If header protection has been successfully removed, packet data are copied
+ * into <qc> Rx buffer. If <el> secrets are not yet available, the copy is also
+ * proceeded, and the packet is inserted into <qc> protected packets tree. In
+ * both cases, packet can now be considered handled by the <qc> connection.
+ *
+ * If header protection cannot be removed due to <el> secrets already
+ * discarded, no operation is conducted.
+ *
+ * Returns 1 on success : packet data is now handled by the connection. On
+ * error 0 is returned : packet should be dropped by the caller.
+ */
+static int qc_try_rm_hp(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ unsigned char *beg, struct quic_enc_level **el)
+{
+ int ret = 0;
+ unsigned char *pn = NULL; /* Packet number field */
+ enum quic_tls_enc_level tel;
+ struct quic_enc_level *qel;
+ /* Only for traces. */
+
+ TRACE_ENTER(QUIC_EV_CONN_TRMHP, qc);
+ BUG_ON(!pkt->pn_offset);
+
+ /* The packet number is here. This is also the start minus
+ * QUIC_PACKET_PN_MAXLEN of the sample used to add/remove the header
+ * protection.
+ */
+ pn = beg + pkt->pn_offset;
+
+ tel = quic_packet_type_enc_level(pkt->type);
+ qel = qc_quic_enc_level(qc, tel);
+ if (!qel) {
+ struct quic_enc_level **qc_qel = qel_to_qel_addr(qc, tel);
+ struct quic_pktns **qc_pktns = qel_to_quic_pktns(qc, tel);
+
+ if (!qc_enc_level_alloc(qc, qc_pktns, qc_qel, quic_to_ssl_enc_level(tel))) {
+ TRACE_PROTO("Could not allocated an encryption level", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ qel = *qc_qel;
+ }
+
+ if (qc_qel_may_rm_hp(qc, qel)) {
+ struct quic_tls_ctx *tls_ctx =
+ qc_select_tls_ctx(qc, qel, pkt->type, pkt->version);
+
+ /* Note that the following function enables us to unprotect the packet
+ * number and its length subsequently used to decrypt the entire
+ * packets.
+ */
+ if (!qc_do_rm_hp(qc, pkt, tls_ctx,
+ qel->pktns->rx.largest_pn, pn, beg)) {
+ TRACE_PROTO("hp error", QUIC_EV_CONN_TRMHP, qc);
+ goto out;
+ }
+
+ qc_handle_spin_bit(qc, pkt, qel);
+ /* The AAD includes the packet number field. */
+ pkt->aad_len = pkt->pn_offset + pkt->pnl;
+ if (pkt->len - pkt->aad_len < QUIC_TLS_TAG_LEN) {
+ TRACE_PROTO("Too short packet", QUIC_EV_CONN_TRMHP, qc);
+ goto out;
+ }
+
+ TRACE_PROTO("RX hp removed", QUIC_EV_CONN_TRMHP, qc, pkt);
+ }
+ else {
+ TRACE_PROTO("RX hp not removed", QUIC_EV_CONN_TRMHP, qc, pkt);
+ LIST_APPEND(&qel->rx.pqpkts, &pkt->list);
+ quic_rx_packet_refinc(pkt);
+ }
+
+ *el = qel;
+ /* No reference counter incrementation here!!! */
+ LIST_APPEND(&qc->rx.pkt_list, &pkt->qc_rx_pkt_list);
+ memcpy(b_tail(&qc->rx.buf), beg, pkt->len);
+ pkt->data = (unsigned char *)b_tail(&qc->rx.buf);
+ b_add(&qc->rx.buf, pkt->len);
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TRMHP, qc);
+ return ret;
+}
+
+/* Return a 32-bits integer in <val> from QUIC packet with <buf> as address.
+ * Makes <buf> point to the data after this 32-bits value if succeeded.
+ * Note that these 32-bits integers are network bytes ordered.
+ * Returns 0 if failed (not enough data in the buffer), 1 if succeeded.
+ */
+static inline int quic_read_uint32(uint32_t *val,
+ const unsigned char **buf,
+ const unsigned char *end)
+{
+ if (end - *buf < sizeof *val)
+ return 0;
+
+ *val = ntohl(*(uint32_t *)*buf);
+ *buf += sizeof *val;
+
+ return 1;
+}
+
+/* Parse a QUIC packet header starting at <pos> position without exceeding <end>.
+ * Version and type are stored in <pkt> packet instance. Type is set to unknown
+ * on two occasions : for unsupported version, in this case version field is
+ * set to NULL; for Version Negotiation packet with version number set to 0.
+ *
+ * Returns 1 on success else 0.
+ */
+int qc_parse_hd_form(struct quic_rx_packet *pkt,
+ unsigned char **pos, const unsigned char *end)
+{
+ uint32_t version;
+ int ret = 0;
+ const unsigned char byte0 = **pos;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+ pkt->version = NULL;
+ pkt->type = QUIC_PACKET_TYPE_UNKNOWN;
+
+ (*pos)++;
+ if (byte0 & QUIC_PACKET_LONG_HEADER_BIT) {
+ unsigned char type =
+ (byte0 >> QUIC_PACKET_TYPE_SHIFT) & QUIC_PACKET_TYPE_BITMASK;
+
+ /* Version */
+ if (!quic_read_uint32(&version, (const unsigned char **)pos, end)) {
+ TRACE_ERROR("could not read the packet version", QUIC_EV_CONN_RXPKT);
+ goto out;
+ }
+
+ pkt->version = qc_supported_version(version);
+ if (version && pkt->version) {
+ if (version != QUIC_PROTOCOL_VERSION_2) {
+ pkt->type = type;
+ }
+ else {
+ switch (type) {
+ case 0:
+ pkt->type = QUIC_PACKET_TYPE_RETRY;
+ break;
+ case 1:
+ pkt->type = QUIC_PACKET_TYPE_INITIAL;
+ break;
+ case 2:
+ pkt->type = QUIC_PACKET_TYPE_0RTT;
+ break;
+ case 3:
+ pkt->type = QUIC_PACKET_TYPE_HANDSHAKE;
+ break;
+ }
+ }
+ }
+ }
+ else {
+ if (byte0 & QUIC_PACKET_SPIN_BIT)
+ pkt->flags |= QUIC_FL_RX_PACKET_SPIN_BIT;
+ pkt->type = QUIC_PACKET_TYPE_SHORT;
+ }
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+}
+
+/* Check that all the bytes between <pos> included and <end> address
+ * excluded are null. This is the responsibility of the caller to
+ * check that there is at least one byte between <pos> end <end>.
+ * Return 1 if this all the bytes are null, 0 if not.
+ */
+static inline int quic_padding_check(const unsigned char *pos,
+ const unsigned char *end)
+{
+ while (pos < end && !*pos)
+ pos++;
+
+ return pos == end;
+}
+
+/* Find the associated connection to the packet <pkt> or create a new one if
+ * this is an Initial packet. <dgram> is the datagram containing the packet and
+ * <l> is the listener instance on which it was received.
+ *
+ * By default, <new_tid> is set to -1. However, if thread affinity has been
+ * chanbed, it will be set to its new thread ID.
+ *
+ * Returns the quic-conn instance or NULL if not found or thread affinity
+ * changed.
+ */
+static struct quic_conn *quic_rx_pkt_retrieve_conn(struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram,
+ struct listener *l,
+ int *new_tid)
+{
+ struct quic_cid token_odcid = { .len = 0 };
+ struct quic_conn *qc = NULL;
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ *new_tid = -1;
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+
+ qc = retrieve_qc_conn_from_cid(pkt, &dgram->saddr, new_tid);
+
+ /* If connection already created or rebinded on another thread. */
+ if (!qc && *new_tid != -1 && tid != *new_tid)
+ goto out;
+
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ BUG_ON(!pkt->version); /* This must not happen. */
+
+ if (!qc) {
+ struct quic_cid_tree *tree;
+ struct ebmb_node *node;
+ struct quic_connection_id *conn_id;
+ int ipv4;
+
+ /* Reject INITIAL early if listener limits reached. */
+ if (unlikely(HA_ATOMIC_LOAD(&l->rx.quic_curr_handshake) >=
+ quic_listener_max_handshake(l))) {
+ TRACE_DATA("Drop INITIAL on max handshake",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto out;
+ }
+
+ if (unlikely(HA_ATOMIC_LOAD(&l->rx.quic_curr_accept) >=
+ quic_listener_max_accept(l))) {
+ TRACE_DATA("Drop INITIAL on max accept",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto out;
+ }
+
+ if (pkt->token_len) {
+ /* Validate the token only when connection is unknown. */
+ if (!quic_retry_token_check(pkt, dgram, l, qc, &token_odcid))
+ goto err;
+ }
+ else if (!(l->bind_conf->options & BC_O_QUIC_FORCE_RETRY) &&
+ HA_ATOMIC_LOAD(&prx_counters->half_open_conn) >= global.tune.quic_retry_threshold) {
+ TRACE_PROTO("Initial without token, sending retry",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ if (send_retry(l->rx.fd, &dgram->saddr, pkt, pkt->version)) {
+ TRACE_ERROR("Error during Retry generation",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto out;
+ }
+
+ HA_ATOMIC_INC(&prx_counters->retry_sent);
+ goto out;
+ }
+
+ /* RFC 9000 7.2. Negotiating Connection IDs:
+ * When an Initial packet is sent by a client that has not previously
+ * received an Initial or Retry packet from the server, the client
+ * populates the Destination Connection ID field with an unpredictable
+ * value. This Destination Connection ID MUST be at least 8 bytes in length.
+ */
+ if (pkt->dcid.len < QUIC_ODCID_MINLEN) {
+ TRACE_PROTO("dropped packet",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto err;
+ }
+
+ pkt->saddr = dgram->saddr;
+ ipv4 = dgram->saddr.ss_family == AF_INET;
+
+ /* Generate the first connection CID. This is derived from the client
+ * ODCID and address. This allows to retrieve the connection from the
+ * ODCID without storing it in the CID tree. This is an interesting
+ * optimization as the client is expected to stop using its ODCID in
+ * favor of our generated value.
+ */
+ conn_id = new_quic_cid(NULL, NULL, &pkt->dcid, &pkt->saddr);
+ if (!conn_id)
+ goto err;
+
+ qc = qc_new_conn(pkt->version, ipv4, &pkt->dcid, &pkt->scid, &token_odcid,
+ conn_id, &dgram->daddr, &pkt->saddr, 1,
+ !!pkt->token_len, l);
+ if (qc == NULL) {
+ pool_free(pool_head_quic_connection_id, conn_id);
+ goto err;
+ }
+
+ /* Compute and store into the quic_conn the hash used to compute extra CIDs */
+ if (quic_hash64_from_cid)
+ qc->hash64 = quic_hash64_from_cid(conn_id->cid.data, conn_id->cid.len,
+ global.cluster_secret, sizeof(global.cluster_secret));
+
+ tree = &quic_cid_trees[quic_cid_tree_idx(&conn_id->cid)];
+ HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock);
+ node = ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len);
+ if (node != &conn_id->node) {
+ pool_free(pool_head_quic_connection_id, conn_id);
+
+ conn_id = ebmb_entry(node, struct quic_connection_id, node);
+ *new_tid = HA_ATOMIC_LOAD(&conn_id->tid);
+ quic_conn_release(qc);
+ qc = NULL;
+ }
+ else {
+ /* From here, <qc> is the correct connection for this <pkt> Initial
+ * packet. <conn_id> must be inserted in the CIDs tree for this
+ * connection.
+ */
+ eb64_insert(qc->cids, &conn_id->seq_num);
+ /* Initialize the next CID sequence number to be used for this connection. */
+ qc->next_cid_seq_num = 1;
+ }
+ HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock);
+
+ if (*new_tid != -1)
+ goto out;
+ }
+ }
+ else if (!qc) {
+ TRACE_PROTO("RX non Initial pkt without connection", QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ if (!send_stateless_reset(l, &dgram->saddr, pkt))
+ TRACE_ERROR("stateless reset not sent", QUIC_EV_CONN_LPKT, qc);
+ goto err;
+ }
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return qc;
+
+ err:
+ HA_ATOMIC_INC(&prx_counters->dropped_pkt);
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return NULL;
+}
+
+/* Parse a QUIC packet starting at <pos>. Data won't be read after <end> even
+ * if the packet is incomplete. This function will populate fields of <pkt>
+ * instance, most notably its length. <dgram> is the UDP datagram which
+ * contains the parsed packet. <l> is the listener instance on which it was
+ * received.
+ *
+ * Returns 0 on success else non-zero. Packet length is guaranteed to be set to
+ * the real packet value or to cover all data between <pos> and <end> : this is
+ * useful to reject a whole datagram.
+ */
+static int quic_rx_pkt_parse(struct quic_rx_packet *pkt,
+ unsigned char *pos, const unsigned char *end,
+ struct quic_dgram *dgram, struct listener *l)
+{
+ const unsigned char *beg = pos;
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+
+ if (end <= pos) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Fixed bit */
+ if (!(*pos & QUIC_PACKET_FIXED_BIT)) {
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ quic_padding_check(pos, end)) {
+ /* Some browsers may pad the remaining datagram space with null bytes.
+ * That is what we called add padding out of QUIC packets. Such
+ * datagrams must be considered as valid. But we can only consume
+ * the remaining space.
+ */
+ pkt->len = end - pos;
+ goto drop_silent;
+ }
+
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Header form */
+ if (!qc_parse_hd_form(pkt, &pos, end)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ if (pkt->type != QUIC_PACKET_TYPE_SHORT) {
+ uint64_t len;
+ TRACE_PROTO("long header packet received", QUIC_EV_CONN_LPKT);
+
+ if (!quic_packet_read_long_header(&pos, end, pkt)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* When multiple QUIC packets are coalesced on the same UDP datagram,
+ * they must have the same DCID.
+ */
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ (pkt->dcid.len != dgram->dcid_len ||
+ memcmp(dgram->dcid, pkt->dcid.data, pkt->dcid.len))) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* Retry of Version Negotiation packets are only sent by servers */
+ if (pkt->type == QUIC_PACKET_TYPE_RETRY ||
+ (pkt->version && !pkt->version->num)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ /* RFC9000 6. Version Negotiation */
+ if (!pkt->version) {
+ /* unsupported version, send Negotiation packet */
+ if (send_version_negotiation(l->rx.fd, &dgram->saddr, pkt)) {
+ TRACE_ERROR("VN packet not sent", QUIC_EV_CONN_LPKT);
+ goto drop_silent;
+ }
+
+ TRACE_PROTO("VN packet sent", QUIC_EV_CONN_LPKT);
+ goto drop_silent;
+ }
+
+ /* For Initial packets, and for servers (QUIC clients connections),
+ * there is no Initial connection IDs storage.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ uint64_t token_len;
+
+ if (!quic_dec_int(&token_len, (const unsigned char **)&pos, end) ||
+ end - pos < token_len) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto drop;
+ }
+
+ /* TODO Retry should be automatically activated if
+ * suspect network usage is detected.
+ */
+ if (!token_len) {
+ if (l->bind_conf->options & BC_O_QUIC_FORCE_RETRY) {
+ TRACE_PROTO("Initial without token, sending retry",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ if (send_retry(l->rx.fd, &dgram->saddr, pkt, pkt->version)) {
+ TRACE_PROTO("Error during Retry generation",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto drop_silent;
+ }
+
+ HA_ATOMIC_INC(&prx_counters->retry_sent);
+ goto drop_silent;
+ }
+ }
+
+ pkt->token = pos;
+ pkt->token_len = token_len;
+ pos += pkt->token_len;
+ }
+ else if (pkt->type != QUIC_PACKET_TYPE_0RTT) {
+ if (pkt->dcid.len != QUIC_HAP_CID_LEN) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto drop;
+ }
+ }
+
+ if (!quic_dec_int(&len, (const unsigned char **)&pos, end) ||
+ end - pos < len) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version);
+ goto drop;
+ }
+
+ /* Packet Number is stored here. Packet Length totalizes the
+ * rest of the content.
+ */
+ pkt->pn_offset = pos - beg;
+ pkt->len = pkt->pn_offset + len;
+
+ /* RFC 9000. Initial Datagram Size
+ *
+ * A server MUST discard an Initial packet that is carried in a UDP datagram
+ * with a payload that is smaller than the smallest allowed maximum datagram
+ * size of 1200 bytes.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL &&
+ dgram->len < QUIC_INITIAL_PACKET_MINLEN) {
+ TRACE_PROTO("RX too short datagram with an Initial packet", QUIC_EV_CONN_LPKT);
+ HA_ATOMIC_INC(&prx_counters->too_short_initial_dgram);
+ goto drop;
+ }
+
+ /* Interrupt parsing after packet length retrieval : this
+ * ensures that only the packet is dropped but not the whole
+ * datagram.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_0RTT && !l->bind_conf->ssl_conf.early_data) {
+ TRACE_PROTO("RX 0-RTT packet not supported", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+ }
+ else {
+ TRACE_PROTO("RX short header packet", QUIC_EV_CONN_LPKT);
+ if (end - pos < QUIC_HAP_CID_LEN) {
+ TRACE_PROTO("RX pkt dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ memcpy(pkt->dcid.data, pos, QUIC_HAP_CID_LEN);
+ pkt->dcid.len = QUIC_HAP_CID_LEN;
+
+ /* When multiple QUIC packets are coalesced on the same UDP datagram,
+ * they must have the same DCID.
+ */
+ if (!(pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST) &&
+ (pkt->dcid.len != dgram->dcid_len ||
+ memcmp(dgram->dcid, pkt->dcid.data, pkt->dcid.len))) {
+ TRACE_PROTO("RX pkt dropped", QUIC_EV_CONN_LPKT);
+ goto drop;
+ }
+
+ pos += QUIC_HAP_CID_LEN;
+
+ pkt->pn_offset = pos - beg;
+ /* A short packet is the last one of a UDP datagram. */
+ pkt->len = end - beg;
+ }
+
+ TRACE_PROTO("RX pkt parsed", QUIC_EV_CONN_LPKT, NULL, pkt, NULL, pkt->version);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return 0;
+
+ drop:
+ HA_ATOMIC_INC(&prx_counters->dropped_pkt);
+ drop_silent:
+ if (!pkt->len)
+ pkt->len = end - beg;
+ TRACE_PROTO("RX pkt parsing failed", QUIC_EV_CONN_LPKT, NULL, pkt, NULL, pkt->version);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return -1;
+}
+
+/* Check if received packet <pkt> should be drop due to <qc> already in closing
+ * state. This can be true if a CONNECTION_CLOSE has already been emitted for
+ * this connection.
+ *
+ * Returns false if connection is not in closing state else true. The caller
+ * should drop the whole datagram in the last case to not mess up <qc>
+ * CONNECTION_CLOSE rate limit counter.
+ */
+static int qc_rx_check_closing(struct quic_conn *qc,
+ struct quic_rx_packet *pkt)
+{
+ if (!(qc->flags & QUIC_FL_CONN_CLOSING))
+ return 0;
+
+ TRACE_STATE("Closing state connection", QUIC_EV_CONN_LPKT, qc, NULL, NULL, pkt->version);
+
+ /* Check if CONNECTION_CLOSE rate reemission is reached. */
+ if (++qc->nb_pkt_since_cc >= qc->nb_pkt_for_cc) {
+ qc->flags |= QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ qc->nb_pkt_for_cc++;
+ qc->nb_pkt_since_cc = 0;
+ }
+
+ return 1;
+}
+
+/* Release the memory for the RX packets which are no more referenced
+ * and consume their payloads which have been copied to the RX buffer
+ * for the connection.
+ * Always succeeds.
+ */
+static void quic_rx_pkts_del(struct quic_conn *qc)
+{
+ struct quic_rx_packet *pkt, *pktback;
+
+ list_for_each_entry_safe(pkt, pktback, &qc->rx.pkt_list, qc_rx_pkt_list) {
+ TRACE_PRINTF(TRACE_LEVEL_DEVELOPER, QUIC_EV_CONN_LPKT, qc, 0, 0, 0,
+ "pkt #%lld(type=%d,len=%llu,rawlen=%llu,refcnt=%u) (diff: %zd)",
+ (long long)pkt->pn_node.key,
+ pkt->type, (ull)pkt->len, (ull)pkt->raw_len, pkt->refcnt,
+ (unsigned char *)b_head(&qc->rx.buf) - pkt->data);
+ if (pkt->data != (unsigned char *)b_head(&qc->rx.buf)) {
+ size_t cdata;
+
+ cdata = b_contig_data(&qc->rx.buf, 0);
+ TRACE_PRINTF(TRACE_LEVEL_DEVELOPER, QUIC_EV_CONN_LPKT, qc, 0, 0, 0,
+ "cdata=%llu *b_head()=0x%x", (ull)cdata, *b_head(&qc->rx.buf));
+ if (cdata && !*b_head(&qc->rx.buf)) {
+ /* Consume the remaining data */
+ b_del(&qc->rx.buf, cdata);
+ }
+ break;
+ }
+
+ if (pkt->refcnt)
+ break;
+
+ b_del(&qc->rx.buf, pkt->raw_len);
+ LIST_DELETE(&pkt->qc_rx_pkt_list);
+ pool_free(pool_head_quic_rx_packet, pkt);
+ }
+
+ /* In frequent cases the buffer will be emptied at this stage. */
+ b_realign_if_empty(&qc->rx.buf);
+}
+
+/* Handle a parsed packet <pkt> by the connection <qc>. Data will be copied
+ * into <qc> receive buffer after header protection removal procedure.
+ *
+ * <dgram> must be set to the datagram which contains the QUIC packet. <beg>
+ * must point to packet buffer first byte.
+ *
+ * <tasklist_head> may be non-NULL when the caller treat several datagrams for
+ * different quic-conn. In this case, each quic-conn tasklet will be appended
+ * to it in order to be woken up after the current task.
+ *
+ * The caller can safely removed the packet data. If packet refcount was not
+ * incremented by this function, it means that the connection did not handled
+ * it and it should be freed by the caller.
+ */
+static void qc_rx_pkt_handle(struct quic_conn *qc, struct quic_rx_packet *pkt,
+ struct quic_dgram *dgram, unsigned char *beg,
+ struct list **tasklist_head)
+{
+ const struct quic_version *qv = pkt->version;
+ struct quic_enc_level *qel = NULL;
+ size_t b_cspace;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+ TRACE_PROTO("RX pkt", QUIC_EV_CONN_LPKT, qc, pkt, NULL, qv);
+
+ if (pkt->flags & QUIC_FL_RX_PACKET_DGRAM_FIRST &&
+ qc->flags & QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED) {
+ TRACE_PROTO("PTO timer must be armed after anti-amplication was reached",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ TRACE_DEVEL("needs to wakeup the timer task after the amplification limit was reached",
+ QUIC_EV_CONN_LPKT, qc);
+ /* Reset the anti-amplification bit. It will be set again
+ * when sending the next packet if reached again.
+ */
+ qc->flags &= ~QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ qc_set_timer(qc);
+ if (qc->timer_task && tick_isset(qc->timer) && tick_is_lt(qc->timer, now_ms))
+ task_wakeup(qc->timer_task, TASK_WOKEN_MSG);
+ }
+
+ /* Drop asap packet whose packet number space is discarded. */
+ if (quic_tls_pkt_type_pktns_dcd(qc, pkt->type)) {
+ TRACE_PROTO("Discarded packet number space", QUIC_EV_CONN_TRMHP, qc);
+ goto drop_silent;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("Connection error",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ goto out;
+ }
+
+ pkt->raw_len = pkt->len;
+ quic_rx_pkts_del(qc);
+ b_cspace = b_contig_space(&qc->rx.buf);
+ if (b_cspace < pkt->len) {
+ TRACE_PRINTF(TRACE_LEVEL_DEVELOPER, QUIC_EV_CONN_LPKT, qc, 0, 0, 0,
+ "bspace=%llu pkt->len=%llu", (ull)b_cspace, (ull)pkt->len);
+ /* Do not consume buf if space not at the end. */
+ if (b_tail(&qc->rx.buf) + b_cspace < b_wrap(&qc->rx.buf)) {
+ TRACE_PROTO("Packet dropped",
+ QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ qc->cntrs.dropped_pkt_bufoverrun++;
+ goto drop_silent;
+ }
+
+ /* Let us consume the remaining contiguous space. */
+ if (b_cspace) {
+ b_putchr(&qc->rx.buf, 0x00);
+ b_cspace--;
+ }
+ b_add(&qc->rx.buf, b_cspace);
+ if (b_contig_space(&qc->rx.buf) < pkt->len) {
+ TRACE_PROTO("Too big packet",
+ QUIC_EV_CONN_LPKT, qc, pkt, &pkt->len, qv);
+ qc->cntrs.dropped_pkt_bufoverrun++;
+ goto drop_silent;
+ }
+ }
+
+ if (!qc_try_rm_hp(qc, pkt, beg, &qel)) {
+ TRACE_PROTO("Packet dropped", QUIC_EV_CONN_LPKT, qc, NULL, NULL, qv);
+ goto drop;
+ }
+
+ TRACE_DATA("New packet", QUIC_EV_CONN_LPKT, qc, pkt, NULL, qv);
+ if (pkt->aad_len) {
+ /* Insert this RX packet in its encryption level tree */
+ pkt->pn_node.key = pkt->pn;
+ quic_rx_packet_refinc(pkt);
+ eb64_insert(&qel->rx.pkts, &pkt->pn_node);
+ }
+ out:
+ *tasklist_head = tasklet_wakeup_after(*tasklist_head,
+ qc->wait_event.tasklet);
+
+ drop_silent:
+ TRACE_PROTO("RX pkt", QUIC_EV_CONN_LPKT, qc ? qc : NULL, pkt, NULL, qv);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc ? qc : NULL);
+ return;
+
+ drop:
+ qc->cntrs.dropped_pkt++;
+ TRACE_PROTO("packet drop", QUIC_EV_CONN_LPKT, qc, pkt, NULL, qv);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+}
+
+/* Handle a new <dgram> received. Parse each QUIC packets and copied their
+ * content to a quic-conn instance. The datagram content can be released after
+ * this function.
+ *
+ * If datagram has been received on a quic-conn owned FD, <from_qc> must be set
+ * to the connection instance. <li> is the attached listener. The caller is
+ * responsible to ensure that the first packet is destined to this connection
+ * by comparing CIDs.
+ *
+ * If datagram has been received on a receiver FD, <from_qc> will be NULL. This
+ * function will thus retrieve the connection from the CID tree or allocate a
+ * new one if possible. <li> is the listener attached to the receiver.
+ *
+ * Returns 0 on success else non-zero. If an error happens, some packets from
+ * the datagram may not have been parsed.
+ */
+int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *from_qc,
+ struct listener *li)
+{
+ struct quic_rx_packet *pkt;
+ struct quic_conn *qc = NULL;
+ unsigned char *pos, *end;
+ struct list *tasklist_head = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ pos = dgram->buf;
+ end = pos + dgram->len;
+ do {
+ pkt = pool_alloc(pool_head_quic_rx_packet);
+ if (!pkt) {
+ TRACE_ERROR("RX packet allocation failed", QUIC_EV_CONN_LPKT);
+ goto err;
+ }
+
+ LIST_INIT(&pkt->qc_rx_pkt_list);
+ pkt->version = NULL;
+ pkt->type = QUIC_PACKET_TYPE_UNKNOWN;
+ pkt->pn_offset = 0;
+ pkt->len = 0;
+ pkt->raw_len = 0;
+ pkt->token = NULL;
+ pkt->token_len = 0;
+ pkt->aad_len = 0;
+ pkt->data = NULL;
+ pkt->pn_node.key = (uint64_t)-1;
+ pkt->refcnt = 0;
+ pkt->flags = 0;
+ pkt->time_received = now_ms;
+
+ /* Set flag if pkt is the first one in dgram. */
+ if (pos == dgram->buf)
+ pkt->flags |= QUIC_FL_RX_PACKET_DGRAM_FIRST;
+
+ quic_rx_packet_refinc(pkt);
+ if (quic_rx_pkt_parse(pkt, pos, end, dgram, li))
+ goto next;
+
+ /* Search quic-conn instance for first packet of the datagram.
+ * quic_rx_packet_parse() is responsible to discard packets
+ * with different DCID as the first one in the same datagram.
+ */
+ if (!qc) {
+ int new_tid = -1;
+
+ qc = from_qc ? from_qc : quic_rx_pkt_retrieve_conn(pkt, dgram, li, &new_tid);
+ /* qc is NULL if receiving a non Initial packet for an
+ * unknown connection or on connection affinity rebind.
+ */
+ if (!qc) {
+ if (new_tid >= 0) {
+ MT_LIST_APPEND(&quic_dghdlrs[new_tid].dgrams,
+ &dgram->handler_list);
+ tasklet_wakeup(quic_dghdlrs[new_tid].task);
+ pool_free(pool_head_quic_rx_packet, pkt);
+ goto out;
+ }
+
+ /* Skip the entire datagram. */
+ pkt->len = end - pos;
+ goto next;
+ }
+
+ dgram->qc = qc;
+ }
+
+ /* Ensure thread connection migration is finalized ASAP. */
+ if (qc->flags & QUIC_FL_CONN_AFFINITY_CHANGED)
+ qc_finalize_affinity_rebind(qc);
+
+ if (qc_rx_check_closing(qc, pkt)) {
+ /* Skip the entire datagram. */
+ pkt->len = end - pos;
+ goto next;
+ }
+
+ /* Detect QUIC connection migration. */
+ if (ipcmp(&qc->peer_addr, &dgram->saddr, 1)) {
+ if (qc_handle_conn_migration(qc, &dgram->saddr, &dgram->daddr)) {
+ /* Skip the entire datagram. */
+ TRACE_ERROR("error during connection migration, datagram dropped", QUIC_EV_CONN_LPKT, qc);
+ pkt->len = end - pos;
+ goto next;
+ }
+ }
+
+ qc_rx_pkt_handle(qc, pkt, dgram, pos, &tasklist_head);
+
+ next:
+ pos += pkt->len;
+ quic_rx_packet_refdec(pkt);
+
+ /* Free rejected packets */
+ if (!pkt->refcnt) {
+ BUG_ON(LIST_INLIST(&pkt->qc_rx_pkt_list));
+ pool_free(pool_head_quic_rx_packet, pkt);
+ }
+ } while (pos < end);
+
+ /* Increasing the received bytes counter by the UDP datagram length
+ * if this datagram could be associated to a connection.
+ */
+ if (dgram->qc)
+ dgram->qc->bytes.rx += dgram->len;
+
+ /* This must never happen. */
+ BUG_ON(pos > end);
+ BUG_ON(pos < end || pos > dgram->buf + dgram->len);
+ /* Mark this datagram as consumed */
+ HA_ATOMIC_STORE(&dgram->buf, NULL);
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return 0;
+
+ err:
+ /* Mark this datagram as consumed as maybe at least some packets were parsed. */
+ HA_ATOMIC_STORE(&dgram->buf, NULL);
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return -1;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/quic_sock.c b/src/quic_sock.c
new file mode 100644
index 0000000..c479249
--- /dev/null
+++ b/src/quic_sock.c
@@ -0,0 +1,1080 @@
+/*
+ * QUIC socket management.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE /* required for struct in6_pktinfo */
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/fd.h>
+#include <haproxy/global-t.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proto_quic.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/quic_cid.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_rx.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_tp-t.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/task.h>
+#include <haproxy/trace.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+/* Log only first EACCES bind() error runtime occurrence. */
+static volatile char quic_bind_eacces_warn = 0;
+
+/* Retrieve a connection's source address. Returns -1 on failure. */
+int quic_sock_get_src(struct connection *conn, struct sockaddr *addr, socklen_t len)
+{
+ struct quic_conn *qc;
+
+ if (!conn || !conn->handle.qc)
+ return -1;
+
+ qc = conn->handle.qc;
+ if (conn_is_back(conn)) {
+ /* no source address defined for outgoing connections for now */
+ return -1;
+ } else {
+ /* front connection, return the peer's address */
+ if (len > sizeof(qc->peer_addr))
+ len = sizeof(qc->peer_addr);
+ memcpy(addr, &qc->peer_addr, len);
+ return 0;
+ }
+}
+
+/* Retrieve a connection's destination address. Returns -1 on failure. */
+int quic_sock_get_dst(struct connection *conn, struct sockaddr *addr, socklen_t len)
+{
+ struct quic_conn *qc;
+
+ if (!conn || !conn->handle.qc)
+ return -1;
+
+ qc = conn->handle.qc;
+ if (conn_is_back(conn)) {
+ /* back connection, return the peer's address */
+ if (len > sizeof(qc->peer_addr))
+ len = sizeof(qc->peer_addr);
+ memcpy(addr, &qc->peer_addr, len);
+ } else {
+ struct sockaddr_storage *from;
+
+ /* Return listener address if IP_PKTINFO or friends are not
+ * supported by the socket.
+ */
+ BUG_ON(!qc->li);
+ from = is_addr(&qc->local_addr) ? &qc->local_addr :
+ &qc->li->rx.addr;
+ if (len > sizeof(*from))
+ len = sizeof(*from);
+ memcpy(addr, from, len);
+ }
+ return 0;
+}
+
+/*
+ * Inspired from session_accept_fd().
+ * Instantiate a new connection (connection struct) to be attached to <qc>
+ * QUIC connection of <l> listener.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int new_quic_cli_conn(struct quic_conn *qc, struct listener *l,
+ struct sockaddr_storage *saddr)
+{
+ struct connection *cli_conn;
+
+ if (unlikely((cli_conn = conn_new(&l->obj_type)) == NULL))
+ goto out;
+
+ if (!sockaddr_alloc(&cli_conn->src, saddr, sizeof *saddr))
+ goto out_free_conn;
+
+ cli_conn->flags |= CO_FL_FDLESS;
+ qc->conn = cli_conn;
+ cli_conn->handle.qc = qc;
+
+ cli_conn->target = &l->obj_type;
+
+ return 1;
+
+ out_free_conn:
+ qc->conn = NULL;
+ conn_stop_tracking(cli_conn);
+ conn_xprt_close(cli_conn);
+ conn_free(cli_conn);
+ out:
+
+ return 0;
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible
+ */
+int quic_sock_accepting_conn(const struct receiver *rx)
+{
+ return 1;
+}
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *quic_sock_accept_conn(struct listener *l, int *status)
+{
+ struct quic_conn *qc;
+ struct li_per_thread *lthr = &l->per_thr[ti->ltid];
+
+ qc = MT_LIST_POP(&lthr->quic_accept.conns, struct quic_conn *, accept_list);
+ if (!qc || qc->flags & (QUIC_FL_CONN_CLOSING|QUIC_FL_CONN_DRAINING))
+ goto done;
+
+ if (!new_quic_cli_conn(qc, l, &qc->peer_addr))
+ goto err;
+
+ done:
+ *status = CO_AC_DONE;
+
+ if (qc) {
+ BUG_ON(l->rx.quic_curr_accept <= 0);
+ HA_ATOMIC_DEC(&l->rx.quic_curr_accept);
+ return qc->conn;
+ }
+ else {
+ return NULL;
+ }
+
+ err:
+ /* in case of error reinsert the element to process it later. */
+ MT_LIST_INSERT(&lthr->quic_accept.conns, &qc->accept_list);
+
+ *status = CO_AC_PAUSE;
+ return NULL;
+}
+
+/* QUIC datagrams handler task. */
+struct task *quic_lstnr_dghdlr(struct task *t, void *ctx, unsigned int state)
+{
+ struct quic_dghdlr *dghdlr = ctx;
+ struct quic_dgram *dgram;
+ int max_dgrams = global.tune.maxpollevents;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT);
+
+ while ((dgram = MT_LIST_POP(&dghdlr->dgrams, typeof(dgram), handler_list))) {
+ if (quic_dgram_parse(dgram, NULL, dgram->owner)) {
+ /* TODO should we requeue the datagram ? */
+ break;
+ }
+
+ if (--max_dgrams <= 0)
+ goto stop_here;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return t;
+
+ stop_here:
+ /* too much work done at once, come back here later */
+ if (!MT_LIST_ISEMPTY(&dghdlr->dgrams))
+ tasklet_wakeup((struct tasklet *)t);
+
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT);
+ return t;
+}
+
+/* Retrieve the DCID from a QUIC datagram or packet at <pos> position,
+ * <end> being at one byte past the end of this datagram.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_get_dgram_dcid(unsigned char *pos, const unsigned char *end,
+ unsigned char **dcid, size_t *dcid_len)
+{
+ int ret = 0, long_header;
+ size_t minlen, skip;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT);
+
+ if (!(*pos & QUIC_PACKET_FIXED_BIT)) {
+ TRACE_PROTO("fixed bit not set", QUIC_EV_CONN_RXPKT);
+ goto err;
+ }
+
+ long_header = *pos & QUIC_PACKET_LONG_HEADER_BIT;
+ minlen = long_header ? QUIC_LONG_PACKET_MINLEN :
+ QUIC_SHORT_PACKET_MINLEN + QUIC_HAP_CID_LEN + QUIC_TLS_TAG_LEN;
+ skip = long_header ? QUIC_LONG_PACKET_DCID_OFF : QUIC_SHORT_PACKET_DCID_OFF;
+ if (end - pos < minlen)
+ goto err;
+
+ pos += skip;
+ *dcid_len = long_header ? *pos++ : QUIC_HAP_CID_LEN;
+ if (*dcid_len > QUIC_CID_MAXLEN || end - pos <= *dcid_len)
+ goto err;
+
+ *dcid = pos;
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT);
+ return ret;
+
+ err:
+ TRACE_PROTO("wrong datagram", QUIC_EV_CONN_RXPKT);
+ goto leave;
+}
+
+
+/* Retrieve the DCID from the datagram found at <pos> position and deliver it to the
+ * correct datagram handler.
+ * Return 1 if a correct datagram could be found, 0 if not.
+ */
+static int quic_lstnr_dgram_dispatch(unsigned char *pos, size_t len, void *owner,
+ struct sockaddr_storage *saddr,
+ struct sockaddr_storage *daddr,
+ struct quic_dgram *new_dgram, struct list *dgrams)
+{
+ struct quic_dgram *dgram;
+ unsigned char *dcid;
+ size_t dcid_len;
+ int cid_tid;
+
+ if (!len || !quic_get_dgram_dcid(pos, pos + len, &dcid, &dcid_len))
+ goto err;
+
+ dgram = new_dgram ? new_dgram : pool_alloc(pool_head_quic_dgram);
+ if (!dgram)
+ goto err;
+
+ if ((cid_tid = quic_get_cid_tid(dcid, dcid_len, saddr, pos, len)) < 0) {
+ /* Use the current thread if CID not found. If a clients opens
+ * a connection with multiple packets, it is possible that
+ * several threads will deal with datagrams sharing the same
+ * CID. For this reason, the CID tree insertion will be
+ * conducted as an atomic operation and the datagram ultimately
+ * redispatch by the late thread.
+ */
+ cid_tid = tid;
+ }
+
+ /* All the members must be initialized! */
+ dgram->owner = owner;
+ dgram->buf = pos;
+ dgram->len = len;
+ dgram->dcid = dcid;
+ dgram->dcid_len = dcid_len;
+ dgram->saddr = *saddr;
+ dgram->daddr = *daddr;
+ dgram->qc = NULL;
+
+ /* Attached datagram to its quic_receiver_buf and quic_dghdlrs. */
+ LIST_APPEND(dgrams, &dgram->recv_list);
+ MT_LIST_APPEND(&quic_dghdlrs[cid_tid].dgrams, &dgram->handler_list);
+
+ /* typically quic_lstnr_dghdlr() */
+ tasklet_wakeup(quic_dghdlrs[cid_tid].task);
+
+ return 1;
+
+ err:
+ pool_free(pool_head_quic_dgram, new_dgram);
+ return 0;
+}
+
+/* This function is responsible to remove unused datagram attached in front of
+ * <buf>. Each instances will be freed until a not yet consumed datagram is
+ * found or end of the list is hit. The last unused datagram found is not freed
+ * and is instead returned so that the caller can reuse it if needed.
+ *
+ * Returns the last unused datagram or NULL if no occurrence found.
+ */
+static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *rbuf)
+{
+ struct quic_dgram *cur, *prev = NULL;
+
+ while (!LIST_ISEMPTY(&rbuf->dgram_list)) {
+ cur = LIST_ELEM(rbuf->dgram_list.n, struct quic_dgram *, recv_list);
+
+ /* Loop until a not yet consumed datagram is found. */
+ if (HA_ATOMIC_LOAD(&cur->buf))
+ break;
+
+ /* Clear buffer of current unused datagram. */
+ LIST_DELETE(&cur->recv_list);
+ b_del(&rbuf->buf, cur->len);
+
+ /* Free last found unused datagram. */
+ pool_free(pool_head_quic_dgram, prev);
+ prev = cur;
+ }
+
+ /* Return last unused datagram found. */
+ return prev;
+}
+
+/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of
+ * length <len>.
+ *
+ * Datagram addresses will be returned via the next arguments. <from> will be
+ * the peer address and <to> the reception one. Note that <to> can only be
+ * retrieved if the socket supports IP_PKTINFO or affiliated options. If not,
+ * <to> will be set as AF_UNSPEC. The caller must specify <to_port> to ensure
+ * that <to> address is completely filled.
+ *
+ * Returns value from recvmsg syscall.
+ */
+static ssize_t quic_recv(int fd, void *out, size_t len,
+ struct sockaddr *from, socklen_t from_len,
+ struct sockaddr *to, socklen_t to_len,
+ uint16_t dst_port)
+{
+ union pktinfo {
+#ifdef IP_PKTINFO
+ struct in_pktinfo in;
+#else /* !IP_PKTINFO */
+ struct in_addr addr;
+#endif
+#ifdef IPV6_RECVPKTINFO
+ struct in6_pktinfo in6;
+#endif
+ };
+ char cdata[CMSG_SPACE(sizeof(union pktinfo))];
+ struct msghdr msg;
+ struct iovec vec;
+ struct cmsghdr *cmsg;
+ ssize_t ret;
+
+ vec.iov_base = out;
+ vec.iov_len = len;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = from;
+ msg.msg_namelen = from_len;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &cdata;
+ msg.msg_controllen = sizeof(cdata);
+
+ clear_addr((struct sockaddr_storage *)to);
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ /* TODO handle errno. On EAGAIN/EWOULDBLOCK use fd_cant_recv() if
+ * using dedicated connection socket.
+ */
+
+ if (ret < 0)
+ goto end;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ switch (cmsg->cmsg_level) {
+ case IPPROTO_IP:
+#if defined(IP_PKTINFO)
+ if (cmsg->cmsg_type == IP_PKTINFO) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_pktinfo *info = (struct in_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr = info->ipi_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#elif defined(IP_RECVDSTADDR)
+ if (cmsg->cmsg_type == IP_RECVDSTADDR) {
+ struct sockaddr_in *in = (struct sockaddr_in *)to;
+ struct in_addr *info = (struct in_addr *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in)) {
+ in->sin_family = AF_INET;
+ in->sin_addr.s_addr = info->s_addr;
+ in->sin_port = dst_port;
+ }
+ }
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+
+ case IPPROTO_IPV6:
+#ifdef IPV6_RECVPKTINFO
+ if (cmsg->cmsg_type == IPV6_PKTINFO) {
+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)to;
+ struct in6_pktinfo *info6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+
+ if (to_len >= sizeof(struct sockaddr_in6)) {
+ in6->sin6_family = AF_INET6;
+ memcpy(&in6->sin6_addr, &info6->ipi6_addr, sizeof(in6->sin6_addr));
+ in6->sin6_port = dst_port;
+ }
+ }
+#endif
+ break;
+ }
+ }
+
+ end:
+ return ret;
+}
+
+/* Function called on a read event from a listening socket. It tries
+ * to handle as many connections as possible.
+ */
+void quic_lstnr_sock_fd_iocb(int fd)
+{
+ ssize_t ret;
+ struct quic_receiver_buf *rxbuf;
+ struct buffer *buf;
+ struct listener *l = objt_listener(fdtab[fd].owner);
+ struct quic_transport_params *params;
+ /* Source address */
+ struct sockaddr_storage saddr = {0}, daddr = {0};
+ size_t max_sz, cspace;
+ struct quic_dgram *new_dgram;
+ unsigned char *dgram_buf;
+ int max_dgrams;
+
+ BUG_ON(!l);
+
+ new_dgram = NULL;
+ if (!l)
+ return;
+
+ if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd))
+ return;
+
+ rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el);
+ if (!rxbuf)
+ goto out;
+
+ buf = &rxbuf->buf;
+
+ max_dgrams = global.tune.maxpollevents;
+ start:
+ /* Try to reuse an existing dgram. Note that there is always at
+ * least one datagram to pick, except the first time we enter
+ * this function for this <rxbuf> buffer.
+ */
+ new_dgram = quic_rxbuf_purge_dgrams(rxbuf);
+
+ params = &l->bind_conf->quic_params;
+ max_sz = params->max_udp_payload_size;
+ cspace = b_contig_space(buf);
+ if (cspace < max_sz) {
+ struct proxy *px = l->bind_conf->frontend;
+ struct quic_counters *prx_counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, &quic_stats_module);
+ struct quic_dgram *dgram;
+
+ /* Do no mark <buf> as full, and do not try to consume it
+ * if the contiguous remaining space is not at the end
+ */
+ if (b_tail(buf) + cspace < b_wrap(buf)) {
+ HA_ATOMIC_INC(&prx_counters->rxbuf_full);
+ goto out;
+ }
+
+ /* Allocate a fake datagram, without data to locate
+ * the end of the RX buffer (required during purging).
+ */
+ dgram = pool_alloc(pool_head_quic_dgram);
+ if (!dgram)
+ goto out;
+
+ /* Initialize only the useful members of this fake datagram. */
+ dgram->buf = NULL;
+ dgram->len = cspace;
+ /* Append this datagram only to the RX buffer list. It will
+ * not be treated by any datagram handler.
+ */
+ LIST_APPEND(&rxbuf->dgram_list, &dgram->recv_list);
+
+ /* Consume the remaining space */
+ b_add(buf, cspace);
+ if (b_contig_space(buf) < max_sz) {
+ HA_ATOMIC_INC(&prx_counters->rxbuf_full);
+ goto out;
+ }
+ }
+
+ dgram_buf = (unsigned char *)b_tail(buf);
+ ret = quic_recv(fd, dgram_buf, max_sz,
+ (struct sockaddr *)&saddr, sizeof(saddr),
+ (struct sockaddr *)&daddr, sizeof(daddr),
+ get_net_port(&l->rx.addr));
+ if (ret <= 0)
+ goto out;
+
+ b_add(buf, ret);
+ if (!quic_lstnr_dgram_dispatch(dgram_buf, ret, l, &saddr, &daddr,
+ new_dgram, &rxbuf->dgram_list)) {
+ /* If wrong, consume this datagram */
+ b_sub(buf, ret);
+ }
+ new_dgram = NULL;
+ if (--max_dgrams > 0)
+ goto start;
+ out:
+ pool_free(pool_head_quic_dgram, new_dgram);
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+}
+
+/* FD-owned quic-conn socket callback. */
+void quic_conn_sock_fd_iocb(int fd)
+{
+ struct quic_conn *qc = fdtab[fd].owner;
+
+ TRACE_ENTER(QUIC_EV_CONN_RCV, qc);
+
+ if (fd_send_active(fd) && fd_send_ready(fd)) {
+ TRACE_DEVEL("send ready", QUIC_EV_CONN_RCV, qc);
+ fd_stop_send(fd);
+ tasklet_wakeup_after(NULL, qc->wait_event.tasklet);
+ qc_notify_send(qc);
+ }
+
+ if (fd_recv_ready(fd)) {
+ TRACE_DEVEL("recv ready", QUIC_EV_CONN_RCV, qc);
+ tasklet_wakeup_after(NULL, qc->wait_event.tasklet);
+ fd_stop_recv(fd);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_RCV, qc);
+}
+
+/* Send a datagram stored into <buf> buffer with <sz> as size.
+ * The caller must ensure there is at least <sz> bytes in this buffer.
+ *
+ * Returns the total bytes sent over the socket. 0 is returned if a transient
+ * error is encountered which allows send to be retry later. A negative value
+ * is used for a fatal error which guarantee that all future send operation for
+ * this connection will fail.
+ *
+ * TODO standardize this function for a generic UDP sendto wrapper. This can be
+ * done by removing the <qc> arg and replace it with address/port.
+ */
+int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz,
+ int flags)
+{
+ ssize_t ret;
+
+ do {
+ if (qc_test_fd(qc)) {
+ if (!fd_send_ready(qc->fd))
+ return 0;
+
+ ret = send(qc->fd, b_peek(buf, b_head_ofs(buf)), sz,
+ MSG_DONTWAIT | MSG_NOSIGNAL);
+ }
+#if defined(IP_PKTINFO) || defined(IP_RECVDSTADDR) || defined(IPV6_RECVPKTINFO)
+ else if (is_addr(&qc->local_addr)) {
+ struct msghdr msg = { 0 };
+ struct iovec vec;
+ struct cmsghdr *cmsg;
+#ifdef IP_PKTINFO
+ struct in_pktinfo in;
+#endif /* IP_PKTINFO */
+#ifdef IPV6_RECVPKTINFO
+ struct in6_pktinfo in6;
+#endif /* IPV6_RECVPKTINFO */
+ union {
+#ifdef IP_PKTINFO
+ char buf[CMSG_SPACE(sizeof(in))];
+#endif /* IP_PKTINFO */
+#ifdef IPV6_RECVPKTINFO
+ char buf6[CMSG_SPACE(sizeof(in6))];
+#endif /* IPV6_RECVPKTINFO */
+ char bufaddr[CMSG_SPACE(sizeof(struct in_addr))];
+ struct cmsghdr align;
+ } u;
+
+ vec.iov_base = b_peek(buf, b_head_ofs(buf));
+ vec.iov_len = sz;
+ msg.msg_name = &qc->peer_addr;
+ msg.msg_namelen = get_addr_len(&qc->peer_addr);
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+
+ switch (qc->local_addr.ss_family) {
+ case AF_INET:
+#if defined(IP_PKTINFO)
+ memset(&in, 0, sizeof(in));
+ memcpy(&in.ipi_spec_dst,
+ &((struct sockaddr_in *)&qc->local_addr)->sin_addr,
+ sizeof(struct in_addr));
+
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ memcpy(CMSG_DATA(cmsg), &in, sizeof(in));
+#elif defined(IP_RECVDSTADDR)
+ msg.msg_control = u.bufaddr;
+ msg.msg_controllen = sizeof(u.bufaddr);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = IPPROTO_IP;
+ cmsg->cmsg_type = IP_SENDSRCADDR;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
+ memcpy(CMSG_DATA(cmsg),
+ &((struct sockaddr_in *)&qc->local_addr)->sin_addr,
+ sizeof(struct in_addr));
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+
+ case AF_INET6:
+#ifdef IPV6_RECVPKTINFO
+ memset(&in6, 0, sizeof(in6));
+ memcpy(&in6.ipi6_addr,
+ &((struct sockaddr_in6 *)&qc->local_addr)->sin6_addr,
+ sizeof(struct in6_addr));
+
+ msg.msg_control = u.buf6;
+ msg.msg_controllen = sizeof(u.buf6);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = IPPROTO_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+ memcpy(CMSG_DATA(cmsg), &in6, sizeof(in6));
+#endif /* IPV6_RECVPKTINFO */
+ break;
+
+ default:
+ break;
+ }
+
+ ret = sendmsg(qc->li->rx.fd, &msg,
+ MSG_DONTWAIT|MSG_NOSIGNAL);
+ }
+#endif /* IP_PKTINFO || IP_RECVDSTADDR || IPV6_RECVPKTINFO */
+ else {
+ ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz,
+ MSG_DONTWAIT|MSG_NOSIGNAL,
+ (struct sockaddr *)&qc->peer_addr,
+ get_addr_len(&qc->peer_addr));
+ }
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK ||
+ errno == ENOTCONN || errno == EINPROGRESS) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ qc->cntrs.socket_full++;
+ else
+ qc->cntrs.sendto_err++;
+
+ /* transient error */
+ fd_want_send(qc->fd);
+ fd_cant_send(qc->fd);
+ TRACE_PRINTF(TRACE_LEVEL_USER, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
+ "UDP send failure errno=%d (%s)", errno, strerror(errno));
+ return 0;
+ }
+ else {
+ /* unrecoverable error */
+ qc->cntrs.sendto_err_unknown++;
+ TRACE_PRINTF(TRACE_LEVEL_USER, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
+ "UDP send failure errno=%d (%s)", errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ if (ret != sz)
+ return 0;
+
+ return ret;
+}
+
+/* Receive datagram on <qc> FD-owned socket.
+ *
+ * Returns the total number of bytes read or a negative value on error.
+ */
+int qc_rcv_buf(struct quic_conn *qc)
+{
+ struct sockaddr_storage saddr = {0}, daddr = {0};
+ struct quic_transport_params *params;
+ struct quic_dgram *new_dgram = NULL;
+ struct buffer buf = BUF_NULL;
+ size_t max_sz;
+ unsigned char *dgram_buf;
+ struct listener *l;
+ ssize_t ret = 0;
+
+ /* Do not call this if quic-conn FD is uninitialized. */
+ BUG_ON(qc->fd < 0);
+
+ TRACE_ENTER(QUIC_EV_CONN_RCV, qc);
+ l = qc->li;
+
+ params = &l->bind_conf->quic_params;
+ max_sz = params->max_udp_payload_size;
+
+ do {
+ if (!b_alloc(&buf))
+ break; /* TODO subscribe for memory again available. */
+
+ b_reset(&buf);
+ BUG_ON(b_contig_space(&buf) < max_sz);
+
+ /* Allocate datagram on first loop or after requeuing. */
+ if (!new_dgram && !(new_dgram = pool_alloc(pool_head_quic_dgram)))
+ break; /* TODO subscribe for memory again available. */
+
+ dgram_buf = (unsigned char *)b_tail(&buf);
+ ret = quic_recv(qc->fd, dgram_buf, max_sz,
+ (struct sockaddr *)&saddr, sizeof(saddr),
+ (struct sockaddr *)&daddr, sizeof(daddr),
+ get_net_port(&qc->local_addr));
+ if (ret <= 0) {
+ /* Subscribe FD for future reception. */
+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN)
+ fd_want_recv(qc->fd);
+ /* TODO handle other error codes as fatal on the connection. */
+ break;
+ }
+
+ b_add(&buf, ret);
+
+ new_dgram->buf = dgram_buf;
+ new_dgram->len = ret;
+ new_dgram->dcid_len = 0;
+ new_dgram->dcid = NULL;
+ new_dgram->saddr = saddr;
+ new_dgram->daddr = daddr;
+ new_dgram->qc = NULL; /* set later via quic_dgram_parse() */
+
+ TRACE_DEVEL("read datagram", QUIC_EV_CONN_RCV, qc, new_dgram);
+
+ if (!quic_get_dgram_dcid(new_dgram->buf,
+ new_dgram->buf + new_dgram->len,
+ &new_dgram->dcid, &new_dgram->dcid_len)) {
+ continue;
+ }
+
+ if (!qc_check_dcid(qc, new_dgram->dcid, new_dgram->dcid_len)) {
+ /* Datagram received by error on the connection FD, dispatch it
+ * to its associated quic-conn.
+ *
+ * TODO count redispatch datagrams.
+ */
+ struct quic_receiver_buf *rxbuf;
+ struct quic_dgram *tmp_dgram;
+ unsigned char *rxbuf_tail;
+ size_t cspace;
+
+ TRACE_STATE("datagram for other connection on quic-conn socket, requeue it", QUIC_EV_CONN_RCV, qc);
+
+ rxbuf = MT_LIST_POP(&l->rx.rxbuf_list, typeof(rxbuf), rxbuf_el);
+ ALREADY_CHECKED(rxbuf);
+ cspace = b_contig_space(&rxbuf->buf);
+
+ tmp_dgram = quic_rxbuf_purge_dgrams(rxbuf);
+ pool_free(pool_head_quic_dgram, tmp_dgram);
+
+ /* Insert a fake datagram if space wraps to consume it. */
+ if (cspace < new_dgram->len && b_space_wraps(&rxbuf->buf)) {
+ struct quic_dgram *fake_dgram = pool_alloc(pool_head_quic_dgram);
+ if (!fake_dgram) {
+ /* TODO count lost datagrams */
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+ continue;
+ }
+
+ fake_dgram->buf = NULL;
+ fake_dgram->len = cspace;
+ LIST_APPEND(&rxbuf->dgram_list, &fake_dgram->recv_list);
+ b_add(&rxbuf->buf, cspace);
+ }
+
+ /* Recheck contig space after fake datagram insert. */
+ if (b_contig_space(&rxbuf->buf) < new_dgram->len) {
+ /* TODO count lost datagrams */
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+ continue;
+ }
+
+ rxbuf_tail = (unsigned char *)b_tail(&rxbuf->buf);
+ __b_putblk(&rxbuf->buf, (char *)dgram_buf, new_dgram->len);
+ if (!quic_lstnr_dgram_dispatch(rxbuf_tail, ret, l, &saddr, &daddr,
+ new_dgram, &rxbuf->dgram_list)) {
+ /* TODO count lost datagrams. */
+ b_sub(&buf, ret);
+ }
+ else {
+ /* datagram must not be freed as it was requeued. */
+ new_dgram = NULL;
+ }
+
+ MT_LIST_APPEND(&l->rx.rxbuf_list, &rxbuf->rxbuf_el);
+ continue;
+ }
+
+ quic_dgram_parse(new_dgram, qc, qc->li);
+ /* A datagram must always be consumed after quic_parse_dgram(). */
+ BUG_ON(new_dgram->buf);
+ } while (ret > 0);
+
+ pool_free(pool_head_quic_dgram, new_dgram);
+
+ if (b_size(&buf)) {
+ b_free(&buf);
+ offer_buffers(NULL, 1);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_RCV, qc);
+ return ret;
+}
+
+/* Allocate a socket file-descriptor specific for QUIC connection <qc>.
+ * Endpoint addresses are specified by the two following arguments : <src> is
+ * the local address and <dst> is the remote one.
+ *
+ * Return the socket FD or a negative error code. On error, socket is marked as
+ * uninitialized.
+ */
+void qc_alloc_fd(struct quic_conn *qc, const struct sockaddr_storage *src,
+ const struct sockaddr_storage *dst)
+{
+ struct bind_conf *bc = qc->li->bind_conf;
+ struct proxy *p = bc->frontend;
+ int fd = -1;
+ int ret;
+
+ /* Must not happen. */
+ BUG_ON(src->ss_family != dst->ss_family);
+
+ qc_init_fd(qc);
+
+ fd = socket(src->ss_family, SOCK_DGRAM, 0);
+ if (fd < 0)
+ goto err;
+
+ if (fd >= global.maxsock) {
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n",
+ p->id);
+ goto err;
+ }
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (ret < 0)
+ goto err;
+
+ switch (src->ss_family) {
+ case AF_INET:
+#if defined(IP_PKTINFO)
+ ret = setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one));
+#elif defined(IP_RECVDSTADDR)
+ ret = setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one));
+#endif /* IP_PKTINFO || IP_RECVDSTADDR */
+ break;
+ case AF_INET6:
+#ifdef IPV6_RECVPKTINFO
+ ret = setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one));
+#endif
+ break;
+ }
+ if (ret < 0)
+ goto err;
+
+ ret = bind(fd, (struct sockaddr *)src, get_addr_len(src));
+ if (ret < 0) {
+ if (errno == EACCES) {
+ if (!quic_bind_eacces_warn) {
+ send_log(p, LOG_WARNING,
+ "Permission error on QUIC socket binding for proxy %s. Consider using setcap cap_net_bind_service (Linux only) or running as root.\n",
+ p->id);
+ quic_bind_eacces_warn = 1;
+ }
+
+ /* Fallback to listener socket for this receiver instance. */
+ HA_ATOMIC_STORE(&qc->li->rx.quic_mode, QUIC_SOCK_MODE_LSTNR);
+ }
+ goto err;
+ }
+
+ ret = connect(fd, (struct sockaddr *)dst, get_addr_len(dst));
+ if (ret < 0)
+ goto err;
+
+ qc->fd = fd;
+ fd_set_nonblock(fd);
+ fd_insert(fd, qc, quic_conn_sock_fd_iocb, tgid, ti->ltid_bit);
+ fd_want_recv(fd);
+
+ return;
+
+ err:
+ if (fd >= 0)
+ close(fd);
+}
+
+/* Release socket file-descriptor specific for QUIC connection <qc>. Set
+ * <reinit> if socket should be reinitialized after address migration.
+ */
+void qc_release_fd(struct quic_conn *qc, int reinit)
+{
+ if (qc_test_fd(qc)) {
+ fd_delete(qc->fd);
+ qc->fd = DEAD_FD_MAGIC;
+
+ if (reinit)
+ qc_init_fd(qc);
+ }
+}
+
+/* Wrapper for fd_want_recv(). Safe even if connection does not used its owned
+ * socket.
+ */
+void qc_want_recv(struct quic_conn *qc)
+{
+ if (qc_test_fd(qc))
+ fd_want_recv(qc->fd);
+}
+
+/*********************** QUIC accept queue management ***********************/
+/* per-thread accept queues */
+struct quic_accept_queue *quic_accept_queues;
+
+/* Install <qc> on the queue ready to be accepted. The queue task is then woken
+ * up. If <qc> accept is already scheduled or done, nothing is done.
+ */
+void quic_accept_push_qc(struct quic_conn *qc)
+{
+ struct quic_accept_queue *queue = &quic_accept_queues[tid];
+ struct li_per_thread *lthr = &qc->li->per_thr[ti->ltid];
+
+ /* early return if accept is already in progress/done for this
+ * connection
+ */
+ if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)
+ return;
+
+ BUG_ON(MT_LIST_INLIST(&qc->accept_list));
+ HA_ATOMIC_INC(&qc->li->rx.quic_curr_accept);
+
+ qc->flags |= QUIC_FL_CONN_ACCEPT_REGISTERED;
+ /* 1. insert the listener in the accept queue
+ *
+ * Use TRY_APPEND as there is a possible race even with INLIST if
+ * multiple threads try to add the same listener instance from several
+ * quic_conn.
+ */
+ if (!MT_LIST_INLIST(&(lthr->quic_accept.list)))
+ MT_LIST_TRY_APPEND(&queue->listeners, &(lthr->quic_accept.list));
+
+ /* 2. insert the quic_conn in the listener per-thread queue. */
+ MT_LIST_APPEND(&lthr->quic_accept.conns, &qc->accept_list);
+
+ /* 3. wake up the queue tasklet */
+ tasklet_wakeup(quic_accept_queues[tid].tasklet);
+}
+
+/* Tasklet handler to accept QUIC connections. Call listener_accept on every
+ * listener instances registered in the accept queue.
+ */
+struct task *quic_accept_run(struct task *t, void *ctx, unsigned int i)
+{
+ struct li_per_thread *lthr;
+ struct mt_list *elt1, elt2;
+ struct quic_accept_queue *queue = &quic_accept_queues[tid];
+
+ mt_list_for_each_entry_safe(lthr, &queue->listeners, quic_accept.list, elt1, elt2) {
+ listener_accept(lthr->li);
+ if (!MT_LIST_ISEMPTY(&lthr->quic_accept.conns))
+ tasklet_wakeup((struct tasklet*)t);
+ else
+ MT_LIST_DELETE_SAFE(elt1);
+ }
+
+ return NULL;
+}
+
+/* Returns the maximum number of QUIC connections waiting for handshake to
+ * complete in parallel on listener <l> instance. This is directly based on
+ * listener backlog value.
+ */
+int quic_listener_max_handshake(const struct listener *l)
+{
+ return listener_backlog(l) / 2;
+}
+
+/* Returns the value which is considered as the maximum number of QUIC
+ * connections waiting to be accepted for listener <l> instance. This is
+ * directly based on listener backlog value.
+ */
+int quic_listener_max_accept(const struct listener *l)
+{
+ return listener_backlog(l) / 2;
+}
+
+static int quic_alloc_accept_queues(void)
+{
+ int i;
+
+ quic_accept_queues = calloc(global.nbthread,
+ sizeof(*quic_accept_queues));
+ if (!quic_accept_queues) {
+ ha_alert("Failed to allocate the quic accept queues.\n");
+ return 0;
+ }
+
+ for (i = 0; i < global.nbthread; ++i) {
+ struct tasklet *task;
+ if (!(task = tasklet_new())) {
+ ha_alert("Failed to allocate the quic accept queue on thread %d.\n", i);
+ return 0;
+ }
+
+ tasklet_set_tid(task, i);
+ task->process = quic_accept_run;
+ quic_accept_queues[i].tasklet = task;
+
+ MT_LIST_INIT(&quic_accept_queues[i].listeners);
+ }
+
+ return 1;
+}
+REGISTER_POST_CHECK(quic_alloc_accept_queues);
+
+static int quic_deallocate_accept_queues(void)
+{
+ int i;
+
+ if (quic_accept_queues) {
+ for (i = 0; i < global.nbthread; ++i)
+ tasklet_free(quic_accept_queues[i].tasklet);
+ free(quic_accept_queues);
+ }
+
+ return 1;
+}
+REGISTER_POST_DEINIT(quic_deallocate_accept_queues);
diff --git a/src/quic_ssl.c b/src/quic_ssl.c
new file mode 100644
index 0000000..314f587
--- /dev/null
+++ b/src/quic_ssl.c
@@ -0,0 +1,790 @@
+#include <haproxy/errors.h>
+#include <haproxy/ncbuf.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_rx.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_ssl.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/trace.h>
+
+static BIO_METHOD *ha_quic_meth;
+
+DECLARE_POOL(pool_head_quic_ssl_sock_ctx, "quic_ssl_sock_ctx", sizeof(struct ssl_sock_ctx));
+
+/* Set the encoded version of the transport parameter into the TLS
+ * stack depending on <ver> QUIC version and <server> boolean which must
+ * be set to 1 for a QUIC server, 0 for a client.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int qc_ssl_set_quic_transport_params(struct quic_conn *qc,
+ const struct quic_version *ver, int server)
+{
+ int ret = 0;
+#ifdef USE_QUIC_OPENSSL_COMPAT
+ unsigned char *in = qc->enc_params;
+ size_t insz = sizeof qc->enc_params;
+ size_t *enclen = &qc->enc_params_len;
+#else
+ unsigned char tps[QUIC_TP_MAX_ENCLEN];
+ size_t tpslen;
+ unsigned char *in = tps;
+ size_t insz = sizeof tps;
+ size_t *enclen = &tpslen;
+#endif
+
+ TRACE_ENTER(QUIC_EV_CONN_RWSEC, qc);
+ *enclen = quic_transport_params_encode(in, in + insz, &qc->rx.params, ver, server);
+ if (!*enclen) {
+ TRACE_ERROR("quic_transport_params_encode() failed", QUIC_EV_CONN_RWSEC);
+ goto leave;
+ }
+
+ if (!SSL_set_quic_transport_params(qc->xprt_ctx->ssl, in, *enclen)) {
+ TRACE_ERROR("SSL_set_quic_transport_params() failed", QUIC_EV_CONN_RWSEC);
+ goto leave;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_RWSEC, qc);
+ return ret;
+}
+
+/* This function copies the CRYPTO data provided by the TLS stack found at <data>
+ * with <len> as size in CRYPTO buffers dedicated to store the information about
+ * outgoing CRYPTO frames so that to be able to replay the CRYPTO data streams.
+ * It fails (returns 0) only if it could not managed to allocate enough CRYPTO
+ * buffers to store all the data.
+ * Note that CRYPTO data may exist at any encryption level except at 0-RTT.
+ */
+static int qc_ssl_crypto_data_cpy(struct quic_conn *qc, struct quic_enc_level *qel,
+ const unsigned char *data, size_t len)
+{
+ struct quic_crypto_buf **qcb;
+ /* The remaining byte to store in CRYPTO buffers. */
+ size_t cf_offset, cf_len, *nb_buf;
+ unsigned char *pos;
+ int ret = 0;
+
+ nb_buf = &qel->tx.crypto.nb_buf;
+ qcb = &qel->tx.crypto.bufs[*nb_buf - 1];
+ cf_offset = (*nb_buf - 1) * QUIC_CRYPTO_BUF_SZ + (*qcb)->sz;
+ cf_len = len;
+
+ TRACE_ENTER(QUIC_EV_CONN_ADDDATA, qc);
+
+ while (len) {
+ size_t to_copy, room;
+
+ pos = (*qcb)->data + (*qcb)->sz;
+ room = QUIC_CRYPTO_BUF_SZ - (*qcb)->sz;
+ to_copy = len > room ? room : len;
+ if (to_copy) {
+ memcpy(pos, data, to_copy);
+ /* Increment the total size of this CRYPTO buffers by <to_copy>. */
+ qel->tx.crypto.sz += to_copy;
+ (*qcb)->sz += to_copy;
+ len -= to_copy;
+ data += to_copy;
+ }
+ else {
+ struct quic_crypto_buf **tmp;
+
+ // FIXME: realloc!
+ tmp = realloc(qel->tx.crypto.bufs,
+ (*nb_buf + 1) * sizeof *qel->tx.crypto.bufs);
+ if (tmp) {
+ qel->tx.crypto.bufs = tmp;
+ qcb = &qel->tx.crypto.bufs[*nb_buf];
+ *qcb = pool_alloc(pool_head_quic_crypto_buf);
+ if (!*qcb) {
+ TRACE_ERROR("Could not allocate crypto buf", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ (*qcb)->sz = 0;
+ ++*nb_buf;
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ /* Allocate a TX CRYPTO frame only if all the CRYPTO data
+ * have been buffered.
+ */
+ if (!len) {
+ struct quic_frame *frm;
+ struct quic_frame *found = NULL;
+
+ /* There is at most one CRYPTO frame in this packet number
+ * space. Let's look for it.
+ */
+ list_for_each_entry(frm, &qel->pktns->tx.frms, list) {
+ if (frm->type != QUIC_FT_CRYPTO)
+ continue;
+
+ /* Found */
+ found = frm;
+ break;
+ }
+
+ if (found) {
+ found->crypto.len += cf_len;
+ }
+ else {
+ frm = qc_frm_alloc(QUIC_FT_CRYPTO);
+ if (!frm) {
+ TRACE_ERROR("Could not allocate quic frame", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ frm->crypto.offset = cf_offset;
+ frm->crypto.len = cf_len;
+ frm->crypto.qel = qel;
+ LIST_APPEND(&qel->pktns->tx.frms, &frm->list);
+ }
+ }
+ ret = len == 0;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ADDDATA, qc);
+ return ret;
+}
+
+/* returns 0 on error, 1 on success */
+static int ha_quic_set_encryption_secrets(SSL *ssl, enum ssl_encryption_level_t level,
+ const uint8_t *read_secret,
+ const uint8_t *write_secret, size_t secret_len)
+{
+ int ret = 0;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ struct quic_enc_level **qel = ssl_to_qel_addr(qc, level);
+ struct quic_pktns **pktns = ssl_to_quic_pktns(qc, level);
+ struct quic_tls_ctx *tls_ctx;
+ const SSL_CIPHER *cipher = SSL_get_current_cipher(ssl);
+ struct quic_tls_secrets *rx = NULL, *tx = NULL;
+ const struct quic_version *ver =
+ qc->negotiated_version ? qc->negotiated_version : qc->original_version;
+
+ TRACE_ENTER(QUIC_EV_CONN_RWSEC, qc);
+ BUG_ON(secret_len > QUIC_TLS_SECRET_LEN);
+
+ if (!*qel && !qc_enc_level_alloc(qc, pktns, qel, level)) {
+ TRACE_PROTO("Could not allocate an encryption level", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ tls_ctx = &(*qel)->tls_ctx;
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_PROTO("connection to be killed", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("CC required", QUIC_EV_CONN_RWSEC, qc);
+ goto out;
+ }
+
+ if (!read_secret)
+ goto write;
+
+ rx = &tls_ctx->rx;
+ rx->aead = tls_aead(cipher);
+ rx->md = tls_md(cipher);
+ rx->hp = tls_hp(cipher);
+ if (!rx->aead || !rx->md || !rx->hp)
+ goto leave;
+
+ if (!quic_tls_secrets_keys_alloc(rx)) {
+ TRACE_ERROR("RX keys allocation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(rx->aead, rx->hp, rx->md, ver, rx->key, rx->keylen,
+ rx->iv, rx->ivlen, rx->hp_key, sizeof rx->hp_key,
+ read_secret, secret_len)) {
+ TRACE_ERROR("TX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_rx_ctx_init(&rx->ctx, rx->aead, rx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_dec_aes_ctx_init(&rx->hp_ctx, rx->hp, rx->hp_key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context for HP", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ /* Enqueue this connection asap if we could derive O-RTT secrets as
+ * listener. Note that a listener derives only RX secrets for this
+ * level.
+ */
+ if (qc_is_listener(qc) && level == ssl_encryption_early_data) {
+ TRACE_DEVEL("pushing connection into accept queue", QUIC_EV_CONN_RWSEC, qc);
+ quic_accept_push_qc(qc);
+ }
+
+write:
+
+ if (!write_secret)
+ goto keyupdate_init;
+
+ tx = &tls_ctx->tx;
+ tx->aead = tls_aead(cipher);
+ tx->md = tls_md(cipher);
+ tx->hp = tls_hp(cipher);
+ if (!tx->aead || !tx->md || !tx->hp)
+ goto leave;
+
+ if (!quic_tls_secrets_keys_alloc(tx)) {
+ TRACE_ERROR("TX keys allocation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(tx->aead, tx->hp, tx->md, ver, tx->key, tx->keylen,
+ tx->iv, tx->ivlen, tx->hp_key, sizeof tx->hp_key,
+ write_secret, secret_len)) {
+ TRACE_ERROR("TX key derivation failed", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_tx_ctx_init(&tx->ctx, tx->aead, tx->key)) {
+ TRACE_ERROR("could not initial RX TLS cipher context", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_enc_aes_ctx_init(&tx->hp_ctx, tx->hp, tx->hp_key)) {
+ TRACE_ERROR("could not initial TX TLS cipher context for HP", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ /* Set the transport parameters in the TLS stack. */
+ if (level == ssl_encryption_handshake && qc_is_listener(qc) &&
+ !qc_ssl_set_quic_transport_params(qc, ver, 1))
+ goto leave;
+
+ keyupdate_init:
+ /* Store the secret provided by the TLS stack, required for keyupdate. */
+ if (level == ssl_encryption_application) {
+ struct quic_tls_kp *prv_rx = &qc->ku.prv_rx;
+ struct quic_tls_kp *nxt_rx = &qc->ku.nxt_rx;
+ struct quic_tls_kp *nxt_tx = &qc->ku.nxt_tx;
+
+ if (rx) {
+ if (!(rx->secret = pool_alloc(pool_head_quic_tls_secret))) {
+ TRACE_ERROR("Could not allocate RX Application secrete keys", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ memcpy(rx->secret, read_secret, secret_len);
+ rx->secretlen = secret_len;
+ }
+
+ if (tx) {
+ if (!(tx->secret = pool_alloc(pool_head_quic_tls_secret))) {
+ TRACE_ERROR("Could not allocate TX Application secrete keys", QUIC_EV_CONN_RWSEC, qc);
+ goto leave;
+ }
+
+ memcpy(tx->secret, write_secret, secret_len);
+ tx->secretlen = secret_len;
+ }
+
+ /* Initialize all the secret keys lengths */
+ prv_rx->secretlen = nxt_rx->secretlen = nxt_tx->secretlen = secret_len;
+ }
+
+ out:
+ ret = 1;
+ leave:
+ if (!ret) {
+ /* Release the CRYPTO frames which have been provided by the TLS stack
+ * to prevent the transmission of ack-eliciting packets.
+ */
+ qc_release_pktns_frms(qc, qc->ipktns);
+ qc_release_pktns_frms(qc, qc->hpktns);
+ qc_release_pktns_frms(qc, qc->apktns);
+ quic_set_tls_alert(qc, SSL_AD_HANDSHAKE_FAILURE);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_RWSEC, qc, &level);
+ return ret;
+}
+
+#if defined(OPENSSL_IS_AWSLC)
+/* compatibility function for split read/write encryption secrets to be used
+ * with the API which uses 2 callbacks. */
+static inline int ha_quic_set_read_secret(SSL *ssl, enum ssl_encryption_level_t level,
+ const SSL_CIPHER *cipher, const uint8_t *secret,
+ size_t secret_len)
+{
+ return ha_quic_set_encryption_secrets(ssl, level, secret, NULL, secret_len);
+
+}
+
+static inline int ha_quic_set_write_secret(SSL *ssl, enum ssl_encryption_level_t level,
+ const SSL_CIPHER *cipher, const uint8_t *secret,
+ size_t secret_len)
+{
+
+ return ha_quic_set_encryption_secrets(ssl, level, NULL, secret, secret_len);
+
+}
+#endif
+
+/* ->add_handshake_data QUIC TLS callback used by the QUIC TLS stack when it
+ * wants to provide the QUIC layer with CRYPTO data.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int ha_quic_add_handshake_data(SSL *ssl, enum ssl_encryption_level_t level,
+ const uint8_t *data, size_t len)
+{
+ int ret = 0;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ struct quic_enc_level **qel = ssl_to_qel_addr(qc, level);
+ struct quic_pktns **pktns = ssl_to_quic_pktns(qc, level);
+
+ TRACE_ENTER(QUIC_EV_CONN_ADDDATA, qc);
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_PROTO("connection to be killed", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("CC required", QUIC_EV_CONN_ADDDATA, qc);
+ goto out;
+ }
+
+ if (!*qel && !qc_enc_level_alloc(qc, pktns, qel, level))
+ goto leave;
+
+ if (!qc_ssl_crypto_data_cpy(qc, *qel, data, len)) {
+ TRACE_ERROR("Could not bufferize", QUIC_EV_CONN_ADDDATA, qc);
+ goto leave;
+ }
+
+ TRACE_DEVEL("CRYPTO data buffered", QUIC_EV_CONN_ADDDATA,
+ qc, &level, &len);
+ out:
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_ADDDATA, qc);
+ return ret;
+}
+
+static int ha_quic_flush_flight(SSL *ssl)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_FFLIGHT, qc);
+ TRACE_LEAVE(QUIC_EV_CONN_FFLIGHT, qc);
+
+ return 1;
+}
+
+static int ha_quic_send_alert(SSL *ssl, enum ssl_encryption_level_t level, uint8_t alert)
+{
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+
+ TRACE_ENTER(QUIC_EV_CONN_SSLALERT, qc);
+
+ TRACE_PROTO("Received TLS alert", QUIC_EV_CONN_SSLALERT, qc, &alert, &level);
+
+ quic_set_tls_alert(qc, alert);
+ TRACE_LEAVE(QUIC_EV_CONN_SSLALERT, qc);
+ return 1;
+}
+
+/* QUIC TLS methods */
+#if defined(OPENSSL_IS_AWSLC)
+/* write/read set secret split */
+static SSL_QUIC_METHOD ha_quic_method = {
+ .set_read_secret = ha_quic_set_read_secret,
+ .set_write_secret = ha_quic_set_write_secret,
+ .add_handshake_data = ha_quic_add_handshake_data,
+ .flush_flight = ha_quic_flush_flight,
+ .send_alert = ha_quic_send_alert,
+};
+
+#else
+
+static SSL_QUIC_METHOD ha_quic_method = {
+ .set_encryption_secrets = ha_quic_set_encryption_secrets,
+ .add_handshake_data = ha_quic_add_handshake_data,
+ .flush_flight = ha_quic_flush_flight,
+ .send_alert = ha_quic_send_alert,
+};
+#endif
+
+/* Initialize the TLS context of a listener with <bind_conf> as configuration.
+ * Returns an error count.
+ */
+int ssl_quic_initial_ctx(struct bind_conf *bind_conf)
+{
+ struct ssl_bind_conf __maybe_unused *ssl_conf_cur;
+ int cfgerr = 0;
+
+ long options =
+ (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
+ SSL_OP_SINGLE_ECDH_USE |
+ SSL_OP_CIPHER_SERVER_PREFERENCE;
+ SSL_CTX *ctx;
+
+ ctx = SSL_CTX_new(TLS_server_method());
+ bind_conf->initial_ctx = ctx;
+
+ SSL_CTX_set_options(ctx, options);
+ SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
+ SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+ SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+# if defined(HAVE_SSL_CLIENT_HELLO_CB)
+# if defined(SSL_OP_NO_ANTI_REPLAY)
+ if (bind_conf->ssl_conf.early_data) {
+ SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY);
+# if defined(USE_QUIC_OPENSSL_COMPAT) || defined(OPENSSL_IS_AWSLC)
+ ha_warning("Binding [%s:%d] for %s %s: 0-RTT is not supported in limited QUIC compatibility mode, ignored.\n",
+ bind_conf->file, bind_conf->line, proxy_type_str(bind_conf->frontend), bind_conf->frontend->id);
+# else
+ SSL_CTX_set_max_early_data(ctx, 0xffffffff);
+# endif /* ! USE_QUIC_OPENSSL_COMPAT */
+ }
+# endif /* !SSL_OP_NO_ANTI_REPLAY */
+ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# else /* ! HAVE_SSL_CLIENT_HELLO_CB */
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_cbk);
+# endif
+ SSL_CTX_set_tlsext_servername_arg(ctx, bind_conf);
+#endif
+#ifdef USE_QUIC_OPENSSL_COMPAT
+ if (!quic_tls_compat_init(bind_conf, ctx))
+ cfgerr++;
+#endif
+
+ return cfgerr;
+}
+
+/* This function gives the detail of the SSL error. It is used only
+ * if the debug mode and the verbose mode are activated. It dump all
+ * the SSL error until the stack was empty.
+ */
+static forceinline void qc_ssl_dump_errors(struct connection *conn)
+{
+ if (unlikely(global.mode & MODE_DEBUG)) {
+ while (1) {
+ const char *func = NULL;
+ unsigned long ret;
+
+ ERR_peek_error_func(&func);
+ ret = ERR_get_error();
+ if (!ret)
+ return;
+
+ fprintf(stderr, "conn. @%p OpenSSL error[0x%lx] %s: %s\n", conn, ret,
+ func, ERR_reason_error_string(ret));
+ }
+ }
+}
+
+/* Provide CRYPTO data to the TLS stack found at <data> with <len> as length
+ * from <qel> encryption level with <ctx> as QUIC connection context.
+ * Remaining parameter are there for debugging purposes.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_ssl_provide_quic_data(struct ncbuf *ncbuf,
+ enum ssl_encryption_level_t level,
+ struct ssl_sock_ctx *ctx,
+ const unsigned char *data, size_t len)
+{
+#ifdef DEBUG_STRICT
+ enum ncb_ret ncb_ret;
+#endif
+ int ssl_err, state;
+ struct quic_conn *qc;
+ int ret = 0;
+
+ ssl_err = SSL_ERROR_NONE;
+ qc = ctx->qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SSLDATA, qc);
+
+ if (SSL_provide_quic_data(ctx->ssl, level, data, len) != 1) {
+ TRACE_ERROR("SSL_provide_quic_data() error",
+ QUIC_EV_CONN_SSLDATA, qc, NULL, NULL, ctx->ssl);
+ goto leave;
+ }
+
+ state = qc->state;
+ if (state < QUIC_HS_ST_COMPLETE) {
+ ssl_err = SSL_do_handshake(ctx->ssl);
+
+ if (qc->flags & QUIC_FL_CONN_TO_KILL) {
+ TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+
+ /* Finalize the connection as soon as possible if the peer transport parameters
+ * have been received. This may be useful to send packets even if this
+ * handshake fails.
+ */
+ if ((qc->flags & QUIC_FL_CONN_TX_TP_RECEIVED) && !qc_conn_finalize(qc, 1)) {
+ TRACE_ERROR("connection finalization failed", QUIC_EV_CONN_IO_CB, qc, &state);
+ goto leave;
+ }
+
+ if (ssl_err != 1) {
+ ssl_err = SSL_get_error(ctx->ssl, ssl_err);
+ if (ssl_err == SSL_ERROR_WANT_READ || ssl_err == SSL_ERROR_WANT_WRITE) {
+ TRACE_PROTO("SSL handshake in progress",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto out;
+ }
+
+ TRACE_ERROR("SSL handshake error", QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ HA_ATOMIC_INC(&qc->prx_counters->hdshk_fail);
+ qc_ssl_dump_errors(ctx->conn);
+ ERR_clear_error();
+ goto leave;
+ }
+
+ TRACE_PROTO("SSL handshake OK", QUIC_EV_CONN_IO_CB, qc, &state);
+
+ /* Check the alpn could be negotiated */
+ if (!qc->app_ops) {
+ TRACE_ERROR("No negotiated ALPN", QUIC_EV_CONN_IO_CB, qc, &state);
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+ goto leave;
+ }
+
+ /* I/O callback switch */
+ qc->wait_event.tasklet->process = quic_conn_app_io_cb;
+ if (qc_is_listener(ctx->qc)) {
+ qc->flags |= QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS;
+ qc->state = QUIC_HS_ST_CONFIRMED;
+ /* The connection is ready to be accepted. */
+ quic_accept_push_qc(qc);
+
+ BUG_ON(qc->li->rx.quic_curr_handshake == 0);
+ HA_ATOMIC_DEC(&qc->li->rx.quic_curr_handshake);
+ }
+ else {
+ qc->state = QUIC_HS_ST_COMPLETE;
+ }
+
+ /* Prepare the next key update */
+ if (!quic_tls_key_update(qc)) {
+ TRACE_ERROR("quic_tls_key_update() failed", QUIC_EV_CONN_IO_CB, qc);
+ goto leave;
+ }
+ } else {
+ ssl_err = SSL_process_quic_post_handshake(ctx->ssl);
+ if (ssl_err != 1) {
+ ssl_err = SSL_get_error(ctx->ssl, ssl_err);
+ if (ssl_err == SSL_ERROR_WANT_READ || ssl_err == SSL_ERROR_WANT_WRITE) {
+ TRACE_PROTO("SSL post handshake in progress",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto out;
+ }
+
+ TRACE_ERROR("SSL post handshake error",
+ QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err);
+ goto leave;
+ }
+
+ TRACE_STATE("SSL post handshake succeeded", QUIC_EV_CONN_IO_CB, qc, &state);
+ }
+
+ out:
+ ret = 1;
+ leave:
+ /* The CRYPTO data are consumed even in case of an error to release
+ * the memory asap.
+ */
+ if (!ncb_is_null(ncbuf)) {
+#ifdef DEBUG_STRICT
+ ncb_ret = ncb_advance(ncbuf, len);
+ /* ncb_advance() must always succeed. This is guaranteed as
+ * this is only done inside a data block. If false, this will
+ * lead to handshake failure with quic_enc_level offset shifted
+ * from buffer data.
+ */
+ BUG_ON(ncb_ret != NCB_RET_OK);
+#else
+ ncb_advance(ncbuf, len);
+#endif
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_SSLDATA, qc);
+ return ret;
+}
+
+/* Provide all the stored in order CRYPTO data received from the peer to the TLS.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx)
+{
+ int ret = 0;
+ struct quic_enc_level *qel;
+ struct ncbuf ncbuf = NCBUF_NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+ list_for_each_entry(qel, &qc->qel_list, list) {
+ struct qf_crypto *qf_crypto, *qf_back;
+
+ list_for_each_entry_safe(qf_crypto, qf_back, &qel->rx.crypto_frms, list) {
+ const unsigned char *crypto_data = qf_crypto->data;
+ size_t crypto_len = qf_crypto->len;
+
+ /* Free this frame asap */
+ LIST_DELETE(&qf_crypto->list);
+ pool_free(pool_head_qf_crypto, qf_crypto);
+
+ if (!qc_ssl_provide_quic_data(&ncbuf, qel->level, ctx,
+ crypto_data, crypto_len))
+ goto leave;
+
+ TRACE_DEVEL("buffered crypto data were provided to TLS stack",
+ QUIC_EV_CONN_PHPKTS, qc, qel);
+ }
+
+ if (!qel->cstream)
+ continue;
+
+ if (!qc_treat_rx_crypto_frms(qc, qel, ctx))
+ goto leave;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Try to allocate the <*ssl> SSL session object for <qc> QUIC connection
+ * with <ssl_ctx> as SSL context inherited settings. Also set the transport
+ * parameters of this session.
+ * This is the responsibility of the caller to check the validity of all the
+ * pointers passed as parameter to this function.
+ * Return 0 if succeeded, -1 if not. If failed, sets the ->err_code member of <qc->conn> to
+ * CO_ER_SSL_NO_MEM.
+ */
+static int qc_ssl_sess_init(struct quic_conn *qc, SSL_CTX *ssl_ctx, SSL **ssl)
+{
+ int retry, ret = -1;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ retry = 1;
+ retry:
+ *ssl = SSL_new(ssl_ctx);
+ if (!*ssl) {
+ if (!retry--)
+ goto leave;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ if (!SSL_set_ex_data(*ssl, ssl_qc_app_data_index, qc) ||
+ !SSL_set_quic_method(*ssl, &ha_quic_method)) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto leave;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ ret = 0;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+/* Allocate the ssl_sock_ctx from connection <qc>. This creates the tasklet
+ * used to process <qc> received packets. The allocated context is stored in
+ * <qc.xprt_ctx>.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int qc_alloc_ssl_sock_ctx(struct quic_conn *qc)
+{
+ int ret = 0;
+ struct bind_conf *bc = qc->li->bind_conf;
+ struct ssl_sock_ctx *ctx = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ ctx = pool_alloc(pool_head_quic_ssl_sock_ctx);
+ if (!ctx) {
+ TRACE_ERROR("SSL context allocation failed", QUIC_EV_CONN_TXPKT);
+ goto err;
+ }
+
+ ctx->conn = NULL;
+ ctx->bio = NULL;
+ ctx->xprt = NULL;
+ ctx->xprt_ctx = NULL;
+ memset(&ctx->wait_event, 0, sizeof(ctx->wait_event));
+ ctx->subs = NULL;
+ ctx->xprt_st = 0;
+ ctx->error_code = 0;
+ ctx->early_buf = BUF_NULL;
+ ctx->sent_early_data = 0;
+ ctx->qc = qc;
+
+ if (qc_is_listener(qc)) {
+ if (qc_ssl_sess_init(qc, bc->initial_ctx, &ctx->ssl) == -1)
+ goto err;
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) && !defined(OPENSSL_IS_AWSLC)
+#ifndef USE_QUIC_OPENSSL_COMPAT
+ /* Enabling 0-RTT */
+ if (bc->ssl_conf.early_data)
+ SSL_set_quic_early_data_enabled(ctx->ssl, 1);
+#endif
+#endif
+
+ SSL_set_accept_state(ctx->ssl);
+ }
+
+ ctx->xprt = xprt_get(XPRT_QUIC);
+
+ /* Store the allocated context in <qc>. */
+ qc->xprt_ctx = ctx;
+
+ /* global.sslconns is already incremented on INITIAL packet parsing. */
+ _HA_ATOMIC_INC(&global.totalsslconns);
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return !ret;
+
+ err:
+ pool_free(pool_head_quic_ssl_sock_ctx, ctx);
+ goto leave;
+}
+
+static void __quic_conn_init(void)
+{
+ ha_quic_meth = BIO_meth_new(0x666, "ha QUIC methods");
+}
+INITCALL0(STG_REGISTER, __quic_conn_init);
+
+static void __quic_conn_deinit(void)
+{
+ BIO_meth_free(ha_quic_meth);
+}
+REGISTER_POST_DEINIT(__quic_conn_deinit);
diff --git a/src/quic_stats.c b/src/quic_stats.c
new file mode 100644
index 0000000..3657f30
--- /dev/null
+++ b/src/quic_stats.c
@@ -0,0 +1,215 @@
+#include <haproxy/quic_frame-t.h>
+#include <haproxy/quic_stats-t.h>
+#include <haproxy/stats.h>
+
+static struct name_desc quic_stats[] = {
+ [QUIC_ST_RXBUF_FULL] = { .name = "quic_rxbuf_full",
+ .desc = "Total number of cancelled reception due to full receiver buffer" },
+ [QUIC_ST_DROPPED_PACKET] = { .name = "quic_dropped_pkt",
+ .desc = "Total number of dropped packets" },
+ [QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = { .name = "quic_dropped_pkt_bufoverrun",
+ .desc = "Total number of dropped packets because of buffer overrun" },
+ [QUIC_ST_DROPPED_PARSING] = { .name = "quic_dropped_parsing_pkt",
+ .desc = "Total number of dropped packets upon parsing error" },
+ [QUIC_ST_SOCKET_FULL] = { .name = "quic_socket_full",
+ .desc = "Total number of EAGAIN error on sendto() calls" },
+ [QUIC_ST_SENDTO_ERR] = { .name = "quic_sendto_err",
+ .desc = "Total number of error on sendto() calls, EAGAIN excepted" },
+ [QUIC_ST_SENDTO_ERR_UNKNWN] = { .name = "quic_sendto_err_unknwn",
+ .desc = "Total number of error on sendto() calls not explicitly listed" },
+ [QUIC_ST_SENT_PACKET] = { .name = "quic_sent_pkt",
+ .desc = "Total number of sent packets" },
+ [QUIC_ST_LOST_PACKET] = { .name = "quic_lost_pkt",
+ .desc = "Total number of lost sent packets" },
+ [QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = { .name = "quic_too_short_dgram",
+ .desc = "Total number of too short dgrams with Initial packets" },
+ [QUIC_ST_RETRY_SENT] = { .name = "quic_retry_sent",
+ .desc = "Total number of Retry sent" },
+ [QUIC_ST_RETRY_VALIDATED] = { .name = "quic_retry_validated",
+ .desc = "Total number of validated Retry tokens" },
+ [QUIC_ST_RETRY_ERRORS] = { .name = "quic_retry_error",
+ .desc = "Total number of Retry tokens errors" },
+ [QUIC_ST_HALF_OPEN_CONN] = { .name = "quic_half_open_conn",
+ .desc = "Total number of half open connections" },
+ [QUIC_ST_HDSHK_FAIL] = { .name = "quic_hdshk_fail",
+ .desc = "Total number of handshake failures" },
+ [QUIC_ST_STATELESS_RESET_SENT] = { .name = "quic_stless_rst_sent",
+ .desc = "Total number of stateless reset packet sent" },
+ /* Special events of interest */
+ [QUIC_ST_CONN_MIGRATION_DONE] = { .name = "quic_conn_migration_done",
+ .desc = "Total number of connection migration proceeded" },
+ /* Transport errors */
+ [QUIC_ST_TRANSP_ERR_NO_ERROR] = { .name = "quic_transp_err_no_error",
+ .desc = "Total number of NO_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = { .name = "quic_transp_err_internal_error",
+ .desc = "Total number of INTERNAL_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = { .name = "quic_transp_err_connection_refused",
+ .desc = "Total number of CONNECTION_REFUSED errors received" },
+ [QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = { .name = "quic_transp_err_flow_control_error",
+ .desc = "Total number of FLOW_CONTROL_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = { .name = "quic_transp_err_stream_limit_error",
+ .desc = "Total number of STREAM_LIMIT_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = { .name = "quic_transp_err_stream_state_error",
+ .desc = "Total number of STREAM_STATE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = { .name = "quic_transp_err_final_size_error",
+ .desc = "Total number of FINAL_SIZE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = { .name = "quic_transp_err_frame_encoding_error",
+ .desc = "Total number of FRAME_ENCODING_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = { .name = "quic_transp_err_transport_parameter_error",
+ .desc = "Total number of TRANSPORT_PARAMETER_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = { .name = "quic_transp_err_connection_id_limit",
+ .desc = "Total number of CONNECTION_ID_LIMIT_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = { .name = "quic_transp_err_protocol_violation_error",
+ .desc = "Total number of PROTOCOL_VIOLATION errors received" },
+ [QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = { .name = "quic_transp_err_invalid_token",
+ .desc = "Total number of INVALID_TOKEN errors received" },
+ [QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = { .name = "quic_transp_err_application_error",
+ .desc = "Total number of APPLICATION_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = { .name = "quic_transp_err_crypto_buffer_exceeded",
+ .desc = "Total number of CRYPTO_BUFFER_EXCEEDED errors received" },
+ [QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = { .name = "quic_transp_err_key_update_error",
+ .desc = "Total number of KEY_UPDATE_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = { .name = "quic_transp_err_aead_limit_reached",
+ .desc = "Total number of AEAD_LIMIT_REACHED errors received" },
+ [QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = { .name = "quic_transp_err_no_viable_path",
+ .desc = "Total number of NO_VIABLE_PATH errors received" },
+ [QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = { .name = "quic_transp_err_crypto_error",
+ .desc = "Total number of CRYPTO_ERROR errors received" },
+ [QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = { .name = "quic_transp_err_unknown_error",
+ .desc = "Total number of UNKNOWN_ERROR errors received" },
+ /* Streams related counters */
+ [QUIC_ST_DATA_BLOCKED] = { .name = "quic_data_blocked",
+ .desc = "Total number of received DATA_BLOCKED frames" },
+ [QUIC_ST_STREAM_DATA_BLOCKED] = { .name = "quic_stream_data_blocked",
+ .desc = "Total number of received STREAM_DATA_BLOCKED frames" },
+ [QUIC_ST_STREAMS_BLOCKED_BIDI] = { .name = "quic_streams_blocked_bidi",
+ .desc = "Total number of received STREAMS_BLOCKED_BIDI frames" },
+ [QUIC_ST_STREAMS_BLOCKED_UNI] = { .name = "quic_streams_blocked_uni",
+ .desc = "Total number of received STREAMS_BLOCKED_UNI frames" },
+};
+
+struct quic_counters quic_counters;
+
+static void quic_fill_stats(void *data, struct field *stats)
+{
+ struct quic_counters *counters = data;
+
+ stats[QUIC_ST_RXBUF_FULL] = mkf_u64(FN_COUNTER, counters->rxbuf_full);
+ stats[QUIC_ST_DROPPED_PACKET] = mkf_u64(FN_COUNTER, counters->dropped_pkt);
+ stats[QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun);
+ stats[QUIC_ST_DROPPED_PARSING] = mkf_u64(FN_COUNTER, counters->dropped_parsing);
+ stats[QUIC_ST_SOCKET_FULL] = mkf_u64(FN_COUNTER, counters->socket_full);
+ stats[QUIC_ST_SENDTO_ERR] = mkf_u64(FN_COUNTER, counters->sendto_err);
+ stats[QUIC_ST_SENDTO_ERR_UNKNWN] = mkf_u64(FN_COUNTER, counters->sendto_err_unknown);
+ stats[QUIC_ST_SENT_PACKET] = mkf_u64(FN_COUNTER, counters->sent_pkt);
+ stats[QUIC_ST_LOST_PACKET] = mkf_u64(FN_COUNTER, counters->lost_pkt);
+ stats[QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram);
+ stats[QUIC_ST_RETRY_SENT] = mkf_u64(FN_COUNTER, counters->retry_sent);
+ stats[QUIC_ST_RETRY_VALIDATED] = mkf_u64(FN_COUNTER, counters->retry_validated);
+ stats[QUIC_ST_RETRY_ERRORS] = mkf_u64(FN_COUNTER, counters->retry_error);
+ stats[QUIC_ST_HALF_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->half_open_conn);
+ stats[QUIC_ST_HDSHK_FAIL] = mkf_u64(FN_COUNTER, counters->hdshk_fail);
+ stats[QUIC_ST_STATELESS_RESET_SENT] = mkf_u64(FN_COUNTER, counters->stateless_reset_sent);
+ /* Special events of interest */
+ stats[QUIC_ST_CONN_MIGRATION_DONE] = mkf_u64(FN_COUNTER, counters->conn_migration_done);
+ /* Transport errors */
+ stats[QUIC_ST_TRANSP_ERR_NO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error);
+ stats[QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error);
+ stats[QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused);
+ stats[QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error);
+ stats[QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error);
+ stats[QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error);
+ stats[QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error);
+ stats[QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error);
+ stats[QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error);
+ stats[QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit);
+ stats[QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation);
+ stats[QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token);
+ stats[QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error);
+ stats[QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded);
+ stats[QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error);
+ stats[QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached);
+ stats[QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path);
+ stats[QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error);
+ stats[QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error);
+ /* Streams related counters */
+ stats[QUIC_ST_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->data_blocked);
+ stats[QUIC_ST_STREAM_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->stream_data_blocked);
+ stats[QUIC_ST_STREAMS_BLOCKED_BIDI] = mkf_u64(FN_COUNTER, counters->streams_blocked_bidi);
+ stats[QUIC_ST_STREAMS_BLOCKED_UNI] = mkf_u64(FN_COUNTER, counters->streams_blocked_uni);
+}
+
+struct stats_module quic_stats_module = {
+ .name = "quic",
+ .fill_stats = quic_fill_stats,
+ .stats = quic_stats,
+ .stats_count = QUIC_STATS_COUNT,
+ .counters = &quic_counters,
+ .counters_size = sizeof(quic_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &quic_stats_module);
+
+void quic_stats_transp_err_count_inc(struct quic_counters *ctrs, int error_code)
+{
+ switch (error_code) {
+ case QC_ERR_NO_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_no_error);
+ break;
+ case QC_ERR_INTERNAL_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_internal_error);
+ break;
+ case QC_ERR_CONNECTION_REFUSED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_connection_refused);
+ break;
+ case QC_ERR_FLOW_CONTROL_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_flow_control_error);
+ break;
+ case QC_ERR_STREAM_LIMIT_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_stream_limit_error);
+ break;
+ case QC_ERR_STREAM_STATE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_stream_state_error);
+ break;
+ case QC_ERR_FINAL_SIZE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_final_size_error);
+ break;
+ case QC_ERR_FRAME_ENCODING_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_frame_encoding_error);
+ break;
+ case QC_ERR_TRANSPORT_PARAMETER_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_transport_parameter_error);
+ break;
+ case QC_ERR_CONNECTION_ID_LIMIT_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_connection_id_limit);
+ break;
+ case QC_ERR_PROTOCOL_VIOLATION:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_protocol_violation);
+ break;
+ case QC_ERR_INVALID_TOKEN:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_invalid_token);
+ break;
+ case QC_ERR_APPLICATION_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_application_error);
+ break;
+ case QC_ERR_CRYPTO_BUFFER_EXCEEDED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_crypto_buffer_exceeded);
+ break;
+ case QC_ERR_KEY_UPDATE_ERROR:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_key_update_error);
+ break;
+ case QC_ERR_AEAD_LIMIT_REACHED:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_aead_limit_reached);
+ break;
+ case QC_ERR_NO_VIABLE_PATH:
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_no_viable_path);
+ break;
+ default:
+ if (error_code >= 0x100 && error_code <= 0x1ff)
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_crypto_error);
+ else
+ HA_ATOMIC_INC(&ctrs->quic_transp_err_unknown_error);
+ }
+}
diff --git a/src/quic_stream.c b/src/quic_stream.c
new file mode 100644
index 0000000..a4b984d
--- /dev/null
+++ b/src/quic_stream.c
@@ -0,0 +1,294 @@
+#include <haproxy/quic_stream.h>
+
+#include <import/eb64tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/mux_quic-t.h>
+#include <haproxy/pool.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/task.h>
+
+DECLARE_STATIC_POOL(pool_head_quic_stream_desc, "qc_stream_desc",
+ sizeof(struct qc_stream_desc));
+DECLARE_STATIC_POOL(pool_head_quic_stream_buf, "qc_stream_buf",
+ sizeof(struct qc_stream_buf));
+
+
+static void qc_stream_buf_free(struct qc_stream_desc *stream,
+ struct qc_stream_buf **stream_buf)
+{
+ struct quic_conn *qc = stream->qc;
+ struct buffer *buf = &(*stream_buf)->buf;
+
+ LIST_DEL_INIT(&(*stream_buf)->list);
+
+ /* Reset current buf ptr if deleted instance is the same one. */
+ if (*stream_buf == stream->buf)
+ stream->buf = NULL;
+
+ b_free(buf);
+ offer_buffers(NULL, 1);
+ pool_free(pool_head_quic_stream_buf, *stream_buf);
+ *stream_buf = NULL;
+
+ /* notify MUX about available buffers. */
+ --qc->stream_buf_count;
+ if (qc->mux_state == QC_MUX_READY) {
+ if (qc->qcc->flags & QC_CF_CONN_FULL) {
+ qc->qcc->flags &= ~QC_CF_CONN_FULL;
+ tasklet_wakeup(qc->qcc->wait_event.tasklet);
+ }
+ }
+}
+
+/* Allocate a new stream descriptor with id <id>. The caller is responsible to
+ * store the stream in the appropriate tree. -1 special value must be used for
+ * a CRYPTO data stream, the type being ignored.
+ *
+ * Returns the newly allocated instance on success or else NULL.
+ */
+struct qc_stream_desc *qc_stream_desc_new(uint64_t id, enum qcs_type type, void *ctx,
+ struct quic_conn *qc)
+{
+ struct qc_stream_desc *stream;
+
+ stream = pool_alloc(pool_head_quic_stream_desc);
+ if (!stream)
+ return NULL;
+
+ if (id == (uint64_t)-1) {
+ stream->by_id.key = (uint64_t)-1;
+ }
+ else {
+ stream->by_id.key = id;
+ eb64_insert(&qc->streams_by_id, &stream->by_id);
+ qc->rx.strms[type].nb_streams++;
+ }
+ stream->qc = qc;
+
+ stream->buf = NULL;
+ LIST_INIT(&stream->buf_list);
+ stream->buf_offset = 0;
+
+ stream->acked_frms = EB_ROOT;
+ stream->ack_offset = 0;
+ stream->release = 0;
+ stream->ctx = ctx;
+
+ return stream;
+}
+
+/* Mark the stream descriptor <stream> as released. It will be freed as soon as
+ * all its buffered data are acknowledged. Does nothing if <stream> is already
+ * NULL.
+ *
+ * <final_size> corresponds to the last offset sent for this stream. If there
+ * is unsent data present, they will be remove first to guarantee that buffer
+ * is freed after receiving all acknowledges.
+ */
+void qc_stream_desc_release(struct qc_stream_desc *stream,
+ uint64_t final_size)
+{
+ if (!stream)
+ return;
+
+ /* A stream can be released only one time. */
+ BUG_ON(stream->release);
+
+ stream->release = 1;
+ stream->ctx = NULL;
+
+ if (stream->buf) {
+ struct qc_stream_buf *stream_buf = stream->buf;
+ struct buffer *buf = &stream_buf->buf;
+ const uint64_t tail_offset =
+ MAX(stream->buf_offset, stream->ack_offset) + b_data(buf);
+
+ /* final_size cannot be greater than all currently stored data. */
+ BUG_ON(final_size > tail_offset);
+
+ /* Remove unsent data from current buffer. */
+ if (final_size < tail_offset) {
+ b_sub(buf, tail_offset - final_size);
+ /* Remove buffer is all ACK already received. */
+ if (!b_data(buf))
+ qc_stream_buf_free(stream, &stream_buf);
+ }
+
+ /* A released stream does not use <stream.buf>. */
+ stream->buf = NULL;
+ }
+
+ if (LIST_ISEMPTY(&stream->buf_list)) {
+ /* if no buffer left we can free the stream. */
+ qc_stream_desc_free(stream, 0);
+ }
+}
+
+/* Acknowledge data at <offset> of length <len> for <stream>. It is handled
+ * only if it covers a range corresponding to stream.ack_offset. After data
+ * removal, if the stream does not contains data any more and is already
+ * released, the instance stream is freed. <stream> is set to NULL to indicate
+ * this.
+ *
+ * Returns the count of byte removed from stream. Do not forget to check if
+ * <stream> is NULL after invocation.
+ */
+int qc_stream_desc_ack(struct qc_stream_desc **stream, size_t offset, size_t len)
+{
+ struct qc_stream_desc *s = *stream;
+ struct qc_stream_buf *stream_buf;
+ struct buffer *buf;
+ size_t diff;
+
+ if (offset + len <= s->ack_offset || offset > s->ack_offset)
+ return 0;
+
+ /* There must be at least a buffer or we must not report an ACK. */
+ BUG_ON(LIST_ISEMPTY(&s->buf_list));
+
+ /* get oldest buffer from buf_list */
+ stream_buf = LIST_NEXT(&s->buf_list, struct qc_stream_buf *, list);
+ buf = &stream_buf->buf;
+
+ diff = offset + len - s->ack_offset;
+ s->ack_offset += diff;
+ b_del(buf, diff);
+
+ /* Free oldest buffer if all data acknowledged. */
+ if (!b_data(buf)) {
+ qc_stream_buf_free(s, &stream_buf);
+
+ /* Free stream instance if already released and no buffers left. */
+ if (s->release && LIST_ISEMPTY(&s->buf_list)) {
+ qc_stream_desc_free(s, 0);
+ *stream = NULL;
+ }
+ }
+
+ return diff;
+}
+
+/* Free the stream descriptor <stream> content. This function should be used
+ * when all its data have been acknowledged or on full connection closing if <closing>
+ * boolean is set to 1. It must only be called after the stream is released.
+ */
+void qc_stream_desc_free(struct qc_stream_desc *stream, int closing)
+{
+ struct qc_stream_buf *buf, *buf_back;
+ struct quic_conn *qc = stream->qc;
+ struct eb64_node *frm_node;
+ unsigned int free_count = 0;
+
+ /* This function only deals with released streams. */
+ BUG_ON(!stream->release);
+
+ /* free remaining stream buffers */
+ list_for_each_entry_safe(buf, buf_back, &stream->buf_list, list) {
+ if (!(b_data(&buf->buf)) || closing) {
+ b_free(&buf->buf);
+ LIST_DELETE(&buf->list);
+ pool_free(pool_head_quic_stream_buf, buf);
+
+ ++free_count;
+ }
+ }
+
+ if (free_count) {
+ offer_buffers(NULL, free_count);
+
+ qc->stream_buf_count -= free_count;
+ if (qc->mux_state == QC_MUX_READY) {
+ /* notify MUX about available buffers. */
+ if (qc->qcc->flags & QC_CF_CONN_FULL) {
+ qc->qcc->flags &= ~QC_CF_CONN_FULL;
+ tasklet_wakeup(qc->qcc->wait_event.tasklet);
+ }
+ }
+ }
+
+ /* qc_stream_desc might be freed before having received all its ACKs.
+ * This is the case if some frames were retransmitted.
+ */
+ frm_node = eb64_first(&stream->acked_frms);
+ while (frm_node) {
+ struct qf_stream *strm_frm;
+ struct quic_frame *frm;
+
+ strm_frm = eb64_entry(frm_node, struct qf_stream, offset);
+
+ frm_node = eb64_next(frm_node);
+ eb64_delete(&strm_frm->offset);
+
+ frm = container_of(strm_frm, struct quic_frame, stream);
+ qc_release_frm(qc, frm);
+ }
+
+ if (stream->by_id.key != (uint64_t)-1)
+ eb64_delete(&stream->by_id);
+ pool_free(pool_head_quic_stream_desc, stream);
+}
+
+/* Return the current buffer of <stream>. May be NULL if not allocated. */
+struct buffer *qc_stream_buf_get(struct qc_stream_desc *stream)
+{
+ if (!stream->buf)
+ return NULL;
+
+ return &stream->buf->buf;
+}
+
+/* Returns the count of available buffer left for <qc>. */
+static int qc_stream_buf_avail(struct quic_conn *qc)
+{
+ BUG_ON(qc->stream_buf_count > global.tune.quic_streams_buf);
+ return global.tune.quic_streams_buf - qc->stream_buf_count;
+}
+
+/* Allocate a new current buffer for <stream>. The buffer limit count for the
+ * connection is checked first. This function is not allowed if current buffer
+ * is not NULL prior to this call. The new buffer represents stream payload at
+ * offset <offset>.
+ *
+ * Returns the buffer or NULL on error. Caller may check <avail> to ensure if
+ * the connection buffer limit was reached or a fatal error was encountered.
+ */
+struct buffer *qc_stream_buf_alloc(struct qc_stream_desc *stream,
+ uint64_t offset, int *avail)
+{
+ struct quic_conn *qc = stream->qc;
+
+ /* current buffer must be released first before allocate a new one. */
+ BUG_ON(stream->buf);
+
+ *avail = qc_stream_buf_avail(qc);
+ if (!*avail)
+ return NULL;
+
+ stream->buf_offset = offset;
+ stream->buf = pool_alloc(pool_head_quic_stream_buf);
+ if (!stream->buf)
+ return NULL;
+
+ ++qc->stream_buf_count;
+
+ stream->buf->buf = BUF_NULL;
+ LIST_APPEND(&stream->buf_list, &stream->buf->list);
+
+ return &stream->buf->buf;
+}
+
+/* Release the current buffer of <stream>. It will be kept internally by
+ * the <stream>. The current buffer cannot be NULL.
+ */
+void qc_stream_buf_release(struct qc_stream_desc *stream)
+{
+ /* current buffer already released */
+ BUG_ON(!stream->buf);
+
+ stream->buf = NULL;
+ stream->buf_offset = 0;
+}
diff --git a/src/quic_tls.c b/src/quic_tls.c
new file mode 100644
index 0000000..581d615
--- /dev/null
+++ b/src/quic_tls.c
@@ -0,0 +1,1095 @@
+#include <haproxy/quic_tls.h>
+
+#include <string.h>
+
+#include <openssl/evp.h>
+#include <openssl/kdf.h>
+#include <openssl/ssl.h>
+
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/pool.h>
+#include <haproxy/quic_ack.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_rx.h>
+#include <haproxy/quic_stream.h>
+
+
+DECLARE_POOL(pool_head_quic_enc_level, "quic_enc_level", sizeof(struct quic_enc_level));
+DECLARE_POOL(pool_head_quic_pktns, "quic_pktns", sizeof(struct quic_pktns));
+DECLARE_POOL(pool_head_quic_tls_ctx, "quic_tls_ctx", sizeof(struct quic_tls_ctx));
+DECLARE_POOL(pool_head_quic_tls_secret, "quic_tls_secret", QUIC_TLS_SECRET_LEN);
+DECLARE_POOL(pool_head_quic_tls_iv, "quic_tls_iv", QUIC_TLS_IV_LEN);
+DECLARE_POOL(pool_head_quic_tls_key, "quic_tls_key", QUIC_TLS_KEY_LEN);
+
+DECLARE_POOL(pool_head_quic_crypto_buf, "quic_crypto_buf", sizeof(struct quic_crypto_buf));
+DECLARE_STATIC_POOL(pool_head_quic_cstream, "quic_cstream", sizeof(struct quic_cstream));
+
+/* Initial salt depending on QUIC version to derive client/server initial secrets.
+ * This one is for draft-29 QUIC version.
+ */
+const unsigned char initial_salt_draft_29[20] = {
+ 0xaf, 0xbf, 0xec, 0x28, 0x99, 0x93, 0xd2, 0x4c,
+ 0x9e, 0x97, 0x86, 0xf1, 0x9c, 0x61, 0x11, 0xe0,
+ 0x43, 0x90, 0xa8, 0x99
+};
+
+const unsigned char initial_salt_v1[20] = {
+ 0x38, 0x76, 0x2c, 0xf7, 0xf5, 0x59, 0x34, 0xb3,
+ 0x4d, 0x17, 0x9a, 0xe6, 0xa4, 0xc8, 0x0c, 0xad,
+ 0xcc, 0xbb, 0x7f, 0x0a
+};
+
+const unsigned char initial_salt_v2[20] = {
+ 0x0d, 0xed, 0xe3, 0xde, 0xf7, 0x00, 0xa6, 0xdb,
+ 0x81, 0x93, 0x81, 0xbe, 0x6e, 0x26, 0x9d, 0xcb,
+ 0xf9, 0xbd, 0x2e, 0xd9
+};
+
+/* Dump the RX/TX secrets of <secs> QUIC TLS secrets. */
+void quic_tls_keys_hexdump(struct buffer *buf,
+ const struct quic_tls_secrets *secs)
+{
+ int i;
+ size_t aead_keylen;
+ size_t aead_ivlen;
+ size_t hp_len;
+
+ if (!secs->aead || !secs->hp)
+ return;
+
+ aead_keylen = (size_t)EVP_CIPHER_key_length(secs->aead);
+ aead_ivlen = (size_t)EVP_CIPHER_iv_length(secs->aead);
+ hp_len = (size_t)EVP_CIPHER_key_length(secs->hp);
+
+ chunk_appendf(buf, "\n key=");
+ for (i = 0; i < aead_keylen; i++)
+ chunk_appendf(buf, "%02x", secs->key[i]);
+ chunk_appendf(buf, "\n iv=");
+ for (i = 0; i < aead_ivlen; i++)
+ chunk_appendf(buf, "%02x", secs->iv[i]);
+ chunk_appendf(buf, "\n hp=");
+ for (i = 0; i < hp_len; i++)
+ chunk_appendf(buf, "%02x", secs->hp_key[i]);
+}
+
+/* Dump the RX/TX secrets of <kp> QUIC TLS key phase */
+void quic_tls_kp_keys_hexdump(struct buffer *buf,
+ const struct quic_tls_kp *kp)
+{
+ int i;
+
+ chunk_appendf(buf, "\n secret=");
+ for (i = 0; i < kp->secretlen; i++)
+ chunk_appendf(buf, "%02x", kp->secret[i]);
+ chunk_appendf(buf, "\n key=");
+ for (i = 0; i < kp->keylen; i++)
+ chunk_appendf(buf, "%02x", kp->key[i]);
+ chunk_appendf(buf, "\n iv=");
+ for (i = 0; i < kp->ivlen; i++)
+ chunk_appendf(buf, "%02x", kp->iv[i]);
+}
+
+/* Release the memory of <pktns> packet number space attached to <qc> QUIC connection. */
+void quic_pktns_release(struct quic_conn *qc, struct quic_pktns **pktns)
+{
+ if (!*pktns)
+ return;
+
+ quic_pktns_tx_pkts_release(*pktns, qc);
+ qc_release_pktns_frms(qc, *pktns);
+ quic_free_arngs(qc, &(*pktns)->rx.arngs);
+ LIST_DEL_INIT(&(*pktns)->list);
+ pool_free(pool_head_quic_pktns, *pktns);
+ *pktns = NULL;
+}
+
+/* Dump <secret> TLS secret. */
+void quic_tls_secret_hexdump(struct buffer *buf,
+ const unsigned char *secret, size_t secret_len)
+{
+ int i;
+
+ chunk_appendf(buf, " secret=");
+ for (i = 0; i < secret_len; i++)
+ chunk_appendf(buf, "%02x", secret[i]);
+}
+
+/* Release the memory allocated for <cs> CRYPTO stream */
+void quic_cstream_free(struct quic_cstream *cs)
+{
+ if (!cs) {
+ /* This is the case for ORTT encryption level */
+ return;
+ }
+
+ quic_free_ncbuf(&cs->rx.ncbuf);
+
+ qc_stream_desc_release(cs->desc, 0);
+ pool_free(pool_head_quic_cstream, cs);
+}
+
+/* Allocate a new QUIC stream for <qc>.
+ * Return it if succeeded, NULL if not.
+ */
+struct quic_cstream *quic_cstream_new(struct quic_conn *qc)
+{
+ struct quic_cstream *cs, *ret_cs = NULL;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+ cs = pool_alloc(pool_head_quic_cstream);
+ if (!cs) {
+ TRACE_ERROR("crypto stream allocation failed", QUIC_EV_CONN_INIT, qc);
+ goto leave;
+ }
+
+ cs->rx.offset = 0;
+ cs->rx.ncbuf = NCBUF_NULL;
+ cs->rx.offset = 0;
+
+ cs->tx.offset = 0;
+ cs->tx.sent_offset = 0;
+ cs->tx.buf = BUF_NULL;
+ cs->desc = qc_stream_desc_new((uint64_t)-1, -1, cs, qc);
+ if (!cs->desc) {
+ TRACE_ERROR("crypto stream allocation failed", QUIC_EV_CONN_INIT, qc);
+ goto err;
+ }
+
+ ret_cs = cs;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret_cs;
+
+ err:
+ pool_free(pool_head_quic_cstream, cs);
+ goto leave;
+}
+
+/* Uninitialize <qel> QUIC encryption level. Never fails. */
+void quic_conn_enc_level_uninit(struct quic_conn *qc, struct quic_enc_level *qel)
+{
+ int i;
+ struct qf_crypto *qf_crypto, *qfback;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ for (i = 0; i < qel->tx.crypto.nb_buf; i++) {
+ if (qel->tx.crypto.bufs[i]) {
+ pool_free(pool_head_quic_crypto_buf, qel->tx.crypto.bufs[i]);
+ qel->tx.crypto.bufs[i] = NULL;
+ }
+ }
+
+ list_for_each_entry_safe(qf_crypto, qfback, &qel->rx.crypto_frms, list) {
+ LIST_DELETE(&qf_crypto->list);
+ pool_free(pool_head_qf_crypto, qf_crypto);
+ }
+
+ ha_free(&qel->tx.crypto.bufs);
+ quic_cstream_free(qel->cstream);
+
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Initialize QUIC TLS encryption level with <level<> as level for <qc> QUIC
+ * connection allocating everything needed.
+ *
+ * Returns 1 if succeeded, 0 if not. On error the caller is responsible to use
+ * quic_conn_enc_level_uninit() to cleanup partially allocated content.
+ */
+static int quic_conn_enc_level_init(struct quic_conn *qc,
+ struct quic_enc_level **el,
+ struct quic_pktns *pktns,
+ enum ssl_encryption_level_t level)
+{
+ int ret = 0;
+ struct quic_enc_level *qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ qel = pool_alloc(pool_head_quic_enc_level);
+ if (!qel)
+ goto leave;
+
+ LIST_INIT(&qel->retrans);
+ qel->retrans_frms = NULL;
+ qel->tx.crypto.bufs = NULL;
+ qel->tx.crypto.nb_buf = 0;
+ qel->cstream = NULL;
+ qel->pktns = pktns;
+ qel->level = level;
+ quic_tls_ctx_reset(&qel->tls_ctx);
+
+ qel->rx.pkts = EB_ROOT;
+ LIST_INIT(&qel->rx.pqpkts);
+ LIST_INIT(&qel->rx.crypto_frms);
+
+ /* Allocate only one buffer. */
+ /* TODO: use a pool */
+ qel->tx.crypto.bufs = malloc(sizeof *qel->tx.crypto.bufs);
+ if (!qel->tx.crypto.bufs)
+ goto err;
+
+ qel->tx.crypto.bufs[0] = pool_alloc(pool_head_quic_crypto_buf);
+ if (!qel->tx.crypto.bufs[0])
+ goto err;
+
+
+ qel->tx.crypto.bufs[0]->sz = 0;
+ qel->tx.crypto.nb_buf = 1;
+
+ qel->tx.crypto.sz = 0;
+ qel->tx.crypto.offset = 0;
+ /* No CRYPTO data for early data TLS encryption level */
+ if (level == ssl_encryption_early_data)
+ qel->cstream = NULL;
+ else {
+ qel->cstream = quic_cstream_new(qc);
+ if (!qel->cstream)
+ goto err;
+ }
+
+ LIST_APPEND(&qc->qel_list, &qel->list);
+ *el = qel;
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+ return ret;
+
+ err:
+ quic_conn_enc_level_uninit(qc, qel);
+ pool_free(pool_head_quic_enc_level, qel);
+ goto leave;
+}
+
+/* Allocate a QUIC TLS encryption with <level> as TLS stack encryption to be
+ * attached to <qc> QUIC connection. Also allocate the associated packet number
+ * space object with <pktns> as address to be attached to <qc> if not already
+ * allocated.
+ * Return 1 if succeeded, 0 if not.
+ */
+int qc_enc_level_alloc(struct quic_conn *qc, struct quic_pktns **pktns,
+ struct quic_enc_level **qel, enum ssl_encryption_level_t level)
+{
+ int ret = 0;
+
+ BUG_ON(!qel || !pktns);
+ BUG_ON(*qel && !*pktns);
+
+ if (!*pktns && !quic_pktns_init(qc, pktns))
+ goto leave;
+
+ if (!*qel && !quic_conn_enc_level_init(qc, qel, *pktns, level))
+ goto leave;
+
+ ret = 1;
+ leave:
+ return ret;
+}
+
+/* Free the memory allocated to the encryption level attached to <qc> connection
+ * with <qel> as pointer address. Also remove it from the list of the encryption
+ * levels attached to this connection and reset its value to NULL.
+ * Never fails.
+ */
+void qc_enc_level_free(struct quic_conn *qc, struct quic_enc_level **qel)
+{
+ if (!*qel)
+ return;
+
+ quic_tls_ctx_secs_free(&(*qel)->tls_ctx);
+ quic_conn_enc_level_uninit(qc, *qel);
+ LIST_DEL_INIT(&(*qel)->list);
+ pool_free(pool_head_quic_enc_level, *qel);
+ *qel = NULL;
+}
+
+int quic_hkdf_extract(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *salt, size_t saltlen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXTRACT_ONLY) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_salt(ctx, salt, saltlen) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+int quic_hkdf_expand(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *label, size_t labellen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXPAND_ONLY) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_CTX_add1_hkdf_info(ctx, label, labellen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+/* Extracts a peudo-random secret key from <key> which is eventually not
+ * pseudo-random and expand it to a new pseudo-random key into
+ * <buf> with <buflen> as key length according to HKDF specifications
+ * (https://datatracker.ietf.org/doc/html/rfc5869).
+ * According to this specifications it is highly recommended to use
+ * a salt, even if optional (NULL value).
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_hkdf_extract_and_expand(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *salt, size_t saltlen,
+ const unsigned char *label, size_t labellen)
+{
+ EVP_PKEY_CTX *ctx;
+
+ ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
+ if (!ctx)
+ return 0;
+
+ if (EVP_PKEY_derive_init(ctx) <= 0 ||
+ EVP_PKEY_CTX_hkdf_mode(ctx, EVP_PKEY_HKDEF_MODE_EXTRACT_AND_EXPAND) <= 0 ||
+ EVP_PKEY_CTX_set_hkdf_md(ctx, md) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_salt(ctx, salt, saltlen) <= 0 ||
+ EVP_PKEY_CTX_set1_hkdf_key(ctx, key, keylen) <= 0 ||
+ EVP_PKEY_CTX_add1_hkdf_info(ctx, label, labellen) <= 0 ||
+ EVP_PKEY_derive(ctx, buf, &buflen) <= 0)
+ goto err;
+
+ EVP_PKEY_CTX_free(ctx);
+ return 1;
+
+ err:
+ EVP_PKEY_CTX_free(ctx);
+ return 0;
+}
+
+/* https://quicwg.org/base-drafts/draft-ietf-quic-tls.html#protection-keys
+ * refers to:
+ *
+ * https://tools.ietf.org/html/rfc8446#section-7.1:
+ * 7.1. Key Schedule
+ *
+ * The key derivation process makes use of the HKDF-Extract and
+ * HKDF-Expand functions as defined for HKDF [RFC5869], as well as the
+ * functions defined below:
+ *
+ * HKDF-Expand-Label(Secret, Label, Context, Length) =
+ * HKDF-Expand(Secret, HkdfLabel, Length)
+ *
+ * Where HkdfLabel is specified as:
+ *
+ * struct {
+ * uint16 length = Length;
+ * opaque label<7..255> = "tls13 " + Label;
+ * opaque context<0..255> = Context;
+ * } HkdfLabel;
+ *
+ * Derive-Secret(Secret, Label, Messages) =
+ * HKDF-Expand-Label(Secret, Label,
+ * Transcript-Hash(Messages), Hash.length)
+ *
+ */
+int quic_hkdf_expand_label(const EVP_MD *md,
+ unsigned char *buf, size_t buflen,
+ const unsigned char *key, size_t keylen,
+ const unsigned char *label, size_t labellen)
+{
+ unsigned char hdkf_label[256], *pos;
+ const unsigned char hdkf_label_label[] = "tls13 ";
+ size_t hdkf_label_label_sz = sizeof hdkf_label_label - 1;
+
+ pos = hdkf_label;
+ *pos++ = buflen >> 8;
+ *pos++ = buflen & 0xff;
+ *pos++ = hdkf_label_label_sz + labellen;
+ memcpy(pos, hdkf_label_label, hdkf_label_label_sz);
+ pos += hdkf_label_label_sz;
+ memcpy(pos, label, labellen);
+ pos += labellen;
+ *pos++ = '\0';
+
+ return quic_hkdf_expand(md, buf, buflen,
+ key, keylen, hdkf_label, pos - hdkf_label);
+}
+
+/*
+ * This function derives two keys from <secret> is <ctx> as TLS cryptographic context.
+ * ->key is the TLS key to be derived to encrypt/decrypt data at TLS level.
+ * ->iv is the initialization vector to be used with ->key.
+ * ->hp_key is the key to be derived for header protection.
+ * Obviouly these keys have the same size becaused derived with the same TLS cryptographic context.
+ */
+int quic_tls_derive_keys(const EVP_CIPHER *aead, const EVP_CIPHER *hp,
+ const EVP_MD *md, const struct quic_version *qv,
+ unsigned char *key, size_t keylen,
+ unsigned char *iv, size_t ivlen,
+ unsigned char *hp_key, size_t hp_keylen,
+ const unsigned char *secret, size_t secretlen)
+{
+ size_t aead_keylen = (size_t)EVP_CIPHER_key_length(aead);
+ size_t aead_ivlen = (size_t)EVP_CIPHER_iv_length(aead);
+ size_t hp_len = hp ? (size_t)EVP_CIPHER_key_length(hp) : 0;
+
+ if (aead_keylen > keylen || aead_ivlen > ivlen || hp_len > hp_keylen)
+ return 0;
+
+ if (!quic_hkdf_expand_label(md, key, aead_keylen, secret, secretlen,
+ qv->key_label,qv->key_label_len) ||
+ !quic_hkdf_expand_label(md, iv, aead_ivlen, secret, secretlen,
+ qv->iv_label, qv->iv_label_len) ||
+ (hp_key && !quic_hkdf_expand_label(md, hp_key, hp_len, secret, secretlen,
+ qv->hp_label, qv->hp_label_len)))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Derive the initial secret from <secret> and QUIC version dependent salt.
+ * Returns the size of the derived secret if succeeded, 0 if not.
+ */
+int quic_derive_initial_secret(const EVP_MD *md,
+ const unsigned char *initial_salt, size_t initial_salt_sz,
+ unsigned char *initial_secret, size_t initial_secret_sz,
+ const unsigned char *secret, size_t secret_sz)
+{
+ if (!quic_hkdf_extract(md, initial_secret, initial_secret_sz, secret, secret_sz,
+ initial_salt, initial_salt_sz))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Derive the client initial secret from the initial secret.
+ * Returns the size of the derived secret if succeeded, 0 if not.
+ */
+int quic_tls_derive_initial_secrets(const EVP_MD *md,
+ unsigned char *rx, size_t rx_sz,
+ unsigned char *tx, size_t tx_sz,
+ const unsigned char *secret, size_t secret_sz,
+ int server)
+{
+ const unsigned char client_label[] = "client in";
+ const unsigned char server_label[] = "server in";
+ const unsigned char *tx_label, *rx_label;
+ size_t rx_label_sz, tx_label_sz;
+
+ if (server) {
+ rx_label = client_label;
+ rx_label_sz = sizeof client_label;
+ tx_label = server_label;
+ tx_label_sz = sizeof server_label;
+ }
+ else {
+ rx_label = server_label;
+ rx_label_sz = sizeof server_label;
+ tx_label = client_label;
+ tx_label_sz = sizeof client_label;
+ }
+
+ if (!quic_hkdf_expand_label(md, rx, rx_sz, secret, secret_sz,
+ rx_label, rx_label_sz - 1) ||
+ !quic_hkdf_expand_label(md, tx, tx_sz, secret, secret_sz,
+ tx_label, tx_label_sz - 1))
+ return 0;
+
+ return 1;
+}
+
+/* Update <sec> secret key into <new_sec> according to RFC 9001 6.1.
+ * Always succeeds.
+ */
+int quic_tls_sec_update(const EVP_MD *md, const struct quic_version *qv,
+ unsigned char *new_sec, size_t new_seclen,
+ const unsigned char *sec, size_t seclen)
+{
+ return quic_hkdf_expand_label(md, new_sec, new_seclen, sec, seclen,
+ qv->ku_label, qv->ku_label_len);
+}
+
+/*
+ * Build an IV into <iv> buffer with <ivlen> as size from <aead_iv> with
+ * <aead_ivlen> as size depending on <pn> packet number.
+ * This is the function which must be called to build an AEAD IV for the AEAD cryptographic algorithm
+ * used to encrypt/decrypt the QUIC packet payloads depending on the packet number <pn>.
+ */
+void quic_aead_iv_build(unsigned char *iv, size_t ivlen,
+ unsigned char *aead_iv, size_t aead_ivlen, uint64_t pn)
+{
+ int i;
+ unsigned int shift;
+ unsigned char *pos = iv;
+
+ /* Input buffers must have the same size. */
+ BUG_ON(ivlen != aead_ivlen);
+
+ for (i = 0; i < ivlen - sizeof pn; i++)
+ *pos++ = *aead_iv++;
+
+ /* Only the remaining (sizeof pn) bytes are XOR'ed. */
+ shift = 56;
+ for (i = aead_ivlen - sizeof pn; i < aead_ivlen ; i++, shift -= 8)
+ *pos++ = *aead_iv++ ^ (pn >> shift);
+}
+
+/* Initialize the cipher context for RX part of <tls_ctx> QUIC TLS context.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_rx_ctx_init(EVP_CIPHER_CTX **rx_ctx,
+ const EVP_CIPHER *aead, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DecryptInit_ex(ctx, aead, NULL, NULL, NULL) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, QUIC_TLS_IV_LEN, NULL) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, NULL)) ||
+ !EVP_DecryptInit_ex(ctx, NULL, NULL, key, NULL))
+ goto err;
+
+ *rx_ctx = ctx;
+
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Initialize <*aes_ctx> AES cipher context with <key> as key for encryption */
+int quic_tls_enc_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx,
+ const EVP_CIPHER *aes, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_EncryptInit_ex(ctx, aes, NULL, key, NULL))
+ goto err;
+
+ *aes_ctx = ctx;
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Encrypt <inlen> bytes from <in> buffer into <out> with <ctx> as AES
+ * cipher context. This is the responsibility of the caller to check there
+ * is at least <inlen> bytes of available space in <out> buffer.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_aes_encrypt(unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ EVP_CIPHER_CTX *ctx)
+{
+ int ret = 0;
+
+ if (!EVP_EncryptInit_ex(ctx, NULL, NULL, NULL, in) ||
+ !EVP_EncryptUpdate(ctx, out, &ret, out, inlen) ||
+ !EVP_EncryptFinal_ex(ctx, out, &ret))
+ return 0;
+
+ return 1;
+}
+
+/* Initialize <*aes_ctx> AES cipher context with <key> as key for decryption */
+int quic_tls_dec_aes_ctx_init(EVP_CIPHER_CTX **aes_ctx,
+ const EVP_CIPHER *aes, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DecryptInit_ex(ctx, aes, NULL, key, NULL))
+ goto err;
+
+ *aes_ctx = ctx;
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/* Decrypt <in> data into <out> with <ctx> as AES cipher context.
+ * This is the responsibility of the caller to check there is at least
+ * <outlen> bytes into <in> buffer.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_aes_decrypt(unsigned char *out,
+ const unsigned char *in, size_t inlen,
+ EVP_CIPHER_CTX *ctx)
+{
+ int ret = 0;
+
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, in) ||
+ !EVP_DecryptUpdate(ctx, out, &ret, out, inlen) ||
+ !EVP_DecryptFinal_ex(ctx, out, &ret))
+ return 0;
+
+ return 1;
+}
+
+/* Initialize the cipher context for TX part of <tls_ctx> QUIC TLS context.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_tx_ctx_init(EVP_CIPHER_CTX **tx_ctx,
+ const EVP_CIPHER *aead, unsigned char *key)
+{
+ EVP_CIPHER_CTX *ctx;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ if (!EVP_EncryptInit_ex(ctx, aead, NULL, NULL, NULL) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, QUIC_TLS_IV_LEN, NULL) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, NULL)) ||
+ !EVP_EncryptInit_ex(ctx, NULL, NULL, key, NULL))
+ goto err;
+
+ *tx_ctx = ctx;
+
+ return 1;
+
+ err:
+ EVP_CIPHER_CTX_free(ctx);
+ return 0;
+}
+
+/*
+ * https://quicwg.org/base-drafts/draft-ietf-quic-tls.html#aead
+ *
+ * 5.3. AEAD Usage
+ *
+ * Packets are protected prior to applying header protection (Section 5.4).
+ * The unprotected packet header is part of the associated data (A). When removing
+ * packet protection, an endpoint first removes the header protection.
+ * (...)
+ * These ciphersuites have a 16-byte authentication tag and produce an output 16
+ * bytes larger than their input.
+ * The key and IV for the packet are computed as described in Section 5.1. The nonce,
+ * N, is formed by combining the packet protection IV with the packet number. The 62
+ * bits of the reconstructed QUIC packet number in network byte order are left-padded
+ * with zeros to the size of the IV. The exclusive OR of the padded packet number and
+ * the IV forms the AEAD nonce.
+ *
+ * The associated data, A, for the AEAD is the contents of the QUIC header, starting
+ * from the flags byte in either the short or long header, up to and including the
+ * unprotected packet number.
+ *
+ * The input plaintext, P, for the AEAD is the payload of the QUIC packet, as described
+ * in [QUIC-TRANSPORT].
+ *
+ * The output ciphertext, C, of the AEAD is transmitted in place of P.
+ *
+ * Some AEAD functions have limits for how many packets can be encrypted under the same
+ * key and IV (see for example [AEBounds]). This might be lower than the packet number limit.
+ * An endpoint MUST initiate a key update (Section 6) prior to exceeding any limit set for
+ * the AEAD that is in use.
+ */
+
+/* Encrypt in place <buf> plaintext with <len> as length with QUIC_TLS_TAG_LEN
+ * included tailing bytes for the tag.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ */
+int quic_tls_encrypt(unsigned char *buf, size_t len,
+ const unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ if (!EVP_EncryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, NULL, len)) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_EncryptUpdate(ctx, buf, &outlen, buf, len) ||
+ !EVP_EncryptFinal_ex(ctx, buf + outlen, &outlen) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, QUIC_TLS_TAG_LEN, buf + len))
+ return 0;
+
+ return 1;
+}
+
+/* Decrypt in place <buf> ciphertext with <len> as length with QUIC_TLS_TAG_LEN
+ * included tailing bytes for the tag.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>. Also not the
+ * there is no need to call EVP_DecryptFinal_ex for CCM mode.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ */
+int quic_tls_decrypt(unsigned char *buf, size_t len,
+ unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *key, const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN,
+ buf + len - QUIC_TLS_TAG_LEN) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, NULL, len - QUIC_TLS_TAG_LEN)) ||
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_DecryptUpdate(ctx, buf, &outlen, buf, len - QUIC_TLS_TAG_LEN) ||
+ (aead_nid != NID_aes_128_ccm &&
+ !EVP_DecryptFinal_ex(ctx, buf + outlen, &outlen)))
+ return 0;
+
+ return 1;
+}
+
+/* Similar to quic_tls_decrypt(), except that this function does not decrypt
+ * in place its ciphertest if <out> output buffer ciphertest with <len> as length
+ * is different from <in> input buffer. This is the responbality of the caller
+ * to check that the output buffer has at least the same size as the input buffer.
+ * Note that for CCM mode, we must set the the ciphertext length if AAD data
+ * are provided from <aad> buffer with <aad_len> as length. This is always the
+ * case here. So the caller of this function must provide <aad>. Also note that
+ * there is no need to call EVP_DecryptFinal_ex for CCM mode.
+ *
+ * https://wiki.openssl.org/index.php/EVP_Authenticated_Encryption_and_Decryption
+ *
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_decrypt2(unsigned char *out,
+ unsigned char *in, size_t len,
+ unsigned char *aad, size_t aad_len,
+ EVP_CIPHER_CTX *ctx, const EVP_CIPHER *aead,
+ const unsigned char *key, const unsigned char *iv)
+{
+ int outlen;
+ int aead_nid = EVP_CIPHER_nid(aead);
+
+ len -= QUIC_TLS_TAG_LEN;
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, iv) ||
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, QUIC_TLS_TAG_LEN, in + len) ||
+ (aead_nid == NID_aes_128_ccm &&
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, NULL, len)) ||
+ !EVP_DecryptUpdate(ctx, NULL, &outlen, aad, aad_len) ||
+ !EVP_DecryptUpdate(ctx, out, &outlen, in, len) ||
+ (aead_nid != NID_aes_128_ccm &&
+ !EVP_DecryptFinal_ex(ctx, out + outlen, &outlen)))
+ return 0;
+
+ return 1;
+}
+
+/* Derive <key> and <iv> key and IV to be used to encrypt a retry token
+ * with <secret> which is not pseudo-random.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_derive_retry_token_secret(const EVP_MD *md,
+ unsigned char *key, size_t keylen,
+ unsigned char *iv, size_t ivlen,
+ const unsigned char *salt, size_t saltlen,
+ const unsigned char *secret, size_t secretlen)
+{
+ unsigned char tmpkey[QUIC_TLS_KEY_LEN];
+ const unsigned char key_label[] = "retry token key";
+ const unsigned char iv_label[] = "retry token iv";
+
+ if (!quic_hkdf_extract(md, tmpkey, sizeof tmpkey,
+ secret, secretlen, salt, saltlen) ||
+ !quic_hkdf_expand(md, key, keylen, tmpkey, sizeof tmpkey,
+ key_label, sizeof key_label - 1) ||
+ !quic_hkdf_expand(md, iv, ivlen, tmpkey, sizeof tmpkey,
+ iv_label, sizeof iv_label - 1))
+ return 0;
+
+ return 1;
+}
+
+/* Generate the AEAD tag for the Retry packet <pkt> of <pkt_len> bytes and
+ * write it to <tag>. The tag is written just after the <pkt> area. It should
+ * be at least 16 bytes longs. <odcid> is the CID of the Initial packet
+ * received which triggers the Retry.
+ *
+ * Returns non-zero on success else zero.
+ */
+int quic_tls_generate_retry_integrity_tag(unsigned char *odcid, unsigned char odcid_len,
+ unsigned char *pkt, size_t pkt_len,
+ const struct quic_version *qv)
+{
+ const EVP_CIPHER *evp = EVP_aes_128_gcm();
+ EVP_CIPHER_CTX *ctx;
+
+ /* encryption buffer - not used as only AEAD tag generation is proceed */
+ unsigned char *out = NULL;
+ /* address to store the AEAD tag */
+ unsigned char *tag = pkt + pkt_len;
+ int outlen, ret = 0;
+
+ ctx = EVP_CIPHER_CTX_new();
+ if (!ctx)
+ return 0;
+
+ /* rfc9001 5.8. Retry Packet Integrity
+ *
+ * AEAD is proceed over a pseudo-Retry packet used as AAD. It contains
+ * the ODCID len + data and the Retry packet itself.
+ */
+ if (!EVP_EncryptInit_ex(ctx, evp, NULL, qv->retry_tag_key, qv->retry_tag_nonce) ||
+ /* specify pseudo-Retry as AAD */
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, &odcid_len, sizeof(odcid_len)) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, odcid, odcid_len) ||
+ !EVP_EncryptUpdate(ctx, NULL, &outlen, pkt, pkt_len) ||
+ /* finalize */
+ !EVP_EncryptFinal_ex(ctx, out, &outlen) ||
+ /* store the tag */
+ !EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, QUIC_TLS_TAG_LEN, tag)) {
+ goto out;
+ }
+ ret = 1;
+
+ out:
+ EVP_CIPHER_CTX_free(ctx);
+ return ret;
+}
+
+/* Derive new keys and ivs required for Key Update feature for <qc> QUIC
+ * connection.
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_key_update(struct quic_conn *qc)
+{
+ struct quic_tls_ctx *tls_ctx = &qc->ael->tls_ctx;
+ struct quic_tls_secrets *rx = &tls_ctx->rx;
+ struct quic_tls_secrets *tx = &tls_ctx->tx;
+ /* Used only for the traces */
+ struct quic_kp_trace kp_trace = {
+ .rx_sec = rx->secret,
+ .rx_seclen = rx->secretlen,
+ .tx_sec = tx->secret,
+ .tx_seclen = tx->secretlen,
+ };
+ /* The next key phase secrets to be derived */
+ struct quic_tls_kp *nxt_rx = &qc->ku.nxt_rx;
+ struct quic_tls_kp *nxt_tx = &qc->ku.nxt_tx;
+ const struct quic_version *ver =
+ qc->negotiated_version ? qc->negotiated_version : qc->original_version;
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_KP, qc);
+
+ nxt_rx = &qc->ku.nxt_rx;
+ nxt_tx = &qc->ku.nxt_tx;
+
+ TRACE_PRINTF(TRACE_LEVEL_DEVELOPER, QUIC_EV_CONN_SPPKTS, qc, 0, 0, 0,
+ "nxt_rx->secretlen=%llu rx->secretlen=%llu",
+ (ull)nxt_rx->secretlen, (ull)rx->secretlen);
+ /* Prepare new RX secrets */
+ if (!quic_tls_sec_update(rx->md, ver, nxt_rx->secret, nxt_rx->secretlen,
+ rx->secret, rx->secretlen)) {
+ TRACE_ERROR("New RX secret update failed", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(rx->aead, NULL, rx->md, ver,
+ nxt_rx->key, nxt_rx->keylen,
+ nxt_rx->iv, nxt_rx->ivlen, NULL, 0,
+ nxt_rx->secret, nxt_rx->secretlen)) {
+ TRACE_ERROR("New RX key derivation failed", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ kp_trace.rx = nxt_rx;
+ /* Prepare new TX secrets */
+ if (!quic_tls_sec_update(tx->md, ver, nxt_tx->secret, nxt_tx->secretlen,
+ tx->secret, tx->secretlen)) {
+ TRACE_ERROR("New TX secret update failed", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ if (!quic_tls_derive_keys(tx->aead, NULL, tx->md, ver,
+ nxt_tx->key, nxt_tx->keylen,
+ nxt_tx->iv, nxt_tx->ivlen, NULL, 0,
+ nxt_tx->secret, nxt_tx->secretlen)) {
+ TRACE_ERROR("New TX key derivation failed", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ kp_trace.tx = nxt_tx;
+ if (nxt_rx->ctx) {
+ EVP_CIPHER_CTX_free(nxt_rx->ctx);
+ nxt_rx->ctx = NULL;
+ }
+
+ if (!quic_tls_rx_ctx_init(&nxt_rx->ctx, tls_ctx->rx.aead, nxt_rx->key)) {
+ TRACE_ERROR("could not initialize RX TLS cipher context", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ if (nxt_tx->ctx) {
+ EVP_CIPHER_CTX_free(nxt_tx->ctx);
+ nxt_tx->ctx = NULL;
+ }
+
+ if (!quic_tls_tx_ctx_init(&nxt_tx->ctx, tls_ctx->tx.aead, nxt_tx->key)) {
+ TRACE_ERROR("could not initialize TX TLS cipher context", QUIC_EV_CONN_KP, qc);
+ goto leave;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_PROTO("key update", QUIC_EV_CONN_KP, qc, &kp_trace);
+ TRACE_LEAVE(QUIC_EV_CONN_KP, qc);
+ return ret;
+}
+
+/* Rotate the Key Update information for <qc> QUIC connection.
+ * Must be used after having updated them.
+ * Always succeeds.
+ */
+void quic_tls_rotate_keys(struct quic_conn *qc)
+{
+ struct quic_tls_ctx *tls_ctx = &qc->ael->tls_ctx;
+ unsigned char *curr_secret, *curr_iv, *curr_key;
+ EVP_CIPHER_CTX *curr_ctx;
+
+ TRACE_ENTER(QUIC_EV_CONN_RXPKT, qc);
+
+ /* Rotate the RX secrets */
+ curr_ctx = tls_ctx->rx.ctx;
+ curr_secret = tls_ctx->rx.secret;
+ curr_iv = tls_ctx->rx.iv;
+ curr_key = tls_ctx->rx.key;
+
+ tls_ctx->rx.ctx = qc->ku.nxt_rx.ctx;
+ tls_ctx->rx.secret = qc->ku.nxt_rx.secret;
+ tls_ctx->rx.iv = qc->ku.nxt_rx.iv;
+ tls_ctx->rx.key = qc->ku.nxt_rx.key;
+
+ qc->ku.nxt_rx.ctx = qc->ku.prv_rx.ctx;
+ qc->ku.nxt_rx.secret = qc->ku.prv_rx.secret;
+ qc->ku.nxt_rx.iv = qc->ku.prv_rx.iv;
+ qc->ku.nxt_rx.key = qc->ku.prv_rx.key;
+
+ qc->ku.prv_rx.ctx = curr_ctx;
+ qc->ku.prv_rx.secret = curr_secret;
+ qc->ku.prv_rx.iv = curr_iv;
+ qc->ku.prv_rx.key = curr_key;
+ qc->ku.prv_rx.pn = tls_ctx->rx.pn;
+
+ /* Update the TX secrets */
+ curr_ctx = tls_ctx->tx.ctx;
+ curr_secret = tls_ctx->tx.secret;
+ curr_iv = tls_ctx->tx.iv;
+ curr_key = tls_ctx->tx.key;
+
+ tls_ctx->tx.ctx = qc->ku.nxt_tx.ctx;
+ tls_ctx->tx.secret = qc->ku.nxt_tx.secret;
+ tls_ctx->tx.iv = qc->ku.nxt_tx.iv;
+ tls_ctx->tx.key = qc->ku.nxt_tx.key;
+
+ qc->ku.nxt_tx.ctx = curr_ctx;
+ qc->ku.nxt_tx.secret = curr_secret;
+ qc->ku.nxt_tx.iv = curr_iv;
+ qc->ku.nxt_tx.key = curr_key;
+
+ TRACE_LEAVE(QUIC_EV_CONN_RXPKT, qc);
+}
+
+/* Release the memory allocated for the QUIC TLS context with <ctx> as address. */
+void quic_tls_ctx_free(struct quic_tls_ctx **ctx)
+{
+ if (!*ctx)
+ return;
+
+ quic_tls_ctx_secs_free(*ctx);
+ pool_free(pool_head_quic_tls_ctx, *ctx);
+ *ctx = NULL;
+}
+
+/* Finalize <qc> QUIC connection:
+ * - allocated and initialize the Initial QUIC TLS context for negotiated
+ * version if needed,
+ * - derive the secrets for this context,
+ * - set them into the TLS stack,
+ *
+ * Return 1 if succeeded, 0 if not.
+ */
+int quic_tls_finalize(struct quic_conn *qc, int server)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ if (!qc->negotiated_version)
+ goto done;
+
+ qc->nictx = pool_alloc(pool_head_quic_tls_ctx);
+ if (!qc->nictx)
+ goto err;
+
+ quic_tls_ctx_reset(qc->nictx);
+ if (!qc_new_isecs(qc, qc->nictx, qc->negotiated_version,
+ qc->odcid.data, qc->odcid.len, server))
+ goto err;
+
+ done:
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+
+ err:
+ quic_tls_ctx_free(&qc->nictx);
+ goto out;
+}
diff --git a/src/quic_tp.c b/src/quic_tp.c
new file mode 100644
index 0000000..caf48ce
--- /dev/null
+++ b/src/quic_tp.c
@@ -0,0 +1,714 @@
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <haproxy/global.h>
+#include <haproxy/ncbuf-t.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_enc.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+
+#define QUIC_MAX_UDP_PAYLOAD_SIZE 2048
+
+/* This is the values of some QUIC transport parameters when absent.
+ * Should be used to initialize any transport parameters (local or remote)
+ * before updating them with customized values.
+ */
+struct quic_transport_params quic_dflt_transport_params = {
+ .max_udp_payload_size = QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE,
+ .ack_delay_exponent = QUIC_TP_DFLT_ACK_DELAY_COMPONENT,
+ .max_ack_delay = QUIC_TP_DFLT_MAX_ACK_DELAY,
+ .active_connection_id_limit = QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT,
+};
+
+/* Initialize <dst> transport parameters with default values (when absent)
+ * from <quic_dflt_transport_params>.
+ * Never fails.
+ */
+static void quic_dflt_transport_params_cpy(struct quic_transport_params *dst)
+{
+ dst->max_udp_payload_size = quic_dflt_transport_params.max_udp_payload_size;
+ dst->ack_delay_exponent = quic_dflt_transport_params.ack_delay_exponent;
+ dst->max_ack_delay = quic_dflt_transport_params.max_ack_delay;
+ dst->active_connection_id_limit = quic_dflt_transport_params.active_connection_id_limit;
+}
+
+/* Initialize <p> transport parameters. <server> is a boolean, set if TPs are
+ * used by a server (haproxy frontend) else this is for a client (haproxy
+ * backend).
+ *
+ * This must only be used for haproxy local parameters. To initialize peer
+ * parameters, see quic_dflt_transport_params_cpy().
+ *
+ * Never fails.
+ */
+void quic_transport_params_init(struct quic_transport_params *p, int server)
+{
+ const uint64_t ncb_size = global.tune.bufsize - NCB_RESERVED_SZ;
+ const int max_streams_bidi = global.tune.quic_frontend_max_streams_bidi;
+ const int max_streams_uni = 3;
+
+ /* Set RFC default values for unspecified parameters. */
+ quic_dflt_transport_params_cpy(p);
+
+ /* Set the max_udp_payload_size value. If not would equal to
+ * QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE
+ */
+ p->max_udp_payload_size = QUIC_MAX_UDP_PAYLOAD_SIZE;
+ if (server)
+ p->max_idle_timeout = global.tune.quic_frontend_max_idle_timeout;
+ else
+ p->max_idle_timeout = global.tune.quic_backend_max_idle_timeout;
+
+ p->initial_max_streams_bidi = max_streams_bidi;
+ p->initial_max_streams_uni = max_streams_uni;
+ p->initial_max_stream_data_bidi_local = ncb_size;
+ p->initial_max_stream_data_bidi_remote = ncb_size;
+ p->initial_max_stream_data_uni = ncb_size;
+ p->initial_max_data = (max_streams_bidi + max_streams_uni) * ncb_size;
+
+ if (server) {
+ p->with_stateless_reset_token = 1;
+ p->disable_active_migration = 1;
+ }
+
+ p->active_connection_id_limit = 8;
+
+ p->retry_source_connection_id.len = 0;
+}
+
+/* Encode <addr> preferred address transport parameter in <buf> without its
+ * "type+len" prefix.
+ * It is the responsibility of the caller to check there is enough room in <buf> to encode
+ * this address.
+ * Never fails.
+ */
+static void quic_transport_param_enc_pref_addr_val(unsigned char **buf,
+ const unsigned char *end,
+ struct tp_preferred_address *addr)
+{
+ write_n16(*buf, addr->ipv4_port);
+ *buf += sizeof addr->ipv4_port;
+
+ memcpy(*buf, (uint8_t *)&addr->ipv4_addr.s_addr, sizeof(addr->ipv4_addr.s_addr));
+ *buf += sizeof(addr->ipv4_addr.s_addr);
+
+ write_n16(*buf, addr->ipv6_port);
+ *buf += sizeof addr->ipv6_port;
+
+ memcpy(*buf, addr->ipv6_addr.s6_addr, sizeof(addr->ipv6_addr.s6_addr));
+ *buf += sizeof(addr->ipv6_addr.s6_addr);
+
+ *(*buf)++ = addr->cid.len;
+ if (addr->cid.len) {
+ memcpy(*buf, addr->cid.data, addr->cid.len);
+ *buf += addr->cid.len;
+ }
+
+ memcpy(*buf, addr->stateless_reset_token, sizeof addr->stateless_reset_token);
+ *buf += sizeof addr->stateless_reset_token;
+}
+
+/* Decode into <addr> preferred address transport parameter found in <*buf> buffer.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_dec_pref_addr(struct tp_preferred_address *addr,
+ const unsigned char **buf,
+ const unsigned char *end)
+{
+ ssize_t addr_len;
+
+ addr_len = sizeof(addr->ipv4_port) + sizeof(addr->ipv4_addr.s_addr);
+ addr_len += sizeof(addr->ipv6_port) + sizeof(addr->ipv6_addr.s6_addr);
+ addr_len += sizeof(addr->cid.len);
+
+ if (end - *buf < addr_len)
+ return 0;
+
+ memcpy((uint8_t *)&addr->ipv4_addr.s_addr, *buf, sizeof(addr->ipv4_addr.s_addr));
+ *buf += sizeof(addr->ipv4_addr.s_addr);
+
+ addr->ipv4_port = read_n16(*buf);
+ *buf += sizeof addr->ipv4_port;
+
+ memcpy(addr->ipv6_addr.s6_addr, *buf, sizeof(addr->ipv6_addr.s6_addr));
+ *buf += sizeof(addr->ipv6_addr.s6_addr);
+
+ addr->ipv6_port = read_n16(*buf);
+ *buf += sizeof addr->ipv6_port;
+
+ addr->cid.len = *(*buf)++;
+ if (addr->cid.len) {
+ if (end - sizeof(addr->stateless_reset_token) - *buf > addr->cid.len ||
+ addr->cid.len > sizeof(addr->cid.data)) {
+ return 0;
+ }
+
+ memcpy(addr->cid.data, *buf, addr->cid.len);
+ *buf += addr->cid.len;
+ }
+
+ if (end - *buf != sizeof(addr->stateless_reset_token))
+ return 0;
+
+ memcpy(addr->stateless_reset_token, *buf, end - *buf);
+ *buf += sizeof addr->stateless_reset_token;
+
+ return *buf == end;
+}
+
+/* Decode into <v> version information received transport parameters from <*buf>
+ * buffer. <server> must be set to 1 for QUIC clients which receive server
+ * transport parameters, and 0 for QUIC servers which receive client transport
+ * parameters.
+ * Also set the QUIC negotiated version into <tp>.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_dec_version_info(struct tp_version_information *tp,
+ const unsigned char **buf,
+ const unsigned char *end, int server)
+{
+ size_t tp_len = end - *buf;
+ const uint32_t *ver, *others;
+
+ /* <tp_len> must be a multiple of sizeof(uint32_t) */
+ if (tp_len < sizeof tp->chosen || (tp_len & 0x3))
+ return 0;
+
+ tp->chosen = ntohl(*(uint32_t *)*buf);
+ /* Must not be null */
+ if (!tp->chosen)
+ return 0;
+
+ *buf += sizeof tp->chosen;
+ others = (const uint32_t *)*buf;
+
+ /* Others versions must not be null */
+ for (ver = others; ver < (const uint32_t *)end; ver++) {
+ if (!*ver)
+ return 0;
+ }
+
+ if (server)
+ /* TODO: not supported */
+ return 0;
+
+ for (ver = others; ver < (const uint32_t *)end; ver++) {
+ if (!tp->negotiated_version) {
+ int i;
+
+ for (i = 0; i < quic_versions_nb; i++) {
+ if (ntohl(*ver) == quic_versions[i].num) {
+ tp->negotiated_version = &quic_versions[i];
+ break;
+ }
+ }
+ }
+
+ if (preferred_version && ntohl(*ver) == preferred_version->num) {
+ tp->negotiated_version = preferred_version;
+ goto out;
+ }
+ }
+
+ out:
+ *buf = end;
+
+ return 1;
+}
+
+/* Decode into <p> struct a transport parameter found in <*buf> buffer with
+ * <type> as type and <len> as length, depending on <server> boolean value which
+ * must be set to 1 for a server (haproxy listener) or 0 for a client (connection
+ * to an haproxy server).
+ */
+static int quic_transport_param_decode(struct quic_transport_params *p,
+ int server, uint64_t type,
+ const unsigned char **buf, size_t len)
+{
+ const unsigned char *end = *buf + len;
+
+ switch (type) {
+ case QUIC_TP_ORIGINAL_DESTINATION_CONNECTION_ID:
+ if (!server || len > sizeof p->original_destination_connection_id.data)
+ return 0;
+
+ if (len)
+ memcpy(p->original_destination_connection_id.data, *buf, len);
+ p->original_destination_connection_id.len = len;
+ *buf += len;
+ p->original_destination_connection_id_present = 1;
+ break;
+ case QUIC_TP_INITIAL_SOURCE_CONNECTION_ID:
+ if (len > sizeof p->initial_source_connection_id.data)
+ return 0;
+
+ if (len)
+ memcpy(p->initial_source_connection_id.data, *buf, len);
+ p->initial_source_connection_id.len = len;
+ *buf += len;
+ p->initial_source_connection_id_present = 1;
+ break;
+ case QUIC_TP_STATELESS_RESET_TOKEN:
+ if (!server || len != sizeof p->stateless_reset_token)
+ return 0;
+ memcpy(p->stateless_reset_token, *buf, len);
+ *buf += len;
+ p->with_stateless_reset_token = 1;
+ break;
+ case QUIC_TP_PREFERRED_ADDRESS:
+ if (!server)
+ return 0;
+ if (!quic_transport_param_dec_pref_addr(&p->preferred_address, buf, *buf + len))
+ return 0;
+ p->with_preferred_address = 1;
+ break;
+ case QUIC_TP_MAX_IDLE_TIMEOUT:
+ if (!quic_dec_int(&p->max_idle_timeout, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_MAX_UDP_PAYLOAD_SIZE:
+ if (!quic_dec_int(&p->max_udp_payload_size, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_DATA:
+ if (!quic_dec_int(&p->initial_max_data, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL:
+ if (!quic_dec_int(&p->initial_max_stream_data_bidi_local, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE:
+ if (!quic_dec_int(&p->initial_max_stream_data_bidi_remote, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAM_DATA_UNI:
+ if (!quic_dec_int(&p->initial_max_stream_data_uni, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAMS_BIDI:
+ if (!quic_dec_int(&p->initial_max_streams_bidi, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_INITIAL_MAX_STREAMS_UNI:
+ if (!quic_dec_int(&p->initial_max_streams_uni, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_ACK_DELAY_EXPONENT:
+ if (!quic_dec_int(&p->ack_delay_exponent, buf, end) ||
+ p->ack_delay_exponent > QUIC_TP_ACK_DELAY_EXPONENT_LIMIT)
+ return 0;
+ break;
+ case QUIC_TP_MAX_ACK_DELAY:
+ if (!quic_dec_int(&p->max_ack_delay, buf, end) ||
+ p->max_ack_delay > QUIC_TP_MAX_ACK_DELAY_LIMIT)
+ return 0;
+ break;
+ case QUIC_TP_DISABLE_ACTIVE_MIGRATION:
+ /* Zero-length parameter type. */
+ if (len != 0)
+ return 0;
+ p->disable_active_migration = 1;
+ break;
+ case QUIC_TP_ACTIVE_CONNECTION_ID_LIMIT:
+ if (!quic_dec_int(&p->active_connection_id_limit, buf, end))
+ return 0;
+ break;
+ case QUIC_TP_VERSION_INFORMATION:
+ if (!quic_transport_param_dec_version_info(&p->version_information,
+ buf, *buf + len, server))
+ return 0;
+ break;
+ default:
+ *buf += len;
+ };
+
+ return *buf == end;
+}
+
+/* Encode <type> and <len> variable length values in <buf>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_encode_type_len(unsigned char **buf,
+ const unsigned char *end,
+ uint64_t type, uint64_t len)
+{
+ return quic_enc_int(buf, end, type) && quic_enc_int(buf, end, len);
+}
+
+/* Decode variable length type and length values of a QUIC transport parameter
+ * into <type> and <len> found in <*buf> buffer.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_decode_type_len(uint64_t *type, uint64_t *len,
+ const unsigned char **buf,
+ const unsigned char *end)
+{
+ return quic_dec_int(type, buf, end) && quic_dec_int(len, buf, end);
+}
+
+/* Encode <param> bytes stream with <type> as type and <length> as length into buf.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_mem(unsigned char **buf, const unsigned char *end,
+ uint64_t type, void *param, uint64_t length)
+{
+ if (!quic_transport_param_encode_type_len(buf, end, type, length))
+ return 0;
+
+ if (end - *buf < length)
+ return 0;
+
+ if (length)
+ memcpy(*buf, param, length);
+ *buf += length;
+
+ return 1;
+}
+
+/* Encode <val> 64-bits value as variable length integer into <buf>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_int(unsigned char **buf,
+ const unsigned char *end,
+ uint64_t type, uint64_t val)
+{
+ size_t len;
+
+ len = quic_int_getsize(val);
+
+ return len && quic_transport_param_encode_type_len(buf, end, type, len) &&
+ quic_enc_int(buf, end, val);
+}
+
+/* Returns the required length in bytes to encode <cid> QUIC connection ID. */
+static inline size_t sizeof_quic_cid(const struct tp_cid *cid)
+{
+ return sizeof cid->len + cid->len;
+}
+
+/* Encode <addr> preferred address into <buf>.
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_pref_addr(unsigned char **buf,
+ const unsigned char *end,
+ struct tp_preferred_address *addr)
+{
+ uint64_t addr_len = 0;
+
+ addr_len += sizeof(addr->ipv4_port) + sizeof(addr->ipv4_addr.s_addr);
+ addr_len += sizeof(addr->ipv6_port) + sizeof(addr->ipv6_addr.s6_addr);
+ addr_len += sizeof_quic_cid(&addr->cid);
+ addr_len += sizeof(addr->stateless_reset_token);
+
+ if (!quic_transport_param_encode_type_len(buf, end, QUIC_TP_PREFERRED_ADDRESS, addr_len))
+ return 0;
+
+ if (end - *buf < addr_len)
+ return 0;
+
+ quic_transport_param_enc_pref_addr_val(buf, end, addr);
+
+ return 1;
+}
+
+/* Encode version information transport parameters with <chosen_version> as chosen
+ * version.
+ * Return 1 if succeeded, 0 if not.
+ */
+static int quic_transport_param_enc_version_info(unsigned char **buf,
+ const unsigned char *end,
+ const struct quic_version *chosen_version,
+ int server)
+{
+ int i;
+ uint64_t tp_len;
+ uint32_t ver;
+
+ tp_len = sizeof chosen_version->num + quic_versions_nb * sizeof(uint32_t);
+ if (!quic_transport_param_encode_type_len(buf, end,
+ QUIC_TP_VERSION_INFORMATION,
+ tp_len))
+ return 0;
+
+ if (end - *buf < tp_len)
+ return 0;
+
+ /* First: chosen version */
+ ver = htonl(chosen_version->num);
+ memcpy(*buf, &ver, sizeof ver);
+ *buf += sizeof ver;
+ /* For servers: all supported version, chosen included */
+ for (i = 0; i < quic_versions_nb; i++) {
+ ver = htonl(quic_versions[i].num);
+ memcpy(*buf, &ver, sizeof ver);
+ *buf += sizeof ver;
+ }
+
+ return 1;
+}
+
+/* Encode <p> transport parameter into <buf> depending on <server> value which
+ * must be set to 1 for a server (haproxy listener) or 0 for a client
+ * (connection to a haproxy server).
+ * Return the number of bytes consumed if succeeded, 0 if not.
+ */
+int quic_transport_params_encode(unsigned char *buf,
+ const unsigned char *end,
+ struct quic_transport_params *p,
+ const struct quic_version *chosen_version,
+ int server)
+{
+ unsigned char *head;
+ unsigned char *pos;
+
+ head = pos = buf;
+ if (server) {
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_ORIGINAL_DESTINATION_CONNECTION_ID,
+ p->original_destination_connection_id.data,
+ p->original_destination_connection_id.len))
+ return 0;
+
+ if (p->retry_source_connection_id.len) {
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_RETRY_SOURCE_CONNECTION_ID,
+ p->retry_source_connection_id.data,
+ p->retry_source_connection_id.len))
+ return 0;
+ }
+
+ if (p->with_stateless_reset_token &&
+ !quic_transport_param_enc_mem(&pos, end, QUIC_TP_STATELESS_RESET_TOKEN,
+ p->stateless_reset_token,
+ sizeof p->stateless_reset_token))
+ return 0;
+ if (p->with_preferred_address &&
+ !quic_transport_param_enc_pref_addr(&pos, end, &p->preferred_address))
+ return 0;
+ }
+
+ if (!quic_transport_param_enc_mem(&pos, end,
+ QUIC_TP_INITIAL_SOURCE_CONNECTION_ID,
+ p->initial_source_connection_id.data,
+ p->initial_source_connection_id.len))
+ return 0;
+
+ if (p->max_idle_timeout &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_IDLE_TIMEOUT, p->max_idle_timeout))
+ return 0;
+
+ /*
+ * "max_packet_size" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->max_udp_payload_size != QUIC_TP_DFLT_MAX_UDP_PAYLOAD_SIZE &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_UDP_PAYLOAD_SIZE, p->max_udp_payload_size))
+ return 0;
+
+ if (p->initial_max_data &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_DATA, p->initial_max_data))
+ return 0;
+
+ if (p->initial_max_stream_data_bidi_local &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL,
+ p->initial_max_stream_data_bidi_local))
+ return 0;
+
+ if (p->initial_max_stream_data_bidi_remote &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE,
+ p->initial_max_stream_data_bidi_remote))
+ return 0;
+
+ if (p->initial_max_stream_data_uni &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAM_DATA_UNI,
+ p->initial_max_stream_data_uni))
+ return 0;
+
+ if (p->initial_max_streams_bidi &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAMS_BIDI,
+ p->initial_max_streams_bidi))
+ return 0;
+
+ if (p->initial_max_streams_uni &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_INITIAL_MAX_STREAMS_UNI,
+ p->initial_max_streams_uni))
+ return 0;
+
+ /*
+ * "ack_delay_exponent" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->ack_delay_exponent != QUIC_TP_DFLT_ACK_DELAY_COMPONENT &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_ACK_DELAY_EXPONENT, p->ack_delay_exponent))
+ return 0;
+
+ /*
+ * "max_ack_delay" transport parameter must be transmitted only if different
+ * of the default value.
+ */
+ if (p->max_ack_delay != QUIC_TP_DFLT_MAX_ACK_DELAY &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_MAX_ACK_DELAY, p->max_ack_delay))
+ return 0;
+
+ /* 0-length value */
+ if (p->disable_active_migration &&
+ !quic_transport_param_encode_type_len(&pos, end, QUIC_TP_DISABLE_ACTIVE_MIGRATION, 0))
+ return 0;
+
+ if (p->active_connection_id_limit &&
+ p->active_connection_id_limit != QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT &&
+ !quic_transport_param_enc_int(&pos, end, QUIC_TP_ACTIVE_CONNECTION_ID_LIMIT,
+ p->active_connection_id_limit))
+ return 0;
+
+ if (!quic_transport_param_enc_version_info(&pos, end, chosen_version, server))
+ return 0;
+
+ return pos - head;
+}
+
+/* Decode transport parameters found in <buf> buffer into <p>, depending on
+ * <server> boolean value which must be set to 1 for a server (haproxy listener)
+ * or 0 for a client (connection to a haproxy server).
+ * Returns 1 if succeeded, 0 if not.
+ */
+static int quic_transport_params_decode(struct quic_transport_params *p, int server,
+ const unsigned char *buf,
+ const unsigned char *end)
+{
+ const unsigned char *pos;
+ uint64_t type, len = 0;
+
+ pos = buf;
+
+ while (pos != end) {
+ if (!quic_transport_param_decode_type_len(&type, &len, &pos, end))
+ return 0;
+
+ if (end - pos < len)
+ return 0;
+
+ if (!quic_transport_param_decode(p, server, type, &pos, len))
+ return 0;
+ }
+
+ /*
+ * A server MUST send original_destination_connection_id transport parameter.
+ * initial_source_connection_id must be present both for server and client.
+ */
+ if ((server && !p->original_destination_connection_id_present) ||
+ !p->initial_source_connection_id_present)
+ return 0;
+
+ /* Note that if not received by the peer, active_connection_id_limit will
+ * have QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT as default value. This
+ * is also the minimum value for this transport parameter.
+ */
+ if (p->active_connection_id_limit < QUIC_TP_DFLT_ACTIVE_CONNECTION_ID_LIMIT)
+ return 0;
+
+ return 1;
+}
+
+/* Store transport parameters found in <buf> buffer into <qc> QUIC connection
+ * depending on <server> value which must be 1 for a server (haproxy listener)
+ * or 0 for a client (connection to a haproxy server).
+ * Note that peer transport parameters are stored in the TX part of the connection:
+ * they are used to send packets to the peer with its transport parameters as
+ * limitations.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int quic_transport_params_store(struct quic_conn *qc, int server,
+ const unsigned char *buf,
+ const unsigned char *end)
+{
+ struct quic_transport_params *tx_params = &qc->tx.params;
+ struct quic_transport_params *rx_params = &qc->rx.params;
+ /* Initial source connection ID */
+ struct tp_cid *iscid;
+
+ /* initialize peer TPs to RFC default value */
+ quic_dflt_transport_params_cpy(tx_params);
+
+ if (!quic_transport_params_decode(tx_params, server, buf, end))
+ return 0;
+
+ /* Update the connection from transport parameters received */
+ if (tx_params->version_information.negotiated_version &&
+ tx_params->version_information.negotiated_version != qc->original_version)
+ qc->negotiated_version =
+ qc->tx.params.version_information.negotiated_version;
+
+ if (tx_params->max_ack_delay)
+ qc->max_ack_delay = tx_params->max_ack_delay;
+
+ if (tx_params->max_idle_timeout && rx_params->max_idle_timeout)
+ qc->max_idle_timeout =
+ QUIC_MIN(tx_params->max_idle_timeout, rx_params->max_idle_timeout);
+ else
+ qc->max_idle_timeout =
+ QUIC_MAX(tx_params->max_idle_timeout, rx_params->max_idle_timeout);
+ TRACE_PROTO("\nTX(remote) transp. params.", QUIC_EV_TRANSP_PARAMS, qc, tx_params);
+
+ /* Check that the "initial_source_connection_id" transport parameter matches
+ * the SCID received which is also the DCID of the connection.
+ */
+ iscid = &tx_params->initial_source_connection_id;
+ if (qc->dcid.len != iscid->len ||
+ (qc->dcid.len && memcmp(qc->dcid.data, iscid->data, qc->dcid.len))) {
+ TRACE_PROTO("initial_source_connection_id transport parameter mismatch",
+ QUIC_EV_TRANSP_PARAMS, qc);
+ /* Kill the connection as soon as possible */
+ qc_kill_conn(qc);
+ }
+
+ return 1;
+}
+
+/* QUIC server (or haproxy listener) only function.
+ * Initialize the local transport parameters <rx_params> from <listener_params>
+ * coming from configuration and Initial packet information (destination
+ * connection ID, source connection ID, original destination connection ID) from
+ * client token.
+ * Returns 1 if succeeded, 0 if not.
+ */
+int qc_lstnr_params_init(struct quic_conn *qc,
+ const struct quic_transport_params *listener_params,
+ const unsigned char *stateless_reset_token,
+ const unsigned char *dcid, size_t dcidlen,
+ const unsigned char *scid, size_t scidlen,
+ const struct quic_cid *token_odcid)
+{
+ struct quic_transport_params *rx_params = &qc->rx.params;
+ struct tp_cid *odcid_param = &rx_params->original_destination_connection_id;
+
+ /* Copy the transport parameters. */
+ *rx_params = *listener_params;
+ /* Copy the stateless reset token */
+ memcpy(rx_params->stateless_reset_token, stateless_reset_token,
+ sizeof rx_params->stateless_reset_token);
+ /* Copy original_destination_connection_id transport parameter. */
+ if (token_odcid->len) {
+ memcpy(odcid_param->data, token_odcid->data, token_odcid->len);
+ odcid_param->len = token_odcid->len;
+ /* Copy retry_source_connection_id transport parameter. */
+ memcpy(rx_params->retry_source_connection_id.data, dcid, dcidlen);
+ rx_params->retry_source_connection_id.len = dcidlen;
+ }
+ else {
+ memcpy(odcid_param->data, dcid, dcidlen);
+ odcid_param->len = dcidlen;
+ }
+
+ /* Copy the initial source connection ID. */
+ memcpy(rx_params->initial_source_connection_id.data, scid, scidlen);
+ rx_params->initial_source_connection_id.len = scidlen;
+ TRACE_PROTO("\nRX(local) transp. params.", QUIC_EV_TRANSP_PARAMS, qc, rx_params);
+
+ return 1;
+}
+
diff --git a/src/quic_trace.c b/src/quic_trace.c
new file mode 100644
index 0000000..9ab9626
--- /dev/null
+++ b/src/quic_trace.c
@@ -0,0 +1,633 @@
+/*
+ * QUIC traces
+ *
+ * Copyright 2000-2020
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <inttypes.h>
+
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/trace.h>
+
+static void quic_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+static const struct trace_event quic_trace_events[] = {
+ { .mask = QUIC_EV_CONN_NEW, .name = "new_conn", .desc = "new QUIC connection" },
+ { .mask = QUIC_EV_CONN_INIT, .name = "new_conn_init", .desc = "new QUIC connection initialization" },
+ { .mask = QUIC_EV_CONN_ISEC, .name = "init_secs", .desc = "initial secrets derivation" },
+ { .mask = QUIC_EV_CONN_RSEC, .name = "read_secs", .desc = "read secrets derivation" },
+ { .mask = QUIC_EV_CONN_WSEC, .name = "write_secs", .desc = "write secrets derivation" },
+ { .mask = QUIC_EV_CONN_LPKT, .name = "lstnr_packet", .desc = "new listener received packet" },
+ { .mask = QUIC_EV_CONN_SPKT, .name = "srv_packet", .desc = "new server received packet" },
+ { .mask = QUIC_EV_CONN_ENCPKT, .name = "enc_hdshk_pkt", .desc = "handhshake packet encryption" },
+ { .mask = QUIC_EV_CONN_TXPKT, .name = "tx_pkt", .desc = "TX packet" },
+ { .mask = QUIC_EV_CONN_PAPKT, .name = "phdshk_apkt", .desc = "post handhshake application packet preparation" },
+ { .mask = QUIC_EV_CONN_PAPKTS, .name = "phdshk_apkts", .desc = "post handhshake application packets preparation" },
+ { .mask = QUIC_EV_CONN_IO_CB, .name = "qc_io_cb", .desc = "QUIC conn. I/O processing" },
+ { .mask = QUIC_EV_CONN_RMHP, .name = "rm_hp", .desc = "Remove header protection" },
+ { .mask = QUIC_EV_CONN_PRSHPKT, .name = "parse_hpkt", .desc = "parse handshake packet" },
+ { .mask = QUIC_EV_CONN_PRSAPKT, .name = "parse_apkt", .desc = "parse application packet" },
+ { .mask = QUIC_EV_CONN_PRSFRM, .name = "parse_frm", .desc = "parse frame" },
+ { .mask = QUIC_EV_CONN_PRSAFRM, .name = "parse_ack_frm", .desc = "parse ACK frame" },
+ { .mask = QUIC_EV_CONN_BFRM, .name = "build_frm", .desc = "build frame" },
+ { .mask = QUIC_EV_CONN_PHPKTS, .name = "phdshk_pkts", .desc = "handhshake packets preparation" },
+ { .mask = QUIC_EV_CONN_TRMHP, .name = "rm_hp_try", .desc = "header protection removing try" },
+ { .mask = QUIC_EV_CONN_ELRMHP, .name = "el_rm_hp", .desc = "handshake enc. level header protection removing" },
+ { .mask = QUIC_EV_CONN_RXPKT, .name = "rx_pkt", .desc = "RX packet" },
+ { .mask = QUIC_EV_CONN_SSLDATA, .name = "ssl_provide_data", .desc = "CRYPTO data provision to TLS stack" },
+ { .mask = QUIC_EV_CONN_RXCDATA, .name = "el_treat_rx_cfrms",.desc = "enc. level RX CRYPTO frames processing"},
+ { .mask = QUIC_EV_CONN_ADDDATA, .name = "add_hdshk_data", .desc = "TLS stack ->add_handshake_data() call"},
+ { .mask = QUIC_EV_CONN_FFLIGHT, .name = "flush_flight", .desc = "TLS stack ->flush_flight() call"},
+ { .mask = QUIC_EV_CONN_SSLALERT, .name = "send_alert", .desc = "TLS stack ->send_alert() call"},
+ { .mask = QUIC_EV_CONN_RTTUPDT, .name = "rtt_updt", .desc = "RTT sampling" },
+ { .mask = QUIC_EV_CONN_SPPKTS, .name = "sppkts", .desc = "send prepared packets" },
+ { .mask = QUIC_EV_CONN_PKTLOSS, .name = "pktloss", .desc = "detect packet loss" },
+ { .mask = QUIC_EV_CONN_STIMER, .name = "stimer", .desc = "set timer" },
+ { .mask = QUIC_EV_CONN_PTIMER, .name = "ptimer", .desc = "process timer" },
+ { .mask = QUIC_EV_CONN_SPTO, .name = "spto", .desc = "set PTO" },
+ { .mask = QUIC_EV_CONN_BCFRMS, .name = "bcfrms", .desc = "build CRYPTO data frames" },
+ { .mask = QUIC_EV_CONN_XPRTSEND, .name = "xprt_send", .desc = "sending XRPT subscription" },
+ { .mask = QUIC_EV_CONN_XPRTRECV, .name = "xprt_recv", .desc = "receiving XRPT subscription" },
+ { .mask = QUIC_EV_CONN_FREED, .name = "conn_freed", .desc = "releasing conn. memory" },
+ { .mask = QUIC_EV_CONN_CLOSE, .name = "conn_close", .desc = "closing conn." },
+ { .mask = QUIC_EV_CONN_ACKSTRM, .name = "ack_strm", .desc = "STREAM ack."},
+ { .mask = QUIC_EV_CONN_FRMLIST, .name = "frm_list", .desc = "frame list"},
+ { .mask = QUIC_EV_STATELESS_RST, .name = "stateless_reset", .desc = "stateless reset sent"},
+ { .mask = QUIC_EV_TRANSP_PARAMS, .name = "transport_params", .desc = "transport parameters"},
+ { .mask = QUIC_EV_CONN_IDLE_TIMER, .name = "idle_timer", .desc = "idle timer task"},
+ { .mask = QUIC_EV_CONN_SUB, .name = "xprt_sub", .desc = "RX/TX subscription or unsubscription to QUIC xprt"},
+ { .mask = QUIC_EV_CONN_RCV, .name = "conn_recv", .desc = "RX on connection" },
+ { .mask = QUIC_EV_CONN_SET_AFFINITY, .name = "conn_set_affinity", .desc = "set connection thread affinity" },
+ { /* end */ }
+};
+
+static const struct name_desc quic_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the connection */ },
+ /* arg2 */ { .name="quic", .desc="QUIC transport" },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc quic_trace_decoding[] = {
+#define QUIC_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+ { /* end */ }
+};
+
+
+struct trace_source trace_quic = {
+ .name = IST("quic"),
+ .desc = "QUIC xprt",
+ .arg_def = TRC_ARG1_QCON, /* TRACE()'s first argument is always a quic_conn */
+ .default_cb = quic_trace,
+ .known_events = quic_trace_events,
+ .lockon_args = quic_trace_lockon_args,
+ .decoding = quic_trace_decoding,
+ .report_events = ~0, /* report everything by default */
+};
+
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* Trace callback for QUIC.
+ * These traces always expect that arg1, if non-null, is of type connection.
+ */
+static void quic_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct quic_conn *qc = a1;
+
+ if (qc) {
+ const struct quic_tls_ctx *tls_ctx;
+
+ chunk_appendf(&trace_buf, " : qc@%p idle_timer_task@%p flags=0x%x",
+ qc, qc->idle_timer_task, qc->flags);
+ if (mask & QUIC_EV_CONN_INIT) {
+ chunk_appendf(&trace_buf, "\n odcid");
+ quic_cid_dump(&trace_buf, &qc->odcid);
+ chunk_appendf(&trace_buf, "\n dcid");
+ quic_cid_dump(&trace_buf, &qc->dcid);
+ chunk_appendf(&trace_buf, "\n scid");
+ quic_cid_dump(&trace_buf, &qc->scid);
+ }
+
+ if (mask & QUIC_EV_TRANSP_PARAMS) {
+ const struct quic_transport_params *p = a2;
+
+ if (p)
+ quic_transport_params_dump(&trace_buf, qc, p);
+ }
+
+ if (mask & QUIC_EV_CONN_ADDDATA) {
+ const enum ssl_encryption_level_t *level = a2;
+ const size_t *len = a3;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+
+ chunk_appendf(&trace_buf, " el=%c(%d)", quic_enc_level_char(lvl), lvl);
+ }
+ if (len)
+ chunk_appendf(&trace_buf, " len=%llu", (unsigned long long)*len);
+ }
+ if ((mask & QUIC_EV_CONN_ISEC) && qc) {
+ /* Initial read & write secrets. */
+ const unsigned char *rx_sec = a2;
+ const unsigned char *tx_sec = a3;
+
+ tls_ctx = &qc->iel->tls_ctx;
+ chunk_appendf(&trace_buf, "\n RX el=I");
+ if (rx_sec)
+ quic_tls_secret_hexdump(&trace_buf, rx_sec, 32);
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->rx);
+ chunk_appendf(&trace_buf, "\n TX el=I");
+ if (tx_sec)
+ quic_tls_secret_hexdump(&trace_buf, tx_sec, 32);
+ quic_tls_keys_hexdump(&trace_buf, &tls_ctx->tx);
+ }
+
+ if ((mask & QUIC_EV_CONN_KP) && qc) {
+ /* Initial read & write secrets. */
+ const struct quic_kp_trace *kp = a2;
+
+ if (kp) {
+ if (kp->rx) {
+ chunk_appendf(&trace_buf, "\n RX kp");
+ if (kp->rx_sec)
+ quic_tls_secret_hexdump(&trace_buf, kp->rx_sec, kp->rx_seclen);
+ quic_tls_kp_keys_hexdump(&trace_buf, kp->rx);
+ }
+ if (kp->tx) {
+ chunk_appendf(&trace_buf, "\n TX kp");
+ if (kp->tx_sec)
+ quic_tls_secret_hexdump(&trace_buf, kp->tx_sec, kp->tx_seclen);
+ quic_tls_kp_keys_hexdump(&trace_buf, kp->tx);
+ }
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_RSEC|QUIC_EV_CONN_RWSEC)) {
+ const enum ssl_encryption_level_t *level = a2;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+ struct quic_enc_level *qel = qc_quic_enc_level(qc, lvl);
+
+ chunk_appendf(&trace_buf, "\n RX el=%c", quic_enc_level_char(lvl));
+ if (quic_tls_has_rx_sec(qel))
+ quic_tls_keys_hexdump(&trace_buf, &qel->tls_ctx.rx);
+ else
+ chunk_appendf(&trace_buf, " (none)");
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_WSEC|QUIC_EV_CONN_RWSEC)) {
+ const enum ssl_encryption_level_t *level = a2;
+
+ if (level) {
+ enum quic_tls_enc_level lvl = ssl_to_quic_enc_level(*level);
+ struct quic_enc_level *qel = qc_quic_enc_level(qc, lvl);
+
+ chunk_appendf(&trace_buf, "\n TX el=%c", quic_enc_level_char(lvl));
+ if (quic_tls_has_tx_sec(qel)) {
+ quic_tls_keys_hexdump(&trace_buf, &qel->tls_ctx.tx);
+ }
+ else
+ chunk_appendf(&trace_buf, " (none)");
+ }
+
+ }
+
+ if (mask & QUIC_EV_CONN_FRMLIST) {
+ const struct list *l = a2;
+
+ if (l) {
+ const struct quic_frame *frm;
+ list_for_each_entry(frm, l, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_TXPKT|QUIC_EV_CONN_PAPKT)) {
+ const struct quic_tx_packet *pkt = a2;
+ const struct quic_enc_level *qel = a3;
+ const ssize_t *room = a4;
+
+ if (qel) {
+ const struct quic_pktns *pktns = qel->pktns;
+ chunk_appendf(&trace_buf, " qel=%c flags=0x%x pto_count=%d cwnd=%llu ppif=%lld pif=%llu "
+ "if=%llu pp=%u",
+ quic_enc_level_char_from_qel(qel, qc),
+ qel->pktns->flags,
+ qc->path->loss.pto_count,
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight,
+ (unsigned long long)pktns->tx.in_flight,
+ pktns->tx.pto_probe);
+ }
+ if (pkt) {
+ const struct quic_frame *frm;
+ if (pkt->pn_node.key != (uint64_t)-1)
+ chunk_appendf(&trace_buf, " pn=%llu",(ull)pkt->pn_node.key);
+ list_for_each_entry(frm, &pkt->frms, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+
+ if (room) {
+ chunk_appendf(&trace_buf, " room=%lld", (long long)*room);
+ chunk_appendf(&trace_buf, " dcid.len=%llu scid.len=%llu",
+ (unsigned long long)qc->dcid.len, (unsigned long long)qc->scid.len);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_IO_CB) {
+ const enum quic_handshake_state *state = a2;
+
+ if (state)
+ chunk_appendf(&trace_buf, " state=%s", quic_hdshk_state_str(*state));
+ }
+
+ if (mask & (QUIC_EV_CONN_TRMHP|QUIC_EV_CONN_ELRMHP|QUIC_EV_CONN_SPKT)) {
+ const struct quic_rx_packet *pkt = a2;
+ const unsigned long *pktlen = a3;
+ const SSL *ssl = a4;
+
+ if (pkt) {
+ chunk_appendf(&trace_buf, " pkt@%p", pkt);
+ if (pkt->type == QUIC_PACKET_TYPE_SHORT && pkt->data)
+ chunk_appendf(&trace_buf, " kp=%d",
+ !!(*pkt->data & QUIC_PACKET_KEY_PHASE_BIT));
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_packet_type_enc_level_char(pkt->type));
+ if (pkt->pnl)
+ chunk_appendf(&trace_buf, " pnl=%u pn=%llu", pkt->pnl,
+ (unsigned long long)pkt->pn);
+ if (pkt->token_len)
+ chunk_appendf(&trace_buf, " toklen=%llu",
+ (unsigned long long)pkt->token_len);
+ if (pkt->aad_len)
+ chunk_appendf(&trace_buf, " aadlen=%llu",
+ (unsigned long long)pkt->aad_len);
+ chunk_appendf(&trace_buf, " flags=0x%x len=%llu",
+ pkt->flags, (unsigned long long)pkt->len);
+ }
+ if (pktlen)
+ chunk_appendf(&trace_buf, " (%ld)", *pktlen);
+ if (ssl) {
+ enum ssl_encryption_level_t level = SSL_quic_read_level(ssl);
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(level)));
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_RXPKT|QUIC_EV_CONN_PRSHPKT|QUIC_EV_CONN_SSLDATA)) {
+ const struct quic_rx_packet *pkt = a2;
+ const struct quic_rx_crypto_frm *cf = a3;
+ const SSL *ssl = a4;
+
+ if (pkt)
+ chunk_appendf(&trace_buf, " pkt@%p el=%c pn=%llu", pkt,
+ quic_packet_type_enc_level_char(pkt->type),
+ (unsigned long long)pkt->pn);
+ if (cf)
+ chunk_appendf(&trace_buf, " cfoff=%llu cflen=%llu",
+ (unsigned long long)cf->offset_node.key,
+ (unsigned long long)cf->len);
+ if (ssl) {
+ enum ssl_encryption_level_t level = SSL_quic_read_level(ssl);
+ chunk_appendf(&trace_buf, " rel=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(level)));
+ }
+
+ if (qc->err.code)
+ chunk_appendf(&trace_buf, " err_code=0x%llx", (ull)qc->err.code);
+ }
+
+ if (mask & (QUIC_EV_CONN_PRSFRM|QUIC_EV_CONN_BFRM)) {
+ const struct quic_frame *frm = a2;
+
+ if (frm)
+ chunk_appendf(&trace_buf, " %s", quic_frame_type_string(frm->type));
+ }
+
+ if (mask & QUIC_EV_CONN_PHPKTS) {
+ const struct quic_enc_level *qel = a2;
+ const struct list *l = a3;
+
+ if (qel) {
+ const struct quic_pktns *pktns = qel->pktns;
+ chunk_appendf(&trace_buf,
+ " qel=%c flags=0x%x state=%s ack?%d pto_count=%d cwnd=%llu "
+ "ppif=%lld pif=%llu if=%llu pp=%u off=%llu",
+ quic_enc_level_char_from_qel(qel, qc),
+ qel->pktns->flags,
+ quic_hdshk_state_str(qc->state),
+ !!(qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED),
+ qc->path->loss.pto_count,
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight,
+ (unsigned long long)pktns->tx.in_flight,
+ pktns->tx.pto_probe,
+ qel->cstream ? (unsigned long long)qel->cstream->rx.offset : 0);
+ }
+
+ if (l) {
+ const struct quic_frame *frm;
+ list_for_each_entry(frm, l, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_ENCPKT) {
+ const struct enc_debug_info *edi = a2;
+
+ if (edi)
+ chunk_appendf(&trace_buf,
+ " payload=@%p payload_len=%llu"
+ " aad=@%p aad_len=%llu pn=%llu",
+ edi->payload, (unsigned long long)edi->payload_len,
+ edi->aad, (unsigned long long)edi->aad_len,
+ (unsigned long long)edi->pn);
+ }
+
+ if (mask & QUIC_EV_CONN_RMHP) {
+ const struct quic_rx_packet *pkt = a2;
+
+ if (pkt) {
+ const int *ret = a3;
+
+ chunk_appendf(&trace_buf, " pkt@%p", pkt);
+ if (ret && *ret)
+ chunk_appendf(&trace_buf, " pnl=%u pn=%llu",
+ pkt->pnl, (unsigned long long)pkt->pn);
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_PRSAFRM) {
+ const struct quic_frame *frm = a2;
+ const unsigned long *val1 = a3;
+ const unsigned long *val2 = a4;
+
+ if (frm) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+ if (val1)
+ chunk_appendf(&trace_buf, " %lu", *val1);
+ if (val2)
+ chunk_appendf(&trace_buf, "..%lu", *val2);
+ }
+
+ if (mask & QUIC_EV_CONN_ACKSTRM) {
+ const struct qf_stream *strm_frm = a2;
+ const struct qc_stream_desc *stream = a3;
+
+ if (strm_frm)
+ chunk_appendf(&trace_buf, " off=%llu len=%llu", (ull)strm_frm->offset.key, (ull)strm_frm->len);
+ if (stream)
+ chunk_appendf(&trace_buf, " ack_offset=%llu", (ull)stream->ack_offset);
+ }
+
+ if (mask & QUIC_EV_CONN_RTTUPDT) {
+ const unsigned int *rtt_sample = a2;
+ const unsigned int *ack_delay = a3;
+ const struct quic_loss *ql = a4;
+
+ if (rtt_sample)
+ chunk_appendf(&trace_buf, " rtt_sample=%ums", *rtt_sample);
+ if (ack_delay)
+ chunk_appendf(&trace_buf, " ack_delay=%ums", *ack_delay);
+ if (ql)
+ chunk_appendf(&trace_buf,
+ " srtt=%ums rttvar=%ums min_rtt=%ums",
+ ql->srtt, ql->rtt_var, ql->rtt_min);
+ }
+ if (mask & QUIC_EV_CONN_CC) {
+ const struct quic_cc_event *ev = a2;
+ const struct quic_cc *cc = a3;
+
+ if (a2)
+ quic_cc_event_trace(&trace_buf, ev);
+ if (a3)
+ quic_cc_state_trace(&trace_buf, cc);
+ }
+
+ if (mask & QUIC_EV_CONN_PKTLOSS) {
+ const struct quic_pktns *pktns = a2;
+ const struct list *lost_pkts = a3;
+
+ if (pktns) {
+ chunk_appendf(&trace_buf, " pktns=%c", quic_pktns_char(qc, pktns));
+ if (pktns->tx.loss_time)
+ chunk_appendf(&trace_buf, " loss_time=%dms",
+ TICKS_TO_MS(tick_remain(now_ms, pktns->tx.loss_time)));
+ }
+ if (lost_pkts && !LIST_ISEMPTY(lost_pkts)) {
+ struct quic_tx_packet *pkt;
+
+ chunk_appendf(&trace_buf, " lost_pkts:");
+ list_for_each_entry(pkt, lost_pkts, list)
+ chunk_appendf(&trace_buf, " %lu", (unsigned long)pkt->pn_node.key);
+ }
+ }
+
+ if (mask & (QUIC_EV_CONN_STIMER|QUIC_EV_CONN_PTIMER|QUIC_EV_CONN_SPTO)) {
+ const struct quic_pktns *pktns = a2;
+ const int *duration = a3;
+ const uint64_t *ifae_pkts = a4;
+
+ if (ifae_pkts)
+ chunk_appendf(&trace_buf, " ifae_pkts=%llu",
+ (unsigned long long)*ifae_pkts);
+ if (pktns) {
+ chunk_appendf(&trace_buf, " pktns=%c pp=%d",
+ quic_pktns_char(qc, pktns),
+ pktns->tx.pto_probe);
+ if (mask & (QUIC_EV_CONN_STIMER|QUIC_EV_CONN_SPTO)) {
+ if (pktns->tx.in_flight)
+ chunk_appendf(&trace_buf, " if=%llu", (ull)pktns->tx.in_flight);
+ if (pktns->tx.loss_time)
+ chunk_appendf(&trace_buf, " loss_time=%dms",
+ TICKS_TO_MS(pktns->tx.loss_time - now_ms));
+ }
+ if (mask & QUIC_EV_CONN_SPTO) {
+ if (pktns->tx.time_of_last_eliciting)
+ chunk_appendf(&trace_buf, " tole=%dms",
+ TICKS_TO_MS(pktns->tx.time_of_last_eliciting - now_ms));
+ if (duration)
+ chunk_appendf(&trace_buf, " dur=%dms", TICKS_TO_MS(*duration));
+ }
+ }
+
+ if (!(mask & (QUIC_EV_CONN_SPTO|QUIC_EV_CONN_PTIMER)) && qc->timer_task) {
+ chunk_appendf(&trace_buf,
+ " expire=%dms", TICKS_TO_MS(qc->timer - now_ms));
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_SPPKTS) {
+ const struct quic_tx_packet *pkt = a2;
+
+ chunk_appendf(&trace_buf, " pto_count=%d cwnd=%llu ppif=%llu pif=%llu",
+ qc->path->loss.pto_count,
+ (unsigned long long)qc->path->cwnd,
+ (unsigned long long)qc->path->prep_in_flight,
+ (unsigned long long)qc->path->in_flight);
+ if (pkt) {
+ const struct quic_frame *frm;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK)
+ chunk_appendf(&trace_buf, " ack");
+ chunk_appendf(&trace_buf, " pn=%lu(%c) iflen=%llu",
+ (unsigned long)pkt->pn_node.key,
+ quic_pktns_char(qc, pkt->pktns),
+ (unsigned long long)pkt->in_flight_len);
+ chunk_appendf(&trace_buf, " bytes.rx=%llu bytes.tx=%llu",
+ (unsigned long long)qc->bytes.rx,
+ (unsigned long long)qc->bytes.tx);
+ list_for_each_entry(frm, &pkt->frms, list) {
+ chunk_appendf(&trace_buf, " frm@%p", frm);
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ chunk_appendf(&trace_buf, " with scid");
+ quic_cid_dump(&trace_buf, &qc->scid);
+ }
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_SSLALERT) {
+ const uint8_t *alert = a2;
+ const enum ssl_encryption_level_t *level = a3;
+
+ if (alert)
+ chunk_appendf(&trace_buf, " alert=0x%02x", *alert);
+ if (level)
+ chunk_appendf(&trace_buf, " el=%c",
+ quic_enc_level_char(ssl_to_quic_enc_level(*level)));
+ }
+
+ if (mask & QUIC_EV_CONN_BCFRMS) {
+ const size_t *sz1 = a2;
+ const size_t *sz2 = a3;
+ const size_t *sz3 = a4;
+
+ if (sz1)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz1);
+ if (sz2)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz2);
+ if (sz3)
+ chunk_appendf(&trace_buf, " %llu", (unsigned long long)*sz3);
+ }
+
+ if (mask & QUIC_EV_CONN_PSTRM) {
+ const struct quic_frame *frm = a2;
+
+ if (frm)
+ chunk_frm_appendf(&trace_buf, frm);
+ }
+
+ if (mask & QUIC_EV_CONN_ELEVELSEL) {
+ const enum quic_handshake_state *state = a2;
+ const enum quic_tls_enc_level *level = a3;
+ const enum quic_tls_enc_level *next_level = a4;
+
+ if (state)
+ chunk_appendf(&trace_buf, " state=%s", quic_hdshk_state_str(qc->state));
+ if (level)
+ chunk_appendf(&trace_buf, " level=%c", quic_enc_level_char(*level));
+ if (next_level)
+ chunk_appendf(&trace_buf, " next_level=%c", quic_enc_level_char(*next_level));
+
+ }
+
+ if (mask & QUIC_EV_CONN_IDLE_TIMER) {
+ if (tick_isset(qc->ack_expire))
+ chunk_appendf(&trace_buf, " ack_expire=%ums",
+ TICKS_TO_MS(tick_remain(now_ms, qc->ack_expire)));
+ if (tick_isset(qc->idle_expire))
+ chunk_appendf(&trace_buf, " idle_expire=%ums",
+ TICKS_TO_MS(tick_remain(now_ms, qc->idle_expire)));
+ if (qc->idle_timer_task && tick_isset(qc->idle_timer_task->expire))
+ chunk_appendf(&trace_buf, " expire=%ums",
+ TICKS_TO_MS(tick_remain(now_ms, qc->idle_timer_task->expire)));
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_RCV) {
+ int i;
+ const struct quic_dgram *dgram = a2;
+ char bufaddr[INET6_ADDRSTRLEN], bufport[6];
+
+ if (qc) {
+ addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr));
+ port_to_str(&qc->peer_addr, bufport, sizeof(bufport));
+ chunk_appendf(&trace_buf, " peer_addr=%s:%s ", bufaddr, bufport);
+ }
+
+ if (dgram) {
+ chunk_appendf(&trace_buf, " dgram.len=%zu", dgram->len);
+ /* Socket */
+ if (dgram->saddr.ss_family == AF_INET ||
+ dgram->saddr.ss_family == AF_INET6) {
+ addr_to_str(&dgram->saddr, bufaddr, sizeof(bufaddr));
+ port_to_str(&dgram->saddr, bufport, sizeof(bufport));
+ chunk_appendf(&trace_buf, "saddr=%s:%s ", bufaddr, bufport);
+
+ addr_to_str(&dgram->daddr, bufaddr, sizeof(bufaddr));
+ port_to_str(&dgram->daddr, bufport, sizeof(bufport));
+ chunk_appendf(&trace_buf, "daddr=%s:%s ", bufaddr, bufport);
+ }
+ /* DCID */
+ for (i = 0; i < dgram->dcid_len; ++i)
+ chunk_appendf(&trace_buf, "%02x", dgram->dcid[i]);
+
+ }
+ }
+
+ if (mask & QUIC_EV_CONN_LPKT) {
+ const struct quic_rx_packet *pkt = a2;
+ const uint64_t *len = a3;
+ const struct quic_version *ver = a4;
+
+ if (pkt) {
+ chunk_appendf(&trace_buf, " pkt@%p type=0x%02x %s",
+ pkt, pkt->type, qc_pkt_long(pkt) ? "long" : "short");
+ if (pkt->pn_node.key != (uint64_t)-1)
+ chunk_appendf(&trace_buf, " pn=%llu", pkt->pn_node.key);
+ }
+
+ if (len)
+ chunk_appendf(&trace_buf, " len=%llu", (ull)*len);
+
+ if (ver)
+ chunk_appendf(&trace_buf, " ver=0x%08x", ver->num);
+ }
+
+ if (mask & QUIC_EV_STATELESS_RST) {
+ const struct quic_cid *cid = a2;
+
+ if (cid)
+ quic_cid_dump(&trace_buf, cid);
+ }
+
+}
diff --git a/src/quic_tx.c b/src/quic_tx.c
new file mode 100644
index 0000000..306b4c2
--- /dev/null
+++ b/src/quic_tx.c
@@ -0,0 +1,2348 @@
+/*
+ * QUIC protocol implementation. Lower layer with internal features implemented
+ * here such as QUIC encryption, idle timeout, acknowledgement and
+ * retransmission.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/quic_tx.h>
+
+#include <haproxy/pool.h>
+#include <haproxy/trace.h>
+#include <haproxy/quic_cid.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_retransmit.h>
+#include <haproxy/quic_retry.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/quic_tls.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/ssl_sock-t.h>
+
+DECLARE_POOL(pool_head_quic_tx_packet, "quic_tx_packet", sizeof(struct quic_tx_packet));
+DECLARE_POOL(pool_head_quic_cc_buf, "quic_cc_buf", QUIC_MAX_CC_BUFSIZE);
+
+static struct quic_tx_packet *qc_build_pkt(unsigned char **pos, const unsigned char *buf_end,
+ struct quic_enc_level *qel, struct quic_tls_ctx *ctx,
+ struct list *frms, struct quic_conn *qc,
+ const struct quic_version *ver, size_t dglen, int pkt_type,
+ int must_ack, int padding, int probe, int cc, int *err);
+
+static void quic_packet_encrypt(unsigned char *payload, size_t payload_len,
+ unsigned char *aad, size_t aad_len, uint64_t pn,
+ struct quic_tls_ctx *tls_ctx, struct quic_conn *qc,
+ int *fail)
+{
+ unsigned char iv[QUIC_TLS_IV_LEN];
+ unsigned char *tx_iv = tls_ctx->tx.iv;
+ size_t tx_iv_sz = tls_ctx->tx.ivlen;
+ struct enc_debug_info edi;
+
+ TRACE_ENTER(QUIC_EV_CONN_ENCPKT, qc);
+ *fail = 0;
+
+ quic_aead_iv_build(iv, sizeof iv, tx_iv, tx_iv_sz, pn);
+
+ if (!quic_tls_encrypt(payload, payload_len, aad, aad_len,
+ tls_ctx->tx.ctx, tls_ctx->tx.aead, iv)) {
+ TRACE_ERROR("QUIC packet encryption failed", QUIC_EV_CONN_ENCPKT, qc);
+ *fail = 1;
+ enc_debug_info_init(&edi, payload, payload_len, aad, aad_len, pn);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_ENCPKT, qc);
+}
+
+/* Free <pkt> TX packet and its attached frames.
+ * This is the responsibility of the caller to remove this packet of
+ * any data structure it was possibly attached to.
+ */
+static inline void free_quic_tx_packet(struct quic_conn *qc,
+ struct quic_tx_packet *pkt)
+{
+ struct quic_frame *frm, *frmbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (!pkt)
+ goto leave;
+
+ list_for_each_entry_safe(frm, frmbak, &pkt->frms, list)
+ qc_frm_free(qc, &frm);
+ pool_free(pool_head_quic_tx_packet, pkt);
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Allocate Tx buffer from <qc> quic-conn if needed.
+ *
+ * Returns allocated buffer or NULL on error.
+ */
+struct buffer *qc_txb_alloc(struct quic_conn *qc)
+{
+ struct buffer *buf = &qc->tx.buf;
+ if (!b_alloc(buf))
+ return NULL;
+
+ return buf;
+}
+
+/* Free Tx buffer from <qc> if it is empty. */
+void qc_txb_release(struct quic_conn *qc)
+{
+ struct buffer *buf = &qc->tx.buf;
+
+ /* For the moment sending function is responsible to purge the buffer
+ * entirely. It may change in the future but this requires to be able
+ * to reuse old data.
+ * For the moment we do not care to leave data in the buffer for
+ * a connection which is supposed to be killed asap.
+ */
+ BUG_ON_HOT(buf && b_data(buf));
+
+ if (!b_data(buf)) {
+ b_free(buf);
+ offer_buffers(NULL, 1);
+ }
+}
+
+/* Return the TX buffer dedicated to the "connection close" datagram to be built
+ * if an immediate close is required after having allocated it or directly
+ * allocate a TX buffer if an immediate close is not required.
+ */
+struct buffer *qc_get_txb(struct quic_conn *qc)
+{
+ struct buffer *buf;
+
+ if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) {
+ TRACE_PROTO("Immediate close required", QUIC_EV_CONN_PHPKTS, qc);
+ buf = &qc->tx.cc_buf;
+ if (b_is_null(buf)) {
+ qc->tx.cc_buf_area = pool_alloc(pool_head_quic_cc_buf);
+ if (!qc->tx.cc_buf_area)
+ goto err;
+ }
+
+ /* In every case, initialize ->tx.cc_buf */
+ qc->tx.cc_buf = b_make(qc->tx.cc_buf_area, QUIC_MAX_CC_BUFSIZE, 0, 0);
+ }
+ else {
+ buf = qc_txb_alloc(qc);
+ if (!buf)
+ goto err;
+ }
+
+ return buf;
+ err:
+ return NULL;
+}
+
+/* Commit a datagram payload written into <buf> of length <length>. <first_pkt>
+ * must contains the address of the first packet stored in the payload.
+ *
+ * Caller is responsible that there is enough space in the buffer.
+ */
+static void qc_txb_store(struct buffer *buf, uint16_t length,
+ struct quic_tx_packet *first_pkt)
+{
+ const size_t hdlen = sizeof(uint16_t) + sizeof(void *);
+ BUG_ON_HOT(b_contig_space(buf) < hdlen); /* this must not happen */
+
+ write_u16(b_tail(buf), length);
+ write_ptr(b_tail(buf) + sizeof(length), first_pkt);
+ b_add(buf, hdlen + length);
+}
+
+/* Returns 1 if a packet may be built for <qc> from <qel> encryption level
+ * with <frms> as ack-eliciting frame list to send, 0 if not.
+ * <cc> must equal to 1 if an immediate close was asked, 0 if not.
+ * <probe> must equalt to 1 if a probing packet is required, 0 if not.
+ * Also set <*must_ack> to inform the caller if an acknowledgement should be sent.
+ */
+static int qc_may_build_pkt(struct quic_conn *qc, struct list *frms,
+ struct quic_enc_level *qel, int cc, int probe,
+ int *must_ack)
+{
+ int force_ack = qel == qc->iel || qel == qc->hel;
+ int nb_aepkts_since_last_ack = qel->pktns->rx.nb_aepkts_since_last_ack;
+
+ /* An acknowledgement must be sent if this has been forced by the caller,
+ * typically during the handshake when the packets must be acknowledged as
+ * soon as possible. This is also the case when the ack delay timer has been
+ * triggered, or at least every QUIC_MAX_RX_AEPKTS_SINCE_LAST_ACK packets.
+ */
+ *must_ack = (qc->flags & QUIC_FL_CONN_ACK_TIMER_FIRED) ||
+ ((qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) &&
+ (force_ack || nb_aepkts_since_last_ack >= QUIC_MAX_RX_AEPKTS_SINCE_LAST_ACK));
+
+ TRACE_PRINTF(TRACE_LEVEL_DEVELOPER, QUIC_EV_CONN_PHPKTS, qc, 0, 0, 0,
+ "has_sec=%d cc=%d probe=%d must_ack=%d frms=%d prep_in_fligh=%llu cwnd=%llu",
+ quic_tls_has_tx_sec(qel), cc, probe, *must_ack, LIST_ISEMPTY(frms),
+ (ullong)qc->path->prep_in_flight, (ullong)qc->path->cwnd);
+
+ /* Do not build any more packet if the TX secrets are not available or
+ * if there is nothing to send, i.e. if no CONNECTION_CLOSE or ACK are required
+ * and if there is no more packets to send upon PTO expiration
+ * and if there is no more ack-eliciting frames to send or in flight
+ * congestion control limit is reached for prepared data
+ */
+ if (!quic_tls_has_tx_sec(qel) ||
+ (!cc && !probe && !*must_ack &&
+ (LIST_ISEMPTY(frms) || qc->path->prep_in_flight >= qc->path->cwnd))) {
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Prepare as much as possible QUIC packets for sending from prebuilt frames
+ * <frms>. Each packet is stored in a distinct datagram written to <buf>.
+ *
+ * Each datagram is prepended by a two fields header : the datagram length and
+ * the address of the packet contained in the datagram.
+ *
+ * Returns the number of bytes prepared in packets if succeeded (may be 0), or
+ * -1 if something wrong happened.
+ */
+static int qc_prep_app_pkts(struct quic_conn *qc, struct buffer *buf,
+ struct list *frms)
+{
+ int ret = -1, cc;
+ struct quic_enc_level *qel;
+ unsigned char *end, *pos;
+ struct quic_tx_packet *pkt;
+ size_t total;
+
+ TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc);
+
+ qel = qc->ael;
+ total = 0;
+ pos = (unsigned char *)b_tail(buf);
+ cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ /* Each datagram is prepended with its length followed by the address
+ * of the first packet in the datagram (QUIC_DGRAM_HEADLEN).
+ */
+ while ((!cc && b_contig_space(buf) >= (int)qc->path->mtu + QUIC_DGRAM_HEADLEN) ||
+ (cc && b_contig_space(buf) >= QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN)) {
+ int err, probe, must_ack;
+
+ TRACE_PROTO("TX prep app pkts", QUIC_EV_CONN_PHPKTS, qc, qel, frms);
+ probe = 0;
+ /* We do not probe if an immediate close was asked */
+ if (!cc)
+ probe = qel->pktns->tx.pto_probe;
+
+ if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack))
+ break;
+
+ /* Leave room for the datagram header */
+ pos += QUIC_DGRAM_HEADLEN;
+ if (cc) {
+ end = pos + QUIC_MIN_CC_PKTSIZE;
+ }
+ else if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ end = pos + QUIC_MIN(qc->path->mtu, quic_may_send_bytes(qc));
+ }
+ else {
+ end = pos + qc->path->mtu;
+ }
+
+ pkt = qc_build_pkt(&pos, end, qel, &qel->tls_ctx, frms, qc, NULL, 0,
+ QUIC_PACKET_TYPE_SHORT, must_ack, 0, probe, cc, &err);
+ switch (err) {
+ case -3:
+ qc_purge_txbuf(qc, buf);
+ goto leave;
+ case -2:
+ // trace already emitted by function above
+ goto leave;
+ case -1:
+ /* As we provide qc_build_pkt() with an enough big buffer to fulfill an
+ * MTU, we are here because of the congestion control window. There is
+ * no need to try to reuse this buffer.
+ */
+ TRACE_PROTO("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc, qel);
+ goto out;
+ default:
+ break;
+ }
+
+ /* This is to please to GCC. We cannot have (err >= 0 && !pkt) */
+ BUG_ON(!pkt);
+
+ if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA)
+ pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA;
+
+ total += pkt->len;
+
+ /* Write datagram header. */
+ qc_txb_store(buf, pkt->len, pkt);
+ /* Build only one datagram when an immediate close is required. */
+ if (cc)
+ break;
+ }
+
+ out:
+ if (total && cc) {
+ BUG_ON(buf != &qc->tx.cc_buf);
+ qc->tx.cc_dgram_len = total;
+ }
+ ret = total;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Free all frames in <l> list. In addition also remove all these frames
+ * from the original ones if they are the results of duplications.
+ */
+static inline void qc_free_frm_list(struct quic_conn *qc, struct list *l)
+{
+ struct quic_frame *frm, *frmbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ list_for_each_entry_safe(frm, frmbak, l, list) {
+ LIST_DEL_INIT(&frm->ref);
+ qc_frm_free(qc, &frm);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Free <pkt> TX packet and all the packets coalesced to it. */
+static inline void qc_free_tx_coalesced_pkts(struct quic_conn *qc,
+ struct quic_tx_packet *p)
+{
+ struct quic_tx_packet *pkt, *nxt_pkt;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ for (pkt = p; pkt; pkt = nxt_pkt) {
+ qc_free_frm_list(qc, &pkt->frms);
+ nxt_pkt = pkt->next;
+ pool_free(pool_head_quic_tx_packet, pkt);
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Purge <buf> TX buffer from its prepare packets. */
+static void qc_purge_tx_buf(struct quic_conn *qc, struct buffer *buf)
+{
+ while (b_contig_data(buf, 0)) {
+ uint16_t dglen;
+ struct quic_tx_packet *pkt;
+ size_t headlen = sizeof dglen + sizeof pkt;
+
+ dglen = read_u16(b_head(buf));
+ pkt = read_ptr(b_head(buf) + sizeof dglen);
+ qc_free_tx_coalesced_pkts(qc, pkt);
+ b_del(buf, dglen + headlen);
+ }
+
+ BUG_ON(b_data(buf));
+}
+
+/* Send datagrams stored in <buf>.
+ *
+ * This function returns 1 for success. On error, there is several behavior
+ * depending on underlying sendto() error :
+ * - for an unrecoverable error, 0 is returned and connection is killed.
+ * - a transient error is handled differently if connection has its owned
+ * socket. If this is the case, 0 is returned and socket is subscribed on the
+ * poller. The other case is assimilated to a success case with 1 returned.
+ * Remaining data are purged from the buffer and will eventually be detected
+ * as lost which gives the opportunity to retry sending.
+ */
+int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
+{
+ int ret = 0;
+ struct quic_conn *qc;
+ char skip_sendto = 0;
+
+ qc = ctx->qc;
+ TRACE_ENTER(QUIC_EV_CONN_SPPKTS, qc);
+ while (b_contig_data(buf, 0)) {
+ unsigned char *pos;
+ struct buffer tmpbuf = { };
+ struct quic_tx_packet *first_pkt, *pkt, *next_pkt;
+ uint16_t dglen;
+ size_t headlen = sizeof dglen + sizeof first_pkt;
+ unsigned int time_sent;
+
+ pos = (unsigned char *)b_head(buf);
+ dglen = read_u16(pos);
+ BUG_ON_HOT(!dglen); /* this should not happen */
+
+ pos += sizeof dglen;
+ first_pkt = read_ptr(pos);
+ pos += sizeof first_pkt;
+ tmpbuf.area = (char *)pos;
+ tmpbuf.size = tmpbuf.data = dglen;
+
+ TRACE_PROTO("TX dgram", QUIC_EV_CONN_SPPKTS, qc);
+ /* If sendto is on error just skip the call to it for the rest
+ * of the loop but continue to purge the buffer. Data will be
+ * transmitted when QUIC packets are detected as lost on our
+ * side.
+ *
+ * TODO use fd-monitoring to detect when send operation can be
+ * retry. This should improve the bandwidth without relying on
+ * retransmission timer. However, it requires a major rework on
+ * quic-conn fd management.
+ */
+ if (!skip_sendto) {
+ int ret = qc_snd_buf(qc, &tmpbuf, tmpbuf.data, 0);
+ if (ret < 0) {
+ TRACE_ERROR("sendto fatal error", QUIC_EV_CONN_SPPKTS, qc, first_pkt);
+ qc_kill_conn(qc);
+ qc_free_tx_coalesced_pkts(qc, first_pkt);
+ b_del(buf, dglen + headlen);
+ qc_purge_tx_buf(qc, buf);
+ goto leave;
+ }
+ else if (!ret) {
+ /* Connection owned socket : poller will wake us up when transient error is cleared. */
+ if (qc_test_fd(qc)) {
+ TRACE_ERROR("sendto error, subscribe to poller", QUIC_EV_CONN_SPPKTS, qc);
+ goto leave;
+ }
+
+ /* No connection owned-socket : rely on retransmission to retry sending. */
+ skip_sendto = 1;
+ TRACE_ERROR("sendto error, simulate sending for the rest of data", QUIC_EV_CONN_SPPKTS, qc);
+ }
+ }
+
+ b_del(buf, dglen + headlen);
+ qc->bytes.tx += tmpbuf.data;
+ time_sent = now_ms;
+
+ for (pkt = first_pkt; pkt; pkt = next_pkt) {
+ /* RFC 9000 14.1 Initial datagram size
+ * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting
+ * Initial packets to at least the smallest allowed maximum datagram size of
+ * 1200 bytes.
+ */
+ qc->cntrs.sent_pkt++;
+ BUG_ON_HOT(pkt->type == QUIC_PACKET_TYPE_INITIAL &&
+ (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) &&
+ dglen < QUIC_INITIAL_PACKET_MINLEN);
+
+ pkt->time_sent = time_sent;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) {
+ pkt->pktns->tx.time_of_last_eliciting = time_sent;
+ qc->path->ifae_pkts++;
+ if (qc->flags & QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ)
+ qc_idle_timer_rearm(qc, 0, 0);
+ }
+ if (!(qc->flags & QUIC_FL_CONN_CLOSING) &&
+ (pkt->flags & QUIC_FL_TX_PACKET_CC)) {
+ qc->flags |= QUIC_FL_CONN_CLOSING;
+ qc_detach_th_ctx_list(qc, 1);
+
+ /* RFC 9000 10.2. Immediate Close:
+ * The closing and draining connection states exist to ensure
+ * that connections close cleanly and that delayed or reordered
+ * packets are properly discarded. These states SHOULD persist
+ * for at least three times the current PTO interval...
+ *
+ * Rearm the idle timeout only one time when entering closing
+ * state.
+ */
+ qc_idle_timer_do_rearm(qc, 0);
+ if (qc->timer_task) {
+ task_destroy(qc->timer_task);
+ qc->timer_task = NULL;
+ }
+ }
+ qc->path->in_flight += pkt->in_flight_len;
+ pkt->pktns->tx.in_flight += pkt->in_flight_len;
+ if (pkt->in_flight_len)
+ qc_set_timer(qc);
+ TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt);
+ next_pkt = pkt->next;
+ quic_tx_packet_refinc(pkt);
+ eb64_insert(&pkt->pktns->tx.pkts, &pkt->pn_node);
+ }
+ }
+
+ ret = 1;
+leave:
+ TRACE_LEAVE(QUIC_EV_CONN_SPPKTS, qc);
+
+ return ret;
+}
+
+/* Flush txbuf for <qc> connection. This must be called prior to a packet
+ * preparation when txbuf contains older data. A send will be conducted for
+ * these data.
+ *
+ * Returns 1 on success : buffer is empty and can be use for packet
+ * preparation. On error 0 is returned.
+ */
+int qc_purge_txbuf(struct quic_conn *qc, struct buffer *buf)
+{
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ /* This operation can only be conducted if txbuf is not empty. This
+ * case only happens for connection with their owned socket due to an
+ * older transient sendto() error.
+ */
+ BUG_ON(!qc_test_fd(qc));
+
+ if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) {
+ if (qc->flags & QUIC_FL_CONN_TO_KILL)
+ qc_txb_release(qc);
+ TRACE_DEVEL("leaving in error", QUIC_EV_CONN_TXPKT, qc);
+ return 0;
+ }
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return 1;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>.
+ *
+ * Use qc_send_app_probing wrapper when probing with old data.
+ *
+ * Returns 1 on success. Some data might not have been sent due to congestion,
+ * in this case they are left in <frms> input list. The caller may subscribe on
+ * quic-conn to retry later.
+ *
+ * Returns 0 on critical error.
+ * TODO review and classify more distinctly transient from definitive errors to
+ * allow callers to properly handle it.
+ */
+int qc_send_app_pkts(struct quic_conn *qc, struct list *frms)
+{
+ int status = 0, ret;
+ struct buffer *buf;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ buf = qc_get_txb(qc);
+ if (!buf) {
+ TRACE_ERROR("could not get a buffer", QUIC_EV_CONN_TXPKT, qc);
+ goto err;
+ }
+
+ if (b_data(buf) && !qc_purge_txbuf(qc, buf))
+ goto err;
+
+ /* Prepare and send packets until we could not further prepare packets. */
+ do {
+ /* Currently buf cannot be non-empty at this stage. Even if a
+ * previous sendto() has failed it is emptied to simulate
+ * packet emission and rely on QUIC lost detection to try to
+ * emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ ret = qc_prep_app_pkts(qc, buf, frms);
+
+ if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) {
+ if (qc->flags & QUIC_FL_CONN_TO_KILL)
+ qc_txb_release(qc);
+ goto err;
+ }
+ } while (ret > 0);
+
+ qc_txb_release(qc);
+ if (ret < 0)
+ goto err;
+
+ status = 1;
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return status;
+
+ err:
+ TRACE_DEVEL("leaving in error", QUIC_EV_CONN_TXPKT, qc);
+ return 0;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>. Use this
+ * function when probing is required.
+ *
+ * Returns the result from qc_send_app_pkts function.
+ */
+static forceinline int qc_send_app_probing(struct quic_conn *qc,
+ struct list *frms)
+{
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ TRACE_PROTO("preparing old data (probing)", QUIC_EV_CONN_FRMLIST, qc, frms);
+ qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA;
+ ret = qc_send_app_pkts(qc, frms);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA;
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Try to send application frames from list <frms> on connection <qc>. This
+ * function is provided for MUX upper layer usage only.
+ *
+ * Returns the result from qc_send_app_pkts function.
+ */
+int qc_send_mux(struct quic_conn *qc, struct list *frms)
+{
+ int ret;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+ BUG_ON(qc->mux_state != QC_MUX_READY); /* Only MUX can uses this function so it must be ready. */
+
+ if (qc->conn->flags & CO_FL_SOCK_WR_SH) {
+ qc->conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH;
+ TRACE_DEVEL("connection on error", QUIC_EV_CONN_TXPKT, qc);
+ return 0;
+ }
+
+ /* Try to send post handshake frames first unless on 0-RTT. */
+ if ((qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) &&
+ qc->state >= QUIC_HS_ST_COMPLETE) {
+ quic_build_post_handshake_frames(qc);
+ qc_send_app_pkts(qc, &qc->ael->pktns->tx.frms);
+ }
+
+ TRACE_STATE("preparing data (from MUX)", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_TX_MUX_CONTEXT;
+ ret = qc_send_app_pkts(qc, frms);
+ qc->flags &= ~QUIC_FL_CONN_TX_MUX_CONTEXT;
+
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Return the encryption level following the one which contains <el> list head
+ * depending on <retrans> TX mode (retranmission or not).
+ */
+static inline struct quic_enc_level *qc_list_next_qel(struct list *el, int retrans)
+{
+ return !retrans ? LIST_NEXT(el, struct quic_enc_level *, list) :
+ LIST_NEXT(el, struct quic_enc_level *, retrans);
+}
+
+/* Return the encryption level following <qel> depending on <retrans> TX mode
+ * (retranmission or not).
+ */
+static inline struct quic_enc_level *qc_next_qel(struct quic_enc_level *qel, int retrans)
+{
+ struct list *el = !retrans ? &qel->list : &qel->retrans;
+
+ return qc_list_next_qel(el, retrans);
+}
+
+/* Return 1 if <qel> is at the head of its list, 0 if not. */
+static inline int qc_qel_is_head(struct quic_enc_level *qel, struct list *l,
+ int retrans)
+{
+ return !retrans ? &qel->list == l : &qel->retrans == l;
+}
+
+/* Select <*tls_ctx>, <*frms> and <*ver> for the encryption level <qel> of <qc> QUIC
+ * connection, depending on its state, especially the negotiated version and if
+ * retransmissions are required. If this the case <qels> is the list of encryption
+ * levels to used, or NULL if no retransmissions are required.
+ * Never fails.
+ */
+static inline void qc_select_tls_frms_ver(struct quic_conn *qc,
+ struct quic_enc_level *qel,
+ struct quic_tls_ctx **tls_ctx,
+ struct list **frms,
+ const struct quic_version **ver,
+ struct list *qels)
+{
+ if (qc->negotiated_version) {
+ *ver = qc->negotiated_version;
+ if (qel == qc->iel)
+ *tls_ctx = qc->nictx;
+ else
+ *tls_ctx = &qel->tls_ctx;
+ }
+ else {
+ *ver = qc->original_version;
+ *tls_ctx = &qel->tls_ctx;
+ }
+
+ if (!qels)
+ *frms = &qel->pktns->tx.frms;
+ else
+ *frms = qel->retrans_frms;
+}
+
+/* Prepare as much as possible QUIC datagrams/packets for sending from <qels>
+ * list of encryption levels. Several packets can be coalesced into a single
+ * datagram. The result is written into <buf>. Note that if <qels> is NULL,
+ * the encryption levels which will be used are those currently allocated
+ * and attached to the connection.
+ *
+ * Each datagram is prepended by a two fields header : the datagram length and
+ * the address of first packet in the datagram.
+ *
+ * Returns the number of bytes prepared in datragrams/packets if succeeded
+ * (may be 0), or -1 if something wrong happened.
+ */
+int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels)
+{
+ int ret, cc, retrans, padding;
+ struct quic_tx_packet *first_pkt, *prv_pkt;
+ unsigned char *end, *pos;
+ uint16_t dglen;
+ size_t total;
+ struct list *qel_list;
+ struct quic_enc_level *qel;
+
+ TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc);
+ /* Currently qc_prep_pkts() does not handle buffer wrapping so the
+ * caller must ensure that buf is reset.
+ */
+ BUG_ON_HOT(buf->head || buf->data);
+
+ ret = -1;
+ cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ retrans = !!qels;
+ padding = 0;
+ first_pkt = prv_pkt = NULL;
+ end = pos = (unsigned char *)b_head(buf);
+ dglen = 0;
+ total = 0;
+
+ qel_list = qels ? qels : &qc->qel_list;
+ qel = qc_list_next_qel(qel_list, retrans);
+ while (!qc_qel_is_head(qel, qel_list, retrans)) {
+ struct quic_tls_ctx *tls_ctx;
+ const struct quic_version *ver;
+ struct list *frms, *next_frms;
+ struct quic_enc_level *next_qel;
+
+ if (qel == qc->eel) {
+ /* Next encryption level */
+ qel = qc_next_qel(qel, retrans);
+ continue;
+ }
+
+ qc_select_tls_frms_ver(qc, qel, &tls_ctx, &frms, &ver, qels);
+
+ next_qel = qc_next_qel(qel, retrans);
+ next_frms = qc_qel_is_head(next_qel, qel_list, retrans) ? NULL :
+ !qels ? &next_qel->pktns->tx.frms : next_qel->retrans_frms;
+
+ /* Build as much as datagrams at <qel> encryption level.
+ * Each datagram is prepended with its length followed by the address
+ * of the first packet in the datagram (QUIC_DGRAM_HEADLEN).
+ */
+ while ((!cc && b_contig_space(buf) >= (int)qc->path->mtu + QUIC_DGRAM_HEADLEN) ||
+ (cc && b_contig_space(buf) >= QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN) || prv_pkt) {
+ int err, probe, must_ack;
+ enum quic_pkt_type pkt_type;
+ struct quic_tx_packet *cur_pkt;
+
+ TRACE_PROTO("TX prep pkts", QUIC_EV_CONN_PHPKTS, qc, qel);
+ probe = 0;
+ /* We do not probe if an immediate close was asked */
+ if (!cc)
+ probe = qel->pktns->tx.pto_probe;
+
+ if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) {
+ if (prv_pkt && qc_qel_is_head(next_qel, qel_list, retrans)) {
+ qc_txb_store(buf, dglen, first_pkt);
+ /* Build only one datagram when an immediate close is required. */
+ if (cc)
+ goto out;
+ }
+
+ TRACE_DEVEL("next encryption level", QUIC_EV_CONN_PHPKTS, qc);
+ break;
+ }
+
+ if (!prv_pkt) {
+ /* Leave room for the datagram header */
+ pos += QUIC_DGRAM_HEADLEN;
+ if (cc) {
+ end = pos + QUIC_MIN_CC_PKTSIZE;
+ }
+ else if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) {
+ end = pos + QUIC_MIN(qc->path->mtu, quic_may_send_bytes(qc));
+ }
+ else {
+ end = pos + qc->path->mtu;
+ }
+ }
+
+ /* RFC 9000 14.1 Initial datagram size
+ * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting
+ * Initial packets to at least the smallest allowed maximum datagram size of
+ * 1200 bytes.
+ *
+ * Ensure that no ack-eliciting packets are sent into too small datagrams
+ */
+ if (qel == qc->iel && !LIST_ISEMPTY(frms)) {
+ if (end - pos < QUIC_INITIAL_PACKET_MINLEN) {
+ TRACE_PROTO("No more enough room to build an Initial packet",
+ QUIC_EV_CONN_PHPKTS, qc);
+ break;
+ }
+
+ /* Pad this Initial packet if there is no ack-eliciting frames to send from
+ * the next packet number space.
+ */
+ if (!next_frms || LIST_ISEMPTY(next_frms))
+ padding = 1;
+ }
+
+ pkt_type = quic_enc_level_pkt_type(qc, qel);
+ cur_pkt = qc_build_pkt(&pos, end, qel, tls_ctx, frms,
+ qc, ver, dglen, pkt_type,
+ must_ack, padding, probe, cc, &err);
+ switch (err) {
+ case -3:
+ if (first_pkt)
+ qc_txb_store(buf, dglen, first_pkt);
+ qc_purge_tx_buf(qc, buf);
+ goto leave;
+ case -2:
+ // trace already emitted by function above
+ goto leave;
+ case -1:
+ /* If there was already a correct packet present, set the
+ * current datagram as prepared into <cbuf>.
+ */
+ if (prv_pkt)
+ qc_txb_store(buf, dglen, first_pkt);
+ TRACE_PROTO("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc, qel);
+ goto out;
+ default:
+ break;
+ }
+
+ /* This is to please to GCC. We cannot have (err >= 0 && !cur_pkt) */
+ BUG_ON(!cur_pkt);
+
+ total += cur_pkt->len;
+ dglen += cur_pkt->len;
+
+ if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA)
+ cur_pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA;
+
+ /* keep trace of the first packet in the datagram */
+ if (!first_pkt)
+ first_pkt = cur_pkt;
+
+ /* Attach the current one to the previous one and vice versa */
+ if (prv_pkt) {
+ prv_pkt->next = cur_pkt;
+ cur_pkt->prev = prv_pkt;
+ cur_pkt->flags |= QUIC_FL_TX_PACKET_COALESCED;
+ }
+
+ /* If there is no more packet to build for this encryption level,
+ * select the next one <next_qel>, if any, to coalesce a packet in
+ * the same datagram, except if <qel> is the Application data
+ * encryption level which cannot be selected to do that.
+ */
+ if (LIST_ISEMPTY(frms) && qel != qc->ael &&
+ !qc_qel_is_head(next_qel, qel_list, retrans)) {
+ if (qel == qc->iel &&
+ (!qc_is_listener(qc) ||
+ cur_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING))
+ padding = 1;
+
+ prv_pkt = cur_pkt;
+ break;
+ }
+ else {
+ qc_txb_store(buf, dglen, first_pkt);
+ /* Build only one datagram when an immediate close is required. */
+ if (cc)
+ goto out;
+ first_pkt = NULL;
+ dglen = 0;
+ padding = 0;
+ prv_pkt = NULL;
+ }
+ }
+
+ /* Next encryption level */
+ qel = next_qel;
+ }
+
+ out:
+ if (cc && total) {
+ BUG_ON(buf != &qc->tx.cc_buf);
+ BUG_ON(dglen != total);
+ qc->tx.cc_dgram_len = dglen;
+ }
+
+ ret = total;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc);
+ return ret;
+}
+
+/* Sends handshake packets from up to two encryption levels <tel> and <next_te>
+ * with <tel_frms> and <next_tel_frms> as frame list respectively for <qc>
+ * QUIC connection. <old_data> is used as boolean to send data already sent but
+ * not already acknowledged (in flight).
+ * Returns 1 if succeeded, 0 if not.
+ */
+int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data,
+ struct quic_enc_level *qel1, struct quic_enc_level *qel2)
+{
+ int ret, status = 0;
+ struct buffer *buf = qc_get_txb(qc);
+ struct list qels = LIST_HEAD_INIT(qels);
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (!buf) {
+ TRACE_ERROR("buffer allocation failed", QUIC_EV_CONN_TXPKT, qc);
+ goto leave;
+ }
+
+ if (b_data(buf) && !qc_purge_txbuf(qc, buf)) {
+ TRACE_ERROR("Could not purge TX buffer", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ /* Currently buf cannot be non-empty at this stage. Even if a previous
+ * sendto() has failed it is emptied to simulate packet emission and
+ * rely on QUIC lost detection to try to emit it.
+ */
+ BUG_ON_HOT(b_data(buf));
+ b_reset(buf);
+
+ if (old_data) {
+ TRACE_STATE("old data for probing asked", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA;
+ }
+
+ if (qel1) {
+ BUG_ON(LIST_INLIST(&qel1->retrans));
+ LIST_APPEND(&qels, &qel1->retrans);
+ }
+
+ if (qel2) {
+ BUG_ON(LIST_INLIST(&qel2->retrans));
+ LIST_APPEND(&qels, &qel2->retrans);
+ }
+
+ ret = qc_prep_hpkts(qc, buf, &qels);
+ if (ret == -1) {
+ qc_txb_release(qc);
+ TRACE_ERROR("Could not build some packets", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) {
+ if (qc->flags & QUIC_FL_CONN_TO_KILL)
+ qc_txb_release(qc);
+ TRACE_ERROR("Could not send some packets", QUIC_EV_CONN_TXPKT, qc);
+ goto out;
+ }
+
+ qc_txb_release(qc);
+ status = 1;
+
+ out:
+ if (qel1) {
+ LIST_DEL_INIT(&qel1->retrans);
+ qel1->retrans_frms = NULL;
+ }
+
+ if (qel2) {
+ LIST_DEL_INIT(&qel2->retrans);
+ qel2->retrans_frms = NULL;
+ }
+
+ TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc);
+ qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return status;
+}
+
+/* Retransmit up to two datagrams depending on packet number space.
+ * Return 0 when failed, 0 if not.
+ */
+int qc_dgrams_retransmit(struct quic_conn *qc)
+{
+ int ret = 0;
+ int sret;
+ struct quic_pktns *ipktns = qc->ipktns;
+ struct quic_pktns *hpktns = qc->hpktns;
+ struct quic_pktns *apktns = qc->apktns;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ /* Note that if the Initial packet number space is not discarded,
+ * this is also the case for the Handshake packet number space.
+ */
+ if (ipktns && (ipktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) {
+ int i;
+
+ for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) {
+ struct list ifrms = LIST_HEAD_INIT(ifrms);
+ struct list hfrms = LIST_HEAD_INIT(hfrms);
+ struct list qels = LIST_HEAD_INIT(qels);
+
+ qc_prep_hdshk_fast_retrans(qc, &ifrms, &hfrms);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &ifrms);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &hfrms);
+ if (!LIST_ISEMPTY(&ifrms)) {
+ ipktns->tx.pto_probe = 1;
+ if (!LIST_ISEMPTY(&hfrms))
+ hpktns->tx.pto_probe = 1;
+ qc->iel->retrans_frms = &ifrms;
+ if (qc->hel)
+ qc->hel->retrans_frms = &hfrms;
+ sret = qc_send_hdshk_pkts(qc, 1, qc->iel, qc->hel);
+ qc_free_frm_list(qc, &ifrms);
+ qc_free_frm_list(qc, &hfrms);
+ if (!sret)
+ goto leave;
+ }
+ else {
+ /* We are in the case where the anti-amplification limit will be
+ * reached after having sent this datagram or some handshake frames
+ * could not be allocated. There is no need to send more than one
+ * datagram.
+ */
+ ipktns->tx.pto_probe = 1;
+ qc->iel->retrans_frms = &ifrms;
+ sret = qc_send_hdshk_pkts(qc, 0, qc->iel, NULL);
+ qc_free_frm_list(qc, &ifrms);
+ qc_free_frm_list(qc, &hfrms);
+ if (!sret)
+ goto leave;
+
+ break;
+ }
+ }
+ TRACE_STATE("no more need to probe Initial packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ ipktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ if (hpktns)
+ hpktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ else {
+ int i;
+
+ if (hpktns && (hpktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) {
+ hpktns->tx.pto_probe = 0;
+ for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) {
+ struct list frms1 = LIST_HEAD_INIT(frms1);
+
+ qc_prep_fast_retrans(qc, hpktns, &frms1, NULL);
+ TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1);
+ if (!LIST_ISEMPTY(&frms1)) {
+ hpktns->tx.pto_probe = 1;
+ qc->hel->retrans_frms = &frms1;
+ sret = qc_send_hdshk_pkts(qc, 1, qc->hel, NULL);
+ qc_free_frm_list(qc, &frms1);
+ if (!sret)
+ goto leave;
+ }
+ }
+ TRACE_STATE("no more need to probe Handshake packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ hpktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ else if (apktns && (apktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) {
+ struct list frms2 = LIST_HEAD_INIT(frms2);
+ struct list frms1 = LIST_HEAD_INIT(frms1);
+
+ apktns->tx.pto_probe = 0;
+ qc_prep_fast_retrans(qc, apktns, &frms1, &frms2);
+ TRACE_PROTO("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1);
+ TRACE_PROTO("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms2);
+
+ if (!LIST_ISEMPTY(&frms1)) {
+ apktns->tx.pto_probe = 1;
+ sret = qc_send_app_probing(qc, &frms1);
+ qc_free_frm_list(qc, &frms1);
+ if (!sret) {
+ qc_free_frm_list(qc, &frms2);
+ goto leave;
+ }
+ }
+
+ if (!LIST_ISEMPTY(&frms2)) {
+ apktns->tx.pto_probe = 1;
+ sret = qc_send_app_probing(qc, &frms2);
+ qc_free_frm_list(qc, &frms2);
+ if (!sret)
+ goto leave;
+ }
+ TRACE_STATE("no more need to probe 01RTT packet number space",
+ QUIC_EV_CONN_TXPKT, qc);
+ apktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED;
+ }
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/*
+ * Send a Version Negotiation packet on response to <pkt> on socket <fd> to
+ * address <addr>.
+ * Implementation of RFC9000 6. Version Negotiation
+ *
+ * TODO implement a rate-limiting sending of Version Negotiation packets
+ *
+ * Returns 0 on success else non-zero
+ */
+int send_version_negotiation(int fd, struct sockaddr_storage *addr,
+ struct quic_rx_packet *pkt)
+{
+ char buf[256];
+ int ret = 0, i = 0, j;
+ uint32_t version;
+ const socklen_t addrlen = get_addr_len(addr);
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+ /*
+ * header form
+ * long header, fixed bit to 0 for Version Negotiation
+ */
+ /* TODO: RAND_bytes() should be replaced? */
+ if (RAND_bytes((unsigned char *)buf, 1) != 1) {
+ TRACE_ERROR("RAND_bytes() error", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ buf[i++] |= '\x80';
+ /* null version for Version Negotiation */
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+ buf[i++] = '\x00';
+
+ /* source connection id */
+ buf[i++] = pkt->scid.len;
+ memcpy(&buf[i], pkt->scid.data, pkt->scid.len);
+ i += pkt->scid.len;
+
+ /* destination connection id */
+ buf[i++] = pkt->dcid.len;
+ memcpy(&buf[i], pkt->dcid.data, pkt->dcid.len);
+ i += pkt->dcid.len;
+
+ /* supported version */
+ for (j = 0; j < quic_versions_nb; j++) {
+ version = htonl(quic_versions[j].num);
+ memcpy(&buf[i], &version, sizeof(version));
+ i += sizeof(version);
+ }
+
+ if (sendto(fd, buf, i, 0, (struct sockaddr *)addr, addrlen) < 0)
+ goto out;
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return !ret;
+}
+
+/* Send a stateless reset packet depending on <pkt> RX packet information
+ * from <fd> UDP socket to <dst>
+ * Return 1 if succeeded, 0 if not.
+ */
+int send_stateless_reset(struct listener *l, struct sockaddr_storage *dstaddr,
+ struct quic_rx_packet *rxpkt)
+{
+ int ret = 0, pktlen, rndlen;
+ unsigned char pkt[64];
+ const socklen_t addrlen = get_addr_len(dstaddr);
+ struct proxy *prx;
+ struct quic_counters *prx_counters;
+
+ TRACE_ENTER(QUIC_EV_STATELESS_RST);
+
+ prx = l->bind_conf->frontend;
+ prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module);
+ /* 10.3 Stateless Reset (https://www.rfc-editor.org/rfc/rfc9000.html#section-10.3)
+ * The resulting minimum size of 21 bytes does not guarantee that a Stateless
+ * Reset is difficult to distinguish from other packets if the recipient requires
+ * the use of a connection ID. To achieve that end, the endpoint SHOULD ensure
+ * that all packets it sends are at least 22 bytes longer than the minimum
+ * connection ID length that it requests the peer to include in its packets,
+ * adding PADDING frames as necessary. This ensures that any Stateless Reset
+ * sent by the peer is indistinguishable from a valid packet sent to the endpoint.
+ * An endpoint that sends a Stateless Reset in response to a packet that is
+ * 43 bytes or shorter SHOULD send a Stateless Reset that is one byte shorter
+ * than the packet it responds to.
+ */
+
+ /* Note that we build at most a 42 bytes QUIC packet to mimic a short packet */
+ pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : 0;
+ pktlen = QUIC_MAX(QUIC_STATELESS_RESET_PACKET_MINLEN, pktlen);
+ rndlen = pktlen - QUIC_STATELESS_RESET_TOKEN_LEN;
+
+ /* Put a header of random bytes */
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(pkt, rndlen) != 1) {
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_STATELESS_RST);
+ goto leave;
+ }
+
+ /* Clear the most significant bit, and set the second one */
+ *pkt = (*pkt & ~0x80) | 0x40;
+ if (!quic_stateless_reset_token_cpy(pkt + rndlen, QUIC_STATELESS_RESET_TOKEN_LEN,
+ rxpkt->dcid.data, rxpkt->dcid.len))
+ goto leave;
+
+ if (sendto(l->rx.fd, pkt, pktlen, 0, (struct sockaddr *)dstaddr, addrlen) < 0)
+ goto leave;
+
+ ret = 1;
+ HA_ATOMIC_INC(&prx_counters->stateless_reset_sent);
+ TRACE_PROTO("stateless reset sent", QUIC_EV_STATELESS_RST, NULL, &rxpkt->dcid);
+ leave:
+ TRACE_LEAVE(QUIC_EV_STATELESS_RST);
+ return ret;
+}
+
+/* Return the long packet type matching with <qv> version and <type> */
+static inline int quic_pkt_type(int type, uint32_t version)
+{
+ if (version != QUIC_PROTOCOL_VERSION_2)
+ return type;
+
+ switch (type) {
+ case QUIC_PACKET_TYPE_INITIAL:
+ return 1;
+ case QUIC_PACKET_TYPE_0RTT:
+ return 2;
+ case QUIC_PACKET_TYPE_HANDSHAKE:
+ return 3;
+ case QUIC_PACKET_TYPE_RETRY:
+ return 0;
+ }
+
+ return -1;
+}
+
+
+/* Generate a Retry packet and send it on <fd> socket to <addr> in response to
+ * the Initial <pkt> packet.
+ *
+ * Returns 0 on success else non-zero.
+ */
+int send_retry(int fd, struct sockaddr_storage *addr,
+ struct quic_rx_packet *pkt, const struct quic_version *qv)
+{
+ int ret = 0;
+ unsigned char buf[128];
+ int i = 0, token_len;
+ const socklen_t addrlen = get_addr_len(addr);
+ struct quic_cid scid;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT);
+
+ /* long header(1) | fixed bit(1) | packet type QUIC_PACKET_TYPE_RETRY(2) | unused random bits(4)*/
+ buf[i++] = (QUIC_PACKET_LONG_HEADER_BIT | QUIC_PACKET_FIXED_BIT) |
+ (quic_pkt_type(QUIC_PACKET_TYPE_RETRY, qv->num) << QUIC_PACKET_TYPE_SHIFT) |
+ statistical_prng_range(16);
+ /* version */
+ write_n32(&buf[i], qv->num);
+ i += sizeof(uint32_t);
+
+ /* Use the SCID from <pkt> for Retry DCID. */
+ buf[i++] = pkt->scid.len;
+ memcpy(&buf[i], pkt->scid.data, pkt->scid.len);
+ i += pkt->scid.len;
+
+ /* Generate a new CID to be used as SCID for the Retry packet. */
+ scid.len = QUIC_HAP_CID_LEN;
+ /* TODO: RAND_bytes() should be replaced */
+ if (RAND_bytes(scid.data, scid.len) != 1) {
+ TRACE_ERROR("RAND_bytes() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ buf[i++] = scid.len;
+ memcpy(&buf[i], scid.data, scid.len);
+ i += scid.len;
+
+ /* token */
+ if (!(token_len = quic_generate_retry_token(&buf[i], sizeof(buf) - i, qv->num,
+ &pkt->dcid, &pkt->scid, addr))) {
+ TRACE_ERROR("quic_generate_retry_token() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ i += token_len;
+
+ /* token integrity tag */
+ if ((sizeof(buf) - i < QUIC_TLS_TAG_LEN) ||
+ !quic_tls_generate_retry_integrity_tag(pkt->dcid.data,
+ pkt->dcid.len, buf, i, qv)) {
+ TRACE_ERROR("quic_tls_generate_retry_integrity_tag() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ i += QUIC_TLS_TAG_LEN;
+
+ if (sendto(fd, buf, i, 0, (struct sockaddr *)addr, addrlen) < 0) {
+ TRACE_ERROR("quic_tls_generate_retry_integrity_tag() failed", QUIC_EV_CONN_TXPKT);
+ goto out;
+ }
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT);
+ return !ret;
+}
+
+/* Write a 32-bits integer to a buffer with <buf> as address.
+ * Make <buf> point to the data after this 32-buts value if succeeded.
+ * Note that these 32-bits integers are networkg bytes ordered.
+ * Returns 0 if failed (not enough room in the buffer), 1 if succeeded.
+ */
+static inline int quic_write_uint32(unsigned char **buf,
+ const unsigned char *end, uint32_t val)
+{
+ if (end - *buf < sizeof val)
+ return 0;
+
+ *(uint32_t *)*buf = htonl(val);
+ *buf += sizeof val;
+
+ return 1;
+}
+
+/* Return the maximum number of bytes we must use to completely fill a
+ * buffer with <sz> as size for a data field of bytes prefixed by its QUIC
+ * variable-length (may be 0).
+ * Also put in <*len_sz> the size of this QUIC variable-length.
+ * So after returning from this function we have : <*len_sz> + <ret> <= <sz>
+ * (<*len_sz> = { max(i), i + ret <= <sz> }) .
+ */
+static inline size_t max_available_room(size_t sz, size_t *len_sz)
+{
+ size_t sz_sz, ret;
+ size_t diff;
+
+ sz_sz = quic_int_getsize(sz);
+ if (sz <= sz_sz)
+ return 0;
+
+ ret = sz - sz_sz;
+ *len_sz = quic_int_getsize(ret);
+ /* Difference between the two sizes. Note that <sz_sz> >= <*len_sz>. */
+ diff = sz_sz - *len_sz;
+ if (unlikely(diff > 0)) {
+ /* Let's try to take into an account remaining bytes.
+ *
+ * <----------------> <sz_sz>
+ * <--------------><--------> +----> <max_int>
+ * <ret> <len_sz> |
+ * +---------------------------+-----------....
+ * <--------------------------------> <sz>
+ */
+ size_t max_int = quic_max_int(*len_sz);
+
+ if (max_int + *len_sz <= sz)
+ ret = max_int;
+ else
+ ret = sz - diff;
+ }
+
+ return ret;
+}
+
+/* This function computes the maximum data we can put into a buffer with <sz> as
+ * size prefixed with a variable-length field "Length" whose value is the
+ * remaining data length, already filled of <ilen> bytes which must be taken
+ * into an account by "Length" field, and finally followed by the data we want
+ * to put in this buffer prefixed again by a variable-length field.
+ * <sz> is the size of the buffer to fill.
+ * <ilen> the number of bytes already put after the "Length" field.
+ * <dlen> the number of bytes we want to at most put in the buffer.
+ * Also set <*dlen_sz> to the size of the data variable-length we want to put in
+ * the buffer. This is typically this function which must be used to fill as
+ * much as possible a QUIC packet made of only one CRYPTO or STREAM frames.
+ * Returns this computed size if there is enough room in the buffer, 0 if not.
+ */
+static inline size_t max_stream_data_size(size_t sz, size_t ilen, size_t dlen)
+{
+ size_t ret, len_sz, dlen_sz;
+
+ /*
+ * The length of variable-length QUIC integers are powers of two.
+ * Look for the first 3length" field value <len_sz> which match our need.
+ * As we must put <ilen> bytes in our buffer, the minimum value for
+ * <len_sz> is the number of bytes required to encode <ilen>.
+ */
+ for (len_sz = quic_int_getsize(ilen);
+ len_sz <= QUIC_VARINT_MAX_SIZE;
+ len_sz <<= 1) {
+ if (sz < len_sz + ilen)
+ return 0;
+
+ ret = max_available_room(sz - len_sz - ilen, &dlen_sz);
+ if (!ret)
+ return 0;
+
+ /* Check that <*len_sz> matches <ret> value */
+ if (len_sz + ilen + dlen_sz + ret <= quic_max_int(len_sz))
+ return ret < dlen ? ret : dlen;
+ }
+
+ return 0;
+}
+
+/* Return the length in bytes of <pn> packet number depending on
+ * <largest_acked_pn> the largest ackownledged packet number.
+ */
+static inline size_t quic_packet_number_length(int64_t pn,
+ int64_t largest_acked_pn)
+{
+ int64_t max_nack_pkts;
+
+ /* About packet number encoding, the RFC says:
+ * The sender MUST use a packet number size able to represent more than
+ * twice as large a range than the difference between the largest
+ * acknowledged packet and packet number being sent.
+ */
+ max_nack_pkts = 2 * (pn - largest_acked_pn) + 1;
+ if (max_nack_pkts > 0xffffff)
+ return 4;
+ if (max_nack_pkts > 0xffff)
+ return 3;
+ if (max_nack_pkts > 0xff)
+ return 2;
+
+ return 1;
+}
+
+/* Encode <pn> packet number with <pn_len> as length in byte into a buffer with
+ * <buf> as current copy address and <end> as pointer to one past the end of
+ * this buffer. This is the responsibility of the caller to check there is
+ * enough room in the buffer to copy <pn_len> bytes.
+ * Never fails.
+ */
+static inline int quic_packet_number_encode(unsigned char **buf,
+ const unsigned char *end,
+ uint64_t pn, size_t pn_len)
+{
+ if (end - *buf < pn_len)
+ return 0;
+
+ /* Encode the packet number. */
+ switch (pn_len) {
+ case 1:
+ **buf = pn;
+ break;
+ case 2:
+ write_n16(*buf, pn);
+ break;
+ case 3:
+ (*buf)[0] = pn >> 16;
+ (*buf)[1] = pn >> 8;
+ (*buf)[2] = pn;
+ break;
+ case 4:
+ write_n32(*buf, pn);
+ break;
+ }
+ *buf += pn_len;
+
+ return 1;
+}
+
+/* This function builds into a buffer at <pos> position a QUIC long packet header,
+ * <end> being one byte past the end of this buffer.
+ * Return 1 if enough room to build this header, 0 if not.
+ */
+static int quic_build_packet_long_header(unsigned char **pos, const unsigned char *end,
+ int type, size_t pn_len,
+ struct quic_conn *qc, const struct quic_version *ver)
+{
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_LPKT, qc);
+
+ if (end - *pos < sizeof ver->num + qc->dcid.len + qc->scid.len + 3) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ type = quic_pkt_type(type, ver->num);
+ /* #0 byte flags */
+ *(*pos)++ = QUIC_PACKET_FIXED_BIT | QUIC_PACKET_LONG_HEADER_BIT |
+ (type << QUIC_PACKET_TYPE_SHIFT) | (pn_len - 1);
+ /* Version */
+ quic_write_uint32(pos, end, ver->num);
+ *(*pos)++ = qc->dcid.len;
+ /* Destination connection ID */
+ if (qc->dcid.len) {
+ memcpy(*pos, qc->dcid.data, qc->dcid.len);
+ *pos += qc->dcid.len;
+ }
+ /* Source connection ID */
+ *(*pos)++ = qc->scid.len;
+ if (qc->scid.len) {
+ memcpy(*pos, qc->scid.data, qc->scid.len);
+ *pos += qc->scid.len;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_LPKT, qc);
+ return ret;
+}
+
+/* This function builds into a buffer at <pos> position a QUIC short packet header,
+ * <end> being one byte past the end of this buffer.
+ * Return 1 if enough room to build this header, 0 if not.
+ */
+static int quic_build_packet_short_header(unsigned char **pos, const unsigned char *end,
+ size_t pn_len, struct quic_conn *qc,
+ unsigned char tls_flags)
+{
+ int ret = 0;
+ unsigned char spin_bit =
+ (qc->flags & QUIC_FL_CONN_SPIN_BIT) ? QUIC_PACKET_SPIN_BIT : 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ if (end - *pos < 1 + qc->dcid.len) {
+ TRACE_DEVEL("not enough room", QUIC_EV_CONN_LPKT, qc);
+ goto leave;
+ }
+
+ /* #0 byte flags */
+ *(*pos)++ = QUIC_PACKET_FIXED_BIT | spin_bit |
+ ((tls_flags & QUIC_FL_TLS_KP_BIT_SET) ? QUIC_PACKET_KEY_PHASE_BIT : 0) | (pn_len - 1);
+ /* Destination connection ID */
+ if (qc->dcid.len) {
+ memcpy(*pos, qc->dcid.data, qc->dcid.len);
+ *pos += qc->dcid.len;
+ }
+
+ ret = 1;
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+}
+
+/* Apply QUIC header protection to the packet with <pos> as first byte address,
+ * <pn> as address of the Packet number field, <pnlen> being this field length
+ * with <aead> as AEAD cipher and <key> as secret key.
+ *
+ * TODO no error is expected as encryption is done in place but encryption
+ * manual is unclear. <fail> will be set to true if an error is detected.
+ */
+void quic_apply_header_protection(struct quic_conn *qc, unsigned char *pos,
+ unsigned char *pn, size_t pnlen,
+ struct quic_tls_ctx *tls_ctx, int *fail)
+
+{
+ int i;
+ /* We need an IV of at least 5 bytes: one byte for bytes #0
+ * and at most 4 bytes for the packet number
+ */
+ unsigned char mask[5] = {0};
+ EVP_CIPHER_CTX *aes_ctx = tls_ctx->tx.hp_ctx;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ *fail = 0;
+
+ if (!quic_tls_aes_encrypt(mask, pn + QUIC_PACKET_PN_MAXLEN, sizeof mask, aes_ctx)) {
+ TRACE_ERROR("could not apply header protection", QUIC_EV_CONN_TXPKT, qc);
+ *fail = 1;
+ goto out;
+ }
+
+ *pos ^= mask[0] & (*pos & QUIC_PACKET_LONG_HEADER_BIT ? 0xf : 0x1f);
+ for (i = 0; i < pnlen; i++)
+ pn[i] ^= mask[i + 1];
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+}
+
+/* Prepare into <outlist> as most as possible ack-eliciting frame from their
+ * <inlist> prebuilt frames for <qel> encryption level to be encoded in a buffer
+ * with <room> as available room, and <*len> the packet Length field initialized
+ * with the number of bytes already present in this buffer which must be taken
+ * into an account for the Length packet field value. <headlen> is the number of
+ * bytes already present in this packet before building frames.
+ *
+ * Update consequently <*len> to reflect the size of these frames built
+ * by this function. Also attach these frames to <l> frame list.
+ * Return 1 if at least one ack-eleciting frame could be built, 0 if not.
+ */
+static int qc_build_frms(struct list *outlist, struct list *inlist,
+ size_t room, size_t *len, size_t headlen,
+ struct quic_enc_level *qel,
+ struct quic_conn *qc)
+{
+ int ret;
+ struct quic_frame *cf, *cfbak;
+
+ TRACE_ENTER(QUIC_EV_CONN_BCFRMS, qc);
+
+ ret = 0;
+ if (*len > room)
+ goto leave;
+
+ /* If we are not probing we must take into an account the congestion
+ * control window.
+ */
+ if (!qel->pktns->tx.pto_probe) {
+ size_t remain = quic_cc_path_prep_data(qc->path);
+
+ if (headlen > remain)
+ goto leave;
+
+ room = QUIC_MIN(room, remain - headlen);
+ }
+
+ TRACE_PROTO("TX frms build (headlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &headlen);
+
+ /* NOTE: switch/case block inside a loop, a successful status must be
+ * returned by this function only if at least one frame could be built
+ * in the switch/case block.
+ */
+ list_for_each_entry_safe(cf, cfbak, inlist, list) {
+ /* header length, data length, frame length. */
+ size_t hlen, dlen, dlen_sz, avail_room, flen;
+
+ if (!room)
+ break;
+
+ switch (cf->type) {
+ case QUIC_FT_CRYPTO:
+ TRACE_DEVEL(" New CRYPTO frame build (room, len)",
+ QUIC_EV_CONN_BCFRMS, qc, &room, len);
+ /* Compute the length of this CRYPTO frame header */
+ hlen = 1 + quic_int_getsize(cf->crypto.offset);
+ /* Compute the data length of this CRyPTO frame. */
+ dlen = max_stream_data_size(room, *len + hlen, cf->crypto.len);
+ TRACE_DEVEL(" CRYPTO data length (hlen, crypto.len, dlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &hlen, &cf->crypto.len, &dlen);
+ if (!dlen)
+ continue;
+
+ /* CRYPTO frame length. */
+ flen = hlen + quic_int_getsize(dlen) + dlen;
+ TRACE_DEVEL(" CRYPTO frame length (flen)",
+ QUIC_EV_CONN_BCFRMS, qc, &flen);
+ /* Add the CRYPTO data length and its encoded length to the packet
+ * length and the length of this length.
+ */
+ *len += flen;
+ room -= flen;
+ if (dlen == cf->crypto.len) {
+ /* <cf> CRYPTO data have been consumed. */
+ LIST_DEL_INIT(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+ }
+ else {
+ struct quic_frame *new_cf;
+
+ new_cf = qc_frm_alloc(QUIC_FT_CRYPTO);
+ if (!new_cf) {
+ TRACE_ERROR("No memory for new crypto frame", QUIC_EV_CONN_BCFRMS, qc);
+ continue;
+ }
+
+ new_cf->crypto.len = dlen;
+ new_cf->crypto.offset = cf->crypto.offset;
+ new_cf->crypto.qel = qel;
+ TRACE_DEVEL("split frame", QUIC_EV_CONN_PRSAFRM, qc, new_cf);
+ if (cf->origin) {
+ TRACE_DEVEL("duplicated frame", QUIC_EV_CONN_PRSAFRM, qc);
+ /* This <cf> frame was duplicated */
+ LIST_APPEND(&cf->origin->reflist, &new_cf->ref);
+ new_cf->origin = cf->origin;
+ /* Detach the remaining CRYPTO frame from its original frame */
+ LIST_DEL_INIT(&cf->ref);
+ cf->origin = NULL;
+ }
+ LIST_APPEND(outlist, &new_cf->list);
+ /* Consume <dlen> bytes of the current frame. */
+ cf->crypto.len -= dlen;
+ cf->crypto.offset += dlen;
+ }
+ break;
+
+ case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F:
+ if (cf->stream.dup) {
+ struct eb64_node *node = NULL;
+ struct qc_stream_desc *stream_desc = NULL;
+ struct qf_stream *strm_frm = &cf->stream;
+
+ /* As this frame has been already lost, ensure the stream is always
+ * available or the range of this frame is not consumed before
+ * resending it.
+ */
+ node = eb64_lookup(&qc->streams_by_id, strm_frm->id);
+ if (!node) {
+ TRACE_DEVEL("released stream", QUIC_EV_CONN_PRSAFRM, qc, cf);
+ qc_frm_free(qc, &cf);
+ continue;
+ }
+
+ stream_desc = eb64_entry(node, struct qc_stream_desc, by_id);
+ if (strm_frm->offset.key + strm_frm->len <= stream_desc->ack_offset) {
+ TRACE_DEVEL("ignored frame frame in already acked range",
+ QUIC_EV_CONN_PRSAFRM, qc, cf);
+ qc_frm_free(qc, &cf);
+ continue;
+ }
+ else if (strm_frm->offset.key < stream_desc->ack_offset) {
+ uint64_t diff = stream_desc->ack_offset - strm_frm->offset.key;
+
+ qc_stream_frm_mv_fwd(cf, diff);
+ TRACE_DEVEL("updated partially acked frame",
+ QUIC_EV_CONN_PRSAFRM, qc, cf);
+ }
+ }
+ /* Note that these frames are accepted in short packets only without
+ * "Length" packet field. Here, <*len> is used only to compute the
+ * sum of the lengths of the already built frames for this packet.
+ *
+ * Compute the length of this STREAM frame "header" made a all the field
+ * excepting the variable ones. Note that +1 is for the type of this frame.
+ */
+ hlen = 1 + quic_int_getsize(cf->stream.id) +
+ ((cf->type & QUIC_STREAM_FRAME_TYPE_OFF_BIT) ? quic_int_getsize(cf->stream.offset.key) : 0);
+ /* Compute the data length of this STREAM frame. */
+ avail_room = room - hlen - *len;
+ if ((ssize_t)avail_room <= 0)
+ continue;
+
+ TRACE_DEVEL(" New STREAM frame build (room, len)",
+ QUIC_EV_CONN_BCFRMS, qc, &room, len);
+
+ /* hlen contains STREAM id and offset. Ensure there is
+ * enough room for length field.
+ */
+ if (cf->type & QUIC_STREAM_FRAME_TYPE_LEN_BIT) {
+ dlen = QUIC_MIN((uint64_t)max_available_room(avail_room, &dlen_sz),
+ cf->stream.len);
+ dlen_sz = quic_int_getsize(dlen);
+ flen = hlen + dlen_sz + dlen;
+ }
+ else {
+ dlen = QUIC_MIN((uint64_t)avail_room, cf->stream.len);
+ flen = hlen + dlen;
+ }
+
+ if (cf->stream.len && !dlen) {
+ /* Only a small gap is left on buffer, not
+ * enough to encode the STREAM data length.
+ */
+ continue;
+ }
+
+ TRACE_DEVEL(" STREAM data length (hlen, stream.len, dlen)",
+ QUIC_EV_CONN_BCFRMS, qc, &hlen, &cf->stream.len, &dlen);
+ TRACE_DEVEL(" STREAM frame length (flen)",
+ QUIC_EV_CONN_BCFRMS, qc, &flen);
+ /* Add the STREAM data length and its encoded length to the packet
+ * length and the length of this length.
+ */
+ *len += flen;
+ room -= flen;
+ if (dlen == cf->stream.len) {
+ /* <cf> STREAM data have been consumed. */
+ LIST_DEL_INIT(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+
+ /* Do not notify MUX on retransmission. */
+ if (qc->flags & QUIC_FL_CONN_TX_MUX_CONTEXT) {
+ qcc_streams_sent_done(cf->stream.stream->ctx,
+ cf->stream.len,
+ cf->stream.offset.key);
+ }
+ }
+ else {
+ struct quic_frame *new_cf;
+ struct buffer cf_buf;
+
+ new_cf = qc_frm_alloc(cf->type);
+ if (!new_cf) {
+ TRACE_ERROR("No memory for new STREAM frame", QUIC_EV_CONN_BCFRMS, qc);
+ continue;
+ }
+
+ new_cf->stream.stream = cf->stream.stream;
+ new_cf->stream.buf = cf->stream.buf;
+ new_cf->stream.id = cf->stream.id;
+ new_cf->stream.offset = cf->stream.offset;
+ new_cf->stream.len = dlen;
+ new_cf->type |= QUIC_STREAM_FRAME_TYPE_LEN_BIT;
+ /* FIN bit reset */
+ new_cf->type &= ~QUIC_STREAM_FRAME_TYPE_FIN_BIT;
+ new_cf->stream.data = cf->stream.data;
+ new_cf->stream.dup = cf->stream.dup;
+ TRACE_DEVEL("split frame", QUIC_EV_CONN_PRSAFRM, qc, new_cf);
+ if (cf->origin) {
+ TRACE_DEVEL("duplicated frame", QUIC_EV_CONN_PRSAFRM, qc);
+ /* This <cf> frame was duplicated */
+ LIST_APPEND(&cf->origin->reflist, &new_cf->ref);
+ new_cf->origin = cf->origin;
+ /* Detach this STREAM frame from its origin */
+ LIST_DEL_INIT(&cf->ref);
+ cf->origin = NULL;
+ }
+ LIST_APPEND(outlist, &new_cf->list);
+ cf->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT;
+ /* Consume <dlen> bytes of the current frame. */
+ cf_buf = b_make(b_orig(cf->stream.buf),
+ b_size(cf->stream.buf),
+ (char *)cf->stream.data - b_orig(cf->stream.buf), 0);
+ cf->stream.len -= dlen;
+ cf->stream.offset.key += dlen;
+ cf->stream.data = (unsigned char *)b_peek(&cf_buf, dlen);
+
+ /* Do not notify MUX on retransmission. */
+ if (qc->flags & QUIC_FL_CONN_TX_MUX_CONTEXT) {
+ qcc_streams_sent_done(new_cf->stream.stream->ctx,
+ new_cf->stream.len,
+ new_cf->stream.offset.key);
+ }
+ }
+
+ /* TODO the MUX is notified about the frame sending via
+ * previous qcc_streams_sent_done call. However, the
+ * sending can fail later, for example if the sendto
+ * system call returns an error. As the MUX has been
+ * notified, the transport layer is responsible to
+ * bufferize and resent the announced data later.
+ */
+
+ break;
+
+ default:
+ flen = qc_frm_len(cf);
+ BUG_ON(!flen);
+ if (flen > room)
+ continue;
+
+ *len += flen;
+ room -= flen;
+ LIST_DEL_INIT(&cf->list);
+ LIST_APPEND(outlist, &cf->list);
+ break;
+ }
+
+ /* Successful status as soon as a frame could be built */
+ ret = 1;
+ }
+
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_BCFRMS, qc);
+ return ret;
+}
+
+/* Generate a CONNECTION_CLOSE frame for <qc> on <qel> encryption level. <out>
+ * is used as return parameter and should be zero'ed by the caller.
+ */
+static void qc_build_cc_frm(struct quic_conn *qc, struct quic_enc_level *qel,
+ struct quic_frame *out)
+{
+ /* TODO improve CONNECTION_CLOSE on Initial/Handshake encryption levels
+ *
+ * A CONNECTION_CLOSE frame should be sent in several packets with
+ * different encryption levels depending on the client context. This is
+ * to ensure that the client can decrypt it. See RFC 9000 10.2.3 for
+ * more details on how to implement it.
+ */
+ TRACE_ENTER(QUIC_EV_CONN_BFRM, qc);
+
+
+ if (qc->err.app) {
+ if (unlikely(qel == qc->iel || qel == qc->hel)) {
+ /* RFC 9000 10.2.3. Immediate Close during the Handshake
+ *
+ * Sending a CONNECTION_CLOSE of type 0x1d in an Initial or Handshake
+ * packet could expose application state or be used to alter application
+ * state. A CONNECTION_CLOSE of type 0x1d MUST be replaced by a
+ * CONNECTION_CLOSE of type 0x1c when sending the frame in Initial or
+ * Handshake packets. Otherwise, information about the application
+ * state might be revealed. Endpoints MUST clear the value of the
+ * Reason Phrase field and SHOULD use the APPLICATION_ERROR code when
+ * converting to a CONNECTION_CLOSE of type 0x1c.
+ */
+ out->type = QUIC_FT_CONNECTION_CLOSE;
+ out->connection_close.error_code = QC_ERR_APPLICATION_ERROR;
+ out->connection_close.reason_phrase_len = 0;
+ }
+ else {
+ out->type = QUIC_FT_CONNECTION_CLOSE_APP;
+ out->connection_close_app.error_code = qc->err.code;
+ out->connection_close_app.reason_phrase_len = 0;
+ }
+ }
+ else {
+ out->type = QUIC_FT_CONNECTION_CLOSE;
+ out->connection_close.error_code = qc->err.code;
+ out->connection_close.reason_phrase_len = 0;
+ }
+ TRACE_LEAVE(QUIC_EV_CONN_BFRM, qc);
+
+}
+
+/* Returns the <ack_delay> field value in microsecond to be set in an ACK frame
+ * depending on the time the packet with a new largest packet number was received.
+ */
+static inline uint64_t quic_compute_ack_delay_us(unsigned int time_received,
+ struct quic_conn *conn)
+{
+ return ((now_ms - time_received) * 1000) >> conn->tx.params.ack_delay_exponent;
+}
+
+/* This function builds a clear packet from <pkt> information (its type)
+ * into a buffer with <pos> as position pointer and <qel> as QUIC TLS encryption
+ * level for <conn> QUIC connection and <qel> as QUIC TLS encryption level,
+ * filling the buffer with as much frames as possible from <frms> list of
+ * prebuilt frames.
+ * The trailing QUIC_TLS_TAG_LEN bytes of this packet are not built. But they are
+ * reserved so that to ensure there is enough room to build this AEAD TAG after
+ * having returned from this function.
+ * This function also updates the value of <buf_pn> pointer to point to the packet
+ * number field in this packet. <pn_len> will also have the packet number
+ * length as value.
+ *
+ * Return 1 if succeeded (enough room to buile this packet), O if not.
+ */
+static int qc_do_build_pkt(unsigned char *pos, const unsigned char *end,
+ size_t dglen, struct quic_tx_packet *pkt,
+ int64_t pn, size_t *pn_len, unsigned char **buf_pn,
+ int must_ack, int padding, int cc, int probe,
+ struct quic_enc_level *qel, struct quic_conn *qc,
+ const struct quic_version *ver, struct list *frms)
+{
+ unsigned char *beg, *payload;
+ size_t len, len_sz, len_frms, padding_len;
+ struct quic_frame frm;
+ struct quic_frame ack_frm;
+ struct quic_frame cc_frm;
+ size_t ack_frm_len, head_len;
+ int64_t rx_largest_acked_pn;
+ int add_ping_frm;
+ struct list frm_list = LIST_HEAD_INIT(frm_list);
+ struct quic_frame *cf;
+ int ret = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+
+ /* Length field value with CRYPTO frames if present. */
+ len_frms = 0;
+ beg = pos;
+ /* When not probing, and no immediate close is required, reduce the size of this
+ * buffer to respect the congestion controller window.
+ * This size will be limited if we have ack-eliciting frames to send from <frms>.
+ */
+ if (!probe && !LIST_ISEMPTY(frms) && !cc) {
+ size_t path_room;
+
+ path_room = quic_cc_path_prep_data(qc->path);
+ if (end - beg > path_room)
+ end = beg + path_room;
+ }
+
+ /* Ensure there is enough room for the TLS encryption tag and a zero token
+ * length field if any.
+ */
+ if (end - pos < QUIC_TLS_TAG_LEN +
+ (pkt->type == QUIC_PACKET_TYPE_INITIAL ? 1 : 0))
+ goto no_room;
+
+ end -= QUIC_TLS_TAG_LEN;
+ rx_largest_acked_pn = qel->pktns->rx.largest_acked_pn;
+ /* packet number length */
+ *pn_len = quic_packet_number_length(pn, rx_largest_acked_pn);
+ /* Build the header */
+ if ((pkt->type == QUIC_PACKET_TYPE_SHORT &&
+ !quic_build_packet_short_header(&pos, end, *pn_len, qc, qel->tls_ctx.flags)) ||
+ (pkt->type != QUIC_PACKET_TYPE_SHORT &&
+ !quic_build_packet_long_header(&pos, end, pkt->type, *pn_len, qc, ver)))
+ goto no_room;
+
+ /* Encode the token length (0) for an Initial packet. */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ if (end <= pos)
+ goto no_room;
+
+ *pos++ = 0;
+ }
+
+ head_len = pos - beg;
+ /* Build an ACK frame if required. */
+ ack_frm_len = 0;
+ /* Do not ack and probe at the same time. */
+ if ((must_ack || (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED)) && !qel->pktns->tx.pto_probe) {
+ struct quic_arngs *arngs = &qel->pktns->rx.arngs;
+ BUG_ON(eb_is_empty(&qel->pktns->rx.arngs.root));
+ ack_frm.type = QUIC_FT_ACK;
+ ack_frm.tx_ack.arngs = arngs;
+ if (qel->pktns->flags & QUIC_FL_PKTNS_NEW_LARGEST_PN) {
+ qel->pktns->tx.ack_delay =
+ quic_compute_ack_delay_us(qel->pktns->rx.largest_time_received, qc);
+ qel->pktns->flags &= ~QUIC_FL_PKTNS_NEW_LARGEST_PN;
+ }
+ ack_frm.tx_ack.ack_delay = qel->pktns->tx.ack_delay;
+ /* XXX BE CAREFUL XXX : here we reserved at least one byte for the
+ * smallest frame (PING) and <*pn_len> more for the packet number. Note
+ * that from here, we do not know if we will have to send a PING frame.
+ * This will be decided after having computed the ack-eliciting frames
+ * to be added to this packet.
+ */
+ if (end - pos <= 1 + *pn_len)
+ goto no_room;
+
+ ack_frm_len = qc_frm_len(&ack_frm);
+ if (ack_frm_len > end - 1 - *pn_len - pos)
+ goto no_room;
+ }
+
+ /* Length field value without the ack-eliciting frames. */
+ len = ack_frm_len + *pn_len;
+ len_frms = 0;
+ if (!cc && !LIST_ISEMPTY(frms)) {
+ ssize_t room = end - pos;
+
+ TRACE_PROTO("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, frms);
+ /* Initialize the length of the frames built below to <len>.
+ * If any frame could be successfully built by qc_build_frms(),
+ * we will have len_frms > len.
+ */
+ len_frms = len;
+ if (!qc_build_frms(&frm_list, frms,
+ end - pos, &len_frms, pos - beg, qel, qc)) {
+ TRACE_PROTO("Not enough room", QUIC_EV_CONN_TXPKT,
+ qc, NULL, NULL, &room);
+ if (padding) {
+ len_frms = 0;
+ goto comp_pkt_len;
+ }
+
+ if (!ack_frm_len && !qel->pktns->tx.pto_probe)
+ goto no_room;
+ }
+ }
+
+ comp_pkt_len:
+ /* Length (of the remaining data). Must not fail because, the buffer size
+ * has been checked above. Note that we have reserved QUIC_TLS_TAG_LEN bytes
+ * for the encryption tag. It must be taken into an account for the length
+ * of this packet.
+ */
+ if (len_frms)
+ len = len_frms + QUIC_TLS_TAG_LEN;
+ else
+ len += QUIC_TLS_TAG_LEN;
+ /* CONNECTION_CLOSE frame */
+ if (cc) {
+ qc_build_cc_frm(qc, qel, &cc_frm);
+ len += qc_frm_len(&cc_frm);
+ }
+ add_ping_frm = 0;
+ padding_len = 0;
+ len_sz = quic_int_getsize(len);
+ /* Add this packet size to <dglen> */
+ dglen += head_len + len_sz + len;
+ /* Note that <padding> is true only when building an Handshake packet
+ * coalesced to an Initial packet.
+ */
+ if (padding && dglen < QUIC_INITIAL_PACKET_MINLEN) {
+ /* This is a maximum padding size */
+ padding_len = QUIC_INITIAL_PACKET_MINLEN - dglen;
+ /* The length field value is of this packet is <len> + <padding_len>
+ * the size of which may be greater than the initial computed size
+ * <len_sz>. So, let's deduce the difference between these to packet
+ * sizes from <padding_len>.
+ */
+ padding_len -= quic_int_getsize(len + padding_len) - len_sz;
+ len += padding_len;
+ }
+ else if (len_frms && len_frms < QUIC_PACKET_PN_MAXLEN) {
+ len += padding_len = QUIC_PACKET_PN_MAXLEN - len_frms;
+ }
+ else if (LIST_ISEMPTY(&frm_list)) {
+ if (qel->pktns->tx.pto_probe) {
+ /* If we cannot send a frame, we send a PING frame. */
+ add_ping_frm = 1;
+ len += 1;
+ dglen += 1;
+ /* Note that only we are in the case where this Initial packet
+ * is not coalesced to an Handshake packet. We must directly
+ * pad the datragram.
+ */
+ if (pkt->type == QUIC_PACKET_TYPE_INITIAL) {
+ if (dglen < QUIC_INITIAL_PACKET_MINLEN) {
+ padding_len = QUIC_INITIAL_PACKET_MINLEN - dglen;
+ padding_len -= quic_int_getsize(len + padding_len) - len_sz;
+ len += padding_len;
+ }
+ }
+ else {
+ /* Note that +1 is for the PING frame */
+ if (*pn_len + 1 < QUIC_PACKET_PN_MAXLEN)
+ len += padding_len = QUIC_PACKET_PN_MAXLEN - *pn_len - 1;
+ }
+ }
+ else {
+ /* If there is no frame at all to follow, add at least a PADDING frame. */
+ if (!ack_frm_len && !cc)
+ len += padding_len = QUIC_PACKET_PN_MAXLEN - *pn_len;
+ }
+ }
+
+ if (pkt->type != QUIC_PACKET_TYPE_SHORT && !quic_enc_int(&pos, end, len))
+ goto no_room;
+
+ /* Packet number field address. */
+ *buf_pn = pos;
+
+ /* Packet number encoding. */
+ if (!quic_packet_number_encode(&pos, end, pn, *pn_len))
+ goto no_room;
+
+ /* payload building (ack-eliciting or not frames) */
+ payload = pos;
+ if (ack_frm_len) {
+ if (!qc_build_frm(&pos, end, &ack_frm, pkt, qc))
+ goto no_room;
+
+ pkt->largest_acked_pn = quic_pktns_get_largest_acked_pn(qel->pktns);
+ pkt->flags |= QUIC_FL_TX_PACKET_ACK;
+ }
+
+ /* Ack-eliciting frames */
+ if (!LIST_ISEMPTY(&frm_list)) {
+ struct quic_frame *tmp_cf;
+ list_for_each_entry_safe(cf, tmp_cf, &frm_list, list) {
+ if (!qc_build_frm(&pos, end, cf, pkt, qc)) {
+ ssize_t room = end - pos;
+ TRACE_PROTO("Not enough room", QUIC_EV_CONN_TXPKT,
+ qc, NULL, NULL, &room);
+ /* Note that <cf> was added from <frms> to <frm_list> list by
+ * qc_build_frms().
+ */
+ LIST_DEL_INIT(&cf->list);
+ LIST_INSERT(frms, &cf->list);
+ continue;
+ }
+
+ quic_tx_packet_refinc(pkt);
+ cf->pkt = pkt;
+ }
+ }
+
+ /* Build a PING frame if needed. */
+ if (add_ping_frm) {
+ frm.type = QUIC_FT_PING;
+ if (!qc_build_frm(&pos, end, &frm, pkt, qc))
+ goto no_room;
+ }
+
+ /* Build a CONNECTION_CLOSE frame if needed. */
+ if (cc) {
+ if (!qc_build_frm(&pos, end, &cc_frm, pkt, qc))
+ goto no_room;
+
+ pkt->flags |= QUIC_FL_TX_PACKET_CC;
+ }
+
+ /* Build a PADDING frame if needed. */
+ if (padding_len) {
+ frm.type = QUIC_FT_PADDING;
+ frm.padding.len = padding_len;
+ if (!qc_build_frm(&pos, end, &frm, pkt, qc))
+ goto no_room;
+ }
+
+ if (pos == payload) {
+ /* No payload was built because of congestion control */
+ TRACE_PROTO("limited by congestion control", QUIC_EV_CONN_TXPKT, qc);
+ goto no_room;
+ }
+
+ /* If this packet is ack-eliciting and we are probing let's
+ * decrement the PTO probe counter.
+ */
+ if ((pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) &&
+ qel->pktns->tx.pto_probe)
+ qel->pktns->tx.pto_probe--;
+
+ pkt->len = pos - beg;
+ LIST_SPLICE(&pkt->frms, &frm_list);
+
+ ret = 1;
+ TRACE_PROTO("Packet ack-eliciting frames", QUIC_EV_CONN_TXPKT, qc, pkt);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret;
+
+ no_room:
+ /* Replace the pre-built frames which could not be add to this packet */
+ LIST_SPLICE(frms, &frm_list);
+ TRACE_PROTO("Remaining ack-eliciting frames", QUIC_EV_CONN_FRMLIST, qc, frms);
+ goto leave;
+}
+
+static inline void quic_tx_packet_init(struct quic_tx_packet *pkt, int type)
+{
+ pkt->type = type;
+ pkt->len = 0;
+ pkt->in_flight_len = 0;
+ pkt->pn_node.key = (uint64_t)-1;
+ LIST_INIT(&pkt->frms);
+ pkt->time_sent = TICK_ETERNITY;
+ pkt->next = NULL;
+ pkt->prev = NULL;
+ pkt->largest_acked_pn = -1;
+ pkt->flags = 0;
+ pkt->refcnt = 0;
+}
+
+/* Build a packet into a buffer at <pos> position, <end> pointing to one byte past
+ * the end of this buffer, with <pkt_type> as packet type for <qc> QUIC connection
+ * at <qel> encryption level with <frms> list of prebuilt frames.
+ *
+ * Return -3 if the packet could not be allocated, -2 if could not be encrypted for
+ * any reason, -1 if there was not enough room to build a packet.
+ * XXX NOTE XXX
+ * If you provide provide qc_build_pkt() with a big enough buffer to build a packet as big as
+ * possible (to fill an MTU), the unique reason why this function may fail is the congestion
+ * control window limitation.
+ */
+static struct quic_tx_packet *qc_build_pkt(unsigned char **pos,
+ const unsigned char *end,
+ struct quic_enc_level *qel,
+ struct quic_tls_ctx *tls_ctx, struct list *frms,
+ struct quic_conn *qc, const struct quic_version *ver,
+ size_t dglen, int pkt_type, int must_ack,
+ int padding, int probe, int cc, int *err)
+{
+ struct quic_tx_packet *ret_pkt = NULL;
+ /* The pointer to the packet number field. */
+ unsigned char *buf_pn;
+ unsigned char *first_byte, *last_byte, *payload;
+ int64_t pn;
+ size_t pn_len, payload_len, aad_len;
+ struct quic_tx_packet *pkt;
+ int encrypt_failure = 0;
+
+ TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc);
+ TRACE_PROTO("TX pkt build", QUIC_EV_CONN_TXPKT, qc, NULL, qel);
+ *err = 0;
+ pkt = pool_alloc(pool_head_quic_tx_packet);
+ if (!pkt) {
+ TRACE_DEVEL("Not enough memory for a new packet", QUIC_EV_CONN_TXPKT, qc);
+ *err = -3;
+ goto err;
+ }
+
+ quic_tx_packet_init(pkt, pkt_type);
+ first_byte = *pos;
+ pn_len = 0;
+ buf_pn = NULL;
+
+ pn = qel->pktns->tx.next_pn + 1;
+ if (!qc_do_build_pkt(*pos, end, dglen, pkt, pn, &pn_len, &buf_pn,
+ must_ack, padding, cc, probe, qel, qc, ver, frms)) {
+ // trace already emitted by function above
+ *err = -1;
+ goto err;
+ }
+
+ last_byte = first_byte + pkt->len;
+ payload = buf_pn + pn_len;
+ payload_len = last_byte - payload;
+ aad_len = payload - first_byte;
+
+ quic_packet_encrypt(payload, payload_len, first_byte, aad_len, pn, tls_ctx, qc, &encrypt_failure);
+ if (encrypt_failure) {
+ /* TODO Unrecoverable failure, unencrypted data should be returned to the caller. */
+ WARN_ON("quic_packet_encrypt failure");
+ *err = -2;
+ goto err;
+ }
+
+ last_byte += QUIC_TLS_TAG_LEN;
+ pkt->len += QUIC_TLS_TAG_LEN;
+ quic_apply_header_protection(qc, first_byte, buf_pn, pn_len, tls_ctx, &encrypt_failure);
+ if (encrypt_failure) {
+ /* TODO Unrecoverable failure, unencrypted data should be returned to the caller. */
+ WARN_ON("quic_apply_header_protection failure");
+ *err = -2;
+ goto err;
+ }
+
+ /* Consume a packet number */
+ qel->pktns->tx.next_pn++;
+ qc->bytes.prep += pkt->len;
+ if (qc->bytes.prep >= 3 * qc->bytes.rx && !quic_peer_validated_addr(qc)) {
+ qc->flags |= QUIC_FL_CONN_ANTI_AMPLIFICATION_REACHED;
+ TRACE_PROTO("anti-amplification limit reached", QUIC_EV_CONN_TXPKT, qc);
+ }
+
+ /* Now that a correct packet is built, let us consume <*pos> buffer. */
+ *pos = last_byte;
+ /* Attach the built packet to its tree. */
+ pkt->pn_node.key = pn;
+ /* Set the packet in fligth length for in flight packet only. */
+ if (pkt->flags & QUIC_FL_TX_PACKET_IN_FLIGHT) {
+ pkt->in_flight_len = pkt->len;
+ qc->path->prep_in_flight += pkt->len;
+ }
+ /* Always reset this flag */
+ qc->flags &= ~QUIC_FL_CONN_IMMEDIATE_CLOSE;
+ if (pkt->flags & QUIC_FL_TX_PACKET_ACK) {
+ qel->pktns->flags &= ~QUIC_FL_PKTNS_ACK_REQUIRED;
+ qel->pktns->rx.nb_aepkts_since_last_ack = 0;
+ qc->flags &= ~QUIC_FL_CONN_ACK_TIMER_FIRED;
+ if (tick_isset(qc->ack_expire)) {
+ qc->ack_expire = TICK_ETERNITY;
+ qc->idle_timer_task->expire = qc->idle_expire;
+ task_queue(qc->idle_timer_task);
+ TRACE_PROTO("ack timer cancelled", QUIC_EV_CONN_IDLE_TIMER, qc);
+ }
+ }
+
+ pkt->pktns = qel->pktns;
+
+ ret_pkt = pkt;
+ leave:
+ TRACE_PROTO("TX pkt built", QUIC_EV_CONN_TXPKT, qc, ret_pkt);
+ TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc);
+ return ret_pkt;
+
+ err:
+ /* TODO: what about the frames which have been built
+ * for this packet.
+ */
+ free_quic_tx_packet(qc, pkt);
+ goto leave;
+}
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/raw_sock.c b/src/raw_sock.c
new file mode 100644
index 0000000..1287dc5
--- /dev/null
+++ b/src/raw_sock.c
@@ -0,0 +1,489 @@
+/*
+ * RAW transport layer over SOCK_STREAM sockets.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <netinet/tcp.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/pipe.h>
+#include <haproxy/proxy.h>
+#include <haproxy/tools.h>
+
+
+#if defined(USE_LINUX_SPLICE)
+
+/* A pipe contains 16 segments max, and it's common to see segments of 1448 bytes
+ * because of timestamps. Use this as a hint for not looping on splice().
+ */
+#define SPLICE_FULL_HINT 16*1448
+
+/* how many data we attempt to splice at once when the buffer is configured for
+ * infinite forwarding */
+#define MAX_SPLICE_AT_ONCE (1<<30)
+
+/* Returns :
+ * -1 if splice() is not supported
+ * >= 0 to report the amount of spliced bytes.
+ * connection flags are updated (error, read0, wait_room, wait_data).
+ * The caller must have previously allocated the pipe.
+ */
+int raw_sock_to_pipe(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count)
+{
+ int ret;
+ int retval = 0;
+
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ return 0;
+
+ conn->flags &= ~CO_FL_WAIT_ROOM;
+ errno = 0;
+
+ /* Under Linux, if FD_POLL_HUP is set, we have reached the end.
+ * Since older splice() implementations were buggy and returned
+ * EAGAIN on end of read, let's bypass the call to splice() now.
+ */
+ if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) {
+ /* stop here if we reached the end of data */
+ if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP)
+ goto out_read0;
+
+ /* report error on POLL_ERR before connection establishment */
+ if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ errno = 0; /* let the caller do a getsockopt() if it wants it */
+ goto leave;
+ }
+ }
+
+ while (count) {
+ if (count > MAX_SPLICE_AT_ONCE)
+ count = MAX_SPLICE_AT_ONCE;
+
+ ret = splice(conn->handle.fd, NULL, pipe->prod, NULL, count,
+ SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+
+ if (ret <= 0) {
+ if (ret == 0)
+ goto out_read0;
+
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ /* there are two reasons for EAGAIN :
+ * - nothing in the socket buffer (standard)
+ * - pipe is full
+ * The difference between these two situations
+ * is problematic. Since we don't know if the
+ * pipe is full, we'll stop if the pipe is not
+ * empty. Anyway, we will almost always fill or
+ * empty the pipe.
+ */
+ if (pipe->data) {
+ /* always stop reading until the pipe is flushed */
+ conn->flags |= CO_FL_WAIT_ROOM;
+ break;
+ }
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+ break;
+ }
+ else if (errno == ENOSYS || errno == EINVAL || errno == EBADF) {
+ /* splice not supported on this end, disable it.
+ * We can safely return -1 since there is no
+ * chance that any data has been piped yet.
+ */
+ retval = -1;
+ goto leave;
+ }
+ else if (errno == EINTR) {
+ /* try again */
+ continue;
+ }
+ /* here we have another error */
+ conn->flags |= CO_FL_ERROR;
+ break;
+ } /* ret <= 0 */
+
+ retval += ret;
+ pipe->data += ret;
+ count -= ret;
+
+ if (pipe->data >= SPLICE_FULL_HINT || ret >= global.tune.recv_enough) {
+ /* We've read enough of it for this time, let's stop before
+ * being asked to poll.
+ */
+ conn->flags |= CO_FL_WAIT_ROOM;
+ break;
+ }
+ } /* while */
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && retval)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ leave:
+ if (retval > 0)
+ increment_send_rate(retval, 1);
+
+ return retval;
+
+ out_read0:
+ conn_sock_read0(conn);
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ goto leave;
+}
+
+/* Send as many bytes as possible from the pipe to the connection's socket.
+ */
+int raw_sock_from_pipe(struct connection *conn, void *xprt_ctx, struct pipe *pipe, unsigned int count)
+{
+ int ret, done;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(conn->handle.fd))
+ return 0;
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* it's already closed */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH;
+ errno = EPIPE;
+ return 0;
+ }
+
+ if (unlikely(count > pipe->data))
+ count = pipe->data;
+
+ done = 0;
+ while (count) {
+ ret = splice(pipe->cons, NULL, conn->handle.fd, NULL, count,
+ SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
+
+ if (ret <= 0) {
+ if (ret == 0 || errno == EAGAIN || errno == EWOULDBLOCK) {
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ else if (errno == EINTR)
+ continue;
+
+ /* here we have another error */
+ conn->flags |= CO_FL_ERROR;
+ break;
+ }
+
+ done += ret;
+ count -= ret;
+ pipe->data -= ret;
+ }
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ return done;
+}
+
+#endif /* USE_LINUX_SPLICE */
+
+
+/* Receive up to <count> bytes from connection <conn>'s socket and store them
+ * into buffer <buf>. Only one call to recv() is performed, unless the
+ * buffer wraps, in which case a second call may be performed. The connection's
+ * flags are updated with whatever special event is detected (error, read0,
+ * empty). The caller is responsible for taking care of those events and
+ * avoiding the call if inappropriate. The function does not call the
+ * connection's polling update function, so the caller is responsible for this.
+ * errno is cleared before starting so that the caller knows that if it spots an
+ * error without errno, it's pending and can be retrieved via getsockopt(SO_ERROR).
+ */
+static size_t raw_sock_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ ssize_t ret;
+ size_t try, done = 0;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_recv_ready(conn->handle.fd))
+ return 0;
+
+ conn->flags &= ~CO_FL_WAIT_ROOM;
+ errno = 0;
+
+ if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) {
+ /* stop here if we reached the end of data */
+ if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP)
+ goto read0;
+
+ /* report error on POLL_ERR before connection establishment */
+ if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto leave;
+ }
+ }
+
+ /* read the largest possible block. For this, we perform only one call
+ * to recv() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again. A new attempt is made on
+ * EINTR too.
+ */
+ while (count > 0) {
+ try = b_contig_space(buf);
+ if (!try)
+ break;
+
+ if (try > count)
+ try = count;
+
+ ret = recv(conn->handle.fd, b_tail(buf), try, 0);
+
+ if (ret > 0) {
+ b_add(buf, ret);
+ done += ret;
+ if (ret < try) {
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+
+ /* unfortunately, on level-triggered events, POLL_HUP
+ * is generally delivered AFTER the system buffer is
+ * empty, unless the poller supports POLL_RDHUP. If
+ * we know this is the case, we don't try to read more
+ * as we know there's no more available. Similarly, if
+ * there's no problem with lingering we don't even try
+ * to read an unlikely close from the client since we'll
+ * close first anyway.
+ */
+ if (fdtab[conn->handle.fd].state & FD_POLL_HUP)
+ goto read0;
+
+ if (!(fdtab[conn->handle.fd].state & FD_LINGER_RISK) ||
+ (cur_poller.flags & HAP_POLL_F_RDHUP)) {
+ break;
+ }
+ }
+ count -= ret;
+
+ if (flags & CO_RFL_READ_ONCE)
+ break;
+ }
+ else if (ret == 0) {
+ goto read0;
+ }
+ else if (errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN) {
+ /* socket buffer exhausted */
+ fd_cant_recv(conn->handle.fd);
+ break;
+ }
+ else if (errno != EINTR) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ break;
+ }
+ }
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ leave:
+ return done;
+
+ read0:
+ conn_sock_read0(conn);
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+
+ /* Now a final check for a possible asynchronous low-level error
+ * report. This can happen when a connection receives a reset
+ * after a shutdown, both POLL_HUP and POLL_ERR are queued, and
+ * we might have come from there by just checking POLL_HUP instead
+ * of recv()'s return value 0, so we have no way to tell there was
+ * an error without checking.
+ */
+ if (unlikely(!done && fdtab[conn->handle.fd].state & FD_POLL_ERR))
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ goto leave;
+}
+
+
+/* Send up to <count> pending bytes from buffer <buf> to connection <conn>'s
+ * socket. <flags> may contain some CO_SFL_* flags to hint the system about
+ * other pending data for example, but this flag is ignored at the moment.
+ * Only one call to send() is performed, unless the buffer wraps, in which case
+ * a second call may be performed. The connection's flags are updated with
+ * whatever special event is detected (error, empty). The caller is responsible
+ * for taking care of those events and avoiding the call if inappropriate. The
+ * function does not call the connection's polling update function, so the caller
+ * is responsible for this. It's up to the caller to update the buffer's contents
+ * based on the return value.
+ */
+static size_t raw_sock_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ ssize_t ret;
+ size_t try, done;
+ int send_flag;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(conn->handle.fd))
+ return 0;
+
+ if (unlikely(fdtab[conn->handle.fd].state & FD_POLL_ERR)) {
+ /* an error was reported on the FD, we can't send anymore */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_SOCK_RD_SH;
+ errno = EPIPE;
+ return 0;
+ }
+
+ if (conn->flags & CO_FL_SOCK_WR_SH) {
+ /* it's already closed */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH;
+ errno = EPIPE;
+ return 0;
+ }
+
+ done = 0;
+ /* send the largest possible block. For this we perform only one call
+ * to send() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again.
+ */
+ while (count) {
+ try = b_contig_data(buf, done);
+ if (try > count)
+ try = count;
+
+ send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;
+ if (try < count || flags & CO_SFL_MSG_MORE)
+ send_flag |= MSG_MORE;
+
+ ret = send(conn->handle.fd, b_peek(buf, done), try, send_flag);
+
+ if (ret > 0) {
+ count -= ret;
+ done += ret;
+
+ /* if the system buffer is full, don't insist */
+ if (ret < try) {
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ if (!count)
+ fd_stop_send(conn->handle.fd);
+ }
+ else if (ret == 0 || errno == EAGAIN || errno == EWOULDBLOCK || errno == ENOTCONN || errno == EINPROGRESS) {
+ /* nothing written, we need to poll for write first */
+ fd_cant_send(conn->handle.fd);
+ break;
+ }
+ else if (errno != EINTR) {
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ break;
+ }
+ }
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) && done) {
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ }
+
+ if (done > 0)
+ increment_send_rate(done, 0);
+
+ return done;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int raw_sock_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ return conn_subscribe(conn, xprt_ctx, event_type, es);
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int raw_sock_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ return conn_unsubscribe(conn, xprt_ctx, event_type, es);
+}
+
+static void raw_sock_close(struct connection *conn, void *xprt_ctx)
+{
+ if (conn->subs != NULL) {
+ conn_unsubscribe(conn, NULL, conn->subs->events, conn->subs);
+ }
+}
+
+/* We can't have an underlying XPRT, so just return -1 to signify failure */
+static int raw_sock_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ /* This is the lowest xprt we can have, so if we get there we didn't
+ * find the xprt we wanted to remove, that's a bug
+ */
+ BUG_ON(1);
+ return -1;
+}
+
+/* transport-layer operations for RAW sockets */
+static struct xprt_ops raw_sock = {
+ .snd_buf = raw_sock_from_buf,
+ .rcv_buf = raw_sock_to_buf,
+ .subscribe = raw_sock_subscribe,
+ .unsubscribe = raw_sock_unsubscribe,
+ .remove_xprt = raw_sock_remove_xprt,
+#if defined(USE_LINUX_SPLICE)
+ .rcv_pipe = raw_sock_to_pipe,
+ .snd_pipe = raw_sock_from_pipe,
+#endif
+ .shutr = NULL,
+ .shutw = NULL,
+ .close = raw_sock_close,
+ .name = "RAW",
+};
+
+
+static void __raw_sock_init(void)
+{
+ xprt_register(XPRT_RAW, &raw_sock);
+}
+
+INITCALL0(STG_REGISTER, __raw_sock_init);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/regex.c b/src/regex.c
new file mode 100644
index 0000000..19c7eda
--- /dev/null
+++ b/src/regex.c
@@ -0,0 +1,459 @@
+/*
+ * Regex and string management functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/regex.h>
+#include <haproxy/tools.h>
+
+/* regex trash buffer used by various regex tests */
+THREAD_LOCAL regmatch_t pmatch[MAX_MATCH]; /* rm_so, rm_eo for regular expressions */
+
+int exp_replace(char *dst, unsigned int dst_size, char *src, const char *str, const regmatch_t *matches)
+{
+ char *old_dst = dst;
+ char* dst_end = dst + dst_size;
+
+ while (*str) {
+ if (*str == '\\') {
+ str++;
+ if (!*str)
+ return -1;
+
+ if (isdigit((unsigned char)*str)) {
+ int len, num;
+
+ num = *str - '0';
+ str++;
+
+ if (matches[num].rm_eo > -1 && matches[num].rm_so > -1) {
+ len = matches[num].rm_eo - matches[num].rm_so;
+
+ if (dst + len >= dst_end)
+ return -1;
+
+ memcpy(dst, src + matches[num].rm_so, len);
+ dst += len;
+ }
+
+ } else if (*str == 'x') {
+ unsigned char hex1, hex2;
+ str++;
+
+ if (!*str)
+ return -1;
+
+ hex1 = toupper((unsigned char)*str++) - '0';
+
+ if (!*str)
+ return -1;
+
+ hex2 = toupper((unsigned char)*str++) - '0';
+
+ if (hex1 > 9) hex1 -= 'A' - '9' - 1;
+ if (hex2 > 9) hex2 -= 'A' - '9' - 1;
+
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = (hex1<<4) + hex2;
+ } else {
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = *str++;
+ }
+ } else {
+ if (dst >= dst_end)
+ return -1;
+
+ *dst++ = *str++;
+ }
+ }
+ if (dst >= dst_end)
+ return -1;
+
+ *dst = '\0';
+ return dst - old_dst;
+}
+
+/* returns NULL if the replacement string <str> is valid, or the pointer to the first error */
+const char *check_replace_string(const char *str)
+{
+ const char *err = NULL;
+ while (*str) {
+ if (*str == '\\') {
+ err = str; /* in case of a backslash, we return the pointer to it */
+ str++;
+ if (!*str)
+ return err;
+ else if (isdigit((unsigned char)*str))
+ err = NULL;
+ else if (*str == 'x') {
+ str++;
+ if (!ishex(*str))
+ return err;
+ str++;
+ if (!ishex(*str))
+ return err;
+ err = NULL;
+ }
+ else {
+ ha_warning("'\\%c' : deprecated use of a backslash before something not '\\','x' or a digit.\n", *str);
+ err = NULL;
+ }
+ }
+ str++;
+ }
+ return err;
+}
+
+
+/* This function apply regex. It take const null terminated char as input.
+ * If the function doesn't match, it returns false, else it returns true.
+ * When it is compiled with JIT, this function execute strlen on the subject.
+ * Currently the only supported flag is REG_NOTBOL.
+ */
+int regex_exec_match(const struct my_regex *preg, const char *subject,
+ size_t nmatch, regmatch_t pmatch[], int flags) {
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
+ int matches[MAX_MATCH * 3];
+#endif
+ int enmatch;
+ int i;
+ int options;
+
+ /* Silently limit the number of allowed matches. max
+ * match i the maximum value for match, in fact this
+ * limit is not applied.
+ */
+
+ enmatch = nmatch;
+ if (enmatch > MAX_MATCH)
+ enmatch = MAX_MATCH;
+
+ options = 0;
+ if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
+ options |= PCRE_NOTBOL;
+#endif
+
+ /* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
+ * pair that has been set. For example, if two substrings have been captured,
+ * the returned value is 3. If there are no capturing subpatterns, the return
+ * value from a successful match is 1, indicating that just the first pair of
+ * offsets has been set.
+ *
+ * It seems that this function returns 0 if it detects more matches than available
+ * space in the matches array.
+ */
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
+ ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
+
+ if (ret < 0)
+ return 0;
+#endif
+
+ if (ret == 0)
+ ret = enmatch;
+
+ for (i=0; i<nmatch; i++) {
+ /* Copy offset. */
+ if (i < ret) {
+ pmatch[i].rm_so = matches[(i*2)];
+ pmatch[i].rm_eo = matches[(i*2)+1];
+ continue;
+ }
+ /* Set the unmatvh flag (-1). */
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
+ return 1;
+#else
+ int match;
+
+ flags &= REG_NOTBOL;
+ match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
+ if (match == REG_NOMATCH)
+ return 0;
+ return 1;
+#endif
+}
+
+/* This function apply regex. It take a "char *" ans length as input. The
+ * <subject> can be modified during the processing. If the function doesn't
+ * match, it returns false, else it returns true.
+ * When it is compiled with standard POSIX regex or PCRE, this function add
+ * a temporary null characters at the end of the <subject>. The <subject> must
+ * have a real length of <length> + 1. Currently the only supported flag is
+ * REG_NOTBOL.
+ */
+int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
+ size_t nmatch, regmatch_t pmatch[], int flags) {
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int ret;
+#ifdef USE_PCRE2
+ PCRE2_SIZE *matches;
+ pcre2_match_data *pm;
+#else
+ int matches[MAX_MATCH * 3];
+#endif
+ int enmatch;
+ int i;
+ int options;
+
+ /* Silently limit the number of allowed matches. max
+ * match i the maximum value for match, in fact this
+ * limit is not applied.
+ */
+ enmatch = nmatch;
+ if (enmatch > MAX_MATCH)
+ enmatch = MAX_MATCH;
+
+ options = 0;
+ if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+ options |= PCRE2_NOTBOL;
+#else
+ options |= PCRE_NOTBOL;
+#endif
+
+ /* The value returned by pcre_exec()/pcre2_(jit)_match() is one more than the highest numbered
+ * pair that has been set. For example, if two substrings have been captured,
+ * the returned value is 3. If there are no capturing subpatterns, the return
+ * value from a successful match is 1, indicating that just the first pair of
+ * offsets has been set.
+ *
+ * It seems that this function returns 0 if it detects more matches than available
+ * space in the matches array.
+ */
+#ifdef USE_PCRE2
+ pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+ ret = preg->mfn(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
+
+ if (ret < 0) {
+ pcre2_match_data_free(pm);
+ return 0;
+ }
+
+ matches = pcre2_get_ovector_pointer(pm);
+#else
+ ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
+ if (ret < 0)
+ return 0;
+#endif
+
+ if (ret == 0)
+ ret = enmatch;
+
+ for (i=0; i<nmatch; i++) {
+ /* Copy offset. */
+ if (i < ret) {
+ pmatch[i].rm_so = matches[(i*2)];
+ pmatch[i].rm_eo = matches[(i*2)+1];
+ continue;
+ }
+ /* Set the unmatvh flag (-1). */
+ pmatch[i].rm_so = -1;
+ pmatch[i].rm_eo = -1;
+ }
+#ifdef USE_PCRE2
+ pcre2_match_data_free(pm);
+#endif
+ return 1;
+#else
+ char old_char = subject[length];
+ int match;
+
+ flags &= REG_NOTBOL;
+ subject[length] = 0;
+ match = regexec(&preg->regex, subject, nmatch, pmatch, flags);
+ subject[length] = old_char;
+ if (match == REG_NOMATCH)
+ return 0;
+ return 1;
+#endif
+}
+
+struct my_regex *regex_comp(const char *str, int cs, int cap, char **err)
+{
+ struct my_regex *regex = NULL;
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+ int flags = 0;
+ const char *error;
+ int erroffset;
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ int flags = 0;
+ int errn;
+#if defined(USE_PCRE2_JIT)
+ int jit;
+#endif
+ PCRE2_UCHAR error[256];
+ PCRE2_SIZE erroffset;
+#else
+ int flags = REG_EXTENDED;
+#endif
+
+ regex = calloc(1, sizeof(*regex));
+ if (!regex) {
+ memprintf(err, "not enough memory to build regex");
+ goto out_fail_alloc;
+ }
+
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+ if (!cs)
+ flags |= PCRE_CASELESS;
+ if (!cap)
+ flags |= PCRE_NO_AUTO_CAPTURE;
+
+ regex->reg = pcre_compile(str, flags, &error, &erroffset, NULL);
+ if (!regex->reg) {
+ memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%d)", str, error, erroffset);
+ goto out_fail_alloc;
+ }
+
+ regex->extra = pcre_study(regex->reg, PCRE_STUDY_JIT_COMPILE, &error);
+ if (!regex->extra && error != NULL) {
+ pcre_free(regex->reg);
+ memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
+ goto out_fail_alloc;
+ }
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+ if (!cs)
+ flags |= PCRE2_CASELESS;
+ if (!cap)
+ flags |= PCRE2_NO_AUTO_CAPTURE;
+
+ regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
+ if (!regex->reg) {
+ pcre2_get_error_message(errn, error, sizeof(error));
+ memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
+ goto out_fail_alloc;
+ }
+
+ regex->mfn = &pcre2_match;
+#if defined(USE_PCRE2_JIT)
+ jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
+ /*
+ * We end if it is an error not related to lack of JIT support
+ * in a case of JIT support missing pcre2_jit_compile is "no-op"
+ */
+ if (!jit)
+ regex->mfn = &pcre2_jit_match;
+ else {
+ if (jit != PCRE2_ERROR_JIT_BADOPTION) {
+ pcre2_code_free(regex->reg);
+ memprintf(err, "regex '%s' jit compilation failed", str);
+ goto out_fail_alloc;
+ }
+ else
+ regex->mfn = &pcre2_match;
+ }
+#endif
+
+#else
+ if (!cs)
+ flags |= REG_ICASE;
+ if (!cap)
+ flags |= REG_NOSUB;
+
+ if (regcomp(&regex->regex, str, flags) != 0) {
+ memprintf(err, "regex '%s' is invalid", str);
+ goto out_fail_alloc;
+ }
+#endif
+ return regex;
+
+ out_fail_alloc:
+ free(regex);
+ return NULL;
+}
+
+static void regex_register_build_options(void)
+{
+ char *ptr = NULL;
+
+#ifdef USE_PCRE
+ memprintf(&ptr, "Built with PCRE version : %s", (HAP_XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
+ HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
+ HAP_XSTRING(PCRE_MAJOR.PCRE_MINOR) HAP_XSTRING(PCRE_PRERELEASE PCRE_DATE));
+ memprintf(&ptr, "%s\nRunning on PCRE version : %s", ptr, pcre_version());
+
+ memprintf(&ptr, "%s\nPCRE library supports JIT : %s", ptr,
+#ifdef USE_PCRE_JIT
+ ({
+ int r;
+ pcre_config(PCRE_CONFIG_JIT, &r);
+ r ? "yes" : "no (libpcre build without JIT?)";
+ })
+#else
+ "no (USE_PCRE_JIT not set)"
+#endif
+ );
+#endif /* USE_PCRE */
+
+#ifdef USE_PCRE2
+ memprintf(&ptr, "Built with PCRE2 version : %s", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0) ?
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+ HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
+ memprintf(&ptr, "%s\nPCRE2 library supports JIT : %s", ptr,
+#ifdef USE_PCRE2_JIT
+ ({
+ int r;
+ pcre2_config(PCRE2_CONFIG_JIT, &r);
+ r ? "yes" : "no (libpcre2 build without JIT?)";
+ })
+#else
+ "no (USE_PCRE2_JIT not set)"
+#endif
+ );
+#endif /* USE_PCRE2 */
+
+#if !defined(USE_PCRE) && !defined(USE_PCRE2)
+ memprintf(&ptr, "Built without PCRE or PCRE2 support (using libc's regex instead)");
+#endif
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, regex_register_build_options);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/resolvers.c b/src/resolvers.c
new file mode 100644
index 0000000..3275cd2
--- /dev/null
+++ b/src/resolvers.c
@@ -0,0 +1,3813 @@
+/*
+ * Name server resolution
+ *
+ * Copyright 2014 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/dns.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/ring.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+
+
+struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers);
+struct list resolv_srvrq_list = LIST_HEAD_INIT(resolv_srvrq_list);
+
+static THREAD_LOCAL struct list death_row; /* list of deferred resolutions to kill, local validity only */
+static THREAD_LOCAL unsigned int recurse = 0; /* counter to track calls to public functions */
+static THREAD_LOCAL uint64_t resolv_query_id_seed = 0; /* random seed */
+struct resolvers *curr_resolvers = NULL;
+
+DECLARE_STATIC_POOL(resolv_answer_item_pool, "resolv_answer_item", sizeof(struct resolv_answer_item));
+DECLARE_STATIC_POOL(resolv_resolution_pool, "resolv_resolution", sizeof(struct resolv_resolution));
+DECLARE_POOL(resolv_requester_pool, "resolv_requester", sizeof(struct resolv_requester));
+
+static unsigned int resolution_uuid = 1;
+unsigned int resolv_failed_resolutions = 0;
+struct task *process_resolvers(struct task *t, void *context, unsigned int state);
+static void resolv_free_resolution(struct resolv_resolution *resolution);
+static void _resolv_unlink_resolution(struct resolv_requester *requester);
+static void enter_resolver_code();
+static void leave_resolver_code();
+
+enum {
+ RSLV_STAT_ID,
+ RSLV_STAT_PID,
+ RSLV_STAT_SENT,
+ RSLV_STAT_SND_ERROR,
+ RSLV_STAT_VALID,
+ RSLV_STAT_UPDATE,
+ RSLV_STAT_CNAME,
+ RSLV_STAT_CNAME_ERROR,
+ RSLV_STAT_ANY_ERR,
+ RSLV_STAT_NX,
+ RSLV_STAT_TIMEOUT,
+ RSLV_STAT_REFUSED,
+ RSLV_STAT_OTHER,
+ RSLV_STAT_INVALID,
+ RSLV_STAT_TOO_BIG,
+ RSLV_STAT_TRUNCATED,
+ RSLV_STAT_OUTDATED,
+ RSLV_STAT_END,
+};
+
+static struct name_desc resolv_stats[] = {
+ [RSLV_STAT_ID] = { .name = "id", .desc = "ID" },
+ [RSLV_STAT_PID] = { .name = "pid", .desc = "Parent ID" },
+ [RSLV_STAT_SENT] = { .name = "sent", .desc = "Sent" },
+ [RSLV_STAT_SND_ERROR] = { .name = "send_error", .desc = "Send error" },
+ [RSLV_STAT_VALID] = { .name = "valid", .desc = "Valid" },
+ [RSLV_STAT_UPDATE] = { .name = "update", .desc = "Update" },
+ [RSLV_STAT_CNAME] = { .name = "cname", .desc = "CNAME" },
+ [RSLV_STAT_CNAME_ERROR] = { .name = "cname_error", .desc = "CNAME error" },
+ [RSLV_STAT_ANY_ERR] = { .name = "any_err", .desc = "Any errors" },
+ [RSLV_STAT_NX] = { .name = "nx", .desc = "NX" },
+ [RSLV_STAT_TIMEOUT] = { .name = "timeout", .desc = "Timeout" },
+ [RSLV_STAT_REFUSED] = { .name = "refused", .desc = "Refused" },
+ [RSLV_STAT_OTHER] = { .name = "other", .desc = "Other" },
+ [RSLV_STAT_INVALID] = { .name = "invalid", .desc = "Invalid" },
+ [RSLV_STAT_TOO_BIG] = { .name = "too_big", .desc = "Too big" },
+ [RSLV_STAT_TRUNCATED] = { .name = "truncated", .desc = "Truncated" },
+ [RSLV_STAT_OUTDATED] = { .name = "outdated", .desc = "Outdated" },
+};
+
+static struct dns_counters dns_counters;
+
+static void resolv_fill_stats(void *d, struct field *stats)
+{
+ struct dns_counters *counters = d;
+ stats[RSLV_STAT_ID] = mkf_str(FO_CONFIG, counters->id);
+ stats[RSLV_STAT_PID] = mkf_str(FO_CONFIG, counters->pid);
+ stats[RSLV_STAT_SENT] = mkf_u64(FN_GAUGE, counters->sent);
+ stats[RSLV_STAT_SND_ERROR] = mkf_u64(FN_GAUGE, counters->snd_error);
+ stats[RSLV_STAT_VALID] = mkf_u64(FN_GAUGE, counters->app.resolver.valid);
+ stats[RSLV_STAT_UPDATE] = mkf_u64(FN_GAUGE, counters->app.resolver.update);
+ stats[RSLV_STAT_CNAME] = mkf_u64(FN_GAUGE, counters->app.resolver.cname);
+ stats[RSLV_STAT_CNAME_ERROR] = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error);
+ stats[RSLV_STAT_ANY_ERR] = mkf_u64(FN_GAUGE, counters->app.resolver.any_err);
+ stats[RSLV_STAT_NX] = mkf_u64(FN_GAUGE, counters->app.resolver.nx);
+ stats[RSLV_STAT_TIMEOUT] = mkf_u64(FN_GAUGE, counters->app.resolver.timeout);
+ stats[RSLV_STAT_REFUSED] = mkf_u64(FN_GAUGE, counters->app.resolver.refused);
+ stats[RSLV_STAT_OTHER] = mkf_u64(FN_GAUGE, counters->app.resolver.other);
+ stats[RSLV_STAT_INVALID] = mkf_u64(FN_GAUGE, counters->app.resolver.invalid);
+ stats[RSLV_STAT_TOO_BIG] = mkf_u64(FN_GAUGE, counters->app.resolver.too_big);
+ stats[RSLV_STAT_TRUNCATED] = mkf_u64(FN_GAUGE, counters->app.resolver.truncated);
+ stats[RSLV_STAT_OUTDATED] = mkf_u64(FN_GAUGE, counters->app.resolver.outdated);
+}
+
+static struct stats_module rslv_stats_module = {
+ .name = "resolvers",
+ .domain_flags = STATS_DOMAIN_RESOLVERS << STATS_DOMAIN,
+ .fill_stats = resolv_fill_stats,
+ .stats = resolv_stats,
+ .stats_count = RSLV_STAT_END,
+ .counters = &dns_counters,
+ .counters_size = sizeof(dns_counters),
+ .clearable = 0,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &rslv_stats_module);
+
+/* CLI context used during "show resolvers" */
+struct show_resolvers_ctx {
+ struct resolvers *forced_section;
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+};
+
+/* Returns a pointer to the resolvers matching the id <id>. NULL is returned if
+ * no match is found.
+ */
+struct resolvers *find_resolvers_by_id(const char *id)
+{
+ struct resolvers *res;
+
+ list_for_each_entry(res, &sec_resolvers, list) {
+ if (strcmp(res->id, id) == 0)
+ return res;
+ }
+ return NULL;
+}
+
+/* Returns a pointer on the SRV request matching the name <name> for the proxy
+ * <px>. NULL is returned if no match is found.
+ */
+struct resolv_srvrq *find_srvrq_by_name(const char *name, struct proxy *px)
+{
+ struct resolv_srvrq *srvrq;
+
+ list_for_each_entry(srvrq, &resolv_srvrq_list, list) {
+ if (srvrq->proxy == px && strcmp(srvrq->name, name) == 0)
+ return srvrq;
+ }
+ return NULL;
+}
+
+/* Allocates a new SRVRQ for the given server with the name <fqdn>. It returns
+ * NULL if an error occurred. */
+struct resolv_srvrq *new_resolv_srvrq(struct server *srv, char *fqdn)
+{
+ struct proxy *px = srv->proxy;
+ struct resolv_srvrq *srvrq = NULL;
+ int fqdn_len, hostname_dn_len;
+
+ fqdn_len = strlen(fqdn);
+ hostname_dn_len = resolv_str_to_dn_label(fqdn, fqdn_len, trash.area,
+ trash.size);
+ if (hostname_dn_len == -1) {
+ ha_alert("%s '%s', server '%s': failed to parse FQDN '%s'\n",
+ proxy_type_str(px), px->id, srv->id, fqdn);
+ goto err;
+ }
+
+ if ((srvrq = calloc(1, sizeof(*srvrq))) == NULL) {
+ ha_alert("%s '%s', server '%s': out of memory\n",
+ proxy_type_str(px), px->id, srv->id);
+ goto err;
+ }
+ srvrq->obj_type = OBJ_TYPE_SRVRQ;
+ srvrq->proxy = px;
+ srvrq->name = strdup(fqdn);
+ srvrq->hostname_dn = strdup(trash.area);
+ srvrq->hostname_dn_len = hostname_dn_len;
+ if (!srvrq->name || !srvrq->hostname_dn) {
+ ha_alert("%s '%s', server '%s': out of memory\n",
+ proxy_type_str(px), px->id, srv->id);
+ goto err;
+ }
+ LIST_INIT(&srvrq->attached_servers);
+ srvrq->named_servers = EB_ROOT;
+ LIST_APPEND(&resolv_srvrq_list, &srvrq->list);
+ return srvrq;
+
+ err:
+ if (srvrq) {
+ free(srvrq->name);
+ free(srvrq->hostname_dn);
+ free(srvrq);
+ }
+ return NULL;
+}
+
+
+/* finds and return the SRV answer item associated to a requester (whose type is 'server').
+ *
+ * returns NULL in case of error or not found.
+ */
+struct resolv_answer_item *find_srvrq_answer_record(const struct resolv_requester *requester)
+{
+ struct resolv_resolution *res;
+ struct eb32_node *eb32;
+ struct server *srv;
+
+ if (!requester)
+ return NULL;
+
+ if ((srv = objt_server(requester->owner)) == NULL)
+ return NULL;
+ /* check if the server is managed by a SRV record */
+ if (srv->srvrq == NULL)
+ return NULL;
+
+ res = srv->srvrq->requester->resolution;
+
+ /* search an ANSWER record whose target points to the server's hostname and whose port is
+ * the same as server's svc_port */
+ for (eb32 = eb32_first(&res->response.answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ struct resolv_answer_item *item = eb32_entry(eb32, typeof(*item), link);
+
+ if (memcmp(srv->hostname_dn, item->data.target, srv->hostname_dn_len) == 0 &&
+ (srv->svc_port == item->port))
+ return item;
+ }
+
+ return NULL;
+}
+
+/* 2 bytes random generator to generate DNS query ID */
+static inline uint16_t resolv_rnd16(void)
+{
+ if (!resolv_query_id_seed)
+ resolv_query_id_seed = now_ms;
+ resolv_query_id_seed ^= resolv_query_id_seed << 13;
+ resolv_query_id_seed ^= resolv_query_id_seed >> 7;
+ resolv_query_id_seed ^= resolv_query_id_seed << 17;
+ return resolv_query_id_seed;
+}
+
+
+static inline int resolv_resolution_timeout(struct resolv_resolution *res)
+{
+ return res->resolvers->timeout.resolve;
+}
+
+/* Updates a resolvers' task timeout for next wake up and queue it */
+static void resolv_update_resolvers_timeout(struct resolvers *resolvers)
+{
+ struct resolv_resolution *res;
+ int next = TICK_ETERNITY;
+
+ if (!LIST_ISEMPTY(&resolvers->resolutions.curr)) {
+ res = LIST_NEXT(&resolvers->resolutions.curr, struct resolv_resolution *, list);
+ next = tick_add(now_ms, resolvers->timeout.resolve);
+ next = tick_first(next, tick_add(res->last_query, resolvers->timeout.retry));
+ }
+
+ list_for_each_entry(res, &resolvers->resolutions.wait, list)
+ next = tick_first(next, tick_add(res->last_resolution, resolv_resolution_timeout(res)));
+
+ resolvers->t->expire = next;
+ task_queue(resolvers->t);
+}
+
+/* Forges a DNS query. It needs the following information from the caller:
+ * - <query_id> : the DNS query id corresponding to this query
+ * - <query_type> : DNS_RTYPE_* request DNS record type (A, AAAA, ANY...)
+ * - <hostname_dn> : hostname in domain name format
+ * - <hostname_dn_len> : length of <hostname_dn>
+ *
+ * To store the query, the caller must pass a buffer <buf> and its size
+ * <bufsize>. It returns the number of written bytes in success, -1 if <buf> is
+ * too short.
+ */
+static int resolv_build_query(int query_id, int query_type, unsigned int accepted_payload_size,
+ char *hostname_dn, int hostname_dn_len, char *buf, int bufsize)
+{
+ struct dns_header dns_hdr;
+ struct dns_question qinfo;
+ struct dns_additional_record edns;
+ char *p = buf;
+
+ if (sizeof(dns_hdr) + sizeof(qinfo) + sizeof(edns) + hostname_dn_len >= bufsize)
+ return -1;
+
+ memset(buf, 0, bufsize);
+
+ /* Set dns query headers */
+ dns_hdr.id = (unsigned short) htons(query_id);
+ dns_hdr.flags = htons(0x0100); /* qr=0, opcode=0, aa=0, tc=0, rd=1, ra=0, z=0, rcode=0 */
+ dns_hdr.qdcount = htons(1); /* 1 question */
+ dns_hdr.ancount = 0;
+ dns_hdr.nscount = 0;
+ dns_hdr.arcount = htons(1);
+ memcpy(p, &dns_hdr, sizeof(dns_hdr));
+ p += sizeof(dns_hdr);
+
+ /* Set up query hostname */
+ memcpy(p, hostname_dn, hostname_dn_len);
+ p += hostname_dn_len;
+ *p++ = 0;
+
+ /* Set up query info (type and class) */
+ qinfo.qtype = htons(query_type);
+ qinfo.qclass = htons(DNS_RCLASS_IN);
+ memcpy(p, &qinfo, sizeof(qinfo));
+ p += sizeof(qinfo);
+
+ /* Set the DNS extension */
+ edns.name = 0;
+ edns.type = htons(DNS_RTYPE_OPT);
+ edns.udp_payload_size = htons(accepted_payload_size);
+ edns.extension = 0;
+ edns.data_length = 0;
+ memcpy(p, &edns, sizeof(edns));
+ p += sizeof(edns);
+
+ return (p - buf);
+}
+
+/* Sends a DNS query to resolvers associated to a resolution. It returns 0 on
+ * success or -1 if trash buffer is not large enough to build a valid query.
+ */
+static int resolv_send_query(struct resolv_resolution *resolution)
+{
+ struct resolvers *resolvers = resolution->resolvers;
+ struct dns_nameserver *ns;
+ int len;
+
+ /* Update resolution */
+ resolution->nb_queries = 0;
+ resolution->nb_responses = 0;
+ resolution->last_query = now_ms;
+
+ len = resolv_build_query(resolution->query_id, resolution->query_type,
+ resolvers->accepted_payload_size,
+ resolution->hostname_dn, resolution->hostname_dn_len,
+ trash.area, trash.size);
+ if (len < 0) {
+ send_log(NULL, LOG_NOTICE,
+ "can not build the query message for %s, in resolvers %s.\n",
+ resolution->hostname_dn, resolvers->id);
+ return -1;
+ }
+
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ if (dns_send_nameserver(ns, trash.area, len) >= 0)
+ resolution->nb_queries++;
+ }
+
+ /* Push the resolution at the end of the active list */
+ LIST_DEL_INIT(&resolution->list);
+ LIST_APPEND(&resolvers->resolutions.curr, &resolution->list);
+ return 0;
+}
+
+/* Prepares and sends a DNS resolution. It returns 1 if the query was sent, 0 if
+ * skipped and -1 if an error occurred.
+ */
+static int
+resolv_run_resolution(struct resolv_resolution *resolution)
+{
+ struct resolvers *resolvers = resolution->resolvers;
+ int query_id, i;
+
+ /* Avoid sending requests for resolutions that don't yet have an
+ * hostname, ie resolutions linked to servers that do not yet have an
+ * fqdn */
+ if (!resolution->hostname_dn)
+ return 0;
+
+ /* Check if a resolution has already been started for this server return
+ * directly to avoid resolution pill up. */
+ if (resolution->step != RSLV_STEP_NONE)
+ return 0;
+
+ /* Generates a new query id. We try at most 100 times to find a free
+ * query id */
+ for (i = 0; i < 100; ++i) {
+ query_id = resolv_rnd16();
+ if (!eb32_lookup(&resolvers->query_ids, query_id))
+ break;
+ query_id = -1;
+ }
+ if (query_id == -1) {
+ send_log(NULL, LOG_NOTICE,
+ "could not generate a query id for %s, in resolvers %s.\n",
+ resolution->hostname_dn, resolvers->id);
+ return -1;
+ }
+
+ /* Update resolution parameters */
+ resolution->query_id = query_id;
+ resolution->qid.key = query_id;
+ resolution->step = RSLV_STEP_RUNNING;
+ resolution->query_type = resolution->prefered_query_type;
+ resolution->try = resolvers->resolve_retries;
+ eb32_insert(&resolvers->query_ids, &resolution->qid);
+
+ /* Send the DNS query */
+ resolution->try -= 1;
+ resolv_send_query(resolution);
+ return 1;
+}
+
+/* Performs a name resolution for the requester <req> */
+void resolv_trigger_resolution(struct resolv_requester *req)
+{
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ int exp;
+
+ if (!req || !req->resolution)
+ return;
+ res = req->resolution;
+ resolvers = res->resolvers;
+
+ enter_resolver_code();
+
+ /* The resolution must not be triggered yet. Use the cached response, if
+ * valid */
+ exp = tick_add(res->last_resolution, resolvers->hold.valid);
+ if (resolvers->t && (!tick_isset(resolvers->t->expire) || res->status != RSLV_STATUS_VALID ||
+ !tick_isset(res->last_resolution) || tick_is_expired(exp, now_ms))) {
+ /* If the resolution is not running and the requester is a
+ * server, reset the resolution timer to force a quick
+ * resolution.
+ */
+ if (res->step == RSLV_STEP_NONE &&
+ (obj_type(req->owner) == OBJ_TYPE_SERVER ||
+ obj_type(req->owner) == OBJ_TYPE_SRVRQ))
+ res->last_resolution = TICK_ETERNITY;
+ task_wakeup(resolvers->t, TASK_WOKEN_OTHER);
+ }
+
+ leave_resolver_code();
+}
+
+
+/* Resets some resolution parameters to initial values and also delete the query
+ * ID from the resolver's tree.
+ */
+static void resolv_reset_resolution(struct resolv_resolution *resolution)
+{
+ /* update resolution status */
+ resolution->step = RSLV_STEP_NONE;
+ resolution->try = 0;
+ resolution->last_resolution = now_ms;
+ resolution->nb_queries = 0;
+ resolution->nb_responses = 0;
+ resolution->query_type = resolution->prefered_query_type;
+
+ /* clean up query id */
+ eb32_delete(&resolution->qid);
+ resolution->query_id = 0;
+ resolution->qid.key = 0;
+}
+
+/* Returns the query id contained in a DNS response */
+static inline unsigned short resolv_response_get_query_id(unsigned char *resp)
+{
+ return resp[0] * 256 + resp[1];
+}
+
+
+/* Analyses, re-builds and copies the name <name> from the DNS response packet
+ * <buffer>. <name> must point to the 'data_len' information or pointer 'c0'
+ * for compressed data. The result is copied into <dest>, ensuring we don't
+ * overflow using <dest_len> Returns the number of bytes the caller can move
+ * forward. If 0 it means an error occurred while parsing the name. <offset> is
+ * the number of bytes the caller could move forward.
+ */
+int resolv_read_name(unsigned char *buffer, unsigned char *bufend,
+ unsigned char *name, char *destination, int dest_len,
+ int *offset, unsigned int depth)
+{
+ int nb_bytes = 0, n = 0;
+ int label_len;
+ unsigned char *reader = name;
+ char *dest = destination;
+
+ while (1) {
+ if (reader >= bufend)
+ goto err;
+
+ /* Name compression is in use */
+ if ((*reader & 0xc0) == 0xc0) {
+ if (reader + 1 >= bufend)
+ goto err;
+
+ /* Must point BEFORE current position */
+ if ((buffer + reader[1]) > reader)
+ goto err;
+
+ if (depth++ > 100)
+ goto err;
+
+ n = resolv_read_name(buffer, bufend, buffer + (*reader & 0x3f)*256 + reader[1],
+ dest, dest_len - nb_bytes, offset, depth);
+ if (n == 0)
+ goto err;
+
+ dest += n;
+ nb_bytes += n;
+ goto out;
+ }
+
+ label_len = *reader;
+ if (label_len == 0)
+ goto out;
+
+ /* Check if:
+ * - we won't read outside the buffer
+ * - there is enough place in the destination
+ */
+ if ((reader + label_len >= bufend) || (nb_bytes + label_len >= dest_len))
+ goto err;
+
+ /* +1 to take label len + label string */
+ label_len++;
+
+ memcpy(dest, reader, label_len);
+
+ dest += label_len;
+ nb_bytes += label_len;
+ reader += label_len;
+ }
+
+ out:
+ /* offset computation:
+ * parse from <name> until finding either NULL or a pointer "c0xx"
+ */
+ reader = name;
+ *offset = 0;
+ while (reader < bufend) {
+ if ((reader[0] & 0xc0) == 0xc0) {
+ *offset += 2;
+ break;
+ }
+ else if (*reader == 0) {
+ *offset += 1;
+ break;
+ }
+ *offset += 1;
+ ++reader;
+ }
+ return nb_bytes;
+
+ err:
+ return 0;
+}
+
+/* Reinitialize the list of aborted resolutions before calling certain
+ * functions relying on it. The list must be processed by calling
+ * leave_resolver_code() after operations.
+ */
+static void enter_resolver_code()
+{
+ if (!recurse)
+ LIST_INIT(&death_row);
+ recurse++;
+}
+
+/* Add a resolution to the death_row. */
+static void abort_resolution(struct resolv_resolution *res)
+{
+ /* Remove the resolution from query_ids tree and from any resolvers list */
+ eb32_delete(&res->qid);
+ res->query_id = 0;
+ res->qid.key = 0;
+
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&death_row, &res->list);
+}
+
+/* This releases any aborted resolution found in the death row. It is mandatory
+ * to call enter_resolver_code() first before the function (or loop) that
+ * needs to defer deletions. Note that some of them are in relation via internal
+ * objects and might cause the deletion of other ones from the same list, so we
+ * must absolutely not use a list_for_each_entry_safe() nor any such thing here,
+ * and solely rely on each call to remove the first remaining list element.
+ */
+static void leave_resolver_code()
+{
+ struct resolv_resolution *res;
+
+ recurse--;
+ if (recurse)
+ return;
+
+ while (!LIST_ISEMPTY(&death_row)) {
+ res = LIST_NEXT(&death_row, struct resolv_resolution *, list);
+ resolv_free_resolution(res);
+ }
+
+ /* make sure nobody tries to add anything without having initialized it */
+ death_row = (struct list){ };
+}
+
+/* Cleanup fqdn/port and address of a server attached to a SRV resolution. This
+ * happens when an SRV item is purged or when the server status is considered as
+ * obsolete.
+ *
+ * Must be called with the DNS lock held, and with the death_row already
+ * initialized via enter_resolver_code().
+ */
+static void resolv_srvrq_cleanup_srv(struct server *srv)
+{
+ _resolv_unlink_resolution(srv->resolv_requester);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srvrq_update_srv_status(srv, 1);
+ ha_free(&srv->hostname);
+ ha_free(&srv->hostname_dn);
+ srv->hostname_dn_len = 0;
+ memset(&srv->addr, 0, sizeof(srv->addr));
+ srv->svc_port = 0;
+ srv->flags |= SRV_F_NO_RESOLUTION;
+
+ ebpt_delete(&srv->host_dn);
+ ha_free(&srv->host_dn.key);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ LIST_APPEND(&srv->srvrq->attached_servers, &srv->srv_rec_item);
+
+ srv->srvrq_check->expire = TICK_ETERNITY;
+}
+
+/* Takes care to cleanup a server resolution when it is outdated. This only
+ * happens for a server relying on a SRV record.
+ */
+static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsigned int state)
+{
+ struct server *srv = context;
+
+ if (!tick_is_expired(t->expire, now_ms))
+ goto end;
+
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
+ resolv_srvrq_cleanup_srv(srv);
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->srvrq->resolvers->lock);
+ leave_resolver_code();
+
+ end:
+ return t;
+}
+
+/* Checks for any obsolete record, also identify any SRV request, and try to
+ * find a corresponding server.
+ */
+static void resolv_check_response(struct resolv_resolution *res)
+{
+ struct resolvers *resolvers = res->resolvers;
+ struct resolv_requester *req;
+ struct eb32_node *eb32, *eb32_back;
+ struct server *srv, *srvback;
+ struct resolv_srvrq *srvrq;
+
+ for (eb32 = eb32_first(&res->response.answer_tree); eb32 && (eb32_back = eb32_next(eb32), 1); eb32 = eb32_back) {
+ struct resolv_answer_item *item = eb32_entry(eb32, typeof(*item), link);
+ struct resolv_answer_item *ar_item = item->ar_item;
+
+ /* clean up obsolete Additional record */
+ if (ar_item && tick_is_lt(tick_add(ar_item->last_seen, resolvers->hold.obsolete), now_ms)) {
+ /* Cleaning up the AR item will trigger an extra DNS resolution, except if the SRV
+ * item is also obsolete.
+ */
+ pool_free(resolv_answer_item_pool, ar_item);
+ item->ar_item = NULL;
+ }
+
+ /* Remove obsolete items */
+ if (tick_is_lt(tick_add(item->last_seen, resolvers->hold.obsolete), now_ms)) {
+ if (item->type == DNS_RTYPE_A || item->type == DNS_RTYPE_AAAA) {
+ /* Remove any associated server */
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, ip_rec_item) {
+ LIST_DEL_INIT(&srv->ip_rec_item);
+ }
+ }
+ else if (item->type == DNS_RTYPE_SRV) {
+ /* Remove any associated server */
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item)
+ resolv_srvrq_cleanup_srv(srv);
+ }
+
+ eb32_delete(&item->link);
+ if (item->ar_item) {
+ pool_free(resolv_answer_item_pool, item->ar_item);
+ item->ar_item = NULL;
+ }
+ pool_free(resolv_answer_item_pool, item);
+ continue;
+ }
+
+ if (item->type != DNS_RTYPE_SRV)
+ continue;
+
+ /* Now process SRV records */
+ list_for_each_entry(req, &res->requesters, list) {
+ struct ebpt_node *node;
+ char target[DNS_MAX_NAME_SIZE+1];
+
+ int i;
+ if ((srvrq = objt_resolv_srvrq(req->owner)) == NULL)
+ continue;
+
+ /* Check if a server already uses that record */
+ srv = NULL;
+ list_for_each_entry(srv, &item->attached_servers, srv_rec_item) {
+ if (srv->srvrq == srvrq) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ goto srv_found;
+ }
+ }
+
+
+ /* If not empty we try to match a server
+ * in server state file tree with the same hostname
+ */
+ if (!eb_is_empty(&srvrq->named_servers)) {
+ srv = NULL;
+
+ /* convert the key to lookup in lower case */
+ for (i = 0 ; item->data.target[i] ; i++)
+ target[i] = tolower(item->data.target[i]);
+ target[i] = 0;
+
+ node = ebis_lookup(&srvrq->named_servers, target);
+ if (node) {
+ srv = ebpt_entry(node, struct server, host_dn);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ /* an entry was found with the same hostname
+ * let check this node if the port matches
+ * and try next node if the hostname
+ * is still the same
+ */
+ while (1) {
+ if (srv->svc_port == item->port) {
+ /* server found, we remove it from tree */
+ ebpt_delete(node);
+ ha_free(&srv->host_dn.key);
+ goto srv_found;
+ }
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+
+ node = ebpt_next(node);
+ if (!node)
+ break;
+
+ srv = ebpt_entry(node, struct server, host_dn);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ if ((item->data_len != srv->hostname_dn_len)
+ || memcmp(srv->hostname_dn, item->data.target, item->data_len) != 0) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ break;
+ }
+ }
+ }
+ }
+
+ /* Pick the first server listed in srvrq (those ones don't
+ * have hostname and are free to use)
+ */
+ srv = NULL;
+ list_for_each_entry(srv, &srvrq->attached_servers, srv_rec_item) {
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ goto srv_found;
+ }
+ srv = NULL;
+
+srv_found:
+ /* And update this server, if found (srv is locked here) */
+ if (srv) {
+ /* re-enable DNS resolution for this server by default */
+ srv->flags &= ~SRV_F_NO_RESOLUTION;
+ srv->srvrq_check->expire = TICK_ETERNITY;
+
+ srv->svc_port = item->port;
+ srv->flags &= ~SRV_F_MAPPORTS;
+
+ /* Check if an Additional Record is associated to this SRV record.
+ * Perform some sanity checks too to ensure the record can be used.
+ * If all fine, we simply pick up the IP address found and associate
+ * it to the server. And DNS resolution is disabled for this server.
+ */
+ if ((item->ar_item != NULL) &&
+ (item->ar_item->type == DNS_RTYPE_A || item->ar_item->type == DNS_RTYPE_AAAA))
+ {
+
+ switch (item->ar_item->type) {
+ case DNS_RTYPE_A:
+ srv_update_addr(srv, &item->ar_item->data.in4.sin_addr, AF_INET, "DNS additional record");
+ break;
+ case DNS_RTYPE_AAAA:
+ srv_update_addr(srv, &item->ar_item->data.in6.sin6_addr, AF_INET6, "DNS additional record");
+ break;
+ }
+
+ srv->flags |= SRV_F_NO_RESOLUTION;
+
+ /* Unlink A/AAAA resolution for this server if there is an AR item.
+ * It is usless to perform an extra resolution
+ */
+ _resolv_unlink_resolution(srv->resolv_requester);
+ }
+
+ if (!srv->hostname_dn) {
+ const char *msg = NULL;
+ char hostname[DNS_MAX_NAME_SIZE+1];
+
+ if (resolv_dn_label_to_str(item->data.target, item->data_len,
+ hostname, sizeof(hostname)) == -1) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ continue;
+ }
+ msg = srv_update_fqdn(srv, hostname, "SRV record", 1);
+ if (msg)
+ send_log(srv->proxy, LOG_NOTICE, "%s", msg);
+ }
+
+ if (!LIST_INLIST(&srv->srv_rec_item))
+ LIST_APPEND(&item->attached_servers, &srv->srv_rec_item);
+
+ if (!(srv->flags & SRV_F_NO_RESOLUTION)) {
+ /* If there is no AR item responsible of the FQDN resolution,
+ * trigger a dedicated DNS resolution
+ */
+ if (!srv->resolv_requester || !srv->resolv_requester->resolution)
+ resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1);
+ }
+
+ /* Update the server status */
+ srvrq_update_srv_status(srv, (srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6));
+
+ if (!srv->resolv_opts.ignore_weight) {
+ char weight[9];
+ int ha_weight;
+
+ /* DNS weight range if from 0 to 65535
+ * HAProxy weight is from 0 to 256
+ * The rule below ensures that weight 0 is well respected
+ * while allowing a "mapping" from DNS weight into HAProxy's one.
+ */
+ ha_weight = (item->weight + 255) / 256;
+
+ snprintf(weight, sizeof(weight), "%d", ha_weight);
+ server_parse_weight_change_request(srv, weight);
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+ }
+ }
+}
+
+/* Validates that the buffer DNS response provided in <resp> and finishing
+ * before <bufend> is valid from a DNS protocol point of view.
+ *
+ * The result is stored in <resolution>' response, buf_response,
+ * response_query_records and response_answer_records members.
+ *
+ * This function returns one of the RSLV_RESP_* code to indicate the type of
+ * error found.
+ */
+static int resolv_validate_dns_response(unsigned char *resp, unsigned char *bufend,
+ struct resolv_resolution *resolution, int max_answer_records)
+{
+ unsigned char *reader;
+ char *previous_dname, tmpname[DNS_MAX_NAME_SIZE];
+ int len, flags, offset;
+ int nb_saved_records;
+ struct resolv_query_item *query;
+ struct resolv_answer_item *answer_record, *tmp_record;
+ struct resolv_response *r_res;
+ struct eb32_node *eb32;
+ uint32_t key = 0;
+ int i, found = 0;
+ int cause = RSLV_RESP_ERROR;
+
+ reader = resp;
+ len = 0;
+ previous_dname = NULL;
+ query = NULL;
+ answer_record = NULL;
+
+ /* Initialization of response buffer and structure */
+ r_res = &resolution->response;
+
+ /* query id */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+
+ r_res->header.id = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* Flags and rcode are stored over 2 bytes
+ * First byte contains:
+ * - response flag (1 bit)
+ * - opcode (4 bits)
+ * - authoritative (1 bit)
+ * - truncated (1 bit)
+ * - recursion desired (1 bit)
+ */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+
+ flags = reader[0] * 256 + reader[1];
+
+ if ((flags & DNS_FLAG_REPLYCODE) != DNS_RCODE_NO_ERROR) {
+ if ((flags & DNS_FLAG_REPLYCODE) == DNS_RCODE_NX_DOMAIN) {
+ cause = RSLV_RESP_NX_DOMAIN;
+ goto return_error;
+ }
+ else if ((flags & DNS_FLAG_REPLYCODE) == DNS_RCODE_REFUSED) {
+ cause = RSLV_RESP_REFUSED;
+ goto return_error;
+ }
+ else {
+ cause = RSLV_RESP_ERROR;
+ goto return_error;
+ }
+ }
+
+ /* Move forward 2 bytes for flags */
+ reader += 2;
+
+ /* 2 bytes for question count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.qdcount = reader[0] * 256 + reader[1];
+ /* (for now) we send one query only, so we expect only one in the
+ * response too */
+ if (r_res->header.qdcount != 1) {
+ cause = RSLV_RESP_QUERY_COUNT_ERROR;
+ goto return_error;
+ }
+
+ if (r_res->header.qdcount > DNS_MAX_QUERY_RECORDS)
+ goto invalid_resp;
+ reader += 2;
+
+ /* 2 bytes for answer count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.ancount = reader[0] * 256 + reader[1];
+ if (r_res->header.ancount == 0) {
+ cause = RSLV_RESP_ANCOUNT_ZERO;
+ goto return_error;
+ }
+
+ /* Check if too many records are announced */
+ if (r_res->header.ancount > max_answer_records)
+ goto invalid_resp;
+ reader += 2;
+
+ /* 2 bytes authority count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.nscount = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes additional count */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ r_res->header.arcount = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* Parsing dns queries. For now there is only one query and it exists
+ * because (qdcount == 1).
+ */
+ query = &resolution->response_query_records[0];
+
+ /* Name is a NULL terminated string in our case, since we have
+ * one query per response and the first one can't be compressed
+ * (using the 0x0c format) */
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, query->name, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0)
+ goto invalid_resp;
+
+ /* Now let's check the query's dname corresponds to the one we sent. */
+ if (len != resolution->hostname_dn_len ||
+ memcmp(query->name, resolution->hostname_dn, resolution->hostname_dn_len) != 0) {
+ cause = RSLV_RESP_WRONG_NAME;
+ goto return_error;
+ }
+
+ reader += offset;
+ previous_dname = query->name;
+
+ /* move forward 2 bytes for question type */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ query->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* move forward 2 bytes for question class */
+ if (reader + 2 >= bufend)
+ goto invalid_resp;
+ query->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* TRUNCATED flag must be checked after we could read the query type
+ * because a TRUNCATED SRV query type response can still be exploited
+ */
+ if (query->type != DNS_RTYPE_SRV && flags & DNS_FLAG_TRUNCATED) {
+ cause = RSLV_RESP_TRUNCATED;
+ goto return_error;
+ }
+
+ /* now parsing response records */
+ nb_saved_records = 0;
+ for (i = 0; i < r_res->header.ancount; i++) {
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ answer_record = pool_alloc(resolv_answer_item_pool);
+ if (answer_record == NULL)
+ goto invalid_resp;
+
+ /* initialization */
+ answer_record->ar_item = NULL;
+ answer_record->last_seen = TICK_ETERNITY;
+ LIST_INIT(&answer_record->attached_servers);
+ answer_record->link.node.leaf_p = NULL;
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0)
+ goto invalid_resp;
+
+ /* Check if the current record dname is valid. previous_dname
+ * points either to queried dname or last CNAME target */
+ if (query->type != DNS_RTYPE_SRV && memcmp(previous_dname, tmpname, len) != 0) {
+ if (i == 0) {
+ /* First record, means a mismatch issue between
+ * queried dname and dname found in the first
+ * record */
+ goto invalid_resp;
+ }
+ else {
+ /* If not the first record, this means we have a
+ * CNAME resolution error.
+ */
+ cause = RSLV_RESP_CNAME_ERROR;
+ goto return_error;
+ }
+
+ }
+
+ memcpy(answer_record->name, tmpname, len);
+ answer_record->name[len] = 0;
+
+ reader += offset;
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ /* 2 bytes for record type (A, AAAA, CNAME, etc...) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes for class (2) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 4 bytes for ttl (4) */
+ if (reader + 4 > bufend)
+ goto invalid_resp;
+
+ answer_record->ttl = reader[0] * 16777216 + reader[1] * 65536
+ + reader[2] * 256 + reader[3];
+ reader += 4;
+
+ /* Now reading data len */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->data_len = reader[0] * 256 + reader[1];
+
+ /* Move forward 2 bytes for data len */
+ reader += 2;
+
+ if (reader + answer_record->data_len > bufend)
+ goto invalid_resp;
+
+ /* Analyzing record content */
+ switch (answer_record->type) {
+ case DNS_RTYPE_A:
+ /* ipv4 is stored on 4 bytes */
+ if (answer_record->data_len != 4)
+ goto invalid_resp;
+
+ answer_record->data.in4.sin_family = AF_INET;
+ memcpy(&answer_record->data.in4.sin_addr, reader, answer_record->data_len);
+ key = XXH32(reader, answer_record->data_len, answer_record->type);
+ break;
+
+ case DNS_RTYPE_CNAME:
+ /* Check if this is the last record and update the caller about the status:
+ * no IP could be found and last record was a CNAME. Could be triggered
+ * by a wrong query type
+ *
+ * + 1 because answer_record_id starts at 0
+ * while number of answers is an integer and
+ * starts at 1.
+ */
+ if (i + 1 == r_res->header.ancount) {
+ cause = RSLV_RESP_CNAME_ERROR;
+ goto return_error;
+ }
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+ if (len == 0)
+ goto invalid_resp;
+
+ memcpy(answer_record->data.target, tmpname, len);
+ answer_record->data.target[len] = 0;
+ key = XXH32(tmpname, len, answer_record->type);
+ previous_dname = answer_record->data.target;
+ break;
+
+
+ case DNS_RTYPE_SRV:
+ /* Answer must contain :
+ * - 2 bytes for the priority
+ * - 2 bytes for the weight
+ * - 2 bytes for the port
+ * - the target hostname
+ */
+ if (answer_record->data_len <= 6)
+ goto invalid_resp;
+
+ answer_record->priority = read_n16(reader);
+ reader += sizeof(uint16_t);
+ answer_record->weight = read_n16(reader);
+ reader += sizeof(uint16_t);
+ answer_record->port = read_n16(reader);
+ reader += sizeof(uint16_t);
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+ if (len == 0)
+ goto invalid_resp;
+
+ answer_record->data_len = len;
+ memcpy(answer_record->data.target, tmpname, len);
+ answer_record->data.target[len] = 0;
+ key = XXH32(tmpname, len, answer_record->type);
+ if (answer_record->ar_item != NULL) {
+ pool_free(resolv_answer_item_pool, answer_record->ar_item);
+ answer_record->ar_item = NULL;
+ }
+ break;
+
+ case DNS_RTYPE_AAAA:
+ /* ipv6 is stored on 16 bytes */
+ if (answer_record->data_len != 16)
+ goto invalid_resp;
+
+ answer_record->data.in6.sin6_family = AF_INET6;
+ memcpy(&answer_record->data.in6.sin6_addr, reader, answer_record->data_len);
+ key = XXH32(reader, answer_record->data_len, answer_record->type);
+ break;
+
+ } /* switch (record type) */
+
+ /* Increment the counter for number of records saved into our
+ * local response */
+ nb_saved_records++;
+
+ /* Move forward answer_record->data_len for analyzing next
+ * record in the response */
+ reader += ((answer_record->type == DNS_RTYPE_SRV)
+ ? offset
+ : answer_record->data_len);
+
+ /* Lookup to see if we already had this entry */
+ found = 0;
+
+ for (eb32 = eb32_lookup(&r_res->answer_tree, key); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+ if (tmp_record->type != answer_record->type)
+ continue;
+
+ switch(tmp_record->type) {
+ case DNS_RTYPE_A:
+ if (!memcmp(&answer_record->data.in4.sin_addr,
+ &tmp_record->data.in4.sin_addr,
+ sizeof(answer_record->data.in4.sin_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_AAAA:
+ if (!memcmp(&answer_record->data.in6.sin6_addr,
+ &tmp_record->data.in6.sin6_addr,
+ sizeof(answer_record->data.in6.sin6_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_SRV:
+ if (answer_record->data_len == tmp_record->data_len &&
+ memcmp(answer_record->data.target, tmp_record->data.target, answer_record->data_len) == 0 &&
+ answer_record->port == tmp_record->port) {
+ tmp_record->weight = answer_record->weight;
+ found = 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (found == 1)
+ break;
+ }
+
+ if (found == 1) {
+ tmp_record->last_seen = now_ms;
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ else {
+ answer_record->last_seen = now_ms;
+ answer_record->ar_item = NULL;
+ answer_record->link.key = key;
+ eb32_insert(&r_res->answer_tree, &answer_record->link);
+ answer_record = NULL;
+ }
+ } /* for i 0 to ancount */
+
+ /* Save the number of records we really own */
+ r_res->header.ancount = nb_saved_records;
+
+ /* now parsing additional records for SRV queries only */
+ if (query->type != DNS_RTYPE_SRV)
+ goto skip_parsing_additional_records;
+
+ /* if we find Authority records, just skip them */
+ for (i = 0; i < r_res->header.nscount; i++) {
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE,
+ &offset, 0);
+ if (len == 0)
+ continue;
+
+ if (reader + offset + 10 >= bufend)
+ goto invalid_resp;
+
+ reader += offset;
+ /* skip 2 bytes for class */
+ reader += 2;
+ /* skip 2 bytes for type */
+ reader += 2;
+ /* skip 4 bytes for ttl */
+ reader += 4;
+ /* read data len */
+ len = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ if (reader + len >= bufend)
+ goto invalid_resp;
+
+ reader += len;
+ }
+
+ nb_saved_records = 0;
+ for (i = 0; i < r_res->header.arcount; i++) {
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ answer_record = pool_alloc(resolv_answer_item_pool);
+ if (answer_record == NULL)
+ goto invalid_resp;
+ answer_record->last_seen = TICK_ETERNITY;
+ LIST_INIT(&answer_record->attached_servers);
+
+ offset = 0;
+ len = resolv_read_name(resp, bufend, reader, tmpname, DNS_MAX_NAME_SIZE, &offset, 0);
+
+ if (len == 0) {
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ continue;
+ }
+
+ memcpy(answer_record->name, tmpname, len);
+ answer_record->name[len] = 0;
+
+ reader += offset;
+ if (reader >= bufend)
+ goto invalid_resp;
+
+ /* 2 bytes for record type (A, AAAA, CNAME, etc...) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->type = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 2 bytes for class (2) */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->class = reader[0] * 256 + reader[1];
+ reader += 2;
+
+ /* 4 bytes for ttl (4) */
+ if (reader + 4 > bufend)
+ goto invalid_resp;
+
+ answer_record->ttl = reader[0] * 16777216 + reader[1] * 65536
+ + reader[2] * 256 + reader[3];
+ reader += 4;
+
+ /* Now reading data len */
+ if (reader + 2 > bufend)
+ goto invalid_resp;
+
+ answer_record->data_len = reader[0] * 256 + reader[1];
+
+ /* Move forward 2 bytes for data len */
+ reader += 2;
+
+ if (reader + answer_record->data_len > bufend)
+ goto invalid_resp;
+
+ /* Analyzing record content */
+ switch (answer_record->type) {
+ case DNS_RTYPE_A:
+ /* ipv4 is stored on 4 bytes */
+ if (answer_record->data_len != 4)
+ goto invalid_resp;
+
+ answer_record->data.in4.sin_family = AF_INET;
+ memcpy(&answer_record->data.in4.sin_addr, reader, answer_record->data_len);
+ break;
+
+ case DNS_RTYPE_AAAA:
+ /* ipv6 is stored on 16 bytes */
+ if (answer_record->data_len != 16)
+ goto invalid_resp;
+
+ answer_record->data.in6.sin6_family = AF_INET6;
+ memcpy(&answer_record->data.in6.sin6_addr, reader, answer_record->data_len);
+ break;
+
+ default:
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ continue;
+
+ } /* switch (record type) */
+
+ /* Increment the counter for number of records saved into our
+ * local response */
+ nb_saved_records++;
+
+ /* Move forward answer_record->data_len for analyzing next
+ * record in the response */
+ reader += answer_record->data_len;
+
+ /* Lookup to see if we already had this entry */
+ found = 0;
+
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ struct resolv_answer_item *ar_item;
+
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+ if (tmp_record->type != DNS_RTYPE_SRV || !tmp_record->ar_item)
+ continue;
+
+ ar_item = tmp_record->ar_item;
+ if (ar_item->type != answer_record->type || ar_item->last_seen == now_ms ||
+ len != tmp_record->data_len ||
+ memcmp(answer_record->name, tmp_record->data.target, tmp_record->data_len) != 0)
+ continue;
+
+ switch(ar_item->type) {
+ case DNS_RTYPE_A:
+ if (!memcmp(&answer_record->data.in4.sin_addr,
+ &ar_item->data.in4.sin_addr,
+ sizeof(answer_record->data.in4.sin_addr)))
+ found = 1;
+ break;
+
+ case DNS_RTYPE_AAAA:
+ if (!memcmp(&answer_record->data.in6.sin6_addr,
+ &ar_item->data.in6.sin6_addr,
+ sizeof(answer_record->data.in6.sin6_addr)))
+ found = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ if (found == 1)
+ break;
+ }
+
+ if (found == 1) {
+ tmp_record->ar_item->last_seen = now_ms;
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ else {
+ answer_record->last_seen = now_ms;
+ answer_record->ar_item = NULL;
+
+ // looking for the SRV record in the response list linked to this additional record
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ tmp_record = eb32_entry(eb32, typeof(*tmp_record), link);
+
+ if (tmp_record->type == DNS_RTYPE_SRV &&
+ tmp_record->ar_item == NULL &&
+ memcmp(tmp_record->data.target, answer_record->name, tmp_record->data_len) == 0) {
+ /* Always use the received additional record to refresh info */
+ pool_free(resolv_answer_item_pool, tmp_record->ar_item);
+ tmp_record->ar_item = answer_record;
+ answer_record = NULL;
+ break;
+ }
+ }
+ if (answer_record) {
+ pool_free(resolv_answer_item_pool, answer_record);
+ answer_record = NULL;
+ }
+ }
+ } /* for i 0 to arcount */
+
+ skip_parsing_additional_records:
+
+ /* Save the number of records we really own */
+ r_res->header.arcount = nb_saved_records;
+ resolv_check_response(resolution);
+ return RSLV_RESP_VALID;
+
+ invalid_resp:
+ cause = RSLV_RESP_INVALID;
+
+ return_error:
+ pool_free(resolv_answer_item_pool, answer_record);
+ return cause;
+}
+
+/* Searches dn_name resolution in resp.
+ * If existing IP not found, return the first IP matching family_priority,
+ * otherwise, first ip found
+ * The following tasks are the responsibility of the caller:
+ * - <r_res> contains an error free DNS response
+ * For both cases above, resolv_validate_dns_response is required
+ * returns one of the RSLV_UPD_* code
+ */
+int resolv_get_ip_from_response(struct resolv_response *r_res,
+ struct resolv_options *resolv_opts, void *currentip,
+ short currentip_sin_family,
+ void **newip, short *newip_sin_family,
+ struct server *owner)
+{
+ struct resolv_answer_item *record, *found_record = NULL;
+ struct eb32_node *eb32;
+ int family_priority;
+ int currentip_found;
+ unsigned char *newip4, *newip6;
+ int currentip_sel;
+ int j;
+ int score, max_score;
+ int allowed_duplicated_ip;
+
+ /* srv is linked to an alive ip record */
+ if (owner && LIST_INLIST(&owner->ip_rec_item))
+ return RSLV_UPD_NO;
+
+ family_priority = resolv_opts->family_prio;
+ allowed_duplicated_ip = resolv_opts->accept_duplicate_ip;
+ *newip = newip4 = newip6 = NULL;
+ currentip_found = 0;
+ *newip_sin_family = AF_UNSPEC;
+ max_score = -1;
+
+ /* Select an IP regarding configuration preference.
+ * Top priority is the preferred network ip version,
+ * second priority is the preferred network.
+ * the last priority is the currently used IP,
+ *
+ * For these three priorities, a score is calculated. The
+ * weight are:
+ * 8 - preferred ip version.
+ * 4 - preferred network.
+ * 2 - if the ip in the record is not affected to any other server in the same backend (duplication)
+ * 1 - current ip.
+ * The result with the biggest score is returned.
+ */
+
+ for (eb32 = eb32_first(&r_res->answer_tree); eb32 != NULL; eb32 = eb32_next(eb32)) {
+ void *ip;
+ unsigned char ip_type;
+
+ record = eb32_entry(eb32, typeof(*record), link);
+ if (record->type == DNS_RTYPE_A) {
+ ip_type = AF_INET;
+ ip = &record->data.in4.sin_addr;
+ }
+ else if (record->type == DNS_RTYPE_AAAA) {
+ ip_type = AF_INET6;
+ ip = &record->data.in6.sin6_addr;
+ }
+ else
+ continue;
+ score = 0;
+
+ /* Check for preferred ip protocol. */
+ if (ip_type == family_priority)
+ score += 8;
+
+ /* Check for preferred network. */
+ for (j = 0; j < resolv_opts->pref_net_nb; j++) {
+
+ /* Compare only the same addresses class. */
+ if (resolv_opts->pref_net[j].family != ip_type)
+ continue;
+
+ if ((ip_type == AF_INET &&
+ in_net_ipv4(ip,
+ &resolv_opts->pref_net[j].mask.in4,
+ &resolv_opts->pref_net[j].addr.in4)) ||
+ (ip_type == AF_INET6 &&
+ in_net_ipv6(ip,
+ &resolv_opts->pref_net[j].mask.in6,
+ &resolv_opts->pref_net[j].addr.in6))) {
+ score += 4;
+ break;
+ }
+ }
+
+ /* Check if the IP found in the record is already affected to a
+ * member of a group. If not, the score should be incremented
+ * by 2. */
+ if (owner) {
+ struct server *srv;
+ int already_used = 0;
+
+ list_for_each_entry(srv, &record->attached_servers, ip_rec_item) {
+ if (srv == owner)
+ continue;
+ if (srv->proxy == owner->proxy) {
+ already_used = 1;
+ break;
+ }
+ }
+ if (already_used) {
+ if (!allowed_duplicated_ip) {
+ continue;
+ }
+ }
+ else {
+ score += 2;
+ }
+ } else {
+ score += 2;
+ }
+
+ /* Check for current ip matching. */
+ if (ip_type == currentip_sin_family &&
+ ((currentip_sin_family == AF_INET &&
+ !memcmp(ip, currentip, 4)) ||
+ (currentip_sin_family == AF_INET6 &&
+ !memcmp(ip, currentip, 16)))) {
+ score++;
+ currentip_sel = 1;
+ }
+ else
+ currentip_sel = 0;
+
+ /* Keep the address if the score is better than the previous
+ * score. The maximum score is 15, if this value is reached, we
+ * break the parsing. Implicitly, this score is reached the ip
+ * selected is the current ip. */
+ if (score > max_score) {
+ if (ip_type == AF_INET)
+ newip4 = ip;
+ else
+ newip6 = ip;
+ found_record = record;
+ currentip_found = currentip_sel;
+ if (score == 15) {
+ /* this was not registered on the current record but it matches
+ * let's fix it (it may comes from state file */
+ if (owner)
+ LIST_APPEND(&found_record->attached_servers, &owner->ip_rec_item);
+ return RSLV_UPD_NO;
+ }
+ max_score = score;
+ }
+ } /* list for each record entries */
+
+ /* No IP found in the response */
+ if (!newip4 && !newip6)
+ return RSLV_UPD_NO_IP_FOUND;
+
+ /* Case when the caller looks first for an IPv4 address */
+ if (family_priority == AF_INET) {
+ if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ else if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ }
+ /* Case when the caller looks first for an IPv6 address */
+ else if (family_priority == AF_INET6) {
+ if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ else if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ }
+ /* Case when the caller have no preference (we prefer IPv6) */
+ else if (family_priority == AF_UNSPEC) {
+ if (newip6) {
+ *newip = newip6;
+ *newip_sin_family = AF_INET6;
+ }
+ else if (newip4) {
+ *newip = newip4;
+ *newip_sin_family = AF_INET;
+ }
+ }
+
+ /* the ip of this record was chosen for the server */
+ if (owner && found_record) {
+ LIST_DEL_INIT(&owner->ip_rec_item);
+ LIST_APPEND(&found_record->attached_servers, &owner->ip_rec_item);
+ }
+
+ eb32 = eb32_first(&r_res->answer_tree);
+ if (eb32) {
+ /* Move the first record to the end of the list, for internal
+ * round robin.
+ */
+ eb32_delete(eb32);
+ eb32_insert(&r_res->answer_tree, eb32);
+ }
+
+ return (currentip_found ? RSLV_UPD_NO : RSLV_UPD_SRVIP_NOT_FOUND);
+}
+
+/* Turns a domain name label into a string: 3www7haproxy3org into www.haproxy.org
+ *
+ * <dn> contains the input label of <dn_len> bytes long and does not need to be
+ * null-terminated. <str> must be allocated large enough to contain a full host
+ * name plus the trailing zero, and the allocated size must be passed in
+ * <str_len>.
+ *
+ * In case of error, -1 is returned, otherwise, the number of bytes copied in
+ * <str> (including the terminating null byte).
+ */
+int resolv_dn_label_to_str(const char *dn, int dn_len, char *str, int str_len)
+{
+ char *ptr;
+ int i, sz;
+
+ if (str_len < dn_len)
+ return -1;
+
+ ptr = str;
+ for (i = 0; i < dn_len; ++i) {
+ sz = dn[i];
+ if (i)
+ *ptr++ = '.';
+ /* copy the string at i+1 to lower case */
+ for (; sz > 0; sz--)
+ *(ptr++) = tolower(dn[++i]);
+ }
+ *ptr++ = '\0';
+ return (ptr - str);
+}
+
+/* Turns a string into domain name label: www.haproxy.org into 3www7haproxy3org
+ *
+ * <str> contains the input string that is <str_len> bytes long (trailing zero
+ * not needed). <dn> buffer must be allocated large enough to contain the
+ * encoded string and a trailing zero, so it must be at least str_len+2, and
+ * this allocated buffer size must be passed in <dn_len>.
+ *
+ * In case of error, -1 is returned, otherwise, the number of bytes copied in
+ * <dn> (excluding the terminating null byte).
+ */
+int resolv_str_to_dn_label(const char *str, int str_len, char *dn, int dn_len)
+{
+ int i, offset;
+
+ if (dn_len < str_len + 2)
+ return -1;
+
+ /* First byte of dn will be used to store the length of the first
+ * label */
+ offset = 0;
+ for (i = 0; i < str_len; ++i) {
+ if (str[i] == '.') {
+ /* 2 or more consecutive dots is invalid */
+ if (i == offset)
+ return -1;
+
+ /* ignore trailing dot */
+ if (i + 1 == str_len)
+ break;
+
+ dn[offset] = (i - offset);
+ offset = i+1;
+ continue;
+ }
+ dn[i+1] = tolower(str[i]);
+ }
+ dn[offset] = i - offset;
+ dn[i+1] = '\0';
+ return i+1;
+}
+
+/* Validates host name:
+ * - total size
+ * - each label size individually
+ * returns:
+ * 0 in case of error. If <err> is not NULL, an error message is stored there.
+ * 1 when no error. <err> is left unaffected.
+ */
+int resolv_hostname_validation(const char *string, char **err)
+{
+ int i;
+
+ if (strlen(string) > DNS_MAX_NAME_SIZE) {
+ if (err)
+ *err = DNS_TOO_LONG_FQDN;
+ return 0;
+ }
+
+ while (*string) {
+ i = 0;
+ while (*string && *string != '.' && i < DNS_MAX_LABEL_SIZE) {
+ if (!(*string == '-' || *string == '_' ||
+ (*string >= 'a' && *string <= 'z') ||
+ (*string >= 'A' && *string <= 'Z') ||
+ (*string >= '0' && *string <= '9'))) {
+ if (err)
+ *err = DNS_INVALID_CHARACTER;
+ return 0;
+ }
+ i++;
+ string++;
+ }
+
+ if (!(*string))
+ break;
+
+ if (*string != '.' && i >= DNS_MAX_LABEL_SIZE) {
+ if (err)
+ *err = DNS_LABEL_TOO_LONG;
+ return 0;
+ }
+
+ string++;
+ }
+ return 1;
+}
+
+/* Picks up an available resolution from the different resolution list
+ * associated to a resolvers section, in this order:
+ * 1. check in resolutions.curr for the same hostname and query_type
+ * 2. check in resolutions.wait for the same hostname and query_type
+ * 3. Get a new resolution from resolution pool
+ *
+ * Returns an available resolution, NULL if none found.
+ */
+static struct resolv_resolution *resolv_pick_resolution(struct resolvers *resolvers,
+ char **hostname_dn, int hostname_dn_len,
+ int query_type)
+{
+ struct resolv_resolution *res;
+
+ if (!*hostname_dn)
+ goto from_pool;
+
+ /* Search for same hostname and query type in resolutions.curr */
+ list_for_each_entry(res, &resolvers->resolutions.curr, list) {
+ if (!res->hostname_dn)
+ continue;
+ if ((query_type == res->prefered_query_type) &&
+ hostname_dn_len == res->hostname_dn_len &&
+ memcmp(*hostname_dn, res->hostname_dn, hostname_dn_len) == 0)
+ return res;
+ }
+
+ /* Search for same hostname and query type in resolutions.wait */
+ list_for_each_entry(res, &resolvers->resolutions.wait, list) {
+ if (!res->hostname_dn)
+ continue;
+ if ((query_type == res->prefered_query_type) &&
+ hostname_dn_len == res->hostname_dn_len &&
+ memcmp(*hostname_dn, res->hostname_dn, hostname_dn_len) == 0)
+ return res;
+ }
+
+ from_pool:
+ /* No resolution could be found, so let's allocate a new one */
+ res = pool_zalloc(resolv_resolution_pool);
+ if (res) {
+ res->resolvers = resolvers;
+ res->uuid = resolution_uuid;
+ res->status = RSLV_STATUS_NONE;
+ res->step = RSLV_STEP_NONE;
+ res->last_valid = now_ms;
+
+ LIST_INIT(&res->requesters);
+ res->response.answer_tree = EB_ROOT;
+
+ res->prefered_query_type = query_type;
+ res->query_type = query_type;
+ res->hostname_dn = *hostname_dn;
+ res->hostname_dn_len = hostname_dn_len;
+
+ ++resolution_uuid;
+
+ /* Move the resolution to the resolvers wait queue */
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ }
+ return res;
+}
+
+/* deletes and frees all answer_items from the resolution's answer_list */
+static void resolv_purge_resolution_answer_records(struct resolv_resolution *resolution)
+{
+ struct eb32_node *eb32, *eb32_back;
+ struct resolv_answer_item *item;
+
+ for (eb32 = eb32_first(&resolution->response.answer_tree);
+ eb32 && (eb32_back = eb32_next(eb32), 1);
+ eb32 = eb32_back) {
+ item = eb32_entry(eb32, typeof(*item), link);
+ eb32_delete(&item->link);
+ pool_free(resolv_answer_item_pool, item->ar_item);
+ pool_free(resolv_answer_item_pool, item);
+ }
+}
+
+/* Releases a resolution from its requester(s) and move it back to the pool */
+static void resolv_free_resolution(struct resolv_resolution *resolution)
+{
+ struct resolv_requester *req, *reqback;
+
+ /* clean up configuration */
+ resolv_reset_resolution(resolution);
+ resolution->hostname_dn = NULL;
+ resolution->hostname_dn_len = 0;
+
+ list_for_each_entry_safe(req, reqback, &resolution->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ req->resolution = NULL;
+ }
+ resolv_purge_resolution_answer_records(resolution);
+
+ LIST_DEL_INIT(&resolution->list);
+ pool_free(resolv_resolution_pool, resolution);
+}
+
+/* If *<req> is not NULL, returns it, otherwise tries to allocate a requester
+ * and makes it owned by this obj_type, with the proposed callback and error
+ * callback. On success, *req is assigned the allocated requester. Returns
+ * NULL on allocation failure.
+ */
+static struct resolv_requester *
+resolv_get_requester(struct resolv_requester **req, enum obj_type *owner,
+ int (*cb)(struct resolv_requester *, struct dns_counters *),
+ int (*err_cb)(struct resolv_requester *, int))
+{
+ struct resolv_requester *tmp;
+
+ if (*req)
+ return *req;
+
+ tmp = pool_alloc(resolv_requester_pool);
+ if (!tmp)
+ goto end;
+
+ LIST_INIT(&tmp->list);
+ tmp->owner = owner;
+ tmp->resolution = NULL;
+ tmp->requester_cb = cb;
+ tmp->requester_error_cb = err_cb;
+ *req = tmp;
+ end:
+ return tmp;
+}
+
+/* Links a requester (a server or a resolv_srvrq) with a resolution. It returns 0
+ * on success, -1 otherwise.
+ */
+int resolv_link_resolution(void *requester, int requester_type, int requester_locked)
+{
+ struct resolv_resolution *res = NULL;
+ struct resolv_requester *req;
+ struct resolvers *resolvers;
+ struct server *srv = NULL;
+ struct resolv_srvrq *srvrq = NULL;
+ struct stream *stream = NULL;
+ char **hostname_dn;
+ int hostname_dn_len, query_type;
+
+ enter_resolver_code();
+ switch (requester_type) {
+ case OBJ_TYPE_SERVER:
+ srv = (struct server *)requester;
+
+ if (!requester_locked)
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ req = resolv_get_requester(&srv->resolv_requester,
+ &srv->obj_type,
+ snr_resolution_cb,
+ snr_resolution_error_cb);
+
+ if (!requester_locked)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+
+ if (!req)
+ goto err;
+
+ hostname_dn = &srv->hostname_dn;
+ hostname_dn_len = srv->hostname_dn_len;
+ resolvers = srv->resolvers;
+ query_type = ((srv->resolv_opts.family_prio == AF_INET)
+ ? DNS_RTYPE_A
+ : DNS_RTYPE_AAAA);
+ break;
+
+ case OBJ_TYPE_SRVRQ:
+ srvrq = (struct resolv_srvrq *)requester;
+
+ req = resolv_get_requester(&srvrq->requester,
+ &srvrq->obj_type,
+ snr_resolution_cb,
+ srvrq_resolution_error_cb);
+ if (!req)
+ goto err;
+
+ hostname_dn = &srvrq->hostname_dn;
+ hostname_dn_len = srvrq->hostname_dn_len;
+ resolvers = srvrq->resolvers;
+ query_type = DNS_RTYPE_SRV;
+ break;
+
+ case OBJ_TYPE_STREAM:
+ stream = (struct stream *)requester;
+
+ req = resolv_get_requester(&stream->resolv_ctx.requester,
+ &stream->obj_type,
+ act_resolution_cb,
+ act_resolution_error_cb);
+ if (!req)
+ goto err;
+
+ hostname_dn = &stream->resolv_ctx.hostname_dn;
+ hostname_dn_len = stream->resolv_ctx.hostname_dn_len;
+ resolvers = stream->resolv_ctx.parent->arg.resolv.resolvers;
+ query_type = ((stream->resolv_ctx.parent->arg.resolv.opts->family_prio == AF_INET)
+ ? DNS_RTYPE_A
+ : DNS_RTYPE_AAAA);
+ break;
+ default:
+ goto err;
+ }
+
+ /* Get a resolution from the resolvers' wait queue or pool */
+ if ((res = resolv_pick_resolution(resolvers, hostname_dn, hostname_dn_len, query_type)) == NULL)
+ goto err;
+
+ req->resolution = res;
+
+ LIST_APPEND(&res->requesters, &req->list);
+ leave_resolver_code();
+ return 0;
+
+ err:
+ if (res && LIST_ISEMPTY(&res->requesters))
+ resolv_free_resolution(res);
+ leave_resolver_code();
+ return -1;
+}
+
+/* This function removes all server/srvrq references on answer items. */
+void resolv_detach_from_resolution_answer_items(struct resolv_resolution *res, struct resolv_requester *req)
+{
+ struct eb32_node *eb32, *eb32_back;
+ struct resolv_answer_item *item;
+ struct server *srv, *srvback;
+ struct resolv_srvrq *srvrq;
+
+ enter_resolver_code();
+ if ((srv = objt_server(req->owner)) != NULL) {
+ LIST_DEL_INIT(&srv->ip_rec_item);
+ }
+ else if ((srvrq = objt_resolv_srvrq(req->owner)) != NULL) {
+ for (eb32 = eb32_first(&res->response.answer_tree);
+ eb32 && (eb32_back = eb32_next(eb32), 1);
+ eb32 = eb32_back) {
+ item = eb32_entry(eb32, typeof(*item), link);
+ if (item->type == DNS_RTYPE_SRV) {
+ list_for_each_entry_safe(srv, srvback, &item->attached_servers, srv_rec_item) {
+ if (srv->srvrq == srvrq)
+ resolv_srvrq_cleanup_srv(srv);
+ }
+ }
+ }
+ }
+ leave_resolver_code();
+}
+
+/* Removes a requester from a DNS resolution. It takes takes care of all the
+ * consequences. It also cleans up some parameters from the requester.
+ */
+static void _resolv_unlink_resolution(struct resolv_requester *requester)
+{
+ struct resolv_resolution *res;
+ struct resolv_requester *req;
+
+ /* Nothing to do */
+ if (!requester || !requester->resolution)
+ return;
+ res = requester->resolution;
+
+ /* Clean up the requester */
+ LIST_DEL_INIT(&requester->list);
+ requester->resolution = NULL;
+
+ /* remove ref from the resolution answer item list to the requester */
+ resolv_detach_from_resolution_answer_items(res, requester);
+
+ /* We need to find another requester linked on this resolution */
+ if (!LIST_ISEMPTY(&res->requesters))
+ req = LIST_NEXT(&res->requesters, struct resolv_requester *, list);
+ else {
+ abort_resolution(res);
+ return;
+ }
+
+ /* Move hostname_dn related pointers to the next requester */
+ switch (obj_type(req->owner)) {
+ case OBJ_TYPE_SERVER:
+ res->hostname_dn = __objt_server(req->owner)->hostname_dn;
+ res->hostname_dn_len = __objt_server(req->owner)->hostname_dn_len;
+ break;
+ case OBJ_TYPE_SRVRQ:
+ res->hostname_dn = __objt_resolv_srvrq(req->owner)->hostname_dn;
+ res->hostname_dn_len = __objt_resolv_srvrq(req->owner)->hostname_dn_len;
+ break;
+ case OBJ_TYPE_STREAM:
+ res->hostname_dn = __objt_stream(req->owner)->resolv_ctx.hostname_dn;
+ res->hostname_dn_len = __objt_stream(req->owner)->resolv_ctx.hostname_dn_len;
+ break;
+ default:
+ res->hostname_dn = NULL;
+ res->hostname_dn_len = 0;
+ break;
+ }
+}
+
+/* The public version of the function above that deals with the death row. */
+void resolv_unlink_resolution(struct resolv_requester *requester)
+{
+ enter_resolver_code();
+ _resolv_unlink_resolution(requester);
+ leave_resolver_code();
+}
+
+/* Called when a network IO is generated on a name server socket for an incoming
+ * packet. It performs the following actions:
+ * - check if the packet requires processing (not outdated resolution)
+ * - ensure the DNS packet received is valid and call requester's callback
+ * - call requester's error callback if invalid response
+ * - check the dn_name in the packet against the one sent
+ */
+static int resolv_process_responses(struct dns_nameserver *ns)
+{
+ struct dns_counters *tmpcounters;
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ unsigned char buf[DNS_MAX_UDP_MESSAGE + 1];
+ unsigned char *bufend;
+ int buflen, dns_resp;
+ int max_answer_records;
+ unsigned short query_id;
+ struct eb32_node *eb;
+ struct resolv_requester *req;
+ int keep_answer_items;
+
+ resolvers = ns->parent;
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+
+ /* process all pending input messages */
+ while (1) {
+ /* read message received */
+ memset(buf, '\0', resolvers->accepted_payload_size + 1);
+ if ((buflen = dns_recv_nameserver(ns, (void *)buf, sizeof(buf))) <= 0) {
+ break;
+ }
+
+ /* message too big */
+ if (buflen > resolvers->accepted_payload_size) {
+ ns->counters->app.resolver.too_big++;
+ continue;
+ }
+
+ /* initializing variables */
+ bufend = buf + buflen; /* pointer to mark the end of the buffer */
+
+ /* read the query id from the packet (16 bits) */
+ if (buf + 2 > bufend) {
+ ns->counters->app.resolver.invalid++;
+ continue;
+ }
+ query_id = resolv_response_get_query_id(buf);
+
+ /* search the query_id in the pending resolution tree */
+ eb = eb32_lookup(&resolvers->query_ids, query_id);
+ if (eb == NULL) {
+ /* unknown query id means an outdated response and can be safely ignored */
+ ns->counters->app.resolver.outdated++;
+ continue;
+ }
+
+ /* known query id means a resolution in progress */
+ res = eb32_entry(eb, struct resolv_resolution, qid);
+ /* number of responses received */
+ res->nb_responses++;
+
+ max_answer_records = (resolvers->accepted_payload_size - DNS_HEADER_SIZE) / DNS_MIN_RECORD_SIZE;
+ dns_resp = resolv_validate_dns_response(buf, bufend, res, max_answer_records);
+
+ switch (dns_resp) {
+ case RSLV_RESP_VALID:
+ break;
+
+ case RSLV_RESP_INVALID:
+ case RSLV_RESP_QUERY_COUNT_ERROR:
+ case RSLV_RESP_WRONG_NAME:
+ res->status = RSLV_STATUS_INVALID;
+ ns->counters->app.resolver.invalid++;
+ break;
+
+ case RSLV_RESP_NX_DOMAIN:
+ res->status = RSLV_STATUS_NX;
+ ns->counters->app.resolver.nx++;
+ break;
+
+ case RSLV_RESP_REFUSED:
+ res->status = RSLV_STATUS_REFUSED;
+ ns->counters->app.resolver.refused++;
+ break;
+
+ case RSLV_RESP_ANCOUNT_ZERO:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.any_err++;
+ break;
+
+ case RSLV_RESP_CNAME_ERROR:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.cname_error++;
+ break;
+
+ case RSLV_RESP_TRUNCATED:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.truncated++;
+ break;
+
+ case RSLV_RESP_NO_EXPECTED_RECORD:
+ case RSLV_RESP_ERROR:
+ case RSLV_RESP_INTERNAL:
+ res->status = RSLV_STATUS_OTHER;
+ ns->counters->app.resolver.other++;
+ break;
+ }
+
+ /* Wait all nameservers response to handle errors */
+ if (dns_resp != RSLV_RESP_VALID && res->nb_responses < res->nb_queries)
+ continue;
+
+ /* Process error codes */
+ if (dns_resp != RSLV_RESP_VALID) {
+ if (res->prefered_query_type != res->query_type) {
+ /* The fallback on the query type was already performed,
+ * so check the try counter. If it falls to 0, we can
+ * report an error. Else, wait the next attempt. */
+ if (!res->try)
+ goto report_res_error;
+ }
+ else {
+ /* Fallback from A to AAAA or the opposite and re-send
+ * the resolution immediately. try counter is not
+ * decremented. */
+ if (res->prefered_query_type == DNS_RTYPE_A) {
+ res->query_type = DNS_RTYPE_AAAA;
+ resolv_send_query(res);
+ }
+ else if (res->prefered_query_type == DNS_RTYPE_AAAA) {
+ res->query_type = DNS_RTYPE_A;
+ resolv_send_query(res);
+ }
+ }
+ continue;
+ }
+
+ /* So the resolution succeeded */
+ res->status = RSLV_STATUS_VALID;
+ res->last_valid = now_ms;
+ ns->counters->app.resolver.valid++;
+ goto report_res_success;
+
+ report_res_error:
+ keep_answer_items = 0;
+ list_for_each_entry(req, &res->requesters, list)
+ keep_answer_items |= req->requester_error_cb(req, dns_resp);
+ if (!keep_answer_items)
+ resolv_purge_resolution_answer_records(res);
+ resolv_reset_resolution(res);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ continue;
+
+ report_res_success:
+ /* Only the 1rst requester s managed by the server, others are
+ * from the cache */
+ tmpcounters = ns->counters;
+ list_for_each_entry(req, &res->requesters, list) {
+ struct server *s = objt_server(req->owner);
+
+ if (s)
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ req->requester_cb(req, tmpcounters);
+ if (s)
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ tmpcounters = NULL;
+ }
+
+ resolv_reset_resolution(res);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ continue;
+ }
+ resolv_update_resolvers_timeout(resolvers);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+ leave_resolver_code();
+ return buflen;
+}
+
+/* Processes DNS resolution. First, it checks the active list to detect expired
+ * resolutions and retry them if possible. Else a timeout is reported. Then, it
+ * checks the wait list to trigger new resolutions.
+ */
+struct task *process_resolvers(struct task *t, void *context, unsigned int state)
+{
+ struct resolvers *resolvers = context;
+ struct resolv_resolution *res, *resback;
+ int exp;
+
+ enter_resolver_code();
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+
+ /* Handle all expired resolutions from the active list. Elements that
+ * need to be removed will in fact be moved to the death_row. Other
+ * ones will be handled normally.
+ */
+
+ res = LIST_NEXT(&resolvers->resolutions.curr, struct resolv_resolution *, list);
+ while (&res->list != &resolvers->resolutions.curr) {
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+
+ if (LIST_ISEMPTY(&res->requesters)) {
+ abort_resolution(res);
+ res = resback;
+ continue;
+ }
+
+ /* When we find the first resolution in the future, then we can
+ * stop here */
+ exp = tick_add(res->last_query, resolvers->timeout.retry);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ /* If current resolution has been tried too many times and
+ * finishes in timeout we update its status and remove it from
+ * the list */
+ if (!res->try) {
+ struct resolv_requester *req;
+ int keep_answer_items = 0;
+
+ /* Notify the result to the requesters */
+ if (!res->nb_responses)
+ res->status = RSLV_STATUS_TIMEOUT;
+ list_for_each_entry(req, &res->requesters, list)
+ keep_answer_items |= req->requester_error_cb(req, res->status);
+ if (!keep_answer_items)
+ resolv_purge_resolution_answer_records(res);
+
+ /* Clean up resolution info and remove it from the
+ * current list */
+ resolv_reset_resolution(res);
+
+ /* subsequent entries might have been deleted here */
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ res = resback;
+ }
+ else {
+ /* Otherwise resend the DNS query and requeue the resolution */
+ if (!res->nb_responses || res->prefered_query_type != res->query_type) {
+ /* No response received (a real timeout) or fallback already done */
+ res->query_type = res->prefered_query_type;
+ res->try--;
+ }
+ else {
+ /* Fallback from A to AAAA or the opposite and re-send
+ * the resolution immediately. try counter is not
+ * decremented. */
+ if (res->prefered_query_type == DNS_RTYPE_A)
+ res->query_type = DNS_RTYPE_AAAA;
+ else if (res->prefered_query_type == DNS_RTYPE_AAAA)
+ res->query_type = DNS_RTYPE_A;
+ else
+ res->try--;
+ }
+ resolv_send_query(res);
+ resback = LIST_NEXT(&res->list, struct resolv_resolution *, list);
+ res = resback;
+ }
+ }
+
+ /* Handle all resolutions in the wait list */
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
+
+ if (unlikely(stopping)) {
+ /* If haproxy is stopping, check if the resolution to know if it must be run or not.
+ * If at least a requester is a stream (because of a do-resolv action) or if there
+ * is a requester attached to a running proxy, the resolution is performed.
+ * Otherwise, it is skipped for now.
+ */
+ struct resolv_requester *req;
+ int must_run = 0;
+
+ list_for_each_entry(req, &res->requesters, list) {
+ struct proxy *px = NULL;
+
+ switch (obj_type(req->owner)) {
+ case OBJ_TYPE_SERVER:
+ px = __objt_server(req->owner)->proxy;
+ break;
+ case OBJ_TYPE_SRVRQ:
+ px = __objt_resolv_srvrq(req->owner)->proxy;
+ break;
+ case OBJ_TYPE_STREAM:
+ /* Always perform the resolution */
+ must_run = 1;
+ break;
+ default:
+ break;
+ }
+ /* Perform the resolution if the proxy is not stopped or disabled */
+ if (px && !(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ must_run = 1;
+
+ if (must_run)
+ break;
+ }
+
+ if (!must_run) {
+ /* Skip the reolsution. reset it and wait for the next wakeup */
+ resolv_reset_resolution(res);
+ continue;
+ }
+ }
+
+ if (LIST_ISEMPTY(&res->requesters)) {
+ abort_resolution(res);
+ continue;
+ }
+
+ exp = tick_add(res->last_resolution, resolv_resolution_timeout(res));
+ if (tick_isset(res->last_resolution) && !tick_is_expired(exp, now_ms))
+ continue;
+
+ if (resolv_run_resolution(res) != 1) {
+ res->last_resolution = now_ms;
+ LIST_DEL_INIT(&res->list);
+ LIST_APPEND(&resolvers->resolutions.wait, &res->list);
+ }
+ }
+
+ resolv_update_resolvers_timeout(resolvers);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+
+ if (unlikely(stopping)) {
+ struct dns_nameserver *ns;
+
+ if (LIST_ISEMPTY(&resolvers->resolutions.curr))
+ t->expire = TICK_ETERNITY;
+
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ if (ns->stream)
+ task_wakeup(ns->stream->task_idle, TASK_WOKEN_MSG);
+ }
+ }
+
+ /* now we can purge all queued deletions */
+ leave_resolver_code();
+ return t;
+}
+
+
+/* destroy a resolvers */
+static void resolvers_destroy(struct resolvers *resolvers)
+{
+ struct dns_nameserver *ns, *nsback;
+ struct resolv_resolution *res, *resback;
+ struct resolv_requester *req, *reqback;
+
+ list_for_each_entry_safe(ns, nsback, &resolvers->nameservers, list) {
+ free(ns->id);
+ free((char *)ns->conf.file);
+ if (ns->dgram) {
+ if (ns->dgram->conn.t.sock.fd != -1) {
+ fd_delete(ns->dgram->conn.t.sock.fd);
+ close(ns->dgram->conn.t.sock.fd);
+ }
+ ring_free(ns->dgram->ring_req);
+ free(ns->dgram);
+ }
+ if (ns->stream) {
+ ring_free(ns->stream->ring_req);
+ task_destroy(ns->stream->task_req);
+ task_destroy(ns->stream->task_rsp);
+ free(ns->stream);
+ }
+ LIST_DEL_INIT(&ns->list);
+ EXTRA_COUNTERS_FREE(ns->extra_counters);
+ free(ns);
+ }
+
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.curr, list) {
+ list_for_each_entry_safe(req, reqback, &res->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ pool_free(resolv_requester_pool, req);
+ }
+ resolv_free_resolution(res);
+ }
+
+ list_for_each_entry_safe(res, resback, &resolvers->resolutions.wait, list) {
+ list_for_each_entry_safe(req, reqback, &res->requesters, list) {
+ LIST_DEL_INIT(&req->list);
+ pool_free(resolv_requester_pool, req);
+ }
+ resolv_free_resolution(res);
+ }
+
+ free_proxy(resolvers->px);
+ free(resolvers->id);
+ free((char *)resolvers->conf.file);
+ task_destroy(resolvers->t);
+ LIST_DEL_INIT(&resolvers->list);
+ free(resolvers);
+}
+
+/* Release memory allocated by DNS */
+static void resolvers_deinit(void)
+{
+ struct resolvers *resolvers, *resolversback;
+ struct resolv_srvrq *srvrq, *srvrqback;
+
+ list_for_each_entry_safe(resolvers, resolversback, &sec_resolvers, list) {
+ resolvers_destroy(resolvers);
+ }
+
+ list_for_each_entry_safe(srvrq, srvrqback, &resolv_srvrq_list, list) {
+ free(srvrq->name);
+ free(srvrq->hostname_dn);
+ LIST_DEL_INIT(&srvrq->list);
+ free(srvrq);
+ }
+}
+
+/* Finalizes the DNS configuration by allocating required resources and checking
+ * live parameters.
+ * Returns 0 on success, 1 on error.
+ */
+static int resolvers_finalize_config(void)
+{
+ struct resolvers *resolvers;
+ struct proxy *px;
+ int err_code = 0;
+
+ enter_resolver_code();
+
+ /* allocate pool of resolution per resolvers */
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ struct dns_nameserver *ns;
+ struct task *t;
+
+ /* Check if we can create the socket with nameservers info */
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ int fd;
+
+ if (ns->dgram) {
+ /* Check nameserver info */
+ if ((fd = socket(ns->dgram->conn.addr.to.ss_family, SOCK_DGRAM, IPPROTO_UDP)) == -1) {
+ if (!resolvers->conf.implicit) { /* emit a warning only if it was configured manually */
+ ha_alert("resolvers '%s': can't create socket for nameserver '%s'.\n",
+ resolvers->id, ns->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ }
+ continue;
+ }
+ if (connect(fd, (struct sockaddr*)&ns->dgram->conn.addr.to, get_addr_len(&ns->dgram->conn.addr.to)) == -1) {
+ if (!resolvers->conf.implicit) { /* emit a warning only if it was configured manually */
+ ha_warning("resolvers '%s': can't connect socket for nameserver '%s'.\n",
+ resolvers->id, ns->id);
+ }
+ close(fd);
+ err_code |= ERR_WARN;
+ continue;
+ }
+ close(fd);
+ }
+ }
+
+ /* Create the task associated to the resolvers section */
+ if ((t = task_new_anywhere()) == NULL) {
+ ha_alert("resolvers '%s' : out of memory.\n", resolvers->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ goto err;
+ }
+
+ /* Update task's parameters */
+ t->process = process_resolvers;
+ t->context = resolvers;
+ resolvers->t = t;
+ task_wakeup(t, TASK_WOKEN_INIT);
+ }
+
+ for (px = proxies_list; px; px = px->next) {
+ struct server *srv;
+
+ if (px->flags & PR_FL_DISABLED) {
+ /* must not run and will not work anyway since
+ * nothing in the proxy is initialized.
+ */
+ continue;
+ }
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ struct resolvers *resolvers;
+
+ if (!srv->resolvers_id)
+ continue;
+
+ if ((resolvers = find_resolvers_by_id(srv->resolvers_id)) == NULL) {
+ ha_alert("%s '%s', server '%s': unable to find required resolvers '%s'\n",
+ proxy_type_str(px), px->id, srv->id, srv->resolvers_id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ srv->resolvers = resolvers;
+ srv->srvrq_check = NULL;
+ if (srv->srvrq) {
+ if (!srv->srvrq->resolvers) {
+ srv->srvrq->resolvers = srv->resolvers;
+ if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) {
+ ha_alert("%s '%s' : unable to set DNS resolution for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ }
+
+ srv->srvrq_check = task_new_anywhere();
+ if (!srv->srvrq_check) {
+ ha_alert("%s '%s' : unable to create SRVRQ task for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ goto err;
+ }
+ srv->srvrq_check->process = resolv_srvrq_expire_task;
+ srv->srvrq_check->context = srv;
+ srv->srvrq_check->expire = TICK_ETERNITY;
+ }
+ else if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) {
+ ha_alert("%s '%s', unable to set DNS resolution for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+
+ srv->flags |= SRV_F_NON_PURGEABLE;
+ }
+ }
+
+ if (err_code & (ERR_ALERT|ERR_ABORT))
+ goto err;
+
+ leave_resolver_code();
+ return 0;
+ err:
+ leave_resolver_code();
+ resolvers_deinit();
+ return 1;
+
+}
+
+static int stats_dump_resolv_to_buffer(struct stconn *sc,
+ struct dns_nameserver *ns,
+ struct field *stats, size_t stats_count,
+ struct list *stat_modules)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct stats_module *mod;
+ size_t idx = 0;
+
+ memset(stats, 0, sizeof(struct field) * stats_count);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ struct counters_node *counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod);
+
+ mod->fill_stats(counters, stats + idx);
+ idx += mod->stats_count;
+ }
+
+ if (!stats_dump_one_line(stats, idx, appctx))
+ return 0;
+
+ if (!stats_putchk(appctx, NULL))
+ goto full;
+
+ return 1;
+
+ full:
+ return 0;
+}
+
+/* Uses <appctx.ctx.stats.obj1> as a pointer to the current resolver and <obj2>
+ * as a pointer to the current nameserver.
+ */
+int stats_dump_resolvers(struct stconn *sc,
+ struct field *stats, size_t stats_count,
+ struct list *stat_modules)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ struct resolvers *resolver = ctx->obj1;
+ struct dns_nameserver *ns = ctx->obj2;
+
+ if (!resolver)
+ resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list);
+
+ /* dump resolvers */
+ list_for_each_entry_from(resolver, &sec_resolvers, list) {
+ ctx->obj1 = resolver;
+
+ ns = ctx->obj2 ?
+ ctx->obj2 :
+ LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list);
+
+ list_for_each_entry_from(ns, &resolver->nameservers, list) {
+ ctx->obj2 = ns;
+
+ if (buffer_almost_full(&rep->buf)) {
+ sc_need_room(sc, b_size(&rep->buf) / 2);
+ goto full;
+ }
+
+ if (!stats_dump_resolv_to_buffer(sc, ns,
+ stats, stats_count,
+ stat_modules)) {
+ return 0;
+ }
+ }
+
+ ctx->obj2 = NULL;
+ }
+
+ return 1;
+
+ full:
+ return 0;
+}
+
+void resolv_stats_clear_counters(int clrall, struct list *stat_modules)
+{
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+ struct stats_module *mod;
+ void *counters;
+
+ list_for_each_entry(mod, stat_modules, list) {
+ if (!mod->clearable && !clrall)
+ continue;
+
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod);
+ memcpy(counters, mod->counters, mod->counters_size);
+ }
+ }
+ }
+
+}
+
+int resolv_allocate_counters(struct list *stat_modules)
+{
+ struct stats_module *mod;
+ struct resolvers *resolvers;
+ struct dns_nameserver *ns;
+
+ list_for_each_entry(resolvers, &sec_resolvers, list) {
+ list_for_each_entry(ns, &resolvers->nameservers, list) {
+ EXTRA_COUNTERS_REGISTER(&ns->extra_counters, COUNTERS_RSLV,
+ alloc_failed);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ EXTRA_COUNTERS_ADD(mod,
+ ns->extra_counters,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ EXTRA_COUNTERS_ALLOC(ns->extra_counters, alloc_failed);
+
+ list_for_each_entry(mod, stat_modules, list) {
+ memcpy(ns->extra_counters->data + mod->counters_off[ns->extra_counters->type],
+ mod->counters, mod->counters_size);
+
+ /* Store the ns counters pointer */
+ if (strcmp(mod->name, "resolvers") == 0) {
+ ns->counters = (struct dns_counters *)ns->extra_counters->data + mod->counters_off[COUNTERS_RSLV];
+ ns->counters->id = ns->id;
+ ns->counters->pid = resolvers->id;
+ }
+ }
+ }
+ }
+
+ return 1;
+
+alloc_failed:
+ return 0;
+}
+
+/* if an arg is found, it sets the optional resolvers section pointer into a
+ * show_resolvers_ctx struct pointed to by svcctx, or NULL when dumping all.
+ */
+static int cli_parse_stat_resolvers(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_resolvers_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct resolvers *presolvers;
+
+ if (*args[2]) {
+ list_for_each_entry(presolvers, &sec_resolvers, list) {
+ if (strcmp(presolvers->id, args[2]) == 0) {
+ ctx->forced_section = presolvers;
+ break;
+ }
+ }
+ if (ctx->forced_section == NULL)
+ return cli_err(appctx, "Can't find that resolvers section\n");
+ }
+ return 0;
+}
+
+/* Dumps counters from all resolvers section and associated name servers. It
+ * returns 0 if the output buffer is full and it needs to be called again,
+ * otherwise non-zero. It may limit itself to the resolver pointed to by the
+ * <resolvers> field of struct show_resolvers_ctx pointed to by <svcctx> if
+ * it's not null.
+ */
+static int cli_io_handler_dump_resolvers_to_buffer(struct appctx *appctx)
+{
+ struct show_resolvers_ctx *ctx = appctx->svcctx;
+ struct resolvers *resolvers = ctx->resolvers;
+ struct dns_nameserver *ns;
+
+ chunk_reset(&trash);
+
+ if (LIST_ISEMPTY(&sec_resolvers)) {
+ if (applet_putstr(appctx, "No resolvers found\n") == -1)
+ goto full;
+ }
+ else {
+ if (!resolvers)
+ resolvers = LIST_ELEM(sec_resolvers.n, typeof(resolvers), list);
+
+ list_for_each_entry_from(resolvers, &sec_resolvers, list) {
+ if (ctx->forced_section != NULL && ctx->forced_section != resolvers)
+ continue;
+
+ ctx->resolvers = resolvers;
+ ns = ctx->ns;
+
+ if (!ns) {
+ chunk_printf(&trash, "Resolvers section %s\n", resolvers->id);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ ns = LIST_ELEM(resolvers->nameservers.n, typeof(ns), list);
+ ctx->ns = ns;
+ }
+
+ list_for_each_entry_from(ns, &resolvers->nameservers, list) {
+ chunk_reset(&trash);
+ chunk_appendf(&trash, " nameserver %s:\n", ns->id);
+ chunk_appendf(&trash, " sent: %lld\n", ns->counters->sent);
+ chunk_appendf(&trash, " snd_error: %lld\n", ns->counters->snd_error);
+ chunk_appendf(&trash, " valid: %lld\n", ns->counters->app.resolver.valid);
+ chunk_appendf(&trash, " update: %lld\n", ns->counters->app.resolver.update);
+ chunk_appendf(&trash, " cname: %lld\n", ns->counters->app.resolver.cname);
+ chunk_appendf(&trash, " cname_error: %lld\n", ns->counters->app.resolver.cname_error);
+ chunk_appendf(&trash, " any_err: %lld\n", ns->counters->app.resolver.any_err);
+ chunk_appendf(&trash, " nx: %lld\n", ns->counters->app.resolver.nx);
+ chunk_appendf(&trash, " timeout: %lld\n", ns->counters->app.resolver.timeout);
+ chunk_appendf(&trash, " refused: %lld\n", ns->counters->app.resolver.refused);
+ chunk_appendf(&trash, " other: %lld\n", ns->counters->app.resolver.other);
+ chunk_appendf(&trash, " invalid: %lld\n", ns->counters->app.resolver.invalid);
+ chunk_appendf(&trash, " too_big: %lld\n", ns->counters->app.resolver.too_big);
+ chunk_appendf(&trash, " truncated: %lld\n", ns->counters->app.resolver.truncated);
+ chunk_appendf(&trash, " outdated: %lld\n", ns->counters->app.resolver.outdated);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+ ctx->ns = ns;
+ }
+
+ ctx->ns = NULL;
+
+ /* was this the only section to dump ? */
+ if (ctx->forced_section)
+ break;
+ }
+ }
+
+ /* done! */
+ return 1;
+ full:
+ /* the output buffer is full, retry later */
+ return 0;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ }, {
+ { { "show", "resolvers", NULL }, "show resolvers [id] : dumps counters from all resolvers section and associated name servers",
+ cli_parse_stat_resolvers, cli_io_handler_dump_resolvers_to_buffer },
+ {{},}
+ }
+};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/*
+ * Prepare <rule> for hostname resolution.
+ * Returns -1 in case of any allocation failure, 0 if not.
+ * On error, a global failure counter is also incremented.
+ */
+static int action_prepare_for_resolution(struct stream *stream, const char *hostname, int hostname_len)
+{
+ char *hostname_dn;
+ int hostname_dn_len;
+ struct buffer *tmp = get_trash_chunk();
+
+ if (!hostname)
+ return 0;
+
+ hostname_dn = tmp->area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, tmp->size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+
+ stream->resolv_ctx.hostname_dn = strdup(hostname_dn);
+ stream->resolv_ctx.hostname_dn_len = hostname_dn_len;
+ if (!stream->resolv_ctx.hostname_dn)
+ goto err;
+
+ return 0;
+
+ err:
+ ha_free(&stream->resolv_ctx.hostname_dn);
+ resolv_failed_resolutions += 1;
+ return -1;
+}
+
+
+/*
+ * Execute the "do-resolution" action. May be called from {tcp,http}request.
+ */
+enum act_return resolv_action_do_resolve(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct resolv_resolution *resolution;
+ struct sample *smp;
+ struct resolv_requester *req;
+ struct resolvers *resolvers;
+ struct resolv_resolution *res;
+ int exp, locked = 0;
+ enum act_return ret = ACT_RET_CONT;
+
+ resolvers = rule->arg.resolv.resolvers;
+
+ enter_resolver_code();
+
+ /* we have a response to our DNS resolution */
+ use_cache:
+ if (s->resolv_ctx.requester && s->resolv_ctx.requester->resolution != NULL) {
+ resolution = s->resolv_ctx.requester->resolution;
+ if (!locked) {
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ locked = 1;
+ }
+
+ if (resolution->step == RSLV_STEP_RUNNING)
+ goto yield;
+ if (resolution->step == RSLV_STEP_NONE) {
+ /* We update the variable only if we have a valid
+ * response. If the response was not received yet, we
+ * must yield.
+ */
+ if (resolution->status == RSLV_STATUS_NONE)
+ goto yield;
+ if (resolution->status == RSLV_STATUS_VALID) {
+ struct sample smp;
+ short ip_sin_family = 0;
+ void *ip = NULL;
+
+ resolv_get_ip_from_response(&resolution->response, rule->arg.resolv.opts, NULL,
+ 0, &ip, &ip_sin_family, NULL);
+
+ switch (ip_sin_family) {
+ case AF_INET:
+ smp.data.type = SMP_T_IPV4;
+ memcpy(&smp.data.u.ipv4, ip, 4);
+ break;
+ case AF_INET6:
+ smp.data.type = SMP_T_IPV6;
+ memcpy(&smp.data.u.ipv6, ip, 16);
+ break;
+ default:
+ ip = NULL;
+ }
+
+ if (ip) {
+ smp.px = px;
+ smp.sess = sess;
+ smp.strm = s;
+
+ vars_set_by_name(rule->arg.resolv.varname, strlen(rule->arg.resolv.varname), &smp);
+ }
+ }
+ }
+
+ goto release_requester;
+ }
+
+ /* need to configure and start a new DNS resolution */
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.resolv.expr, SMP_T_STR);
+ if (smp == NULL)
+ goto end;
+
+ if (action_prepare_for_resolution(s, smp->data.u.str.area, smp->data.u.str.data) == -1)
+ goto end; /* on error, ignore the action */
+
+ s->resolv_ctx.parent = rule;
+
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ locked = 1;
+
+ resolv_link_resolution(s, OBJ_TYPE_STREAM, 0);
+
+ /* Check if there is a fresh enough response in the cache of our associated resolution */
+ req = s->resolv_ctx.requester;
+ if (!req || !req->resolution)
+ goto release_requester; /* on error, ignore the action */
+ res = req->resolution;
+
+ exp = tick_add(res->last_resolution, resolvers->hold.valid);
+ if (resolvers->t && res->status == RSLV_STATUS_VALID && tick_isset(res->last_resolution)
+ && !tick_is_expired(exp, now_ms)) {
+ goto use_cache;
+ }
+
+ resolv_trigger_resolution(s->resolv_ctx.requester);
+
+ yield:
+ if (flags & ACT_OPT_FINAL)
+ goto release_requester;
+ ret = ACT_RET_YIELD;
+
+ end:
+ leave_resolver_code();
+ if (locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+ return ret;
+
+ release_requester:
+ ha_free(&s->resolv_ctx.hostname_dn);
+ s->resolv_ctx.hostname_dn_len = 0;
+ if (s->resolv_ctx.requester) {
+ _resolv_unlink_resolution(s->resolv_ctx.requester);
+ pool_free(resolv_requester_pool, s->resolv_ctx.requester);
+ s->resolv_ctx.requester = NULL;
+ }
+ goto end;
+}
+
+static void release_resolv_action(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.resolv.expr);
+ free(rule->arg.resolv.varname);
+ free(rule->arg.resolv.resolvers_id);
+ free(rule->arg.resolv.opts);
+}
+
+
+/* parse "do-resolve" action
+ * This action takes the following arguments:
+ * do-resolve(<varName>,<resolversSectionName>,<resolvePrefer>) <expr>
+ *
+ * - <varName> is the variable name where the result of the DNS resolution will be stored
+ * (mandatory)
+ * - <resolversSectionName> is the name of the resolvers section to use to perform the resolution
+ * (mandatory)
+ * - <resolvePrefer> can be either 'ipv4' or 'ipv6' and is the IP family we would like to resolve first
+ * (optional), defaults to ipv6
+ * - <expr> is an HAProxy expression used to fetch the name to be resolved
+ */
+enum act_parse_ret resolv_parse_do_resolve(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ struct sample_expr *expr;
+ unsigned int where;
+ const char *beg, *end;
+
+ /* orig_arg points to the first argument, but we need to analyse the command itself first */
+ cur_arg = *orig_arg - 1;
+
+ /* locate varName, which is mandatory */
+ beg = strchr(args[cur_arg], '(');
+ if (beg == NULL)
+ goto do_resolve_parse_error;
+ beg = beg + 1; /* beg should points to the first character after opening parenthesis '(' */
+ end = strchr(beg, ',');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+ rule->arg.resolv.varname = my_strndup(beg, end - beg);
+ if (rule->arg.resolv.varname == NULL)
+ goto do_resolve_parse_error;
+
+
+ /* locate resolversSectionName, which is mandatory.
+ * Since next parameters are optional, the delimiter may be comma ','
+ * or closing parenthesis ')'
+ */
+ beg = end + 1;
+ end = strchr(beg, ',');
+ if (end == NULL)
+ end = strchr(beg, ')');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+ rule->arg.resolv.resolvers_id = my_strndup(beg, end - beg);
+ if (rule->arg.resolv.resolvers_id == NULL)
+ goto do_resolve_parse_error;
+
+
+ rule->arg.resolv.opts = calloc(1, sizeof(*rule->arg.resolv.opts));
+ if (rule->arg.resolv.opts == NULL)
+ goto do_resolve_parse_error;
+
+ /* Default priority is ipv6 */
+ rule->arg.resolv.opts->family_prio = AF_INET6;
+
+ /* optional arguments accepted for now:
+ * ipv4 or ipv6
+ */
+ while (*end != ')') {
+ beg = end + 1;
+ end = strchr(beg, ',');
+ if (end == NULL)
+ end = strchr(beg, ')');
+ if (end == NULL)
+ goto do_resolve_parse_error;
+
+ if (strncmp(beg, "ipv4", end - beg) == 0) {
+ rule->arg.resolv.opts->family_prio = AF_INET;
+ }
+ else if (strncmp(beg, "ipv6", end - beg) == 0) {
+ rule->arg.resolv.opts->family_prio = AF_INET6;
+ }
+ else {
+ goto do_resolve_parse_error;
+ }
+ }
+
+ cur_arg = cur_arg + 1;
+
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ goto do_resolve_parse_error;
+
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ free(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.resolv.expr = expr;
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = resolv_action_do_resolve;
+ *orig_arg = cur_arg;
+
+ rule->check_ptr = check_action_do_resolve;
+ rule->release_ptr = release_resolv_action;
+
+ return ACT_RET_PRS_OK;
+
+ do_resolve_parse_error:
+ ha_free(&rule->arg.resolv.varname);
+ ha_free(&rule->arg.resolv.resolvers_id);
+ memprintf(err, "Can't parse '%s'. Expects 'do-resolve(<varname>,<resolvers>[,<options>]) <expr>'. Available options are 'ipv4' and 'ipv6'",
+ args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+}
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "do-resolve", resolv_parse_do_resolve, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list tcp_req_cont_actions = {ILH, {
+ { "do-resolve", resolv_parse_do_resolve, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
+
+/* Check an "http-request do-resolve" action.
+ *
+ * The function returns 1 in success case, otherwise, it returns 0 and err is
+ * filled.
+ */
+int check_action_do_resolve(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct resolvers *resolvers = NULL;
+
+ if (rule->arg.resolv.resolvers_id == NULL) {
+ memprintf(err,"Proxy '%s': %s", px->id, "do-resolve action without resolvers");
+ return 0;
+ }
+
+ resolvers = find_resolvers_by_id(rule->arg.resolv.resolvers_id);
+ if (resolvers == NULL) {
+ memprintf(err,"Can't find resolvers section '%s' for do-resolve action", rule->arg.resolv.resolvers_id);
+ return 0;
+ }
+ rule->arg.resolv.resolvers = resolvers;
+
+ return 1;
+}
+
+void resolvers_setup_proxy(struct proxy *px)
+{
+ px->last_change = ns_to_sec(now_ns);
+ px->cap = PR_CAP_FE | PR_CAP_BE;
+ px->maxconn = 0;
+ px->conn_retries = 1;
+ px->timeout.server = TICK_ETERNITY;
+ px->timeout.client = TICK_ETERNITY;
+ px->timeout.connect = 1000; // by default same than timeout.resolve
+ px->accept = NULL;
+ px->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON;
+}
+
+static int parse_resolve_conf(char **errmsg, char **warnmsg)
+{
+ struct dns_nameserver *newnameserver = NULL;
+ const char *whitespace = "\r\n\t ";
+ char *resolv_line = NULL;
+ int resolv_linenum = 0;
+ FILE *f = NULL;
+ char *address = NULL;
+ struct sockaddr_storage *sk = NULL;
+ struct protocol *proto;
+ int duplicate_name = 0;
+ int err_code = 0;
+
+ if ((resolv_line = malloc(sizeof(*resolv_line) * LINESIZE)) == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ if ((f = fopen("/etc/resolv.conf", "r")) == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "failed to open /etc/resolv.conf.");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ sk = calloc(1, sizeof(*sk));
+ if (sk == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ while (fgets(resolv_line, LINESIZE, f) != NULL) {
+ resolv_linenum++;
+ if (strncmp(resolv_line, "nameserver", 10) != 0)
+ continue;
+
+ address = strtok(resolv_line + 10, whitespace);
+ if (address == resolv_line + 10)
+ continue;
+
+ if (address == NULL) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : nameserver line is missing address.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ duplicate_name = 0;
+ list_for_each_entry(newnameserver, &curr_resolvers->nameservers, list) {
+ if (strcmp(newnameserver->id, address) == 0) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sParsing [/etc/resolv.conf:%d] : generated name for /etc/resolv.conf nameserver '%s' conflicts with another nameserver (declared at %s:%d), it appears to be a duplicate and will be excluded.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address, newnameserver->conf.file, newnameserver->conf.line);
+ err_code |= ERR_WARN;
+ duplicate_name = 1;
+ }
+ }
+
+ if (duplicate_name)
+ continue;
+
+ memset(sk, 0, sizeof(*sk));
+ if (!str2ip2(address, sk, 1)) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : address '%s' could not be recognized, nameserver will be excluded.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ set_host_port(sk, 53);
+
+ proto = protocol_lookup(sk->ss_family, PROTO_TYPE_STREAM, 0);
+ if (!proto || !proto->connect) {
+ if (warnmsg)
+ memprintf(warnmsg, "%sparsing [/etc/resolv.conf:%d] : '%s' : connect() not supported for this address family.\n",
+ *warnmsg ? *warnmsg : "", resolv_linenum, address);
+ err_code |= ERR_WARN;
+ continue;
+ }
+
+ if ((newnameserver = calloc(1, sizeof(*newnameserver))) == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto resolv_out;
+ }
+
+ if (dns_dgram_init(newnameserver, sk) < 0) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->conf.file = strdup("/etc/resolv.conf");
+ if (newnameserver->conf.file == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->id = strdup(address);
+ if (newnameserver->id == NULL) {
+ if (errmsg)
+ memprintf(errmsg, "parsing [/etc/resolv.conf:%d] : out of memory.", resolv_linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ free((char *)newnameserver->conf.file);
+ free(newnameserver);
+ goto resolv_out;
+ }
+
+ newnameserver->parent = curr_resolvers;
+ newnameserver->process_responses = resolv_process_responses;
+ newnameserver->conf.line = resolv_linenum;
+ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list);
+ }
+
+resolv_out:
+ free(sk);
+ free(resolv_line);
+ if (f != NULL)
+ fclose(f);
+
+ return err_code;
+}
+
+static int resolvers_new(struct resolvers **resolvers, const char *id, const char *file, int linenum)
+{
+ struct resolvers *r = NULL;
+ struct proxy *p = NULL;
+ int err_code = 0;
+
+ if ((r = calloc(1, sizeof(*r))) == NULL) {
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* allocate new proxy to tcp servers */
+ p = calloc(1, sizeof *p);
+ if (!p) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ init_new_proxy(p);
+ resolvers_setup_proxy(p);
+ p->parent = r;
+ p->id = strdup(id);
+ p->conf.args.file = p->conf.file = strdup(file);
+ p->conf.args.line = p->conf.line = linenum;
+ r->px = p;
+
+ /* default values */
+ LIST_APPEND(&sec_resolvers, &r->list);
+ r->conf.file = strdup(file);
+ r->conf.line = linenum;
+ r->id = strdup(id);
+ r->query_ids = EB_ROOT;
+ /* default maximum response size */
+ r->accepted_payload_size = 512;
+ /* default hold period for nx, other, refuse and timeout is 30s */
+ r->hold.nx = 30000;
+ r->hold.other = 30000;
+ r->hold.refused = 30000;
+ r->hold.timeout = 30000;
+ r->hold.obsolete = 0;
+ /* default hold period for valid is 10s */
+ r->hold.valid = 10000;
+ r->timeout.resolve = 1000;
+ r->timeout.retry = 1000;
+ r->resolve_retries = 3;
+ LIST_INIT(&r->nameservers);
+ LIST_INIT(&r->resolutions.curr);
+ LIST_INIT(&r->resolutions.wait);
+ HA_SPIN_INIT(&r->lock);
+
+ *resolvers = r;
+
+out:
+ if (err_code & (ERR_FATAL|ERR_ABORT)) {
+ ha_free(&r);
+ ha_free(&p);
+ }
+
+ return err_code;
+}
+
+
+/*
+ * Parse a <resolvers> section.
+ * Returns the error code, 0 if OK, or any combination of :
+ * - ERR_ABORT: must abort ASAP
+ * - ERR_FATAL: we can continue parsing but not start the service
+ * - ERR_WARN: a warning has been emitted
+ * - ERR_ALERT: an alert has been emitted
+ * Only the two first ones can stop processing, the two others are just
+ * indicators.
+ */
+int cfg_parse_resolvers(const char *file, int linenum, char **args, int kwm)
+{
+ const char *err;
+ int err_code = 0;
+ char *errmsg = NULL;
+ char *warnmsg = NULL;
+
+ if (strcmp(args[0], "resolvers") == 0) { /* new resolvers section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing name for resolvers section.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in '%s' name '%s'.\n",
+ file, linenum, *err, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ list_for_each_entry(curr_resolvers, &sec_resolvers, list) {
+ /* Error if two resolvers owns the same name */
+ if (strcmp(curr_resolvers->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: resolvers '%s' has same name as another resolvers (declared at %s:%d).\n",
+ file, linenum, args[1], curr_resolvers->conf.file, curr_resolvers->conf.line);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ }
+ }
+
+ err_code |= resolvers_new(&curr_resolvers, args[1], file, linenum);
+ if (err_code & ERR_ALERT) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "nameserver") == 0) { /* nameserver definition */
+ struct dns_nameserver *newnameserver = NULL;
+ struct sockaddr_storage *sk;
+ int port1, port2;
+ struct protocol *proto;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ if (err) {
+ ha_alert("parsing [%s:%d] : character '%c' is not permitted in server name '%s'.\n",
+ file, linenum, *err, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ list_for_each_entry(newnameserver, &curr_resolvers->nameservers, list) {
+ /* Error if two resolvers owns the same name */
+ if (strcmp(newnameserver->id, args[1]) == 0) {
+ ha_alert("Parsing [%s:%d]: nameserver '%s' has same name as another nameserver (declared at %s:%d).\n",
+ file, linenum, args[1], newnameserver->conf.file, newnameserver->conf.line);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ sk = str2sa_range(args[2], NULL, &port1, &port2, NULL, &proto, NULL,
+ &errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_DGRAM | PA_O_STREAM | PA_O_DEFAULT_DGRAM);
+ if (!sk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s\n", file, linenum, args[0], args[1], errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((newnameserver = calloc(1, sizeof(*newnameserver))) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if (proto && proto->xprt_type == PROTO_TYPE_STREAM) {
+ err_code |= parse_server(file, linenum, args, curr_resolvers->px, NULL,
+ SRV_PARSE_PARSE_ADDR|SRV_PARSE_INITIAL_RESOLVE);
+ if (err_code & (ERR_FATAL|ERR_ABORT)) {
+ err_code |= ERR_ABORT;
+ goto out;
+ }
+
+ if (dns_stream_init(newnameserver, curr_resolvers->px->srv) < 0) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT|ERR_ABORT;
+ goto out;
+ }
+ }
+ else if (dns_dgram_init(newnameserver, sk) < 0) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((newnameserver->conf.file = strdup(file)) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ if ((newnameserver->id = strdup(args[1])) == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ newnameserver->parent = curr_resolvers;
+ newnameserver->process_responses = resolv_process_responses;
+ newnameserver->conf.line = linenum;
+ /* the nameservers are linked backward first */
+ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list);
+ }
+ else if (strcmp(args[0], "parse-resolv-conf") == 0) {
+ err_code |= parse_resolve_conf(&errmsg, &warnmsg);
+ if (err_code & ERR_WARN) {
+ indent_msg(&warnmsg, 8);
+ ha_warning("parsing [%s:%d]: %s\n", file, linenum, warnmsg);
+ ha_free(&warnmsg);
+ }
+ if (err_code & ERR_ALERT) {
+ indent_msg(&errmsg, 8);
+ ha_alert("parsing [%s:%d]: %s\n", file, linenum, errmsg);
+ ha_free(&errmsg);
+ goto out;
+ }
+ }
+ else if (strcmp(args[0], "hold") == 0) { /* hold periods */
+ const char *res;
+ unsigned int time;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s' expects an <event> and a <time> as arguments.\n",
+ file, linenum, args[0]);
+ ha_alert("<event> can be either 'valid', 'nx', 'refused', 'timeout', or 'other'\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[1], args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s>.\n",
+ file, linenum, *res, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (strcmp(args[1], "nx") == 0)
+ curr_resolvers->hold.nx = time;
+ else if (strcmp(args[1], "other") == 0)
+ curr_resolvers->hold.other = time;
+ else if (strcmp(args[1], "refused") == 0)
+ curr_resolvers->hold.refused = time;
+ else if (strcmp(args[1], "timeout") == 0)
+ curr_resolvers->hold.timeout = time;
+ else if (strcmp(args[1], "valid") == 0)
+ curr_resolvers->hold.valid = time;
+ else if (strcmp(args[1], "obsolete") == 0)
+ curr_resolvers->hold.obsolete = time;
+ else {
+ ha_alert("parsing [%s:%d] : '%s' unknown <event>: '%s', expects either 'nx', 'timeout', 'valid', 'obsolete' or 'other'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ }
+ else if (strcmp(args[0], "accepted_payload_size") == 0) {
+ int i = 0;
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <nb> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ i = atoi(args[1]);
+ if (i < DNS_HEADER_SIZE || i > DNS_MAX_UDP_MESSAGE) {
+ ha_alert("parsing [%s:%d] : '%s' must be between %d and %d inclusive (was %s).\n",
+ file, linenum, args[0], DNS_HEADER_SIZE, DNS_MAX_UDP_MESSAGE, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ curr_resolvers->accepted_payload_size = i;
+ }
+ else if (strcmp(args[0], "resolution_pool_size") == 0) {
+ ha_alert("parsing [%s:%d] : '%s' directive is not supported anymore (it never appeared in a stable release).\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[0], "resolve_retries") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects <nb> as argument.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curr_resolvers->resolve_retries = atoi(args[1]);
+ }
+ else if (strcmp(args[0], "timeout") == 0) {
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : '%s' expects 'retry' or 'resolve' and <time> as arguments.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (strcmp(args[1], "retry") == 0 ||
+ strcmp(args[1], "resolve") == 0) {
+ const char *res;
+ unsigned int tout;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ res = parse_time_err(args[2], &tout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (args[1][2] == 't')
+ curr_resolvers->timeout.retry = tout;
+ else {
+ curr_resolvers->timeout.resolve = tout;
+ curr_resolvers->px->timeout.connect = tout;
+ }
+
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s' expects 'retry' or 'resolve' and <time> as arguments got '%s'.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (*args[0] != 0) {
+ ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+out:
+ free(errmsg);
+ free(warnmsg);
+ return err_code;
+}
+
+/* try to create a "default" resolvers section which uses "/etc/resolv.conf"
+ *
+ * This function is opportunistic and does not try to display errors or warnings.
+ */
+int resolvers_create_default()
+{
+ int err_code = 0;
+
+ if (global.mode & MODE_MWORKER_WAIT) /* does not create the section if in wait mode */
+ return 0;
+
+ /* if the section already exists, do nothing */
+ if (find_resolvers_by_id("default"))
+ return 0;
+
+ curr_resolvers = NULL;
+ err_code |= resolvers_new(&curr_resolvers, "default", "<internal>", 0);
+ if (err_code & ERR_CODE)
+ goto err;
+
+ curr_resolvers->conf.implicit = 1;
+
+ err_code |= parse_resolve_conf(NULL, NULL);
+ if (err_code & ERR_CODE)
+ goto err;
+ /* check if there was any nameserver in the resolvconf file */
+ if (LIST_ISEMPTY(&curr_resolvers->nameservers)) {
+ err_code |= ERR_FATAL;
+ goto err;
+ }
+
+err:
+ if (err_code & ERR_CODE) {
+ resolvers_destroy(curr_resolvers);
+ curr_resolvers = NULL;
+ }
+
+ /* we never return an error there, we only try to create this section
+ * if that's possible */
+ return 0;
+}
+
+int cfg_post_parse_resolvers()
+{
+ int err_code = 0;
+ struct server *srv;
+
+ if (curr_resolvers) {
+
+ /* prepare forward server descriptors */
+ if (curr_resolvers->px) {
+ srv = curr_resolvers->px->srv;
+ while (srv) {
+ /* init ssl if needed */
+ if (srv->use_ssl == 1 && xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv) {
+ if (xprt_get(XPRT_SSL)->prepare_srv(srv)) {
+ ha_alert("unable to prepare SSL for server '%s' in resolvers section '%s'.\n", srv->id, curr_resolvers->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+ }
+ srv = srv->next;
+ }
+ }
+ }
+ curr_resolvers = NULL;
+ return err_code;
+}
+
+REGISTER_CONFIG_SECTION("resolvers", cfg_parse_resolvers, cfg_post_parse_resolvers);
+REGISTER_POST_DEINIT(resolvers_deinit);
+REGISTER_CONFIG_POSTPARSER("dns runtime resolver", resolvers_finalize_config);
+REGISTER_PRE_CHECK(resolvers_create_default);
diff --git a/src/ring.c b/src/ring.c
new file mode 100644
index 0000000..849221e
--- /dev/null
+++ b/src/ring.c
@@ -0,0 +1,482 @@
+/*
+ * Ring buffer management
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/buf.h>
+#include <haproxy/cli.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/thread.h>
+
+/* context used to dump the contents of a ring via "show events" or "show errors" */
+struct show_ring_ctx {
+ struct ring *ring; /* ring to be dumped */
+ size_t ofs; /* storage offset to restart from; ~0=oldest */
+ uint flags; /* set of RING_WF_* */
+};
+
+/* Initialize a pre-allocated ring with the buffer area
+ * of size */
+void ring_init(struct ring *ring, void *area, size_t size)
+{
+ HA_RWLOCK_INIT(&ring->lock);
+ LIST_INIT(&ring->waiters);
+ ring->readers_count = 0;
+ ring->buf = b_make(area, size, 0, 0);
+ /* write the initial RC byte */
+ b_putchr(&ring->buf, 0);
+}
+
+/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on
+ * allocation failure.
+ */
+struct ring *ring_new(size_t size)
+{
+ struct ring *ring = NULL;
+ void *area = NULL;
+
+ if (size < 2)
+ goto fail;
+
+ ring = malloc(sizeof(*ring));
+ if (!ring)
+ goto fail;
+
+ area = malloc(size);
+ if (!area)
+ goto fail;
+
+ ring_init(ring, area, size);
+ return ring;
+ fail:
+ free(area);
+ free(ring);
+ return NULL;
+}
+
+/* Creates a unified ring + storage area at address <area> for <size> bytes.
+ * If <area> is null, then it's allocated of the requested size. The ring
+ * struct is part of the area so the usable area is slightly reduced. However
+ * the ring storage is immediately adjacent to the struct. ring_free() will
+ * ignore such rings, so the caller is responsible for releasing them.
+ */
+struct ring *ring_make_from_area(void *area, size_t size)
+{
+ struct ring *ring = NULL;
+
+ if (size < sizeof(*ring))
+ return NULL;
+
+ if (!area)
+ area = malloc(size);
+ if (!area)
+ return NULL;
+
+ ring = area;
+ area += sizeof(*ring);
+ ring_init(ring, area, size - sizeof(*ring));
+ return ring;
+}
+
+/* Cast an unified ring + storage area to a ring from <area>, without
+ * reinitializing the data buffer.
+ *
+ * Reinitialize the waiters and the lock.
+ */
+struct ring *ring_cast_from_area(void *area)
+{
+ struct ring *ring = NULL;
+
+ ring = area;
+ ring->buf.area = area + sizeof(*ring);
+
+ HA_RWLOCK_INIT(&ring->lock);
+ LIST_INIT(&ring->waiters);
+ ring->readers_count = 0;
+
+ return ring;
+}
+
+/* Resizes existing ring <ring> to <size> which must be larger, without losing
+ * its contents. The new size must be at least as large as the previous one or
+ * no change will be performed. The pointer to the ring is returned on success,
+ * or NULL on allocation failure. This will lock the ring for writes.
+ */
+struct ring *ring_resize(struct ring *ring, size_t size)
+{
+ void *area;
+
+ if (b_size(&ring->buf) >= size)
+ return ring;
+
+ area = malloc(size);
+ if (!area)
+ return NULL;
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+
+ /* recheck the buffer's size, it may have changed during the malloc */
+ if (b_size(&ring->buf) < size) {
+ /* copy old contents */
+ b_getblk(&ring->buf, area, ring->buf.data, 0);
+ area = HA_ATOMIC_XCHG(&ring->buf.area, area);
+ ring->buf.size = size;
+ }
+
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+
+ free(area);
+ return ring;
+}
+
+/* destroys and frees ring <ring> */
+void ring_free(struct ring *ring)
+{
+ if (!ring)
+ return;
+
+ /* make sure it was not allocated by ring_make_from_area */
+ if (ring->buf.area == (void *)ring + sizeof(*ring))
+ return;
+
+ free(ring->buf.area);
+ free(ring);
+}
+
+/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg>
+ * to ring <ring>. The message is sent atomically. It may be truncated to
+ * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the
+ * two lists, it's just a convenience to help the caller prepend some prefixes
+ * when necessary. It takes the ring's write lock to make sure no other thread
+ * will touch the buffer during the update. Returns the number of bytes sent,
+ * or <=0 on failure.
+ */
+ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg)
+{
+ struct buffer *buf = &ring->buf;
+ struct appctx *appctx;
+ size_t totlen = 0;
+ size_t lenlen;
+ uint64_t dellen;
+ int dellenlen;
+ ssize_t sent = 0;
+ int i;
+
+ /* we have to find some room to add our message (the buffer is
+ * never empty and at least contains the previous counter) and
+ * to update both the buffer contents and heads at the same
+ * time (it's doable using atomic ops but not worth the
+ * trouble, let's just lock). For this we first need to know
+ * the total message's length. We cannot measure it while
+ * copying due to the varint encoding of the length.
+ */
+ for (i = 0; i < npfx; i++)
+ totlen += pfx[i].len;
+ for (i = 0; i < nmsg; i++)
+ totlen += msg[i].len;
+
+ if (totlen > maxlen)
+ totlen = maxlen;
+
+ lenlen = varint_bytes(totlen);
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ if (lenlen + totlen + 1 + 1 > b_size(buf))
+ goto done_buf;
+
+ while (b_room(buf) < lenlen + totlen + 1) {
+ /* we need to delete the oldest message (from the end),
+ * and we have to stop if there's a reader stuck there.
+ * Unless there's corruption in the buffer it's guaranteed
+ * that we have enough data to find 1 counter byte, a
+ * varint-encoded length (1 byte min) and the message
+ * payload (0 bytes min).
+ */
+ if (*b_head(buf))
+ goto done_buf;
+ dellenlen = b_peek_varint(buf, 1, &dellen);
+ if (!dellenlen)
+ goto done_buf;
+ BUG_ON(b_data(buf) < 1 + dellenlen + dellen);
+
+ b_del(buf, 1 + dellenlen + dellen);
+ }
+
+ /* OK now we do have room */
+ __b_put_varint(buf, totlen);
+
+ totlen = 0;
+ for (i = 0; i < npfx; i++) {
+ size_t len = pfx[i].len;
+
+ if (len + totlen > maxlen)
+ len = maxlen - totlen;
+ if (len)
+ __b_putblk(buf, pfx[i].ptr, len);
+ totlen += len;
+ }
+
+ for (i = 0; i < nmsg; i++) {
+ size_t len = msg[i].len;
+
+ if (len + totlen > maxlen)
+ len = maxlen - totlen;
+ if (len)
+ __b_putblk(buf, msg[i].ptr, len);
+ totlen += len;
+ }
+
+ *b_tail(buf) = 0; buf->data++; // new read counter
+ sent = lenlen + totlen + 1;
+
+ /* notify potential readers */
+ list_for_each_entry(appctx, &ring->waiters, wait_entry)
+ appctx_wakeup(appctx);
+
+ done_buf:
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+ return sent;
+}
+
+/* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is
+ * meant to be used by low level appctx code such as CLI or ring forwarding.
+ * For higher level functions, please see the relevant parts in appctx or CLI.
+ * It returns non-zero on success or zero on failure if too many users are
+ * already attached. On success, the caller MUST call ring_detach_appctx()
+ * to detach itself, even if it was never woken up.
+ */
+int ring_attach(struct ring *ring)
+{
+ int users = ring->readers_count;
+
+ do {
+ if (users >= 255)
+ return 0;
+ } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1));
+ return 1;
+}
+
+/* detach an appctx from a ring. The appctx is expected to be waiting at offset
+ * <ofs> relative to the beginning of the storage, or ~0 if not waiting yet.
+ * Nothing is done if <ring> is NULL.
+ */
+void ring_detach_appctx(struct ring *ring, struct appctx *appctx, size_t ofs)
+{
+ if (!ring)
+ return;
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ if (ofs != ~0) {
+ /* reader was still attached */
+ if (ofs < b_head_ofs(&ring->buf))
+ ofs += b_size(&ring->buf) - b_head_ofs(&ring->buf);
+ else
+ ofs -= b_head_ofs(&ring->buf);
+
+ BUG_ON(ofs >= b_size(&ring->buf));
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_ATOMIC_DEC(b_peek(&ring->buf, ofs));
+ }
+ HA_ATOMIC_DEC(&ring->readers_count);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+}
+
+/* Tries to attach CLI handler <appctx> as a new reader on ring <ring>. This is
+ * meant to be used when registering a CLI function to dump a buffer, so it
+ * returns zero on success, or non-zero on failure with a message in the appctx
+ * CLI context. It automatically sets the io_handler and io_release callbacks if
+ * they were not set. The <flags> take a combination of RING_WF_*.
+ */
+int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags)
+{
+ struct show_ring_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!ring_attach(ring))
+ return cli_err(appctx,
+ "Sorry, too many watchers (255) on this ring buffer. "
+ "What could it have so interesting to attract so many watchers ?");
+
+ if (!appctx->io_handler)
+ appctx->io_handler = cli_io_handler_show_ring;
+ if (!appctx->io_release)
+ appctx->io_release = cli_io_release_show_ring;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->ring = ring;
+ ctx->ofs = ~0; // start from the oldest event
+ ctx->flags = flags;
+ return 0;
+}
+
+/* This function dumps all events from the ring whose pointer is in <p0> into
+ * the appctx's output buffer, and takes from <o0> the seek offset into the
+ * buffer's history (0 for oldest known event). It looks at <i0> for boolean
+ * options: bit0 means it must wait for new data or any key to be pressed. Bit1
+ * means it must seek directly to the end to wait for new contents. It returns
+ * 0 if the output buffer or events are missing is full and it needs to be
+ * called again, otherwise non-zero. It is meant to be used with
+ * cli_release_show_ring() to clean up.
+ */
+int cli_io_handler_show_ring(struct appctx *appctx)
+{
+ struct show_ring_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct ring *ring = ctx->ring;
+ struct buffer *buf = &ring->buf;
+ size_t ofs;
+ size_t last_ofs;
+ uint64_t msg_len;
+ size_t len, cnt;
+ int ret;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ return 1;
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(ctx->ofs == ~0)) {
+ /* going to the end means looking at tail-1 */
+ ctx->ofs = b_peek_ofs(buf, (ctx->flags & RING_WF_SEEK_NEW) ? b_data(buf) - 1 : 0);
+ HA_ATOMIC_INC(b_orig(buf) + ctx->ofs);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = ctx->ofs - b_head_ofs(buf);
+ if (ctx->ofs < b_head_ofs(buf))
+ ofs += b_size(buf);
+
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ if (unlikely(msg_len + 1 > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ chunk_reset(&trash);
+ len = b_getblk(buf, trash.area, msg_len, ofs + cnt);
+ trash.data += len;
+ trash.area[trash.data++] = '\n';
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ last_ofs = b_tail_ofs(buf);
+ ctx->ofs = b_peek_ofs(buf, ofs);
+ HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock);
+
+ if (ret && (ctx->flags & RING_WF_WAIT_MODE)) {
+ /* we've drained everything and are configured to wait for more
+ * data or an event (keypress, close)
+ */
+ if (!sc_oc(sc)->output && !(sc->flags & SC_FL_SHUT_DONE)) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ ofs = b_tail_ofs(&ring->buf);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+ if (ofs != last_ofs) {
+ /* more data was added into the ring between the
+ * unlock and the lock, and the writer might not
+ * have seen us. We need to reschedule a read.
+ */
+ applet_have_more_data(appctx);
+ } else
+ applet_have_no_more_data(appctx);
+ ret = 0;
+ }
+ /* always drain all the request */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ }
+
+ applet_expect_no_data(appctx);
+ return ret;
+}
+
+/* must be called after cli_io_handler_show_ring() above */
+void cli_io_release_show_ring(struct appctx *appctx)
+{
+ struct show_ring_ctx *ctx = appctx->svcctx;
+ struct ring *ring = ctx->ring;
+ size_t ofs = ctx->ofs;
+
+ ring_detach_appctx(ring, appctx, ofs);
+}
+
+/* Returns the MAXIMUM payload len that could theoretically fit into the ring
+ * based on ring buffer size.
+ *
+ * Computation logic relies on implementation details from 'ring-t.h'.
+ */
+size_t ring_max_payload(const struct ring *ring)
+{
+ size_t max;
+
+ /* initial max = bufsize - 1 (initial RC) - 1 (payload RC) */
+ max = b_size(&ring->buf) - 1 - 1;
+
+ /* subtract payload VI (varint-encoded size) */
+ max -= varint_bytes(max);
+ return max;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sample.c b/src/sample.c
new file mode 100644
index 0000000..89de612
--- /dev/null
+++ b/src/sample.c
@@ -0,0 +1,5173 @@
+/*
+ * Sample management functions.
+ *
+ * Copyright 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <stdio.h>
+
+#include <import/mjson.h>
+#include <import/sha1.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/auth.h>
+#include <haproxy/base64.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/clock.h>
+#include <haproxy/errors.h>
+#include <haproxy/fix.h>
+#include <haproxy/global.h>
+#include <haproxy/hash.h>
+#include <haproxy/http.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/mqtt.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protobuf.h>
+#include <haproxy/proxy.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/sink.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/jwt.h>
+
+/* sample type names */
+const char *smp_to_type[SMP_TYPES] = {
+ [SMP_T_ANY] = "any",
+ [SMP_T_SAME] = "same",
+ [SMP_T_BOOL] = "bool",
+ [SMP_T_SINT] = "sint",
+ [SMP_T_ADDR] = "addr",
+ [SMP_T_IPV4] = "ipv4",
+ [SMP_T_IPV6] = "ipv6",
+ [SMP_T_STR] = "str",
+ [SMP_T_BIN] = "bin",
+ [SMP_T_METH] = "meth",
+};
+
+/* static sample used in sample_process() when <p> is NULL */
+static THREAD_LOCAL struct sample temp_smp;
+
+/* list head of all known sample fetch keywords */
+static struct sample_fetch_kw_list sample_fetches = {
+ .list = LIST_HEAD_INIT(sample_fetches.list)
+};
+
+/* list head of all known sample format conversion keywords */
+static struct sample_conv_kw_list sample_convs = {
+ .list = LIST_HEAD_INIT(sample_convs.list)
+};
+
+const unsigned int fetch_cap[SMP_SRC_ENTRIES] = {
+ [SMP_SRC_CONST] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL_CFG_PARSER |
+ SMP_VAL_CLI_PARSER ),
+
+ [SMP_SRC_INTRN] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL_CLI_PARSER ),
+
+ [SMP_SRC_LISTN] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_FTEND] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L4CLI] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L5CLI] = (SMP_VAL___________ | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_TRACK] = (SMP_VAL_FE_CON_ACC | SMP_VAL_FE_SES_ACC | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L6REQ] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQHV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQHP] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_FE_REQ_CNT |
+ SMP_VAL_FE_HRQ_HDR | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRQBO] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_FE_HRQ_BDY | SMP_VAL_FE_SET_BCK |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_BKEND] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_BE_REQ_CNT | SMP_VAL_BE_HRQ_HDR | SMP_VAL_BE_HRQ_BDY |
+ SMP_VAL_BE_SET_SRV | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_SERVR] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_BE_SRV_CON | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L4SRV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L5SRV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_L6RES] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSHV] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSHP] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL_BE_RES_CNT |
+ SMP_VAL_BE_HRS_HDR | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL_FE_LOG_END | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_HRSBO] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL_BE_HRS_BDY | SMP_VAL_BE_STO_RUL |
+ SMP_VAL_FE_RES_CNT | SMP_VAL_FE_HRS_HDR | SMP_VAL_FE_HRS_BDY |
+ SMP_VAL___________ | SMP_VAL_BE_CHK_RUL | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_RQFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_RSFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_TXFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+
+ [SMP_SRC_SSFIN] = (SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL_FE_LOG_END | SMP_VAL___________ | SMP_VAL___________ |
+ SMP_VAL___________ ),
+};
+
+static const char *fetch_src_names[SMP_SRC_ENTRIES] = {
+ [SMP_SRC_INTRN] = "internal state",
+ [SMP_SRC_LISTN] = "listener",
+ [SMP_SRC_FTEND] = "frontend",
+ [SMP_SRC_L4CLI] = "client address",
+ [SMP_SRC_L5CLI] = "client-side connection",
+ [SMP_SRC_TRACK] = "track counters",
+ [SMP_SRC_L6REQ] = "request buffer",
+ [SMP_SRC_HRQHV] = "HTTP request headers",
+ [SMP_SRC_HRQHP] = "HTTP request",
+ [SMP_SRC_HRQBO] = "HTTP request body",
+ [SMP_SRC_BKEND] = "backend",
+ [SMP_SRC_SERVR] = "server",
+ [SMP_SRC_L4SRV] = "server address",
+ [SMP_SRC_L5SRV] = "server-side connection",
+ [SMP_SRC_L6RES] = "response buffer",
+ [SMP_SRC_HRSHV] = "HTTP response headers",
+ [SMP_SRC_HRSHP] = "HTTP response",
+ [SMP_SRC_HRSBO] = "HTTP response body",
+ [SMP_SRC_RQFIN] = "request buffer statistics",
+ [SMP_SRC_RSFIN] = "response buffer statistics",
+ [SMP_SRC_TXFIN] = "transaction statistics",
+ [SMP_SRC_SSFIN] = "session statistics",
+};
+
+static const char *fetch_ckp_names[SMP_CKP_ENTRIES] = {
+ [SMP_CKP_FE_CON_ACC] = "frontend tcp-request connection rule",
+ [SMP_CKP_FE_SES_ACC] = "frontend tcp-request session rule",
+ [SMP_CKP_FE_REQ_CNT] = "frontend tcp-request content rule",
+ [SMP_CKP_FE_HRQ_HDR] = "frontend http-request header rule",
+ [SMP_CKP_FE_HRQ_BDY] = "frontend http-request body rule",
+ [SMP_CKP_FE_SET_BCK] = "frontend use-backend rule",
+ [SMP_CKP_BE_REQ_CNT] = "backend tcp-request content rule",
+ [SMP_CKP_BE_HRQ_HDR] = "backend http-request header rule",
+ [SMP_CKP_BE_HRQ_BDY] = "backend http-request body rule",
+ [SMP_CKP_BE_SET_SRV] = "backend use-server, balance or stick-match rule",
+ [SMP_CKP_BE_SRV_CON] = "server source selection",
+ [SMP_CKP_BE_RES_CNT] = "backend tcp-response content rule",
+ [SMP_CKP_BE_HRS_HDR] = "backend http-response header rule",
+ [SMP_CKP_BE_HRS_BDY] = "backend http-response body rule",
+ [SMP_CKP_BE_STO_RUL] = "backend stick-store rule",
+ [SMP_CKP_FE_RES_CNT] = "frontend tcp-response content rule",
+ [SMP_CKP_FE_HRS_HDR] = "frontend http-response header rule",
+ [SMP_CKP_FE_HRS_BDY] = "frontend http-response body rule",
+ [SMP_CKP_FE_LOG_END] = "logs",
+ [SMP_CKP_BE_CHK_RUL] = "backend tcp-check rule",
+ [SMP_CKP_CFG_PARSER] = "configuration parser",
+ [SMP_CKP_CLI_PARSER] = "CLI parser",
+};
+
+/* This function returns the most accurate expected type of the data returned
+ * by the sample_expr. It assumes that the <expr> and all of its converters are
+ * properly initialized.
+ */
+int smp_expr_output_type(struct sample_expr *expr)
+{
+ struct sample_conv_expr *cur_smp = NULL;
+ int cur_type = SMP_T_ANY; /* current type in the chain */
+ int next_type = SMP_T_ANY; /* next type in the chain */
+
+ if (!LIST_ISEMPTY(&expr->conv_exprs)) {
+ /* Ignore converters that output SMP_T_SAME if switching to them is
+ * conversion-free. (such converter's output match with input, thus only
+ * their input is considered)
+ *
+ * We start looking at the end of conv list and then loop back until the
+ * sample fetch for better performance (it is more likely to find the last
+ * effective output type near the end of the chain)
+ */
+ do {
+ struct list *cur_head = (cur_smp) ? &cur_smp->list : &expr->conv_exprs;
+
+ cur_smp = LIST_PREV(cur_head, struct sample_conv_expr *, list);
+ if (cur_smp->conv->out_type != SMP_T_SAME) {
+ /* current converter has effective out_type */
+ cur_type = cur_smp->conv->out_type;
+ goto out;
+ }
+ else if (sample_casts[cur_type][next_type] != c_none)
+ return next_type; /* switching to next type is not conversion-free */
+
+ next_type = cur_smp->conv->in_type;
+ } while (cur_smp != LIST_NEXT(&expr->conv_exprs, struct sample_conv_expr *, list));
+ }
+ /* conv list empty or doesn't have effective out_type,
+ * falling back to sample fetch out_type
+ */
+ cur_type = expr->fetch->out_type;
+ out:
+ if (sample_casts[cur_type][next_type] != c_none)
+ return next_type; /* switching to next type is not conversion-free */
+ return cur_type;
+}
+
+
+/* fill the trash with a comma-delimited list of source names for the <use> bit
+ * field which must be composed of a non-null set of SMP_USE_* flags. The return
+ * value is the pointer to the string in the trash buffer.
+ */
+const char *sample_src_names(unsigned int use)
+{
+ int bit;
+
+ trash.data = 0;
+ trash.area[0] = '\0';
+ for (bit = 0; bit < SMP_SRC_ENTRIES; bit++) {
+ if (!(use & ~((1 << bit) - 1)))
+ break; /* no more bits */
+
+ if (!(use & (1 << bit)))
+ continue; /* bit not set */
+
+ trash.data += snprintf(trash.area + trash.data,
+ trash.size - trash.data, "%s%s",
+ (use & ((1 << bit) - 1)) ? "," : "",
+ fetch_src_names[bit]);
+ }
+ return trash.area;
+}
+
+/* return a pointer to the correct sample checkpoint name, or "unknown" when
+ * the flags are invalid. Only the lowest bit is used, higher bits are ignored
+ * if set.
+ */
+const char *sample_ckp_names(unsigned int use)
+{
+ int bit;
+
+ for (bit = 0; bit < SMP_CKP_ENTRIES; bit++)
+ if (use & (1 << bit))
+ return fetch_ckp_names[bit];
+ return "unknown sample check place, please report this bug";
+}
+
+/*
+ * Registers the sample fetch keyword list <kwl> as a list of valid keywords
+ * for next parsing sessions. The fetch keywords capabilities are also computed
+ * from their ->use field.
+ */
+void sample_register_fetches(struct sample_fetch_kw_list *kwl)
+{
+ struct sample_fetch *sf;
+ int bit;
+
+ for (sf = kwl->kw; sf->kw != NULL; sf++) {
+ for (bit = 0; bit < SMP_SRC_ENTRIES; bit++)
+ if (sf->use & (1 << bit))
+ sf->val |= fetch_cap[bit];
+ }
+ LIST_APPEND(&sample_fetches.list, &kwl->list);
+}
+
+/*
+ * Registers the sample format coverstion keyword list <pckl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void sample_register_convs(struct sample_conv_kw_list *pckl)
+{
+ LIST_APPEND(&sample_convs.list, &pckl->list);
+}
+
+/*
+ * Returns the pointer on sample fetch keyword structure identified by
+ * string of <len> in buffer <kw>.
+ *
+ */
+struct sample_fetch *find_sample_fetch(const char *kw, int len)
+{
+ int index;
+ struct sample_fetch_kw_list *kwl;
+
+ list_for_each_entry(kwl, &sample_fetches.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strncmp(kwl->kw[index].kw, kw, len) == 0 &&
+ kwl->kw[index].kw[len] == '\0')
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+/* dump list of registered sample fetch keywords on stdout */
+void smp_dump_fetch_kw(void)
+{
+ struct sample_fetch_kw_list *kwl;
+ struct sample_fetch *kwp, *kw;
+ uint64_t mask;
+ int index;
+ int arg;
+ int bit;
+
+ for (bit = 0; bit <= SMP_CKP_ENTRIES + 1; bit++) {
+ putchar('#');
+ for (index = 0; bit + index <= SMP_CKP_ENTRIES; index++)
+ putchar(' ');
+ for (index = 0; index < bit && index < SMP_CKP_ENTRIES; index++)
+ printf((bit <= SMP_CKP_ENTRIES) ? "/ " : " |");
+ for (index = bit; bit < SMP_CKP_ENTRIES && index < SMP_CKP_ENTRIES + 2; index++)
+ if (index == bit)
+ putchar('_');
+ else if (index == bit + 1)
+ putchar('.');
+ else
+ putchar('-');
+ printf(" %s\n", (bit < SMP_CKP_ENTRIES) ? fetch_ckp_names[bit] : "");
+ }
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &sample_fetches.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ printf("[ ");
+ for (bit = 0; bit < SMP_CKP_ENTRIES; bit++)
+ printf("%s", (kw->val & (1 << bit)) ? "Y " : ". ");
+
+ printf("] %s", kw->kw);
+ if (kw->arg_mask) {
+ mask = kw->arg_mask >> ARGM_BITS;
+ printf("(");
+ for (arg = 0;
+ arg < ARGM_NBARGS && ((mask >> (arg * ARGT_BITS)) & ARGT_MASK);
+ arg++) {
+ if (arg == (kw->arg_mask & ARGM_MASK)) {
+ /* now dumping extra args */
+ printf("[");
+ }
+ if (arg)
+ printf(",");
+ printf("%s", arg_type_names[(mask >> (arg * ARGT_BITS)) & ARGT_MASK]);
+ }
+ if (arg > (kw->arg_mask & ARGM_MASK)) {
+ /* extra args were dumped */
+ printf("]");
+ }
+ printf(")");
+ }
+ printf(": %s", smp_to_type[kw->out_type]);
+ printf("\n");
+ }
+}
+
+/* dump list of registered sample converter keywords on stdout */
+void smp_dump_conv_kw(void)
+{
+ struct sample_conv_kw_list *kwl;
+ struct sample_conv *kwp, *kw;
+ uint64_t mask;
+ int index;
+ int arg;
+
+ for (kw = kwp = NULL;; kwp = kw) {
+ list_for_each_entry(kwl, &sample_convs.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strordered(kwp ? kwp->kw : NULL,
+ kwl->kw[index].kw,
+ kw != kwp ? kw->kw : NULL))
+ kw = &kwl->kw[index];
+ }
+ }
+
+ if (kw == kwp)
+ break;
+
+ printf("%s", kw->kw);
+ if (kw->arg_mask) {
+ mask = kw->arg_mask >> ARGM_BITS;
+ printf("(");
+ for (arg = 0;
+ arg < ARGM_NBARGS && ((mask >> (arg * ARGT_BITS)) & ARGT_MASK);
+ arg++) {
+ if (arg == (kw->arg_mask & ARGM_MASK)) {
+ /* now dumping extra args */
+ printf("[");
+ }
+ if (arg)
+ printf(",");
+ printf("%s", arg_type_names[(mask >> (arg * ARGT_BITS)) & ARGT_MASK]);
+ }
+ if (arg > (kw->arg_mask & ARGM_MASK)) {
+ /* extra args were dumped */
+ printf("]");
+ }
+ printf(")");
+ }
+ printf(": %s => %s", smp_to_type[kw->out_type], smp_to_type[kw->in_type]);
+ printf("\n");
+ }
+}
+
+/* This function browses the list of available sample fetches. <current> is
+ * the last used sample fetch. If it is the first call, it must set to NULL.
+ * <idx> is the index of the next sample fetch entry. It is used as private
+ * value. It is useless to initiate it.
+ *
+ * It returns always the new fetch_sample entry, and NULL when the end of
+ * the list is reached.
+ */
+struct sample_fetch *sample_fetch_getnext(struct sample_fetch *current, int *idx)
+{
+ struct sample_fetch_kw_list *kwl;
+ struct sample_fetch *base;
+
+ if (!current) {
+ /* Get first kwl entry. */
+ kwl = LIST_NEXT(&sample_fetches.list, struct sample_fetch_kw_list *, list);
+ (*idx) = 0;
+ } else {
+ /* Get kwl corresponding to the current entry. */
+ base = current + 1 - (*idx);
+ kwl = container_of(base, struct sample_fetch_kw_list, kw);
+ }
+
+ while (1) {
+
+ /* Check if kwl is the last entry. */
+ if (&kwl->list == &sample_fetches.list)
+ return NULL;
+
+ /* idx contain the next keyword. If it is available, return it. */
+ if (kwl->kw[*idx].kw) {
+ (*idx)++;
+ return &kwl->kw[(*idx)-1];
+ }
+
+ /* get next entry in the main list, and return NULL if the end is reached. */
+ kwl = LIST_NEXT(&kwl->list, struct sample_fetch_kw_list *, list);
+
+ /* Set index to 0, ans do one other loop. */
+ (*idx) = 0;
+ }
+}
+
+/* This function browses the list of available converters. <current> is
+ * the last used converter. If it is the first call, it must set to NULL.
+ * <idx> is the index of the next converter entry. It is used as private
+ * value. It is useless to initiate it.
+ *
+ * It returns always the next sample_conv entry, and NULL when the end of
+ * the list is reached.
+ */
+struct sample_conv *sample_conv_getnext(struct sample_conv *current, int *idx)
+{
+ struct sample_conv_kw_list *kwl;
+ struct sample_conv *base;
+
+ if (!current) {
+ /* Get first kwl entry. */
+ kwl = LIST_NEXT(&sample_convs.list, struct sample_conv_kw_list *, list);
+ (*idx) = 0;
+ } else {
+ /* Get kwl corresponding to the current entry. */
+ base = current + 1 - (*idx);
+ kwl = container_of(base, struct sample_conv_kw_list, kw);
+ }
+
+ while (1) {
+ /* Check if kwl is the last entry. */
+ if (&kwl->list == &sample_convs.list)
+ return NULL;
+
+ /* idx contain the next keyword. If it is available, return it. */
+ if (kwl->kw[*idx].kw) {
+ (*idx)++;
+ return &kwl->kw[(*idx)-1];
+ }
+
+ /* get next entry in the main list, and return NULL if the end is reached. */
+ kwl = LIST_NEXT(&kwl->list, struct sample_conv_kw_list *, list);
+
+ /* Set index to 0, ans do one other loop. */
+ (*idx) = 0;
+ }
+}
+
+/*
+ * Returns the pointer on sample format conversion keyword structure identified by
+ * string of <len> in buffer <kw>.
+ *
+ */
+struct sample_conv *find_sample_conv(const char *kw, int len)
+{
+ int index;
+ struct sample_conv_kw_list *kwl;
+
+ list_for_each_entry(kwl, &sample_convs.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (strncmp(kwl->kw[index].kw, kw, len) == 0 &&
+ kwl->kw[index].kw[len] == '\0')
+ return &kwl->kw[index];
+ }
+ }
+ return NULL;
+}
+
+/******************************************************************/
+/* Sample casts functions */
+/******************************************************************/
+
+static int c_ip2int(struct sample *smp)
+{
+ smp->data.u.sint = ntohl(smp->data.u.ipv4.s_addr);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int c_ip2str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+
+ if (!inet_ntop(AF_INET, (void *)&smp->data.u.ipv4, trash->area, trash->size))
+ return 0;
+
+ trash->data = strlen(trash->area);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+
+ return 1;
+}
+
+static int c_ip2ipv6(struct sample *smp)
+{
+ v4tov6(&smp->data.u.ipv6, &smp->data.u.ipv4);
+ smp->data.type = SMP_T_IPV6;
+ return 1;
+}
+
+static int c_ipv62ip(struct sample *smp)
+{
+ if (!v6tov4(&smp->data.u.ipv4, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV4;
+ return 1;
+}
+
+static int c_ipv62str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+
+ if (!inet_ntop(AF_INET6, (void *)&smp->data.u.ipv6, trash->area, trash->size))
+ return 0;
+
+ trash->data = strlen(trash->area);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/*
+static int c_ipv62ip(struct sample *smp)
+{
+ return v6tov4(&smp->data.u.ipv4, &smp->data.u.ipv6);
+}
+*/
+
+static int c_int2ip(struct sample *smp)
+{
+ smp->data.u.ipv4.s_addr = htonl((unsigned int)smp->data.u.sint);
+ smp->data.type = SMP_T_IPV4;
+ return 1;
+}
+
+static int c_int2ipv6(struct sample *smp)
+{
+ smp->data.u.ipv4.s_addr = htonl((unsigned int)smp->data.u.sint);
+ v4tov6(&smp->data.u.ipv6, &smp->data.u.ipv4);
+ smp->data.type = SMP_T_IPV6;
+ return 1;
+}
+
+static int c_str2addr(struct sample *smp)
+{
+ if (!buf2ip(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv4)) {
+ if (!buf2ip6(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV6;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+ }
+ smp->data.type = SMP_T_IPV4;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2ip(struct sample *smp)
+{
+ if (!buf2ip(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv4))
+ return 0;
+ smp->data.type = SMP_T_IPV4;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2ipv6(struct sample *smp)
+{
+ if (!buf2ip6(smp->data.u.str.area, smp->data.u.str.data, &smp->data.u.ipv6))
+ return 0;
+ smp->data.type = SMP_T_IPV6;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/*
+ * The NULL char always enforces the end of string if it is met.
+ * Data is never changed, so we can ignore the CONST case
+ */
+static int c_bin2str(struct sample *smp)
+{
+ int i;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if (!smp->data.u.str.area[i]) {
+ smp->data.u.str.data = i;
+ break;
+ }
+ }
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+static int c_int2str(struct sample *smp)
+{
+ struct buffer *trash = get_trash_chunk();
+ char *pos;
+
+ pos = lltoa_r(smp->data.u.sint, trash->area, trash->size);
+ if (!pos)
+ return 0;
+
+ trash->size = trash->size - (pos - trash->area);
+ trash->area = pos;
+ trash->data = strlen(pos);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function unconditionally duplicates data and removes the "const" flag.
+ * For strings and binary blocks, it also provides a known allocated size with
+ * a length that is capped to the size, and ensures a trailing zero is always
+ * appended for strings. This is necessary for some operations which may
+ * require to extend the length. It returns 0 if it fails, 1 on success.
+ */
+int smp_dup(struct sample *smp)
+{
+ struct buffer *trash;
+
+ switch (smp->data.type) {
+ case SMP_T_BOOL:
+ case SMP_T_SINT:
+ case SMP_T_ADDR:
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ /* These type are not const. */
+ break;
+
+ case SMP_T_METH:
+ if (smp->data.u.meth.meth != HTTP_METH_OTHER)
+ break;
+ __fallthrough;
+
+ case SMP_T_STR:
+ trash = get_trash_chunk();
+ trash->data = smp->data.type == SMP_T_STR ?
+ smp->data.u.str.data : smp->data.u.meth.str.data;
+ if (trash->data > trash->size - 1)
+ trash->data = trash->size - 1;
+
+ memcpy(trash->area, smp->data.type == SMP_T_STR ?
+ smp->data.u.str.area : smp->data.u.meth.str.area,
+ trash->data);
+ trash->area[trash->data] = 0;
+ smp->data.u.str = *trash;
+ break;
+
+ case SMP_T_BIN:
+ trash = get_trash_chunk();
+ trash->data = smp->data.u.str.data;
+ if (trash->data > trash->size)
+ trash->data = trash->size;
+
+ memcpy(trash->area, smp->data.u.str.area, trash->data);
+ smp->data.u.str = *trash;
+ break;
+
+ default:
+ /* Other cases are unexpected. */
+ return 0;
+ }
+
+ /* remove const flag */
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+int c_none(struct sample *smp)
+{
+ return 1;
+}
+
+/* special converter function used by pseudo types in the compatibility matrix
+ * to inform that the conversion is theoretically allowed at parsing time.
+ *
+ * However, being a pseudo type, it may not be emitted by fetches or converters
+ * so this function should never be called. If this is the case, then it means
+ * that a pseudo type has been used as a final output type at runtime, which is
+ * considered as a bug and should be fixed. To help spot this kind of bug, the
+ * process will crash in this case.
+ */
+int c_pseudo(struct sample *smp)
+{
+ ABORT_NOW(); // die loudly
+ /* never reached */
+ return 0;
+}
+
+static int c_str2int(struct sample *smp)
+{
+ const char *str;
+ const char *end;
+
+ if (smp->data.u.str.data == 0)
+ return 0;
+
+ str = smp->data.u.str.area;
+ end = smp->data.u.str.area + smp->data.u.str.data;
+
+ smp->data.u.sint = read_int64(&str, end);
+ smp->data.type = SMP_T_SINT;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int c_str2meth(struct sample *smp)
+{
+ enum http_meth_t meth;
+ int len;
+
+ meth = find_http_meth(smp->data.u.str.area, smp->data.u.str.data);
+ if (meth == HTTP_METH_OTHER) {
+ len = smp->data.u.str.data;
+ smp->data.u.meth.str.area = smp->data.u.str.area;
+ smp->data.u.meth.str.data = len;
+ }
+ else
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.meth.meth = meth;
+ smp->data.type = SMP_T_METH;
+ return 1;
+}
+
+static int c_meth2str(struct sample *smp)
+{
+ int len;
+ enum http_meth_t meth;
+
+ if (smp->data.u.meth.meth == HTTP_METH_OTHER) {
+ /* The method is unknown. Copy the original pointer. */
+ len = smp->data.u.meth.str.data;
+ smp->data.u.str.area = smp->data.u.meth.str.area;
+ smp->data.u.str.data = len;
+ smp->data.type = SMP_T_STR;
+ }
+ else if (smp->data.u.meth.meth < HTTP_METH_OTHER) {
+ /* The method is known, copy the pointer containing the string. */
+ meth = smp->data.u.meth.meth;
+ smp->data.u.str.area = http_known_methods[meth].ptr;
+ smp->data.u.str.data = http_known_methods[meth].len;
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ }
+ else {
+ /* Unknown method */
+ return 0;
+ }
+ return 1;
+}
+
+static int c_addr2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ if (smp->data.type == SMP_T_IPV4) {
+ chk->data = 4;
+ memcpy(chk->area, &smp->data.u.ipv4, chk->data);
+ }
+ else if (smp->data.type == SMP_T_IPV6) {
+ chk->data = 16;
+ memcpy(chk->area, &smp->data.u.ipv6, chk->data);
+ }
+ else
+ return 0;
+
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+static int c_int2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ *(unsigned long long int *) chk->area = my_htonll(smp->data.u.sint);
+ chk->data = 8;
+
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+static int c_bool2bin(struct sample *smp)
+{
+ struct buffer *chk = get_trash_chunk();
+
+ *(unsigned long long int *)chk->area = my_htonll(!!smp->data.u.sint);
+ chk->data = 8;
+ smp->data.u.str = *chk;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+
+/*****************************************************************/
+/* Sample casts matrix: */
+/* sample_casts[from type][to type] */
+/* NULL pointer used for impossible sample casts */
+/*****************************************************************/
+
+sample_cast_fct sample_casts[SMP_TYPES][SMP_TYPES] = {
+/* to: ANY SAME BOOL SINT ADDR IPV4 IPV6 STR BIN METH */
+/* from: ANY */ { c_none, NULL, c_pseudo, c_pseudo, c_pseudo, c_pseudo, c_pseudo, c_pseudo, c_pseudo, c_pseudo },
+/* SAME */ { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL },
+/* BOOL */ { c_none, NULL, c_none, c_none, NULL, NULL, NULL, c_int2str, c_bool2bin, NULL },
+/* SINT */ { c_none, NULL, c_none, c_none, c_int2ip, c_int2ip, c_int2ipv6, c_int2str, c_int2bin, NULL },
+/* ADDR */ { c_none, NULL, NULL, NULL, c_pseudo, c_pseudo, c_pseudo, c_pseudo, c_pseudo, NULL },
+/* IPV4 */ { c_none, NULL, NULL, c_ip2int, c_none, c_none, c_ip2ipv6, c_ip2str, c_addr2bin, NULL },
+/* IPV6 */ { c_none, NULL, NULL, NULL, c_none, c_ipv62ip, c_none, c_ipv62str, c_addr2bin, NULL },
+/* STR */ { c_none, NULL, c_str2int, c_str2int, c_str2addr, c_str2ip, c_str2ipv6, c_none, c_none, c_str2meth },
+/* BIN */ { c_none, NULL, NULL, NULL, NULL, NULL, NULL, c_bin2str, c_none, c_str2meth },
+/* METH */ { c_none, NULL, NULL, NULL, NULL, NULL, NULL, c_meth2str, c_meth2str, c_none }
+};
+
+/* Process the converters (if any) for a sample expr after the first fetch
+ * keyword. We have two supported syntaxes for the converters, which can be
+ * combined:
+ * - comma-delimited list of converters just after the keyword and args ;
+ * - one converter per keyword (if <idx> != NULL)
+ * FIXME: should we continue to support this old syntax?
+ * The combination allows to have each keyword being a comma-delimited
+ * series of converters.
+ *
+ * We want to process the former first, then the latter. For this we start
+ * from the beginning of the supposed place in the exiting conv chain, which
+ * starts at the last comma (<start> which is then referred to as endt).
+ *
+ * If <endptr> is non-nul, it will be set to the first unparsed character
+ * (which may be the final '\0') on success. If it is nul, the expression
+ * must be properly terminated by a '\0' otherwise an error is reported.
+ *
+ * <expr> should point the the sample expression that is already initialized
+ * with the sample fetch that precedes the converters chain.
+ *
+ * The function returns a positive value for success and 0 for failure, in which
+ * case <err_msg> will point to an allocated string that brings some info
+ * about the failure. It is the caller's responsibility to free it.
+ */
+int sample_parse_expr_cnv(char **str, int *idx, char **endptr, char **err_msg, struct arg_list *al, const char *file, int line,
+ struct sample_expr *expr, const char *start)
+{
+ struct sample_conv *conv;
+ const char *endt = start; /* end of term */
+ const char *begw; /* beginning of word */
+ const char *endw; /* end of word */
+ char *ckw = NULL;
+ unsigned long prev_type = expr->fetch->out_type;
+ int success = 1;
+
+ while (1) {
+ struct sample_conv_expr *conv_expr;
+ int err_arg;
+ int argcnt;
+
+ if (*endt && *endt != ',') {
+ if (endptr) {
+ /* end found, let's stop here */
+ break;
+ }
+ if (ckw)
+ memprintf(err_msg, "missing comma after converter '%s'", ckw);
+ else
+ memprintf(err_msg, "missing comma after fetch keyword");
+ goto out_error;
+ }
+
+ /* FIXME: how long should we support such idiocies ? Maybe we
+ * should already warn ?
+ */
+ while (*endt == ',') /* then trailing commas */
+ endt++;
+
+ begw = endt; /* start of converter */
+
+ if (!*begw) {
+ /* none ? skip to next string if idx is set */
+ if (!idx)
+ break; /* end of converters */
+ (*idx)++;
+ begw = str[*idx];
+ if (!begw || !*begw)
+ break;
+ }
+
+ for (endw = begw; is_idchar(*endw); endw++)
+ ;
+
+ ha_free(&ckw);
+ ckw = my_strndup(begw, endw - begw);
+
+ conv = find_sample_conv(begw, endw - begw);
+ if (!conv) {
+ /* we found an isolated keyword that we don't know, it's not ours */
+ if (idx && begw == str[*idx]) {
+ endt = begw;
+ break;
+ }
+ memprintf(err_msg, "unknown converter '%s'", ckw);
+ goto out_error;
+ }
+
+ if (conv->in_type >= SMP_TYPES || conv->out_type >= SMP_TYPES) {
+ memprintf(err_msg, "return type of converter '%s' is unknown", ckw);
+ goto out_error;
+ }
+
+ /* If impossible type conversion */
+ if (!sample_casts[prev_type][conv->in_type]) {
+ memprintf(err_msg, "converter '%s' cannot be applied", ckw);
+ goto out_error;
+ }
+
+ /* Ignore converters that output SMP_T_SAME if switching to them is
+ * conversion-free. (such converter's output match with input, thus only
+ * their input is considered)
+ */
+ if (conv->out_type != SMP_T_SAME)
+ prev_type = conv->out_type;
+ else if (sample_casts[prev_type][conv->in_type] != c_none)
+ prev_type = conv->in_type;
+
+ conv_expr = calloc(1, sizeof(*conv_expr));
+ if (!conv_expr)
+ goto out_error;
+
+ LIST_APPEND(&(expr->conv_exprs), &(conv_expr->list));
+ conv_expr->conv = conv;
+
+ if (al) {
+ al->kw = expr->fetch->kw;
+ al->conv = conv_expr->conv->kw;
+ }
+ argcnt = make_arg_list(endw, -1, conv->arg_mask, &conv_expr->arg_p, err_msg, &endt, &err_arg, al);
+ if (argcnt < 0) {
+ memprintf(err_msg, "invalid arg %d in converter '%s' : %s", err_arg+1, ckw, *err_msg);
+ goto out_error;
+ }
+
+ if (argcnt && !conv->arg_mask) {
+ memprintf(err_msg, "converter '%s' does not support any args", ckw);
+ goto out_error;
+ }
+
+ if (!conv_expr->arg_p)
+ conv_expr->arg_p = empty_arg_list;
+
+ if (conv->val_args && !conv->val_args(conv_expr->arg_p, conv, file, line, err_msg)) {
+ memprintf(err_msg, "invalid args in converter '%s' : %s", ckw, *err_msg);
+ goto out_error;
+ }
+ }
+
+ if (endptr) {
+ /* end found, let's stop here */
+ *endptr = (char *)endt;
+ }
+ out:
+ free(ckw);
+ return success;
+
+ out_error:
+ success = 0;
+ goto out;
+}
+
+/*
+ * Parse a sample expression configuration:
+ * fetch keyword followed by format conversion keywords.
+ *
+ * <al> is an arg_list serving as a list head to report missing dependencies.
+ * It may be NULL if such dependencies are not allowed. Otherwise, the caller
+ * must have set al->ctx if al is set.
+ *
+ * Returns a pointer on allocated sample expression structure or NULL in case
+ * of error, in which case <err_msg> will point to an allocated string that
+ * brings some info about the failure. It is the caller's responsibility to
+ * free it.
+ */
+struct sample_expr *sample_parse_expr(char **str, int *idx, const char *file, int line, char **err_msg, struct arg_list *al, char **endptr)
+{
+ const char *begw; /* beginning of word */
+ const char *endw; /* end of word */
+ const char *endt; /* end of term */
+ struct sample_expr *expr = NULL;
+ struct sample_fetch *fetch;
+ char *fkw = NULL;
+ int err_arg;
+
+ begw = str[*idx];
+ for (endw = begw; is_idchar(*endw); endw++)
+ ;
+
+ if (endw == begw) {
+ memprintf(err_msg, "missing fetch method");
+ goto out_error;
+ }
+
+ /* keep a copy of the current fetch keyword for error reporting */
+ fkw = my_strndup(begw, endw - begw);
+
+ fetch = find_sample_fetch(begw, endw - begw);
+ if (!fetch) {
+ memprintf(err_msg, "unknown fetch method '%s'", fkw);
+ goto out_error;
+ }
+
+ /* At this point, we have :
+ * - begw : beginning of the keyword
+ * - endw : end of the keyword, first character not part of keyword
+ */
+
+ if (fetch->out_type >= SMP_TYPES) {
+ memprintf(err_msg, "returns type of fetch method '%s' is unknown", fkw);
+ goto out_error;
+ }
+
+ expr = calloc(1, sizeof(*expr));
+ if (!expr)
+ goto out_error;
+
+ LIST_INIT(&(expr->conv_exprs));
+ expr->fetch = fetch;
+ expr->arg_p = empty_arg_list;
+
+ /* Note that we call the argument parser even with an empty string,
+ * this allows it to automatically create entries for mandatory
+ * implicit arguments (eg: local proxy name).
+ */
+ if (al) {
+ al->kw = expr->fetch->kw;
+ al->conv = NULL;
+ }
+ if (make_arg_list(endw, -1, fetch->arg_mask, &expr->arg_p, err_msg, &endt, &err_arg, al) < 0) {
+ memprintf(err_msg, "fetch method '%s' : %s", fkw, *err_msg);
+ goto out_error;
+ }
+
+ /* now endt is our first char not part of the arg list, typically the
+ * comma after the sample fetch name or after the closing parenthesis,
+ * or the NUL char.
+ */
+
+ if (!expr->arg_p) {
+ expr->arg_p = empty_arg_list;
+ }
+ else if (fetch->val_args && !fetch->val_args(expr->arg_p, err_msg)) {
+ memprintf(err_msg, "invalid args in fetch method '%s' : %s", fkw, *err_msg);
+ goto out_error;
+ }
+
+ if (!sample_parse_expr_cnv(str, idx, endptr, err_msg, al, file, line, expr, endt))
+ goto out_error;
+
+ out:
+ free(fkw);
+ return expr;
+
+out_error:
+ release_sample_expr(expr);
+ expr = NULL;
+ goto out;
+}
+
+/*
+ * Helper function to process the converter list of a given sample expression
+ * <expr> using the sample <p> (which is assumed to be properly initialized)
+ * as input.
+ *
+ * Returns 1 on success and 0 on failure.
+ */
+int sample_process_cnv(struct sample_expr *expr, struct sample *p)
+{
+ struct sample_conv_expr *conv_expr;
+
+ list_for_each_entry(conv_expr, &expr->conv_exprs, list) {
+ /* we want to ensure that p->type can be casted into
+ * conv_expr->conv->in_type. We have 3 possibilities :
+ * - NULL => not castable.
+ * - c_none => nothing to do (let's optimize it)
+ * - other => apply cast and prepare to fail
+ */
+ if (!sample_casts[p->data.type][conv_expr->conv->in_type])
+ return 0;
+
+ if (sample_casts[p->data.type][conv_expr->conv->in_type] != c_none &&
+ !sample_casts[p->data.type][conv_expr->conv->in_type](p))
+ return 0;
+
+ /* OK cast succeeded */
+
+ if (!conv_expr->conv->process(conv_expr->arg_p, p, conv_expr->conv->private))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Process a fetch + format conversion of defined by the sample expression <expr>
+ * on request or response considering the <opt> parameter.
+ * Returns a pointer on a typed sample structure containing the result or NULL if
+ * sample is not found or when format conversion failed.
+ * If <p> is not null, function returns results in structure pointed by <p>.
+ * If <p> is null, functions returns a pointer on a static sample structure.
+ *
+ * Note: the fetch functions are required to properly set the return type. The
+ * conversion functions must do so too. However the cast functions do not need
+ * to since they're made to cast multiple types according to what is required.
+ *
+ * The caller may indicate in <opt> if it considers the result final or not.
+ * The caller needs to check the SMP_F_MAY_CHANGE flag in p->flags to verify
+ * if the result is stable or not, according to the following table :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL 0 * Not present and will never be (eg: header)
+ * NULL 1 0 Not present yet, could change (eg: POST param)
+ * NULL 1 1 Not present yet, will not change anymore
+ * smp 0 * Present and will not change (eg: header)
+ * smp 1 0 Present, may change (eg: request length)
+ * smp 1 1 Present, last known value (eg: request length)
+ */
+struct sample *sample_process(struct proxy *px, struct session *sess,
+ struct stream *strm, unsigned int opt,
+ struct sample_expr *expr, struct sample *p)
+{
+ if (p == NULL) {
+ p = &temp_smp;
+ memset(p, 0, sizeof(*p));
+ }
+
+ smp_set_owner(p, px, sess, strm, opt);
+ if (!expr->fetch->process(expr->arg_p, p, expr->fetch->kw, expr->fetch->private))
+ return NULL;
+
+ if (!sample_process_cnv(expr, p))
+ return NULL;
+ return p;
+}
+
+/*
+ * Resolve all remaining arguments in proxy <p>. Returns the number of
+ * errors or 0 if everything is fine. If at least one error is met, it will
+ * be appended to *err. If *err==NULL it will be allocated first.
+ */
+int smp_resolve_args(struct proxy *p, char **err)
+{
+ struct arg_list *cur, *bak;
+ const char *ctx, *where;
+ const char *conv_ctx, *conv_pre, *conv_pos;
+ struct userlist *ul;
+ struct my_regex *reg;
+ struct arg *arg;
+ int cfgerr = 0;
+ int rflags;
+
+ list_for_each_entry_safe(cur, bak, &p->conf.args.list, list) {
+ struct proxy *px;
+ struct server *srv;
+ struct stktable *t;
+ char *pname, *sname, *stktname;
+ char *err2;
+
+ arg = cur->arg;
+
+ /* prepare output messages */
+ conv_pre = conv_pos = conv_ctx = "";
+ if (cur->conv) {
+ conv_ctx = cur->conv;
+ conv_pre = "conversion keyword '";
+ conv_pos = "' for ";
+ }
+
+ where = "in";
+ ctx = "sample fetch keyword";
+ switch (cur->ctx) {
+ case ARGC_STK: where = "in stick rule in"; break;
+ case ARGC_TRK: where = "in tracking rule in"; break;
+ case ARGC_LOG: where = "in log-format string in"; break;
+ case ARGC_LOGSD: where = "in log-format-sd string in"; break;
+ case ARGC_HRQ: where = "in http-request expression in"; break;
+ case ARGC_HRS: where = "in http-response response in"; break;
+ case ARGC_UIF: where = "in unique-id-format string in"; break;
+ case ARGC_RDR: where = "in redirect format string in"; break;
+ case ARGC_CAP: where = "in capture rule in"; break;
+ case ARGC_ACL: ctx = "ACL keyword"; break;
+ case ARGC_SRV: where = "in server directive in"; break;
+ case ARGC_SPOE: where = "in spoe-message directive in"; break;
+ case ARGC_UBK: where = "in use_backend expression in"; break;
+ case ARGC_USRV: where = "in use-server or balance expression in"; break;
+ case ARGC_HERR: where = "in http-error directive in"; break;
+ case ARGC_OT: where = "in ot-scope directive in"; break;
+ case ARGC_OPT: where = "in option directive in"; break;
+ case ARGC_TCO: where = "in tcp-request connection expression in"; break;
+ case ARGC_TSE: where = "in tcp-request session expression in"; break;
+ case ARGC_TRQ: where = "in tcp-request content expression in"; break;
+ case ARGC_TRS: where = "in tcp-response content expression in"; break;
+ case ARGC_TCK: where = "in tcp-check expression in"; break;
+ case ARGC_CFG: where = "in configuration expression in"; break;
+ case ARGC_CLI: where = "in CLI expression in"; break;
+ }
+
+ /* set a few default settings */
+ px = p;
+ pname = p->id;
+
+ switch (arg->type) {
+ case ARGT_SRV:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing server name in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ continue;
+ }
+
+ /* we support two formats : "bck/srv" and "srv" */
+ sname = strrchr(arg->data.str.area, '/');
+
+ if (sname) {
+ *sname++ = '\0';
+ pname = arg->data.str.area;
+
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find proxy '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ }
+ else {
+ if (px->cap & PR_CAP_DEF) {
+ memprintf(err, "%sparsing [%s:%d]: backend name must be set in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ sname = arg->data.str.area;
+ }
+
+ srv = findserver(px, sname);
+ if (!srv) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find server '%s' in proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, sname, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ srv->flags |= SRV_F_NON_PURGEABLE;
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.srv = srv;
+ break;
+
+ case ARGT_FE:
+ if (arg->data.str.data) {
+ pname = arg->data.str.area;
+ px = proxy_fe_by_name(pname);
+ }
+
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find frontend '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "%sparsing [%s:%d]: proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s', has not frontend capability.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.prx = px;
+ break;
+
+ case ARGT_BE:
+ if (arg->data.str.data) {
+ pname = arg->data.str.area;
+ px = proxy_be_by_name(pname);
+ }
+
+ if (!px) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find backend '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!(px->cap & PR_CAP_BE)) {
+ memprintf(err, "%sparsing [%s:%d]: proxy '%s', referenced in arg %d of %s%s%s%s '%s' %s proxy '%s', has not backend capability.\n",
+ *err ? *err : "", cur->file, cur->line, pname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.prx = px;
+ break;
+
+ case ARGT_TAB:
+ if (arg->data.str.data)
+ stktname = arg->data.str.area;
+ else {
+ if (px->cap & PR_CAP_DEF) {
+ memprintf(err, "%sparsing [%s:%d]: table name must be set in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+ stktname = px->id;
+ }
+
+ t = stktable_find_by_name(stktname);
+ if (!t) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find table '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, stktname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!t->size) {
+ memprintf(err, "%sparsing [%s:%d]: no table in proxy '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line, stktname,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (!in_proxies_list(t->proxies_list, p)) {
+ p->next_stkt_ref = t->proxies_list;
+ t->proxies_list = p;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.t = t;
+ break;
+
+ case ARGT_USR:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing userlist name in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ if (p->uri_auth && p->uri_auth->userlist &&
+ strcmp(p->uri_auth->userlist->name, arg->data.str.area) == 0)
+ ul = p->uri_auth->userlist;
+ else
+ ul = auth_find_userlist(arg->data.str.area);
+
+ if (!ul) {
+ memprintf(err, "%sparsing [%s:%d]: unable to find userlist '%s' referenced in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ arg->data.str.area,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ break;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.usr = ul;
+ break;
+
+ case ARGT_REG:
+ if (!arg->data.str.data) {
+ memprintf(err, "%sparsing [%s:%d]: missing regex in arg %d of %s%s%s%s '%s' %s proxy '%s'.\n",
+ *err ? *err : "", cur->file, cur->line,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id);
+ cfgerr++;
+ continue;
+ }
+
+ rflags = 0;
+ rflags |= (arg->type_flags & ARGF_REG_ICASE) ? REG_ICASE : 0;
+ err2 = NULL;
+
+ if (!(reg = regex_comp(arg->data.str.area, !(rflags & REG_ICASE), 1 /* capture substr */, &err2))) {
+ memprintf(err, "%sparsing [%s:%d]: error in regex '%s' in arg %d of %s%s%s%s '%s' %s proxy '%s' : %s.\n",
+ *err ? *err : "", cur->file, cur->line,
+ arg->data.str.area,
+ cur->arg_pos + 1, conv_pre, conv_ctx, conv_pos, ctx, cur->kw, where, p->id, err2);
+ cfgerr++;
+ continue;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->unresolved = 0;
+ arg->data.reg = reg;
+ break;
+
+
+ }
+
+ LIST_DELETE(&cur->list);
+ free(cur);
+ } /* end of args processing */
+
+ return cfgerr;
+}
+
+/*
+ * Process a fetch + format conversion as defined by the sample expression
+ * <expr> on request or response considering the <opt> parameter. The output is
+ * not explicitly set to <smp_type>, but shall be compatible with it as
+ * specified by 'sample_casts' table. If a stable sample can be fetched, or an
+ * unstable one when <opt> contains SMP_OPT_FINAL, the sample is converted and
+ * returned without the SMP_F_MAY_CHANGE flag. If an unstable sample is found
+ * and <opt> does not contain SMP_OPT_FINAL, then the sample is returned as-is
+ * with its SMP_F_MAY_CHANGE flag so that the caller can check it and decide to
+ * take actions (eg: wait longer). If a sample could not be found or could not
+ * be converted, NULL is returned. The caller MUST NOT use the sample if the
+ * SMP_F_MAY_CHANGE flag is present, as it is used only as a hint that there is
+ * still hope to get it after waiting longer, and is not converted to string.
+ * The possible output combinations are the following :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL * * Not present and will never be (eg: header)
+ * smp 0 * Final value converted (eg: header)
+ * smp 1 0 Not present yet, may appear later (eg: header)
+ * smp 1 1 never happens (either flag is cleared on output)
+ */
+struct sample *sample_fetch_as_type(struct proxy *px, struct session *sess,
+ struct stream *strm, unsigned int opt,
+ struct sample_expr *expr, int smp_type)
+{
+ struct sample *smp = &temp_smp;
+
+ memset(smp, 0, sizeof(*smp));
+
+ if (!sample_process(px, sess, strm, opt, expr, smp)) {
+ if ((smp->flags & SMP_F_MAY_CHANGE) && !(opt & SMP_OPT_FINAL))
+ return smp;
+ return NULL;
+ }
+
+ if (!sample_casts[smp->data.type][smp_type])
+ return NULL;
+
+ if (sample_casts[smp->data.type][smp_type] != c_none &&
+ !sample_casts[smp->data.type][smp_type](smp))
+ return NULL;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ return smp;
+}
+
+static void release_sample_arg(struct arg *p)
+{
+ struct arg *p_back = p;
+
+ if (!p)
+ return;
+
+ while (p->type != ARGT_STOP) {
+ if (p->type == ARGT_STR || p->unresolved) {
+ chunk_destroy(&p->data.str);
+ p->unresolved = 0;
+ }
+ else if (p->type == ARGT_REG) {
+ regex_free(p->data.reg);
+ p->data.reg = NULL;
+ }
+ p++;
+ }
+
+ if (p_back != empty_arg_list)
+ free(p_back);
+}
+
+void release_sample_expr(struct sample_expr *expr)
+{
+ struct sample_conv_expr *conv_expr, *conv_exprb;
+
+ if (!expr)
+ return;
+
+ list_for_each_entry_safe(conv_expr, conv_exprb, &expr->conv_exprs, list) {
+ LIST_DELETE(&conv_expr->list);
+ release_sample_arg(conv_expr->arg_p);
+ free(conv_expr);
+ }
+
+ release_sample_arg(expr->arg_p);
+ free(expr);
+}
+
+/*****************************************************************/
+/* Sample format convert functions */
+/* These functions set the data type on return. */
+/*****************************************************************/
+
+static int sample_conv_debug(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+ struct sample tmp;
+ struct buffer *buf;
+ struct sink *sink;
+ struct ist line;
+ char *pfx;
+
+ buf = alloc_trash_chunk();
+ if (!buf)
+ goto end;
+
+ sink = (struct sink *)arg_p[1].data.ptr;
+ BUG_ON(!sink);
+
+ pfx = arg_p[0].data.str.area;
+ BUG_ON(!pfx);
+
+ chunk_printf(buf, "[debug] %s: type=%s ", pfx, smp_to_type[smp->data.type]);
+ if (!sample_casts[smp->data.type][SMP_T_STR])
+ goto nocast;
+
+ /* Copy sample fetch. This puts the sample as const, the
+ * cast will copy data if a transformation is required.
+ */
+ memcpy(&tmp, smp, sizeof(struct sample));
+ tmp.flags = SMP_F_CONST;
+
+ if (!sample_casts[smp->data.type][SMP_T_STR](&tmp))
+ goto nocast;
+
+ /* Display the displayable chars*. */
+ b_putchr(buf, '<');
+ for (i = 0; i < tmp.data.u.str.data; i++) {
+ if (isprint((unsigned char)tmp.data.u.str.area[i]))
+ b_putchr(buf, tmp.data.u.str.area[i]);
+ else
+ b_putchr(buf, '.');
+ }
+ b_putchr(buf, '>');
+
+ done:
+ line = ist2(buf->area, buf->data);
+ sink_write(sink, LOG_HEADER_NONE, 0, &line, 1);
+ end:
+ free_trash_chunk(buf);
+ return 1;
+ nocast:
+ chunk_appendf(buf, "(undisplayable)");
+ goto done;
+}
+
+// This function checks the "debug" converter's arguments.
+static int smp_check_debug(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const char *name = "buf0";
+ struct sink *sink = NULL;
+
+ if (args[0].type != ARGT_STR) {
+ /* optional prefix */
+ args[0].data.str.area = "";
+ args[0].data.str.data = 0;
+ }
+
+ if (args[1].type == ARGT_STR)
+ name = args[1].data.str.area;
+
+ sink = sink_find(name);
+ if (!sink) {
+ memprintf(err, "No such sink '%s'", name);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_PTR;
+ args[1].data.ptr = sink;
+ return 1;
+}
+
+static int sample_conv_base642bin(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bin_len;
+
+ trash->data = 0;
+ bin_len = base64dec(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (bin_len < 0)
+ return 0;
+
+ trash->data = bin_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_base64url2bin(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bin_len;
+
+ trash->data = 0;
+ bin_len = base64urldec(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (bin_len < 0)
+ return 0;
+
+ trash->data = bin_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2base64(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int b64_len;
+
+ trash->data = 0;
+ b64_len = a2base64(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (b64_len < 0)
+ return 0;
+
+ trash->data = b64_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2base64url(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int b64_len;
+
+ trash->data = 0;
+ b64_len = a2base64url(smp->data.u.str.area, smp->data.u.str.data,
+ trash->area, trash->size);
+ if (b64_len < 0)
+ return 0;
+
+ trash->data = b64_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function returns a sample struct filled with the conversion of variable
+ * <var> to sample type <type> (SMP_T_*), via a cast to the target type. If the
+ * variable cannot be retrieved or casted, 0 is returned, otherwise 1.
+ *
+ * Keep in mind that the sample content may be written to a pre-allocated
+ * trash chunk as returned by get_trash_chunk().
+ */
+int sample_conv_var2smp(const struct var_desc *var, struct sample *smp, int type)
+{
+ if (!vars_get_by_desc(var, smp, NULL))
+ return 0;
+ if (!sample_casts[smp->data.type][type])
+ return 0;
+ if (!sample_casts[smp->data.type][type](smp))
+ return 0;
+ return 1;
+}
+
+static int sample_conv_sha1(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ blk_SHA_CTX ctx;
+ struct buffer *trash = get_trash_chunk();
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ blk_SHA1_Init(&ctx);
+ blk_SHA1_Update(&ctx, smp->data.u.str.area, smp->data.u.str.data);
+ blk_SHA1_Final((unsigned char *) trash->area, &ctx);
+
+ trash->data = 20;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function returns a sample struct filled with an <arg> content.
+ * If the <arg> contains a string, it is returned in the sample flagged as
+ * SMP_F_CONST. If the <arg> contains a variable descriptor, the sample is
+ * filled with the content of the variable by using vars_get_by_desc().
+ *
+ * Keep in mind that the sample content may be written to a pre-allocated
+ * trash chunk as returned by get_trash_chunk().
+ *
+ * This function returns 0 if an error occurs, otherwise it returns 1.
+ */
+int sample_conv_var2smp_str(const struct arg *arg, struct sample *smp)
+{
+ switch (arg->type) {
+ case ARGT_STR:
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = arg->data.str;
+ smp->flags = SMP_F_CONST;
+ return 1;
+ case ARGT_VAR:
+ return sample_conv_var2smp(&arg->data.var, smp, SMP_T_STR);
+ default:
+ return 0;
+ }
+}
+
+static int sample_conv_be2dec_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[1].data.sint <= 0 || args[1].data.sint > sizeof(unsigned long long)) {
+ memprintf(err, "chunk_size out of [1..%u] range (%lld)", (uint)sizeof(unsigned long long), args[1].data.sint);
+ return 0;
+ }
+
+ if (args[2].data.sint != 0 && args[2].data.sint != 1) {
+ memprintf(err, "Unsupported truncate value (%lld)", args[2].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Converts big-endian binary input sample to a string containing an unsigned
+ * integer number per <chunk_size> input bytes separated with <separator>.
+ * Optional <truncate> flag indicates if input is truncated at <chunk_size>
+ * boundaries.
+ * Arguments: separator (string), chunk_size (integer), truncate (0,1)
+ */
+static int sample_conv_be2dec(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ const int last = args[2].data.sint ? smp->data.u.str.data - args[1].data.sint + 1 : smp->data.u.str.data;
+ int max_size = trash->size - 2;
+ int i;
+ int start;
+ int ptr = 0;
+ unsigned long long number;
+ char *pos;
+
+ trash->data = 0;
+
+ while (ptr < last && trash->data <= max_size) {
+ start = trash->data;
+ if (ptr) {
+ /* Add separator */
+ memcpy(trash->area + trash->data, args[0].data.str.area, args[0].data.str.data);
+ trash->data += args[0].data.str.data;
+ }
+ else
+ max_size -= args[0].data.str.data;
+
+ /* Add integer */
+ for (number = 0, i = 0; i < args[1].data.sint && ptr < smp->data.u.str.data; i++)
+ number = (number << 8) + (unsigned char)smp->data.u.str.area[ptr++];
+
+ pos = ulltoa(number, trash->area + trash->data, trash->size - trash->data);
+ if (pos)
+ trash->data = pos - trash->area;
+ else {
+ trash->data = start;
+ break;
+ }
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_be2hex_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[1].data.sint <= 0 && (args[0].data.str.data > 0 || args[2].data.sint != 0)) {
+ memprintf(err, "chunk_size needs to be positive (%lld)", args[1].data.sint);
+ return 0;
+ }
+
+ if (args[2].data.sint != 0 && args[2].data.sint != 1) {
+ memprintf(err, "Unsupported truncate value (%lld)", args[2].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Converts big-endian binary input sample to a hex string containing two hex
+ * digits per input byte. <separator> is put every <chunk_size> binary input
+ * bytes if specified. Optional <truncate> flag indicates if input is truncated
+ * at <chunk_size> boundaries.
+ * Arguments: separator (string), chunk_size (integer), truncate (0,1)
+ */
+static int sample_conv_be2hex(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int chunk_size = args[1].data.sint;
+ const int last = args[2].data.sint ? smp->data.u.str.data - chunk_size + 1 : smp->data.u.str.data;
+ int i;
+ int max_size;
+ int ptr = 0;
+ unsigned char c;
+
+ trash->data = 0;
+ if (args[0].data.str.data == 0 && args[2].data.sint == 0)
+ chunk_size = smp->data.u.str.data;
+ max_size = trash->size - 2 * chunk_size;
+
+ while (ptr < last && trash->data <= max_size) {
+ if (ptr) {
+ /* Add separator */
+ memcpy(trash->area + trash->data, args[0].data.str.area, args[0].data.str.data);
+ trash->data += args[0].data.str.data;
+ }
+ else
+ max_size -= args[0].data.str.data;
+
+ /* Add hex */
+ for (i = 0; i < chunk_size && ptr < smp->data.u.str.data; i++) {
+ c = smp->data.u.str.area[ptr++];
+ trash->area[trash->data++] = hextab[(c >> 4) & 0xF];
+ trash->area[trash->data++] = hextab[c & 0xF];
+ }
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_bin2hex(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ unsigned char c;
+ int ptr = 0;
+
+ trash->data = 0;
+ while (ptr < smp->data.u.str.data && trash->data <= trash->size - 2) {
+ c = smp->data.u.str.area[ptr++];
+ trash->area[trash->data++] = hextab[(c >> 4) & 0xF];
+ trash->area[trash->data++] = hextab[c & 0xF];
+ }
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int sample_conv_hex2int(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ long long int n = 0;
+ int i, c;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((c = hex2i(smp->data.u.str.area[i])) < 0)
+ return 0;
+ n = (n << 4) + c;
+ }
+
+ smp->data.u.sint = n;
+ smp->data.type = SMP_T_SINT;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_djb2(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_djb2(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int sample_conv_length(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i = smp->data.u.str.data;
+ smp->data.u.sint = i;
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+
+static int sample_conv_str2lower(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+
+ if (!smp_make_rw(smp))
+ return 0;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((smp->data.u.str.area[i] >= 'A') && (smp->data.u.str.area[i] <= 'Z'))
+ smp->data.u.str.area[i] += 'a' - 'A';
+ }
+ return 1;
+}
+
+static int sample_conv_str2upper(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int i;
+
+ if (!smp_make_rw(smp))
+ return 0;
+
+ for (i = 0; i < smp->data.u.str.data; i++) {
+ if ((smp->data.u.str.area[i] >= 'a') && (smp->data.u.str.area[i] <= 'z'))
+ smp->data.u.str.area[i] += 'A' - 'a';
+ }
+ return 1;
+}
+
+/* takes the IPv4 mask in args[0] and an optional IPv6 mask in args[1] */
+static int sample_conv_ipmask(const struct arg *args, struct sample *smp, void *private)
+{
+ /* Attempt to convert to IPv4 to apply the correct mask. */
+ c_ipv62ip(smp);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ smp->data.u.ipv4.s_addr &= args[0].data.ipv4.s_addr;
+ smp->data.type = SMP_T_IPV4;
+ }
+ else if (smp->data.type == SMP_T_IPV6) {
+ /* IPv6 cannot be converted without an IPv6 mask. */
+ if (args[1].type != ARGT_IPV6)
+ return 0;
+
+ write_u64(&smp->data.u.ipv6.s6_addr[0],
+ read_u64(&smp->data.u.ipv6.s6_addr[0]) & read_u64(&args[1].data.ipv6.s6_addr[0]));
+ write_u64(&smp->data.u.ipv6.s6_addr[8],
+ read_u64(&smp->data.u.ipv6.s6_addr[8]) & read_u64(&args[1].data.ipv6.s6_addr[8]));
+ smp->data.type = SMP_T_IPV6;
+ }
+
+ return 1;
+}
+
+/*
+ * This function implement a conversion specifier seeker for %N so it could be
+ * replaced before doing strftime.
+ *
+ * <format> is the input format string which is used as a haystack
+ *
+ * The function fills multiple variables:
+ * <skip> is the len of the conversion specifier string which was found (ex: strlen(%N):2, strlen(%3N):3 strlen(%123N): 5)
+ * <width> is the width argument, default width is 9 (ex: %3N: 3, %4N: 4: %N: 9, %5N: 5)
+ *
+ * Returns a ptr to the first character of the conversion specifier or NULL if not found
+ */
+static const char *lookup_convspec_N(const char *format, int *skip, int *width)
+{
+ const char *p, *needle;
+ const char *digits;
+ int state;
+
+ p = format;
+
+ /* this looks for % in loop. The iteration stops when a %N conversion
+ * specifier was found or there is no '%' anymore */
+lookagain:
+ while (p && *p) {
+ state = 0;
+ digits = NULL;
+
+ p = needle = strchr(p, '%');
+ /* Once we find a % we try to move forward in the string
+ *
+ * state 0: found %
+ * state 1: digits (precision)
+ * state 2: N
+ */
+ while (p && *p) {
+ switch (state) {
+ case 0:
+ state = 1;
+ break;
+
+ case 1:
+ if (isdigit((unsigned char)*p) && !digits) /* set the start of the digits */
+ digits = p;
+
+ if (isdigit((unsigned char)*p))
+ break;
+ else
+ state = 2;
+ /* if this is not a number anymore, we
+ * don't want to increment p but try the
+ * next state directly */
+ __fallthrough;
+ case 2:
+ if (*p == 'N')
+ goto found;
+ else
+ /* this was not a %N, start again */
+ goto lookagain;
+ break;
+ }
+ p++;
+ }
+ }
+
+ *skip = 0;
+ *width = 0;
+ return NULL;
+
+found:
+ *skip = p - needle + 1;
+ if (digits)
+ *width = atoi(digits);
+ else
+ *width = 9;
+ return needle;
+}
+
+ /*
+ * strftime(3) does not implement nanoseconds, but we still want them in our
+ * date format.
+ *
+ * This function implements %N like in date(1) which gives you the nanoseconds part of the timestamp
+ * An optional field width can be specified, a maximum width of 9 is supported (ex: %3N %6N %9N)
+ *
+ * <format> is the format string
+ * <curr_date> in seconds since epoch
+ * <ns> only the nanoseconds part of the timestamp
+ * <local> chose the localtime instead of UTC time
+ *
+ * Return the results of strftime in the trash buffer
+ */
+static struct buffer *conv_time_common(const char *format, time_t curr_date, uint64_t ns, int local)
+{
+ struct buffer *tmp_format = NULL;
+ struct buffer *res = NULL;
+ struct tm tm;
+ const char *p;
+ char ns_str[10] = {};
+ int set = 0;
+
+ if (local)
+ get_localtime(curr_date, &tm);
+ else
+ get_gmtime(curr_date, &tm);
+
+
+ /* we need to iterate in order to replace all the %N in the string */
+
+ p = format;
+ while (*p) {
+ const char *needle;
+ int skip = 0;
+ int cpy = 0;
+ int width = 0;
+
+ /* look for the next %N onversion specifier */
+ if (!(needle = lookup_convspec_N(p, &skip, &width)))
+ break;
+
+ if (width > 9) /* we don't handle more that 9 */
+ width = 9;
+ cpy = needle - p;
+
+ if (!tmp_format)
+ tmp_format = alloc_trash_chunk();
+ if (!tmp_format)
+ goto error;
+
+ if (set != 9) /* if the snprintf wasn't done yet */
+ set = snprintf(ns_str, sizeof(ns_str), "%.9llu", (unsigned long long)ns);
+
+ if (chunk_istcat(tmp_format, ist2(p, cpy)) == 0) /* copy before the %N */
+ goto error;
+ if (chunk_istcat(tmp_format, ist2(ns_str, width)) == 0) /* copy the %N result with the right precision */
+ goto error;
+
+ p += skip + cpy; /* skip the %N */
+ }
+
+
+ if (tmp_format) { /* %N was found */
+ if (chunk_strcat(tmp_format, p) == 0) /* copy the end of the string if needed or just the \0 */
+ goto error;
+ res = get_trash_chunk();
+ res->data = strftime(res->area, res->size, tmp_format->area , &tm);
+ } else {
+ res = get_trash_chunk();
+ res->data = strftime(res->area, res->size, format, &tm);
+ }
+
+error:
+ free_trash_chunk(tmp_format);
+ return res;
+}
+
+
+
+/*
+ * same as sample_conv_ltime but input is us and %N is supported
+ */
+static int sample_conv_us_ltime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ time_t curr_date = smp->data.u.sint / 1000000; /* convert us to s */
+ uint64_t ns = (smp->data.u.sint % 1000000) * 1000; /* us part to ns */
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ temp = conv_time_common(args[0].data.str.area, curr_date, ns, 1);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/*
+ * same as sample_conv_ltime but input is ms and %N is supported
+ */
+static int sample_conv_ms_ltime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ time_t curr_date = smp->data.u.sint / 1000; /* convert ms to s */
+ uint64_t ns = (smp->data.u.sint % 1000) * 1000000; /* ms part to ns */
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ temp = conv_time_common(args[0].data.str.area, curr_date, ns, 1);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[1] and emits a string representing
+ * the local time in the format specified in args[1] using strftime().
+ */
+static int sample_conv_ltime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ time_t curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+ struct tm tm;
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ get_localtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ temp->data = strftime(temp->area, temp->size, args[0].data.str.area, &tm);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_sdbm(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_sdbm(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/*
+ * same as sample_conv_utime but input is us and %N is supported
+ */
+static int sample_conv_us_utime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ time_t curr_date = smp->data.u.sint / 1000000; /* convert us to s */
+ uint64_t ns = (smp->data.u.sint % 1000000) * 1000; /* us part to ns */
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ temp = conv_time_common(args[0].data.str.area, curr_date, ns, 0);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/*
+ * same as sample_conv_utime but input is ms and %N is supported
+ */
+static int sample_conv_ms_utime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ time_t curr_date = smp->data.u.sint / 1000; /* convert ms to s */
+ uint64_t ns = (smp->data.u.sint % 1000) * 1000000; /* ms part to ns */
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ temp = conv_time_common(args[0].data.str.area, curr_date, ns, 0);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* takes an UINT value on input supposed to represent the time since EPOCH,
+ * adds an optional offset found in args[1] and emits a string representing
+ * the UTC date in the format specified in args[1] using strftime().
+ */
+static int sample_conv_utime(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ /* With high numbers, the date returned can be negative, the 55 bits mask prevent this. */
+ time_t curr_date = smp->data.u.sint & 0x007fffffffffffffLL;
+ struct tm tm;
+
+ /* add offset */
+ if (args[1].type == ARGT_SINT)
+ curr_date += args[1].data.sint;
+
+ get_gmtime(curr_date, &tm);
+
+ temp = get_trash_chunk();
+ temp->data = strftime(temp->area, temp->size, args[0].data.str.area, &tm);
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_wt6(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_wt6(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int using xxh.
+ * The seed of the hash defaults to 0 but can be changd in argument 1.
+ */
+static int sample_conv_xxh32(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned int seed;
+
+ if (arg_p->data.sint)
+ seed = arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = XXH32(smp->data.u.str.area, smp->data.u.str.data,
+ seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 64-bit unsigned int using xxh.
+ * In fact, the function returns a 64 bit unsigned, but the sample
+ * storage of haproxy only proposes 64-bits signed, so the value is
+ * cast as signed. This cast doesn't impact the hash repartition.
+ * The seed of the hash defaults to 0 but can be changd in argument 1.
+ */
+static int sample_conv_xxh64(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned long long int seed;
+
+ if (arg_p->data.sint)
+ seed = (unsigned long long int)arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = (long long int)XXH64(smp->data.u.str.area,
+ smp->data.u.str.data, seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+static int sample_conv_xxh3(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned long long int seed;
+
+ if (arg_p->data.sint)
+ seed = (unsigned long long int)arg_p->data.sint;
+ else
+ seed = 0;
+ smp->data.u.sint = (long long int)XXH3(smp->data.u.str.area,
+ smp->data.u.str.data, seed);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into a 32-bit unsigned int */
+static int sample_conv_crc32(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_crc32(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* hashes the binary input into crc32c (RFC4960, Appendix B [8].) */
+static int sample_conv_crc32c(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = hash_crc32c(smp->data.u.str.area,
+ smp->data.u.str.data);
+ if (arg_p->data.sint)
+ smp->data.u.sint = full_hash(smp->data.u.sint);
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+
+/* This function escape special json characters. The returned string can be
+ * safely set between two '"' and used as json string. The json string is
+ * defined like this:
+ *
+ * any Unicode character except '"' or '\' or control character
+ * \", \\, \/, \b, \f, \n, \r, \t, \u + four-hex-digits
+ *
+ * The enum input_type contain all the allowed mode for decoding the input
+ * string.
+ */
+enum input_type {
+ IT_ASCII = 0,
+ IT_UTF8,
+ IT_UTF8S,
+ IT_UTF8P,
+ IT_UTF8PS,
+};
+
+static int sample_conv_json_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ enum input_type type;
+
+ if (strcmp(arg->data.str.area, "") == 0)
+ type = IT_ASCII;
+ else if (strcmp(arg->data.str.area, "ascii") == 0)
+ type = IT_ASCII;
+ else if (strcmp(arg->data.str.area, "utf8") == 0)
+ type = IT_UTF8;
+ else if (strcmp(arg->data.str.area, "utf8s") == 0)
+ type = IT_UTF8S;
+ else if (strcmp(arg->data.str.area, "utf8p") == 0)
+ type = IT_UTF8P;
+ else if (strcmp(arg->data.str.area, "utf8ps") == 0)
+ type = IT_UTF8PS;
+ else {
+ memprintf(err, "Unexpected input code type. "
+ "Allowed value are 'ascii', 'utf8', 'utf8s', 'utf8p' and 'utf8ps'");
+ return 0;
+ }
+
+ chunk_destroy(&arg->data.str);
+ arg->type = ARGT_SINT;
+ arg->data.sint = type;
+ return 1;
+}
+
+static int sample_conv_json(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *temp;
+ char _str[7]; /* \u + 4 hex digit + null char for sprintf. */
+ const char *str;
+ int len;
+ enum input_type input_type = IT_ASCII;
+ unsigned int c;
+ unsigned int ret;
+ char *p;
+
+ input_type = arg_p->data.sint;
+
+ temp = get_trash_chunk();
+ temp->data = 0;
+
+ p = smp->data.u.str.area;
+ while (p < smp->data.u.str.area + smp->data.u.str.data) {
+
+ if (input_type == IT_ASCII) {
+ /* Read input as ASCII. */
+ c = *(unsigned char *)p;
+ p++;
+ }
+ else {
+ /* Read input as UTF8. */
+ ret = utf8_next(p,
+ smp->data.u.str.data - ( p - smp->data.u.str.area),
+ &c);
+ p += utf8_return_length(ret);
+
+ if (input_type == IT_UTF8 && utf8_return_code(ret) != UTF8_CODE_OK)
+ return 0;
+ if (input_type == IT_UTF8S && utf8_return_code(ret) != UTF8_CODE_OK)
+ continue;
+ if (input_type == IT_UTF8P && utf8_return_code(ret) & (UTF8_CODE_INVRANGE|UTF8_CODE_BADSEQ))
+ return 0;
+ if (input_type == IT_UTF8PS && utf8_return_code(ret) & (UTF8_CODE_INVRANGE|UTF8_CODE_BADSEQ))
+ continue;
+
+ /* Check too big values. */
+ if ((unsigned int)c > 0xffff) {
+ if (input_type == IT_UTF8 || input_type == IT_UTF8P)
+ return 0;
+ continue;
+ }
+ }
+
+ /* Convert character. */
+ if (c == '"') {
+ len = 2;
+ str = "\\\"";
+ }
+ else if (c == '\\') {
+ len = 2;
+ str = "\\\\";
+ }
+ else if (c == '/') {
+ len = 2;
+ str = "\\/";
+ }
+ else if (c == '\b') {
+ len = 2;
+ str = "\\b";
+ }
+ else if (c == '\f') {
+ len = 2;
+ str = "\\f";
+ }
+ else if (c == '\r') {
+ len = 2;
+ str = "\\r";
+ }
+ else if (c == '\n') {
+ len = 2;
+ str = "\\n";
+ }
+ else if (c == '\t') {
+ len = 2;
+ str = "\\t";
+ }
+ else if (c > 0xff || !isprint((unsigned char)c)) {
+ /* isprint generate a segfault if c is too big. The man says that
+ * c must have the value of an unsigned char or EOF.
+ */
+ len = 6;
+ _str[0] = '\\';
+ _str[1] = 'u';
+ snprintf(&_str[2], 5, "%04x", (unsigned short)c);
+ str = _str;
+ }
+ else {
+ len = 1;
+ _str[0] = c;
+ str = _str;
+ }
+
+ /* Check length */
+ if (temp->data + len > temp->size)
+ return 0;
+
+ /* Copy string. */
+ memcpy(temp->area + temp->data, str, len);
+ temp->data += len;
+ }
+
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.str = *temp;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+}
+
+/* This sample function is designed to extract some bytes from an input buffer.
+ * First arg is the offset.
+ * Optional second arg is the length to truncate */
+static int sample_conv_bytes(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample smp_arg0, smp_arg1;
+ long long start_idx, length;
+
+ // determine the start_idx and length of the output
+ smp_set_owner(&smp_arg0, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(&arg_p[0], &smp_arg0) || smp_arg0.data.u.sint < 0) {
+ /* invalid or negative value */
+ goto fail;
+ }
+
+ if (smp_arg0.data.u.sint >= smp->data.u.str.data) {
+ // arg0 >= the input length
+ if (smp->opt & SMP_OPT_FINAL) {
+ // empty output value on final smp
+ smp->data.u.str.data = 0;
+ goto end;
+ }
+ goto wait;
+ }
+ start_idx = smp_arg0.data.u.sint;
+
+ // length comes from arg1 if present, otherwise it's the remaining length
+ length = smp->data.u.str.data - start_idx;
+ if (arg_p[1].type != ARGT_STOP) {
+ smp_set_owner(&smp_arg1, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(&arg_p[1], &smp_arg1) || smp_arg1.data.u.sint < 0) {
+ // invalid or negative value
+ goto fail;
+ }
+
+ if (smp_arg1.data.u.sint > (smp->data.u.str.data - start_idx)) {
+ // arg1 value is greater than the remaining length
+ if (smp->opt & SMP_OPT_FINAL) {
+ // truncate to remaining length
+ length = smp->data.u.str.data - start_idx;
+ goto end;
+ }
+ goto wait;
+ }
+ length = smp_arg1.data.u.sint;
+ }
+
+ // update the output using the start_idx and length
+ smp->data.u.str.area += start_idx;
+ smp->data.u.str.data = length;
+
+ end:
+ return 1;
+
+ fail:
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ wait:
+ smp->data.u.str.data = 0;
+ return 0;
+}
+
+static int sample_conv_field_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct arg *arg = args;
+
+ if (arg->type != ARGT_SINT) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ if (!arg->data.sint) {
+ memprintf(err, "Unexpected value 0 for index");
+ return 0;
+ }
+
+ arg++;
+
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ if (!arg->data.str.data) {
+ memprintf(err, "Empty separators list");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* This sample function is designed to a return selected part of a string (field).
+ * First arg is the index of the field (start at 1)
+ * Second arg is a char list of separators (type string)
+ */
+static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int field;
+ char *start, *end;
+ int i;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
+
+ if (!arg_p[0].data.sint)
+ return 0;
+
+ if (arg_p[0].data.sint < 0) {
+ field = -1;
+ end = start = smp->data.u.str.area + smp->data.u.str.data;
+ while (start > smp->data.u.str.area) {
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*(start-1) == arg_p[1].data.str.area[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ end = start-1;
+ field--;
+ }
+ break;
+ }
+ }
+ start--;
+ }
+ } else {
+ field = 1;
+ end = start = smp->data.u.str.area;
+ while (end - smp->data.u.str.area < smp->data.u.str.data) {
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*end == arg_p[1].data.str.area[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ start = end+1;
+ field++;
+ }
+ break;
+ }
+ }
+ end++;
+ }
+ }
+
+ /* Field not found */
+ if (field != arg_p[0].data.sint) {
+ smp->data.u.str.data = 0;
+ return 0;
+ }
+found:
+ smp->data.u.str.data = end - start;
+ /* If ret string is len 0, no need to
+ change pointers or to update size */
+ if (!smp->data.u.str.data)
+ return 1;
+
+ /* Compute remaining size if needed
+ Note: smp->data.u.str.size cannot be set to 0 */
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= start - smp->data.u.str.area;
+
+ smp->data.u.str.area = start;
+
+ return 1;
+}
+
+/* This sample function is designed to return a word from a string.
+ * First arg is the index of the word (start at 1)
+ * Second arg is a char list of words separators (type string)
+ */
+static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ int word;
+ char *start, *end;
+ int i, issep, inword;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
+
+ if (!arg_p[0].data.sint)
+ return 0;
+
+ word = 0;
+ inword = 0;
+ if (arg_p[0].data.sint < 0) {
+ end = start = smp->data.u.str.area + smp->data.u.str.data;
+ while (start > smp->data.u.str.area) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*(start-1) == arg_p[1].data.str.area[i]) {
+ issep = 1;
+ break;
+ }
+ }
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word--;
+ end = start;
+ }
+ inword = 1;
+ }
+ }
+ else if (issep) {
+ if (word == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ }
+ inword = 0;
+ }
+ start--;
+ }
+ } else {
+ end = start = smp->data.u.str.area;
+ while (end - smp->data.u.str.area < smp->data.u.str.data) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.data; i++) {
+ if (*end == arg_p[1].data.str.area[i]) {
+ issep = 1;
+ break;
+ }
+ }
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word++;
+ start = end;
+ }
+ inword = 1;
+ }
+ }
+ else if (issep) {
+ if (word == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ }
+ inword = 0;
+ }
+ end++;
+ }
+ }
+
+ /* Field not found */
+ if (word != arg_p[0].data.sint) {
+ smp->data.u.str.data = 0;
+ return 0;
+ }
+found:
+ smp->data.u.str.data = end - start;
+ /* If ret string is len 0, no need to
+ change pointers or to update size */
+ if (!smp->data.u.str.data)
+ return 1;
+
+
+ /* Compute remaining size if needed
+ Note: smp->data.u.str.size cannot be set to 0 */
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= start - smp->data.u.str.area;
+
+ smp->data.u.str.area = start;
+
+ return 1;
+}
+
+static int sample_conv_param_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+ memprintf(err, "Delimiter must be exactly 1 character.");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *pos, *end, *pend, *equal;
+ char delim = '&';
+ const char *name = arg_p[0].data.str.area;
+ size_t name_l = arg_p[0].data.str.data;
+
+ if (arg_p[1].type == ARGT_STR)
+ delim = *arg_p[1].data.str.area;
+
+ pos = smp->data.u.str.area;
+ end = pos + smp->data.u.str.data;
+ while (pos < end) {
+ equal = pos + name_l;
+ /* Parameter not found */
+ if (equal > end)
+ break;
+
+ if (equal == end || *equal == delim) {
+ if (memcmp(pos, name, name_l) == 0) {
+ /* input contains parameter, but no value is supplied */
+ smp->data.u.str.data = 0;
+ return 1;
+ }
+ pos = equal + 1;
+ continue;
+ }
+
+ if (*equal == '=' && memcmp(pos, name, name_l) == 0) {
+ pos = equal + 1;
+ pend = memchr(pos, delim, end - pos);
+ if (pend == NULL)
+ pend = end;
+
+ if (smp->data.u.str.size)
+ smp->data.u.str.size -= pos - smp->data.u.str.area;
+ smp->data.u.str.area = pos;
+ smp->data.u.str.data = pend - pos;
+ return 1;
+ }
+ /* find the next delimiter and set position to character after that */
+ pos = memchr(pos, delim, end - pos);
+ if (pos == NULL)
+ pos = end;
+ else
+ pos++;
+ }
+ /* Parameter not found */
+ smp->data.u.str.data = 0;
+ return 0;
+}
+
+static int sample_conv_regsub_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct arg *arg = args;
+ char *p;
+ int len;
+
+ /* arg0 is a regex, it uses type_flag for ICASE and global match */
+ arg[0].type_flags = 0;
+
+ if (arg[2].type != ARGT_STR)
+ return 1;
+
+ p = arg[2].data.str.area;
+ len = arg[2].data.str.data;
+ while (len) {
+ if (*p == 'i') {
+ arg[0].type_flags |= ARGF_REG_ICASE;
+ }
+ else if (*p == 'g') {
+ arg[0].type_flags |= ARGF_REG_GLOB;
+ }
+ else {
+ memprintf(err, "invalid regex flag '%c', only 'i' and 'g' are supported", *p);
+ return 0;
+ }
+ p++;
+ len--;
+ }
+ return 1;
+}
+
+/* This sample function is designed to do the equivalent of s/match/replace/ on
+ * the input string. It applies a regex and restarts from the last matched
+ * location until nothing matches anymore. First arg is the regex to apply to
+ * the input string, second arg is the replacement expression.
+ */
+static int sample_conv_regsub(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *start, *end;
+ struct my_regex *reg = arg_p[0].data.reg;
+ regmatch_t pmatch[MAX_MATCH];
+ struct buffer *trash = get_trash_chunk();
+ struct buffer *output;
+ int flag, max;
+ int found;
+
+ start = smp->data.u.str.area;
+ end = start + smp->data.u.str.data;
+
+ flag = 0;
+ while (1) {
+ /* check for last round which is used to copy remaining parts
+ * when not running in global replacement mode.
+ */
+ found = 0;
+ if ((arg_p[0].type_flags & ARGF_REG_GLOB) || !(flag & REG_NOTBOL)) {
+ /* Note: we can have start == end on empty strings or at the end */
+ found = regex_exec_match2(reg, start, end - start, MAX_MATCH, pmatch, flag);
+ }
+
+ if (!found)
+ pmatch[0].rm_so = end - start;
+
+ /* copy the heading non-matching part (which may also be the tail if nothing matches) */
+ max = trash->size - trash->data;
+ if (max && pmatch[0].rm_so > 0) {
+ if (max > pmatch[0].rm_so)
+ max = pmatch[0].rm_so;
+ memcpy(trash->area + trash->data, start, max);
+ trash->data += max;
+ }
+
+ if (!found)
+ break;
+
+ output = alloc_trash_chunk();
+ if (!output)
+ break;
+
+ output->data = exp_replace(output->area, output->size, start, arg_p[1].data.str.area, pmatch);
+
+ /* replace the matching part */
+ max = output->size - output->data;
+ if (max) {
+ if (max > output->data)
+ max = output->data;
+ memcpy(trash->area + trash->data,
+ output->area, max);
+ trash->data += max;
+ }
+
+ free_trash_chunk(output);
+
+ /* stop here if we're done with this string */
+ if (start >= end)
+ break;
+
+ /* We have a special case for matches of length 0 (eg: "x*y*").
+ * These ones are considered to match in front of a character,
+ * so we have to copy that character and skip to the next one.
+ */
+ if (!pmatch[0].rm_eo) {
+ if (trash->data < trash->size)
+ trash->area[trash->data++] = start[pmatch[0].rm_eo];
+ pmatch[0].rm_eo++;
+ }
+
+ start += pmatch[0].rm_eo;
+ flag |= REG_NOTBOL;
+ }
+
+ smp->data.u.str = *trash;
+ return 1;
+}
+
+/* This function check an operator entry. It expects a string.
+ * The string can be an integer or a variable name.
+ */
+static int check_operator(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const char *str;
+ const char *end;
+ long long int i;
+
+ /* Try to decode a variable. The 'err' variable is intentionnaly left
+ * NULL since the operators accept an integer as argument in which case
+ * vars_check_arg call will fail.
+ */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ /* Try to convert an integer */
+ str = args[0].data.str.area;
+ end = str + strlen(str);
+ i = read_int64(&str, end);
+ if (*str != '\0') {
+ memprintf(err, "expects an integer or a variable name");
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = i;
+ return 1;
+}
+
+/* This function returns a sample struct filled with an arg content.
+ * If the arg contain an integer, the integer is returned in the
+ * sample. If the arg contains a variable descriptor, it returns the
+ * variable value.
+ *
+ * This function returns 0 if an error occurs, otherwise it returns 1.
+ */
+int sample_conv_var2smp_sint(const struct arg *arg, struct sample *smp)
+{
+ switch (arg->type) {
+ case ARGT_SINT:
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = arg->data.sint;
+ return 1;
+ case ARGT_VAR:
+ return sample_conv_var2smp(&arg->data.var, smp, SMP_T_SINT);
+ default:
+ return 0;
+ }
+}
+
+/* Takes a SINT on input, applies a binary twos complement and returns the SINT
+ * result.
+ */
+static int sample_conv_binary_cpl(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.u.sint = ~smp->data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "and" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_and(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint &= tmp.data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "or" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_or(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint |= tmp.data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, applies a binary "xor" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_binary_xor(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint ^= tmp.data.u.sint;
+ return 1;
+}
+
+static inline long long int arith_add(long long int a, long long int b)
+{
+ /* Prevent overflow and makes capped calculus.
+ * We must ensure that the check calculus doesn't
+ * exceed the signed 64 bits limits.
+ *
+ * +----------+----------+
+ * | a<0 | a>=0 |
+ * +------+----------+----------+
+ * | b<0 | MIN-a>b | no check |
+ * +------+----------+----------+
+ * | b>=0 | no check | MAX-a<b |
+ * +------+----------+----------+
+ */
+ if ((a ^ b) >= 0) {
+ /* signs are same. */
+ if (a < 0) {
+ if (LLONG_MIN - a > b)
+ return LLONG_MIN;
+ }
+ else if (LLONG_MAX - a < b)
+ return LLONG_MAX;
+ }
+ return a + b;
+}
+
+/* Takes a SINT on input, applies an arithmetic "add" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_arith_add(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+ smp->data.u.sint = arith_add(smp->data.u.sint, tmp.data.u.sint);
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "sub" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ */
+static int sample_conv_arith_sub(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ /* We cannot represent -LLONG_MIN because abs(LLONG_MIN) is greater
+ * than abs(LLONG_MAX). So, the following code use LLONG_MAX in place
+ * of -LLONG_MIN and correct the result.
+ */
+ if (tmp.data.u.sint == LLONG_MIN) {
+ smp->data.u.sint = arith_add(smp->data.u.sint, LLONG_MAX);
+ if (smp->data.u.sint < LLONG_MAX)
+ smp->data.u.sint++;
+ return 1;
+ }
+
+ /* standard subtraction: we use the "add" function and negate
+ * the second operand.
+ */
+ smp->data.u.sint = arith_add(smp->data.u.sint, -tmp.data.u.sint);
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "mul" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If the result makes an overflow, then the largest possible quantity is
+ * returned.
+ */
+static int sample_conv_arith_mul(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+ long long int c;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ /* prevent divide by 0 during the check */
+ if (!smp->data.u.sint || !tmp.data.u.sint) {
+ smp->data.u.sint = 0;
+ return 1;
+ }
+
+ /* The multiply between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+
+ /* execute standard multiplication. */
+ c = smp->data.u.sint * tmp.data.u.sint;
+
+ /* check for overflow and makes capped multiply. */
+ if (smp->data.u.sint != c / tmp.data.u.sint) {
+ if ((smp->data.u.sint < 0) == (tmp.data.u.sint < 0)) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+ smp->data.u.sint = LLONG_MIN;
+ return 1;
+ }
+ smp->data.u.sint = c;
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "div" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If arg_p makes the result overflow, then the largest possible quantity is
+ * returned.
+ */
+static int sample_conv_arith_div(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ if (tmp.data.u.sint) {
+ /* The divide between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+ }
+ smp->data.u.sint /= tmp.data.u.sint;
+ return 1;
+ }
+ smp->data.u.sint = LLONG_MAX;
+ return 1;
+}
+
+/* Takes a SINT on input, applies an arithmetic "mod" with the SINT directly in
+ * arg_p or in the variable described in arg_p, and returns the SINT result.
+ * If arg_p makes the result overflow, then 0 is returned.
+ */
+static int sample_conv_arith_mod(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ struct sample tmp;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_sint(arg_p, &tmp))
+ return 0;
+
+ if (tmp.data.u.sint) {
+ /* The divide between LLONG_MIN and -1 returns a
+ * "floating point exception".
+ */
+ if (smp->data.u.sint == LLONG_MIN && tmp.data.u.sint == -1) {
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ smp->data.u.sint %= tmp.data.u.sint;
+ return 1;
+ }
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* Takes an SINT on input, applies an arithmetic "neg" and returns the SINT
+ * result.
+ */
+static int sample_conv_arith_neg(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ if (smp->data.u.sint == LLONG_MIN)
+ smp->data.u.sint = LLONG_MAX;
+ else
+ smp->data.u.sint = -smp->data.u.sint;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is non-null, otherwise
+ * false. The output is a BOOL.
+ */
+static int sample_conv_arith_bool(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !!smp->data.u.sint;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns false is the value is non-null, otherwise
+ * truee. The output is a BOOL.
+ */
+static int sample_conv_arith_not(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !smp->data.u.sint;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is odd, otherwise false.
+ * The output is a BOOL.
+ */
+static int sample_conv_arith_odd(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = smp->data.u.sint & 1;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* Takes a SINT on input, returns true is the value is even, otherwise false.
+ * The output is a BOOL.
+ */
+static int sample_conv_arith_even(const struct arg *arg_p,
+ struct sample *smp, void *private)
+{
+ smp->data.u.sint = !(smp->data.u.sint & 1);
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* appends an optional const string, an optional variable contents and another
+ * optional const string to an existing string.
+ */
+static int sample_conv_concat(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash;
+ struct sample tmp;
+ int max;
+
+ trash = alloc_trash_chunk();
+ if (!trash)
+ return 0;
+
+ trash->data = smp->data.u.str.data;
+ if (trash->data > trash->size - 1)
+ trash->data = trash->size - 1;
+
+ memcpy(trash->area, smp->data.u.str.area, trash->data);
+ trash->area[trash->data] = 0;
+
+ /* append first string */
+ max = arg_p[0].data.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, arg_p[0].data.str.area, max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+
+ /* append second string (variable) if it's found and we can turn it
+ * into a string.
+ */
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[1].type == ARGT_VAR && vars_get_by_desc(&arg_p[1].data.var, &tmp, NULL) &&
+ (sample_casts[tmp.data.type][SMP_T_STR] == c_none ||
+ sample_casts[tmp.data.type][SMP_T_STR](&tmp))) {
+
+ max = tmp.data.u.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, tmp.data.u.str.area,
+ max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+ }
+
+ /* append third string */
+ max = arg_p[2].data.str.data;
+ if (max > trash->size - 1 - trash->data)
+ max = trash->size - 1 - trash->data;
+
+ if (max) {
+ memcpy(trash->area + trash->data, arg_p[2].data.str.area, max);
+ trash->data += max;
+ trash->area[trash->data] = 0;
+ }
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp_dup(smp);
+ free_trash_chunk(trash);
+ return 1;
+}
+
+/* This function checks the "concat" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_concat(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ /* Try to decode a variable. */
+ if (args[1].data.str.data > 0 && !vars_check_arg(&args[1], NULL)) {
+ memprintf(err, "failed to register variable name '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+ return 1;
+}
+
+/* Append delimiter (only to a non empty input) followed by the optional
+ * variable contents concatenated with the optional sufix.
+ */
+static int sample_conv_add_item(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *tmpbuf;
+ struct sample tmp;
+ size_t max;
+ int var_available;
+
+ tmpbuf = alloc_trash_chunk();
+ if (!tmpbuf)
+ return 0;
+
+ tmpbuf->data = smp->data.u.str.data;
+ if (tmpbuf->data > tmpbuf->size - 1)
+ tmpbuf->data = tmpbuf->size - 1;
+
+ memcpy(tmpbuf->area, smp->data.u.str.area, tmpbuf->data);
+ tmpbuf->area[tmpbuf->data] = 0;
+
+ /* Check if variable is found and we can turn into a string. */
+ var_available = 0;
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[1].type == ARGT_VAR && vars_get_by_desc(&arg_p[1].data.var, &tmp, NULL) &&
+ (sample_casts[tmp.data.type][SMP_T_STR] == c_none ||
+ sample_casts[tmp.data.type][SMP_T_STR](&tmp)))
+ var_available = 1;
+
+ /* Append delimiter only if input is not empty and either
+ * the variable or the suffix are not empty
+ */
+ if (smp->data.u.str.data && ((var_available && tmp.data.u.str.data) ||
+ arg_p[2].data.str.data)) {
+ max = arg_p[0].data.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, arg_p[0].data.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+ }
+
+ /* Append variable contents if variable is found and turned into string. */
+ if (var_available) {
+ max = tmp.data.u.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, tmp.data.u.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+ }
+
+ /* Append optional suffix. */
+ max = arg_p[2].data.str.data;
+ if (max > tmpbuf->size - 1 - tmpbuf->data)
+ max = tmpbuf->size - 1 - tmpbuf->data;
+
+ if (max) {
+ memcpy(tmpbuf->area + tmpbuf->data, arg_p[2].data.str.area, max);
+ tmpbuf->data += max;
+ tmpbuf->area[tmpbuf->data] = 0;
+ }
+
+ smp->data.u.str = *tmpbuf;
+ smp->data.type = SMP_T_STR;
+ smp_dup(smp);
+ free_trash_chunk(tmpbuf);
+ return 1;
+}
+
+/* Check the "add_item" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_add_item(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ /* Try to decode a variable. */
+ if (args[1].data.str.data > 0 && !vars_check_arg(&args[1], NULL)) {
+ memprintf(err, "failed to register variable name '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+
+ if (args[1].data.str.data == 0 && args[2].data.str.data == 0) {
+ memprintf(err, "one of the optional arguments has to be nonempty");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Compares string with a variable containing a string. Return value
+ * is compatible with strcmp(3)'s return value.
+ */
+static int sample_conv_strcmp(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+ int max, result;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[0].type != ARGT_VAR)
+ return 0;
+
+ if (!sample_conv_var2smp(&arg_p[0].data.var, &tmp, SMP_T_STR))
+ return 0;
+
+ max = MIN(smp->data.u.str.data, tmp.data.u.str.data);
+ result = strncmp(smp->data.u.str.area, tmp.data.u.str.area, max);
+ if (result == 0) {
+ if (smp->data.u.str.data != tmp.data.u.str.data) {
+ if (smp->data.u.str.data < tmp.data.u.str.data) {
+ result = -1;
+ }
+ else {
+ result = 1;
+ }
+ }
+ }
+
+ smp->data.u.sint = result;
+ smp->data.type = SMP_T_SINT;
+ return 1;
+}
+/*
+ * This converter can takes a Host header value as defined by rfc9110#section-7.2
+ * Host = uri-host [ ":" port ] ;
+ * It returns the uri-host value in lowecase with the port stripped.
+ */
+static int sample_conv_host_only(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ /* Working cases: hostname00, hostname00:80, 127.0.0.1, 127.0.0.1:80, [::1], [::1]:80 */
+ char *beg = smp->data.u.str.area;
+ char *end = smp->data.u.str.area + smp->data.u.str.data - 1;
+ char *p;
+
+ for (p = end; p >= beg; p--) {
+ if (*p == ':' || *p == ']')
+ break;
+ }
+
+ if (p >= beg && *p == ':')
+ smp->data.u.str.data = p - beg;
+ /* if no port part was found, the hostname is the whole string */
+
+ smp->data.type = SMP_T_STR;
+
+ return sample_conv_str2lower(arg_p, smp, NULL);
+}
+
+/*
+ * This converter can takes a Host header value as defined by rfc9110#section-7.2
+ * Host = uri-host [ ":" port ] ;
+ * It returns the port value as a int.
+ */
+static int sample_conv_port_only(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ /* Working cases: hostname00, hostname00:80, 127.0.0.1, 127.0.0.1:80, [::1], [::1]:80 */
+ char *beg = smp->data.u.str.area;
+ char *end = smp->data.u.str.area + smp->data.u.str.data - 1;
+ char *p;
+
+ for (p = end; p >= beg; p--) {
+ if (*p == ':' || *p == ']')
+ break;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ if (p >= beg && *p == ':' && ++p <= end) {
+ smp->data.u.sint = strl2ui(p, smp->data.u.str.data + smp->data.u.str.area - p);
+ } else {
+ smp->data.u.sint = 0;
+ }
+ return 1;
+}
+
+
+/* Takes a boolean as input. Returns the first argument if that boolean is true and
+ * the second argument otherwise.
+ */
+static int sample_conv_iif(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+
+ if (smp->data.u.sint) {
+ smp->data.u.str.data = arg_p[0].data.str.data;
+ smp->data.u.str.area = arg_p[0].data.str.area;
+ }
+ else {
+ smp->data.u.str.data = arg_p[1].data.str.data;
+ smp->data.u.str.area = arg_p[1].data.str.area;
+ }
+
+ return 1;
+}
+
+#define GRPC_MSG_COMPRESS_FLAG_SZ 1 /* 1 byte */
+#define GRPC_MSG_LENGTH_SZ 4 /* 4 bytes */
+#define GRPC_MSG_HEADER_SZ (GRPC_MSG_COMPRESS_FLAG_SZ + GRPC_MSG_LENGTH_SZ)
+
+/*
+ * Extract the field value of an input binary sample. Takes a mandatory argument:
+ * the protocol buffers field identifier (dotted notation) internally represented
+ * as an array of unsigned integers and its size.
+ * Return 1 if the field was found, 0 if not.
+ */
+static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned char *pos;
+ size_t grpc_left;
+
+ pos = (unsigned char *)smp->data.u.str.area;
+ grpc_left = smp->data.u.str.data;
+
+ while (grpc_left > GRPC_MSG_HEADER_SZ) {
+ size_t grpc_msg_len, left;
+
+ grpc_msg_len = left = ntohl(*(uint32_t *)(pos + GRPC_MSG_COMPRESS_FLAG_SZ));
+
+ pos += GRPC_MSG_HEADER_SZ;
+ grpc_left -= GRPC_MSG_HEADER_SZ;
+
+ if (grpc_left < left)
+ return 0;
+
+ if (protobuf_field_lookup(arg_p, smp, &pos, &left))
+ return 1;
+
+ grpc_left -= grpc_msg_len;
+ }
+
+ return 0;
+}
+
+static int sample_conv_protobuf(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ unsigned char *pos;
+ size_t left;
+
+ pos = (unsigned char *)smp->data.u.str.area;
+ left = smp->data.u.str.data;
+
+ return protobuf_field_lookup(arg_p, smp, &pos, &left);
+}
+
+static int sample_conv_protobuf_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[1].type) {
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = PBUF_T_BINARY;
+ }
+ else {
+ int pbuf_type;
+
+ pbuf_type = protobuf_type(args[1].data.str.area);
+ if (pbuf_type == -1) {
+ memprintf(err, "Wrong protocol buffer type '%s'", args[1].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = pbuf_type;
+ }
+
+ return 1;
+}
+
+/*
+ * Extract the tag value of an input binary sample. Takes a mandatory argument:
+ * the FIX protocol tag identifier.
+ * Return 1 if the tag was found, 0 if not.
+ */
+static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist value;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data),
+ arg_p[0].data.sint);
+ if (!istlen(value)) {
+ if (isttest(value)) {
+ /* value != IST_NULL, need more data */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ }
+ return 0;
+ }
+
+ smp->data.u.str = ist2buf(value);
+ smp->flags |= SMP_F_CONST;
+
+ return 1;
+}
+
+/* This function checks the "fix_tag_value" converter configuration.
+ * It expects a "known" (by HAProxy) tag name or ID.
+ * Tag string names are converted to their ID counterpart because this is the
+ * format they are sent over the wire.
+ */
+static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct ist str;
+ unsigned int tag;
+
+ str = ist2(args[0].data.str.area, args[0].data.str.data);
+ tag = fix_tagid(str);
+ if (!tag) {
+ memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = tag;
+
+ return 1;
+}
+
+/*
+ * Checks that a buffer contains a valid FIX message
+ *
+ * Return 1 if the check could be run, 0 if not.
+ * The result of the analyse itself is stored in <smp> as a boolean
+ */
+static int sample_conv_fix_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist msg;
+
+ msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ switch (fix_validate_message(msg)) {
+ case FIX_VALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+ case FIX_NEED_MORE_DATA:
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ case FIX_INVALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Extract the field value of an input binary sample containing an MQTT packet.
+ * Takes 2 mandatory arguments:
+ * - packet type
+ * - field name
+ *
+ * return 1 if the field was found, 0 if not.
+ */
+static int sample_conv_mqtt_field_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist pkt, value;
+ int type, fieldname_id;
+
+ pkt = ist2(smp->data.u.str.area, smp->data.u.str.data);
+ type = arg_p[0].data.sint;
+ fieldname_id = arg_p[1].data.sint;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ value = mqtt_field_value(pkt, type, fieldname_id);
+ if (!istlen(value)) {
+ if (isttest(value)) {
+ /* value != IST_NULL, need more data */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ }
+ return 0;
+ }
+
+ smp->data.u.str = ist2buf(value);
+ smp->flags |= SMP_F_CONST;
+ return 1;
+}
+
+/*
+ * this function checks the "mqtt_field_value" converter configuration.
+ * It expects a known packet type name or ID and a field name, in this order
+ *
+ * Args[0] will be turned into a MQTT_CPT_* value for direct matching when parsing
+ * a packet.
+ */
+static int sample_conv_mqtt_field_value_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ int type, fieldname_id;
+
+ /* check the MQTT packet type is valid */
+ type = mqtt_typeid(ist2(args[0].data.str.area, args[0].data.str.data));
+ if (type == MQTT_CPT_INVALID) {
+ memprintf(err, "Unknown MQTT type '%s'", args[0].data.str.area);
+ return 0;
+ }
+
+ /* check the field name belongs to the MQTT packet type */
+ fieldname_id = mqtt_check_type_fieldname(type, ist2(args[1].data.str.area, args[1].data.str.data));
+ if (fieldname_id == MQTT_FN_INVALID) {
+ memprintf(err, "Unknown MQTT field name '%s' for packet type '%s'", args[1].data.str.area,
+ args[0].data.str.area);
+ return 0;
+ }
+
+ /* save numeric counterparts of type and field name */
+ chunk_destroy(&args[0].data.str);
+ chunk_destroy(&args[1].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = type;
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = fieldname_id;
+
+ return 1;
+}
+
+/*
+ * Checks that <smp> contains a valid MQTT message
+ *
+ * The function returns 1 if the check was run to its end, 0 otherwise.
+ * The result of the analyse itself is stored in <smp> as a boolean.
+ */
+static int sample_conv_mqtt_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist msg;
+
+ msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ switch (mqtt_validate_message(msg, NULL)) {
+ case FIX_VALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+ case FIX_NEED_MORE_DATA:
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ case FIX_INVALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ return 0;
+}
+
+/* This function checks the "strcmp" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_strcmp(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[0].data.str.data) {
+ memprintf(err, "missing variable name");
+ return 0;
+ }
+
+ /* Try to decode a variable. */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ memprintf(err, "failed to register variable name '%s'",
+ args[0].data.str.area);
+ return 0;
+}
+
+/**/
+static int sample_conv_htonl(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *tmp;
+ uint32_t n;
+
+ n = htonl((uint32_t)smp->data.u.sint);
+ tmp = get_trash_chunk();
+
+ memcpy(b_head(tmp), &n, 4);
+ b_add(tmp, 4);
+
+ smp->data.u.str = *tmp;
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/**/
+static int sample_conv_cut_crlf(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *p;
+ size_t l;
+
+ p = smp->data.u.str.area;
+ for (l = 0; l < smp->data.u.str.data; l++) {
+ if (*(p+l) == '\r' || *(p+l) == '\n')
+ break;
+ }
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/**/
+static int sample_conv_ltrim(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *delimiters, *p;
+ size_t dlen, l;
+
+ delimiters = arg_p[0].data.str.area;
+ dlen = arg_p[0].data.str.data;
+
+ l = smp->data.u.str.data;
+ p = smp->data.u.str.area;
+ while (l && memchr(delimiters, *p, dlen) != NULL) {
+ p++;
+ l--;
+ }
+
+ smp->data.u.str.area = p;
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/**/
+static int sample_conv_rtrim(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ char *delimiters, *p;
+ size_t dlen, l;
+
+ delimiters = arg_p[0].data.str.area;
+ dlen = arg_p[0].data.str.data;
+
+ l = smp->data.u.str.data;
+ p = smp->data.u.str.area + l - 1;
+ while (l && memchr(delimiters, *p, dlen) != NULL) {
+ p--;
+ l--;
+ }
+
+ smp->data.u.str.data = l;
+ return 1;
+}
+
+/* This function checks the "json_query" converter's arguments. */
+static int sample_check_json_query(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (arg[0].data.str.data == 0) {
+ memprintf(err, "json_path must not be empty");
+ return 0;
+ }
+
+ if (arg[1].data.str.data != 0) {
+ if (strcmp(arg[1].data.str.area, "int") != 0) {
+ memprintf(err, "output_type only supports \"int\" as argument");
+ return 0;
+ } else {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ }
+ return 1;
+}
+
+/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per
+ * the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+#define JSON_INT_MAX ((1LL << 53) - 1)
+#define JSON_INT_MIN (-JSON_INT_MAX)
+
+/* This sample function get the value from a given json string.
+ * The mjson library is used to parse the JSON struct
+ */
+static int sample_conv_json_query(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ const char *token; /* holds the temporary string from mjson_find */
+ int token_size; /* holds the length of <token> */
+
+ enum mjson_tok token_type;
+
+ token_type = mjson_find(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, &token, &token_size);
+
+ switch (token_type) {
+ case MJSON_TOK_NUMBER:
+ if (args[1].type == ARGT_SINT) {
+ smp->data.u.sint = strtoll(token, NULL, 0);
+
+ if (smp->data.u.sint < JSON_INT_MIN || smp->data.u.sint > JSON_INT_MAX)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+ } else {
+ double double_val;
+
+ if (mjson_get_number(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, &double_val) == 0)
+ return 0;
+
+ trash->data = snprintf(trash->area,trash->size,"%g",double_val);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+ }
+ case MJSON_TOK_TRUE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+
+ return 1;
+ case MJSON_TOK_FALSE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+
+ return 1;
+ case MJSON_TOK_STRING: {
+ int len;
+
+ len = mjson_get_string(smp->data.u.str.area, smp->data.u.str.data, args[0].data.str.area, trash->area, trash->size);
+
+ if (len == -1) {
+ /* invalid string */
+ return 0;
+ }
+
+ trash->data = len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+
+ return 1;
+ }
+ case MJSON_TOK_ARRAY: {
+ // We copy the complete array, including square brackets into the return buffer
+ // result looks like: ["manage-account","manage-account-links","view-profile"]
+ trash->data = b_putblk(trash, token, token_size);
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ return 1;
+ }
+ case MJSON_TOK_NULL:
+ case MJSON_TOK_OBJECT:
+ /* We cannot handle these. */
+ return 0;
+ case MJSON_TOK_INVALID:
+ /* Nothing matches the query. */
+ return 0;
+ case MJSON_TOK_KEY:
+ /* This is not a valid return value according to the
+ * mjson documentation, but we handle it to benefit
+ * from '-Wswitch'.
+ */
+ return 0;
+ }
+
+ my_unreachable();
+ return 0;
+}
+
+#ifdef USE_OPENSSL
+static int sample_conv_jwt_verify_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ vars_check_arg(&args[0], NULL);
+ vars_check_arg(&args[1], NULL);
+
+ if (args[0].type == ARGT_STR) {
+ enum jwt_alg alg = jwt_parse_alg(args[0].data.str.area, args[0].data.str.data);
+
+ if (alg == JWT_ALG_DEFAULT) {
+ memprintf(err, "unknown JWT algorithm: %s", args[0].data.str.area);
+ return 0;
+ }
+ }
+
+ if (args[1].type == ARGT_STR) {
+ jwt_tree_load_cert(args[1].data.str.area, args[1].data.str.data, err);
+ }
+
+ return 1;
+}
+
+/* Check that a JWT's signature is correct */
+static int sample_conv_jwt_verify(const struct arg *args, struct sample *smp, void *private)
+{
+ struct sample alg_smp, key_smp;
+ enum jwt_vrfy_status ret;
+
+ smp_set_owner(&alg_smp, smp->px, smp->sess, smp->strm, smp->opt);
+ smp_set_owner(&key_smp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&args[0], &alg_smp))
+ return 0;
+ if (!sample_conv_var2smp_str(&args[1], &key_smp))
+ return 0;
+
+ ret = jwt_verify(&smp->data.u.str, &alg_smp.data.u.str, &key_smp.data.u.str);
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ret;
+ return 1;
+}
+
+
+/*
+ * Returns the decoded header or payload of a JWT if no parameter is given, or
+ * the value of the specified field of the corresponding JWT subpart if a
+ * parameter is given.
+ */
+static int sample_conv_jwt_member_query(const struct arg *args, struct sample *smp,
+ void *private, enum jwt_elt member)
+{
+ struct jwt_item items[JWT_ELT_MAX] = { { 0 } };
+ unsigned int item_num = member + 1; /* We don't need to tokenize the full token */
+ struct buffer *decoded_header = get_trash_chunk();
+ int retval = 0;
+ int ret;
+
+ jwt_tokenize(&smp->data.u.str, items, &item_num);
+
+ if (item_num < member + 1)
+ goto end;
+
+ ret = base64urldec(items[member].start, items[member].length,
+ decoded_header->area, decoded_header->size);
+ if (ret == -1)
+ goto end;
+
+ decoded_header->data = ret;
+ if (args[0].type != ARGT_STR) {
+ smp->data.u.str = *decoded_header;
+ smp->data.type = SMP_T_STR;
+ goto end;
+ }
+
+ /* We look for a specific field of the header or payload part of the JWT */
+ smp->data.u.str = *decoded_header;
+
+ retval = sample_conv_json_query(args, smp, private);
+
+end:
+ return retval;
+}
+
+/* This function checks the "jwt_header_query" and "jwt_payload_query" converters' arguments.
+ * It is based on the "json_query" converter's check with the only difference
+ * being that the jwt converters can take 0 parameters as well.
+ */
+static int sample_conv_jwt_query_check(struct arg *arg, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (arg[1].data.str.data != 0) {
+ if (strcmp(arg[1].data.str.area, "int") != 0) {
+ memprintf(err, "output_type only supports \"int\" as argument");
+ return 0;
+ } else {
+ arg[1].type = ARGT_SINT;
+ arg[1].data.sint = 0;
+ }
+ }
+ return 1;
+}
+
+/*
+ * If no parameter is given, return the decoded header part of a JWT (the first
+ * base64 encoded part, corresponding to the JOSE header).
+ * If a parameter is given, this converter acts as a "json_query" on this
+ * decoded JSON.
+ */
+static int sample_conv_jwt_header_query(const struct arg *args, struct sample *smp, void *private)
+{
+ return sample_conv_jwt_member_query(args, smp, private, JWT_ELT_JOSE);
+}
+
+/*
+ * If no parameter is given, return the decoded payload part of a JWT (the
+ * second base64 encoded part, which contains all the claims). If a parameter
+ * is given, this converter acts as a "json_query" on this decoded JSON.
+ */
+static int sample_conv_jwt_payload_query(const struct arg *args, struct sample *smp, void *private)
+{
+ return sample_conv_jwt_member_query(args, smp, private, JWT_ELT_CLAIMS);
+}
+
+#endif /* USE_OPENSSL */
+
+/************************************************************************/
+/* All supported sample fetch functions must be declared here */
+/************************************************************************/
+
+
+/* returns the actconn */
+static int
+smp_fetch_actconn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = actconn;
+ return 1;
+}
+
+
+/* force TRUE to be returned at the fetch level */
+static int
+smp_fetch_true(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp_make_rw(smp))
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* force FALSE to be returned at the fetch level */
+static int
+smp_fetch_false(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+}
+
+/* retrieve environment variable $1 as a string */
+static int
+smp_fetch_env(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ char *env;
+
+ if (args[0].type != ARGT_STR)
+ return 0;
+
+ env = getenv(args[0].data.str.area);
+ if (!env)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = env;
+ smp->data.u.str.data = strlen(env);
+ return 1;
+}
+
+/* Validates the data unit argument passed to "date" fetch. Argument 1 support an
+ * optional string representing the unit of the result: "s" for seconds, "ms" for
+ * milliseconds and "us" for microseconds.
+ * Returns 0 on error and non-zero if OK.
+ */
+int smp_check_date_unit(struct arg *args, char **err)
+{
+ if (args[1].type == ARGT_STR) {
+ long long int unit;
+
+ if (strcmp(args[1].data.str.area, "s") == 0) {
+ unit = TIME_UNIT_S;
+ }
+ else if (strcmp(args[1].data.str.area, "ms") == 0) {
+ unit = TIME_UNIT_MS;
+ }
+ else if (strcmp(args[1].data.str.area, "us") == 0) {
+ unit = TIME_UNIT_US;
+ }
+ else {
+ memprintf(err, "expects 's', 'ms' or 'us', got '%s'",
+ args[1].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[1].data.str);
+ args[1].type = ARGT_SINT;
+ args[1].data.sint = unit;
+ }
+ else if (args[1].type != ARGT_STOP) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* retrieve the current local date in epoch time, converts it to milliseconds
+ * or microseconds if asked to in optional args[1] unit param, and applies an
+ * optional args[0] offset.
+ */
+static int
+smp_fetch_date(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = date.tv_sec;
+
+ /* report in milliseconds */
+ if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_MS) {
+ smp->data.u.sint *= 1000;
+ smp->data.u.sint += date.tv_usec / 1000;
+ }
+ /* report in microseconds */
+ else if (args[1].type == ARGT_SINT && args[1].data.sint == TIME_UNIT_US) {
+ smp->data.u.sint *= 1000000;
+ smp->data.u.sint += date.tv_usec;
+ }
+
+ /* add offset */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint += args[0].data.sint;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* retrieve the current microsecond part of the date */
+static int
+smp_fetch_date_us(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = date.tv_usec;
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+
+/* returns the hostname */
+static int
+smp_fetch_hostname(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_CONST;
+ smp->data.u.str.area = hostname;
+ smp->data.u.str.data = strlen(hostname);
+ return 1;
+}
+
+/* returns the number of processes */
+static int
+smp_fetch_nbproc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* returns the PID of the current process */
+static int
+smp_fetch_pid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = pid;
+ return 1;
+}
+
+
+/* returns the number of the current process (between 1 and nbproc */
+static int
+smp_fetch_proc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 1;
+ return 1;
+}
+
+/* returns the number of the current thread (between 1 and nbthread */
+static int
+smp_fetch_thread(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = tid;
+ return 1;
+}
+
+/* generate a random 32-bit integer for whatever purpose, with an optional
+ * range specified in argument.
+ */
+static int
+smp_fetch_rand(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.u.sint = statistical_prng();
+
+ /* reduce if needed. Don't do a modulo, use all bits! */
+ if (args[0].type == ARGT_SINT)
+ smp->data.u.sint = ((u64)smp->data.u.sint * (u64)args[0].data.sint) >> 32;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* returns true if the current process is stopping */
+static int
+smp_fetch_stopping(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = stopping;
+ return 1;
+}
+
+/* returns the number of calls of the current stream's process_stream() */
+static int
+smp_fetch_cpu_calls(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls;
+ return 1;
+}
+
+/* returns the average number of nanoseconds spent processing the stream per call */
+static int
+smp_fetch_cpu_ns_avg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
+ return 1;
+}
+
+/* returns the total number of nanoseconds spent processing the stream */
+static int
+smp_fetch_cpu_ns_tot(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->cpu_time;
+ return 1;
+}
+
+/* returns the average number of nanoseconds per call spent waiting for other tasks to be processed */
+static int
+smp_fetch_lat_ns_avg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
+ return 1;
+}
+
+/* returns the total number of nanoseconds per call spent waiting for other tasks to be processed */
+static int
+smp_fetch_lat_ns_tot(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = smp->strm->lat_time;
+ return 1;
+}
+
+static int smp_fetch_const_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = args[0].data.str.area;
+ smp->data.u.str.data = args[0].data.str.data;
+ return 1;
+}
+
+static int smp_check_const_bool(struct arg *args, char **err)
+{
+ if (strcasecmp(args[0].data.str.area, "true") == 0 ||
+ strcasecmp(args[0].data.str.area, "1") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 1;
+ return 1;
+ }
+ if (strcasecmp(args[0].data.str.area, "false") == 0 ||
+ strcasecmp(args[0].data.str.area, "0") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 0;
+ return 1;
+ }
+ memprintf(err, "Expects 'true', 'false', '0' or '1'");
+ return 0;
+}
+
+static int smp_fetch_const_bool(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = args[0].data.sint;
+ return 1;
+}
+
+static int smp_fetch_const_int(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args[0].data.sint;
+ return 1;
+}
+
+static int smp_fetch_const_ipv4(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_IPV4;
+ smp->data.u.ipv4 = args[0].data.ipv4;
+ return 1;
+}
+
+static int smp_fetch_const_ipv6(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_IPV6;
+ smp->data.u.ipv6 = args[0].data.ipv6;
+ return 1;
+}
+
+static int smp_check_const_bin(struct arg *args, char **err)
+{
+ char *binstr = NULL;
+ int binstrlen;
+
+ if (!parse_binary(args[0].data.str.area, &binstr, &binstrlen, err))
+ return 0;
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_STR;
+ args[0].data.str.area = binstr;
+ args[0].data.str.data = binstrlen;
+ return 1;
+}
+
+static int smp_fetch_const_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags |= SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = args[0].data.str.area;
+ smp->data.u.str.data = args[0].data.str.data;
+ return 1;
+}
+
+static int smp_check_const_meth(struct arg *args, char **err)
+{
+ enum http_meth_t meth;
+ int i;
+
+ meth = find_http_meth(args[0].data.str.area, args[0].data.str.data);
+ if (meth != HTTP_METH_OTHER) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = meth;
+ } else {
+ /* Check method availability. A method is a token defined as :
+ * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+ * "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+ * token = 1*tchar
+ */
+ for (i = 0; i < args[0].data.str.data; i++) {
+ if (!HTTP_IS_TOKEN(args[0].data.str.area[i])) {
+ memprintf(err, "expects valid method.");
+ return 0;
+ }
+ }
+ }
+ return 1;
+}
+
+static int smp_fetch_const_meth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_METH;
+ if (args[0].type == ARGT_SINT) {
+ smp->flags &= ~SMP_F_CONST;
+ smp->data.u.meth.meth = args[0].data.sint;
+ smp->data.u.meth.str.area = "";
+ smp->data.u.meth.str.data = 0;
+ } else {
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.meth.meth = HTTP_METH_OTHER;
+ smp->data.u.meth.str.area = args[0].data.str.area;
+ smp->data.u.meth.str.data = args[0].data.str.data;
+ }
+ return 1;
+}
+
+// This function checks the "uuid" sample's arguments.
+// Function won't get called when no parameter is specified (maybe a bug?)
+static int smp_check_uuid(struct arg *args, char **err)
+{
+ if (!args[0].type) {
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = 4;
+ }
+ else if (args[0].data.sint != 4) {
+ memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint);
+ return 0;
+ }
+
+ return 1;
+}
+
+// Generate a RFC4122 UUID (default is v4 = fully random)
+static int smp_fetch_uuid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (args[0].data.sint == 4 || !args[0].type) {
+ ha_generate_uuid(&trash);
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ smp->data.u.str = trash;
+ return 1;
+ }
+
+ // more implementations of other uuid formats possible here
+ return 0;
+}
+
+/* Check if QUIC support was compiled and was not disabled by "no-quic" global option */
+static int smp_fetch_quic_enabled(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = 0;
+#ifdef USE_QUIC
+ smp->data.u.sint = !(global.tune.options & GTUNE_NO_QUIC);
+#else
+ smp->data.u.sint = 0;
+#endif
+ return smp->data.u.sint;
+}
+
+/* Timing events re{q,s}.timer. */
+static int smp_fetch_reX_timers(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct strm_logs *logs;
+ int t_request = -1;
+
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = 0;
+
+ logs = &smp->strm->logs;
+
+
+ if ((llong)(logs->request_ts - logs->accept_ts) >= 0)
+ t_request = ns_to_ms(logs->request_ts - logs->accept_ts);
+
+ /* req.timer. */
+ if (kw[2] == 'q') {
+
+ switch (kw[10]) {
+
+ /* req.timer.idle (%Ti) */
+ case 'i':
+ smp->data.u.sint = logs->t_idle;
+ break;
+
+ /* req.timer.tq (%Tq) */
+ case 't':
+ smp->data.u.sint = t_request;
+ break;
+
+ /* req.timer.hdr (%TR) */
+ case 'h':
+ smp->data.u.sint = (t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1;
+ break;
+
+ /* req.timer.queue (%Tw) */
+ case 'q':
+ smp->data.u.sint = (logs->t_queue >= 0) ? logs->t_queue - t_request : -1;
+ break;
+
+ default:
+ goto error;
+
+ }
+ } else {
+ /* res.timer. */
+ switch (kw[10]) {
+ /* res.timer.hdr (%Tr) */
+ case 'h':
+ smp->data.u.sint = (logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1;
+ break;
+
+ /* res.timer.data (%Td) */
+ case 'd':
+ smp->data.u.sint = (logs->t_data >= 0) ? logs->t_close - logs->t_data : -1;
+ break;
+
+ default:
+ goto error;
+
+ }
+
+ }
+
+ return 1;
+error:
+
+ return 0;
+ }
+
+
+/* Timing events txn. */
+static int smp_fetch_txn_timers(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct strm_logs *logs;
+
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = 0;
+
+ logs = &smp->strm->logs;
+
+ /* txn.timer. */
+ switch (kw[10]) {
+
+ /* txn.timer.total (%Ta) */
+ case 't':
+ smp->data.u.sint = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ break;
+
+
+ /* txn.timer.user (%Tu) */
+ case 'u':
+ smp->data.u.sint = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0);
+ break;
+
+ default:
+ goto error;
+
+ }
+
+ return 1;
+error:
+
+ return 0;
+}
+
+/* Timing events {f,bc}.timer. */
+static int smp_fetch_conn_timers(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct strm_logs *logs;
+
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = 0;
+
+ logs = &smp->strm->logs;
+
+ if (kw[0] == 'b') {
+ /* fc.timer. */
+ switch (kw[9]) {
+
+ /* bc.timer.connect (%Tc) */
+ case 'c':
+ smp->data.u.sint = (logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1;
+ break;
+
+ default:
+ goto error;
+ }
+
+ } else {
+
+ /* fc.timer. */
+ switch (kw[9]) {
+
+ /* fc.timer.handshake (%Th) */
+ case 'h':
+ smp->data.u.sint = logs->t_handshake;
+ break;
+
+ /* fc,timer.total (%Tt) */
+ case 't':
+ smp->data.u.sint = logs->t_close;
+ break;
+
+ default:
+ goto error;
+ }
+
+ }
+
+ return 1;
+error:
+
+ return 0;
+}
+
+/* bytes_{in,out} */
+static int smp_fetch_bytes(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct strm_logs *logs;
+
+ if (!smp->strm)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags = 0;
+
+ logs = &smp->strm->logs;
+ if (!logs)
+ return 0;
+
+ if (kw[6] == 'i') { /* bytes_in */
+ smp->data.u.sint = logs->bytes_in;
+ } else { /* bytes_out */
+ smp->data.u.sint = logs->bytes_out;
+ }
+
+ return 1;
+}
+
+static int sample_conv_bytes_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ // arg0 is not optional, must be >= 0
+ if (!check_operator(&args[0], conv, file, line, err)) {
+ return 0;
+ }
+ if (args[0].type != ARGT_VAR) {
+ if (args[0].type != ARGT_SINT || args[0].data.sint < 0) {
+ memprintf(err, "expects a non-negative integer");
+ return 0;
+ }
+ }
+ // arg1 is optional, must be > 0
+ if (args[1].type != ARGT_STOP) {
+ if (!check_operator(&args[1], conv, file, line, err)) {
+ return 0;
+ }
+ if (args[1].type != ARGT_VAR) {
+ if (args[1].type != ARGT_SINT || args[1].data.sint <= 0) {
+ memprintf(err, "expects a positive integer");
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+static struct sample_fetch_kw_list smp_logs_kws = {ILH, {
+ { "bytes_in", smp_fetch_bytes, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "bytes_out", smp_fetch_bytes, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+
+ { "txn.timer.total", smp_fetch_txn_timers, 0, NULL, SMP_T_SINT, SMP_USE_TXFIN }, /* "Ta" */
+ { "txn.timer.user", smp_fetch_txn_timers, 0, NULL, SMP_T_SINT, SMP_USE_TXFIN }, /* "Tu" */
+
+ { "bc.timer.connect", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "Tc" */
+ { "fc.timer.handshake", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, /* "Th" */
+ { "fc.timer.total", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_SSFIN }, /* "Tt" */
+
+ { "req.timer.idle", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV }, /* "Ti" */
+ { "req.timer.tq", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV }, /* "Tq" */
+ { "req.timer.hdr", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_HRQHV }, /* "TR" */
+ { "req.timer.queue", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "Tw" */
+ { "res.timer.data", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_RSFIN }, /* "Td" */
+ { "res.timer.hdr", smp_fetch_reX_timers, 0, NULL, SMP_T_SINT, SMP_USE_HRSHV }, /* "Tr" */
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_logs_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types should be declared using the
+ * appropriate pseudo-type. If not available it must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "act_conn", smp_fetch_actconn, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "always_false", smp_fetch_false, 0, NULL, SMP_T_BOOL, SMP_USE_CONST },
+ { "always_true", smp_fetch_true, 0, NULL, SMP_T_BOOL, SMP_USE_CONST },
+ { "env", smp_fetch_env, ARG1(1,STR), NULL, SMP_T_STR, SMP_USE_CONST },
+ { "date", smp_fetch_date, ARG2(0,SINT,STR), smp_check_date_unit, SMP_T_SINT, SMP_USE_CONST },
+ { "date_us", smp_fetch_date_us, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "hostname", smp_fetch_hostname, 0, NULL, SMP_T_STR, SMP_USE_CONST },
+ { "nbproc", smp_fetch_nbproc,0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "pid", smp_fetch_pid, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "proc", smp_fetch_proc, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "quic_enabled", smp_fetch_quic_enabled, 0, NULL, SMP_T_BOOL, SMP_USE_CONST },
+ { "thread", smp_fetch_thread, 0, NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "rand", smp_fetch_rand, ARG1(0,SINT), NULL, SMP_T_SINT, SMP_USE_CONST },
+ { "stopping", smp_fetch_stopping, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN },
+ { "uuid", smp_fetch_uuid, ARG1(0, SINT), smp_check_uuid, SMP_T_STR, SMP_USE_CONST },
+
+ { "cpu_calls", smp_fetch_cpu_calls, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "cpu_ns_avg", smp_fetch_cpu_ns_avg, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "cpu_ns_tot", smp_fetch_cpu_ns_tot, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "lat_ns_avg", smp_fetch_lat_ns_avg, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "lat_ns_tot", smp_fetch_lat_ns_tot, 0, NULL, SMP_T_SINT, SMP_USE_INTRN },
+
+ { "str", smp_fetch_const_str, ARG1(1,STR), NULL , SMP_T_STR, SMP_USE_CONST },
+ { "bool", smp_fetch_const_bool, ARG1(1,STR), smp_check_const_bool, SMP_T_BOOL, SMP_USE_CONST },
+ { "int", smp_fetch_const_int, ARG1(1,SINT), NULL , SMP_T_SINT, SMP_USE_CONST },
+ { "ipv4", smp_fetch_const_ipv4, ARG1(1,IPV4), NULL , SMP_T_IPV4, SMP_USE_CONST },
+ { "ipv6", smp_fetch_const_ipv6, ARG1(1,IPV6), NULL , SMP_T_IPV6, SMP_USE_CONST },
+ { "bin", smp_fetch_const_bin, ARG1(1,STR), smp_check_const_bin , SMP_T_BIN, SMP_USE_CONST },
+ { "meth", smp_fetch_const_meth, ARG1(1,STR), smp_check_const_meth, SMP_T_METH, SMP_USE_CONST },
+
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "add_item",sample_conv_add_item, ARG3(2,STR,STR,STR), smp_check_add_item, SMP_T_STR, SMP_T_STR },
+ { "debug", sample_conv_debug, ARG2(0,STR,STR), smp_check_debug, SMP_T_ANY, SMP_T_SAME },
+ { "b64dec", sample_conv_base642bin, 0, NULL, SMP_T_STR, SMP_T_BIN },
+ { "base64", sample_conv_bin2base64, 0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR },
+ { "ub64enc", sample_conv_bin2base64url,0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "ub64dec", sample_conv_base64url2bin,0, NULL, SMP_T_STR, SMP_T_BIN },
+ { "upper", sample_conv_str2upper, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "lower", sample_conv_str2lower, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "length", sample_conv_length, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "be2dec", sample_conv_be2dec, ARG3(1,STR,SINT,SINT), sample_conv_be2dec_check, SMP_T_BIN, SMP_T_STR },
+ { "be2hex", sample_conv_be2hex, ARG3(1,STR,SINT,SINT), sample_conv_be2hex_check, SMP_T_BIN, SMP_T_STR },
+ { "hex", sample_conv_bin2hex, 0, NULL, SMP_T_BIN, SMP_T_STR },
+ { "hex2i", sample_conv_hex2int, 0, NULL, SMP_T_STR, SMP_T_SINT },
+ { "ipmask", sample_conv_ipmask, ARG2(1,MSK4,MSK6), NULL, SMP_T_ADDR, SMP_T_ADDR },
+ { "ltime", sample_conv_ltime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "ms_ltime", sample_conv_ms_ltime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "us_ltime", sample_conv_us_ltime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "utime", sample_conv_utime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "ms_utime", sample_conv_ms_utime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "us_utime", sample_conv_us_utime, ARG2(1,STR,SINT), NULL, SMP_T_SINT, SMP_T_STR },
+ { "crc32", sample_conv_crc32, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "crc32c", sample_conv_crc32c, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "djb2", sample_conv_djb2, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "sdbm", sample_conv_sdbm, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "wt6", sample_conv_wt6, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh3", sample_conv_xxh3, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh32", sample_conv_xxh32, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
+ { "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR },
+ { "bytes", sample_conv_bytes, ARG2(1,STR,STR), sample_conv_bytes_check, SMP_T_BIN, SMP_T_BIN },
+ { "field", sample_conv_field, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "word", sample_conv_word, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "param", sample_conv_param, ARG2(1,STR,STR), sample_conv_param_check, SMP_T_STR, SMP_T_STR },
+ { "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR },
+ { "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN },
+ { "strcmp", sample_conv_strcmp, ARG1(1,STR), smp_check_strcmp, SMP_T_STR, SMP_T_SINT },
+ { "host_only", sample_conv_host_only, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "port_only", sample_conv_port_only, 0, NULL, SMP_T_STR, SMP_T_SINT },
+
+ /* gRPC converters. */
+ { "ungrpc", sample_conv_ungrpc, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
+ { "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
+
+ /* FIX converters */
+ { "fix_is_valid", sample_conv_fix_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL },
+ { "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN },
+
+ /* MQTT converters */
+ { "mqtt_is_valid", sample_conv_mqtt_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL },
+ { "mqtt_field_value", sample_conv_mqtt_field_value, ARG2(2,STR,STR), sample_conv_mqtt_field_value_check, SMP_T_BIN, SMP_T_STR },
+
+ { "iif", sample_conv_iif, ARG2(2, STR, STR), NULL, SMP_T_BOOL, SMP_T_STR },
+
+ { "and", sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "or", sample_conv_binary_or, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "xor", sample_conv_binary_xor, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "cpl", sample_conv_binary_cpl, 0, NULL, SMP_T_SINT, SMP_T_SINT },
+ { "bool", sample_conv_arith_bool, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "not", sample_conv_arith_not, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "odd", sample_conv_arith_odd, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "even", sample_conv_arith_even, 0, NULL, SMP_T_SINT, SMP_T_BOOL },
+ { "add", sample_conv_arith_add, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "sub", sample_conv_arith_sub, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "mul", sample_conv_arith_mul, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "div", sample_conv_arith_div, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "mod", sample_conv_arith_mod, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },
+ { "neg", sample_conv_arith_neg, 0, NULL, SMP_T_SINT, SMP_T_SINT },
+
+ { "htonl", sample_conv_htonl, 0, NULL, SMP_T_SINT, SMP_T_BIN },
+ { "cut_crlf", sample_conv_cut_crlf, 0, NULL, SMP_T_STR, SMP_T_STR },
+ { "ltrim", sample_conv_ltrim, ARG1(1,STR), NULL, SMP_T_STR, SMP_T_STR },
+ { "rtrim", sample_conv_rtrim, ARG1(1,STR), NULL, SMP_T_STR, SMP_T_STR },
+ { "json_query", sample_conv_json_query, ARG2(1,STR,STR), sample_check_json_query , SMP_T_STR, SMP_T_ANY },
+
+#ifdef USE_OPENSSL
+ /* JSON Web Token converters */
+ { "jwt_header_query", sample_conv_jwt_header_query, ARG2(0,STR,STR), sample_conv_jwt_query_check, SMP_T_BIN, SMP_T_ANY },
+ { "jwt_payload_query", sample_conv_jwt_payload_query, ARG2(0,STR,STR), sample_conv_jwt_query_check, SMP_T_BIN, SMP_T_ANY },
+ { "jwt_verify", sample_conv_jwt_verify, ARG2(2,STR,STR), sample_conv_jwt_verify_check, SMP_T_BIN, SMP_T_SINT },
+#endif
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
diff --git a/src/server.c b/src/server.c
new file mode 100644
index 0000000..829fbb3
--- /dev/null
+++ b/src/server.c
@@ -0,0 +1,6765 @@
+/*
+ * Server management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2008 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/types.h>
+#include <netinet/tcp.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet-t.h>
+#include <haproxy/backend.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dict-t.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/mailers.h>
+#include <haproxy/namespace.h>
+#include <haproxy/port_range.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/event_hdl.h>
+
+
+static void srv_update_status(struct server *s, int type, int cause);
+static int srv_apply_lastaddr(struct server *srv, int *err_code);
+static void srv_cleanup_connections(struct server *srv);
+
+/* extra keywords used as value for other arguments. They are used as
+ * suggestions for mistyped words.
+ */
+static const char *extra_kw_list[] = {
+ "ipv4", "ipv6", "legacy", "octet-count",
+ "fail-check", "sudden-death", "mark-down",
+ NULL /* must be last */
+};
+
+/* List head of all known server keywords */
+struct srv_kw_list srv_keywords = {
+ .list = LIST_HEAD_INIT(srv_keywords.list)
+};
+
+__decl_thread(HA_SPINLOCK_T idle_conn_srv_lock);
+struct eb_root idle_conn_srv = EB_ROOT;
+struct task *idle_conn_task __read_mostly = NULL;
+struct list servers_list = LIST_HEAD_INIT(servers_list);
+static struct task *server_atomic_sync_task = NULL;
+static event_hdl_async_equeue server_atomic_sync_queue;
+
+/* SERVER DELETE(n)->ADD global tracker:
+ * This is meant to provide srv->rid (revision id) value.
+ * Revision id allows to differentiate between a previously existing
+ * deleted server and a new server reusing deleted server name/id.
+ *
+ * start value is 0 (even value)
+ * LSB is used to specify that one or multiple srv delete in a row
+ * were performed.
+ * When adding a new server, increment by 1 if current
+ * value is odd (odd = LSB set),
+ * because adding a new server after one or
+ * multiple deletions means we could potentially be reusing old names:
+ * Increase the revision id to prevent mixups between old and new names.
+ *
+ * srv->rid is calculated from cnt even values only.
+ * sizeof(srv_id_reuse_cnt) must be twice sizeof(srv->rid)
+ *
+ * Wraparound is expected and should not cause issues
+ * (with current design we allow up to 4 billion unique revisions)
+ *
+ * Counter is only used under thread_isolate (cli_add/cli_del),
+ * no need for atomic ops.
+ */
+static uint64_t srv_id_reuse_cnt = 0;
+
+/* The server names dictionary */
+struct dict server_key_dict = {
+ .name = "server keys",
+ .values = EB_ROOT_UNIQUE,
+};
+
+static const char *srv_adm_st_chg_cause_str[] = {
+ [SRV_ADM_STCHGC_NONE] = "",
+ [SRV_ADM_STCHGC_DNS_NOENT] = "entry removed from SRV record",
+ [SRV_ADM_STCHGC_DNS_NOIP] = "No IP for server ",
+ [SRV_ADM_STCHGC_DNS_NX] = "DNS NX status",
+ [SRV_ADM_STCHGC_DNS_TIMEOUT] = "DNS timeout status",
+ [SRV_ADM_STCHGC_DNS_REFUSED] = "DNS refused status",
+ [SRV_ADM_STCHGC_DNS_UNSPEC] = "unspecified DNS error",
+ [SRV_ADM_STCHGC_STATS_DISABLE] = "'disable' on stats page",
+ [SRV_ADM_STCHGC_STATS_STOP] = "'stop' on stats page"
+};
+
+const char *srv_adm_st_chg_cause(enum srv_adm_st_chg_cause cause)
+{
+ return srv_adm_st_chg_cause_str[cause];
+}
+
+static const char *srv_op_st_chg_cause_str[] = {
+ [SRV_OP_STCHGC_NONE] = "",
+ [SRV_OP_STCHGC_HEALTH] = "",
+ [SRV_OP_STCHGC_AGENT] = "",
+ [SRV_OP_STCHGC_CLI] = "changed from CLI",
+ [SRV_OP_STCHGC_LUA] = "changed from Lua script",
+ [SRV_OP_STCHGC_STATS_WEB] = "changed from Web interface",
+ [SRV_OP_STCHGC_STATEFILE] = "changed from server-state after a reload"
+};
+
+const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause)
+{
+ return srv_op_st_chg_cause_str[cause];
+}
+
+int srv_downtime(const struct server *s)
+{
+ if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns)) // ignore negative time
+ return s->down_time;
+
+ return ns_to_sec(now_ns) - s->last_change + s->down_time;
+}
+
+int srv_lastsession(const struct server *s)
+{
+ if (s->counters.last_sess)
+ return ns_to_sec(now_ns) - s->counters.last_sess;
+
+ return -1;
+}
+
+int srv_getinter(const struct check *check)
+{
+ const struct server *s = check->server;
+
+ if ((check->state & (CHK_ST_CONFIGURED|CHK_ST_FASTINTER)) == CHK_ST_CONFIGURED &&
+ (check->health == check->rise + check->fall - 1))
+ return check->inter;
+
+ if ((s->next_state == SRV_ST_STOPPED) && check->health == 0)
+ return (check->downinter)?(check->downinter):(check->inter);
+
+ return (check->fastinter)?(check->fastinter):(check->inter);
+}
+
+/* Update server's addr:svc_port tuple in INET context
+ *
+ * Must be called under thread isolation to ensure consistent readings accross
+ * all threads (addr:svc_port might be read without srv lock being held).
+ */
+static void _srv_set_inetaddr_port(struct server *srv,
+ const struct sockaddr_storage *addr,
+ unsigned int svc_port, uint8_t mapped_port)
+{
+ ipcpy(addr, &srv->addr);
+ srv->svc_port = svc_port;
+ if (mapped_port)
+ srv->flags |= SRV_F_MAPPORTS;
+ else
+ srv->flags &= ~SRV_F_MAPPORTS;
+
+ if (srv->log_target && srv->log_target->type == LOG_TARGET_DGRAM) {
+ /* server is used as a log target, manually update log target addr for DGRAM */
+ ipcpy(addr, srv->log_target->addr);
+ set_host_port(srv->log_target->addr, svc_port);
+ }
+}
+
+/* same as _srv_set_inetaddr_port() but only updates the addr part
+ */
+static void _srv_set_inetaddr(struct server *srv,
+ const struct sockaddr_storage *addr)
+{
+ _srv_set_inetaddr_port(srv, addr, srv->svc_port, !!(srv->flags & SRV_F_MAPPORTS));
+}
+
+/*
+ * Function executed by server_atomic_sync_task to perform atomic updates on
+ * compatible server struct members that are not guarded by any lock since
+ * they are not supposed to change often and are subject to being used in
+ * sensitive codepaths
+ *
+ * Some updates may require thread isolation: we start without isolation
+ * but as soon as we encounter an event that requires isolation, we do so.
+ * Once the event is processed, we keep the isolation until we've processed
+ * the whole batch of events and leave isolation once we're done, as it would
+ * be very costly to try to acquire isolation multiple times in a row.
+ * The task will limit itself to a number of events per run to prevent
+ * thread contention (see: "tune.events.max-events-at-once").
+ *
+ * TODO: if we find out that enforcing isolation is too costly, we may
+ * consider adding thread_isolate_try_full(timeout) or equivalent to the
+ * thread API so that we can do our best not to block harmless threads
+ * for too long if one or multiple threads are still heavily busy. This
+ * would mean that the task would be capable of rescheduling itself to
+ * start again on the current event if it failed to acquire thread
+ * isolation. This would also imply that the event_hdl API allows us
+ * to check an event without popping it from the queue first (remove the
+ * event once it is successfully processed).
+ */
+static void srv_set_addr_desc(struct server *s, int reattach);
+static struct task *server_atomic_sync(struct task *task, void *context, unsigned int state)
+{
+ unsigned int remain = event_hdl_tune.max_events_at_once; // to limit max number of events per batch
+ struct event_hdl_async_event *event;
+
+ /* check for new server events that we care about */
+ while ((event = event_hdl_async_equeue_pop(&server_atomic_sync_queue))) {
+ if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_END)) {
+ /* ending event: no more events to come */
+ event_hdl_async_free_event(event);
+ task_destroy(task);
+ task = NULL;
+ break;
+ }
+
+ if (!remain) {
+ /* STOP: we've already spent all our budget here, and
+ * considering we possibly are under isolation, we cannot
+ * keep blocking other threads any longer.
+ *
+ * Reschedule the task to finish where we left off if
+ * there are remaining events in the queue.
+ */
+ if (!event_hdl_async_equeue_isempty(&server_atomic_sync_queue))
+ task_wakeup(task, TASK_WOKEN_OTHER);
+ break;
+ }
+ remain--;
+
+ /* new event to process */
+ if (event_hdl_sub_type_equal(event->type, EVENT_HDL_SUB_SERVER_INETADDR)) {
+ struct sockaddr_storage new_addr;
+ struct event_hdl_cb_data_server_inetaddr *data = event->data;
+ struct proxy *px;
+ struct server *srv;
+
+ /* server ip:port changed, we must atomically update data members
+ * to prevent invalid reads by other threads.
+ */
+
+ /* check if related server still exists */
+ px = proxy_find_by_id(data->server.safe.proxy_uuid, PR_CAP_BE, 0);
+ if (!px)
+ continue;
+ srv = findserver_unique_id(px, data->server.safe.puid, data->server.safe.rid);
+ if (!srv)
+ continue;
+
+ /* prepare new addr based on event cb data */
+ memset(&new_addr, 0, sizeof(new_addr));
+ new_addr.ss_family = data->safe.next.family;
+ switch (new_addr.ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)&new_addr)->sin_addr.s_addr =
+ data->safe.next.addr.v4.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(&((struct sockaddr_in6 *)&new_addr)->sin6_addr,
+ &data->safe.next.addr.v6,
+ sizeof(struct in6_addr));
+ break;
+ case AF_UNSPEC:
+ /* addr reset, nothing to do */
+ break;
+ default:
+ /* should not happen */
+ break;
+ }
+ /*
+ * this requires thread isolation, which is safe since we're the only
+ * task working for the current subscription and we don't hold locks
+ * or ressources that other threads may depend on to complete a running
+ * cycle. Note that we do this way because we assume that this event is
+ * rather rare.
+ */
+ if (!thread_isolated())
+ thread_isolate_full();
+
+ /* apply new addr:port combination */
+ _srv_set_inetaddr_port(srv, &new_addr,
+ data->safe.next.port.svc, data->safe.next.port.map);
+
+ /* propagate the changes */
+ if (data->safe.purge_conn) /* force connection cleanup on the given server? */
+ srv_cleanup_connections(srv);
+ srv_set_dyncookie(srv);
+ srv_set_addr_desc(srv, 1);
+ }
+ event_hdl_async_free_event(event);
+ }
+
+ /* some events possibly required thread_isolation:
+ * now that we are done, we must leave thread isolation before
+ * returning
+ */
+ if (thread_isolated())
+ thread_release();
+
+ return task;
+}
+
+/* Try to start the atomic server sync task.
+ *
+ * Returns ERR_NONE on success and a combination of ERR_CODE on failure
+ */
+static int server_atomic_sync_start()
+{
+ struct event_hdl_sub_type subscriptions = EVENT_HDL_SUB_NONE;
+
+ if (server_atomic_sync_task)
+ return ERR_NONE; // nothing to do
+ server_atomic_sync_task = task_new_anywhere();
+ if (!server_atomic_sync_task)
+ goto fail;
+ server_atomic_sync_task->process = server_atomic_sync;
+ event_hdl_async_equeue_init(&server_atomic_sync_queue);
+
+ /* task created, now subscribe to relevant server events in the global list */
+ subscriptions = event_hdl_sub_type_add(subscriptions, EVENT_HDL_SUB_SERVER_INETADDR);
+ if (!event_hdl_subscribe(NULL, subscriptions,
+ EVENT_HDL_ASYNC_TASK(&server_atomic_sync_queue,
+ server_atomic_sync_task,
+ NULL,
+ NULL)))
+ goto fail;
+
+
+ return ERR_NONE;
+
+ fail:
+ task_destroy(server_atomic_sync_task);
+ server_atomic_sync_task = NULL;
+ return ERR_ALERT | ERR_FATAL;
+}
+REGISTER_POST_CHECK(server_atomic_sync_start);
+
+/* fill common server event data members struct
+ * must be called with server lock or under thread isolate
+ */
+static inline void _srv_event_hdl_prepare(struct event_hdl_cb_data_server *cb_data,
+ struct server *srv, uint8_t thread_isolate)
+{
+ /* safe data assignments */
+ cb_data->safe.puid = srv->puid;
+ cb_data->safe.rid = srv->rid;
+ cb_data->safe.flags = srv->flags;
+ snprintf(cb_data->safe.name, sizeof(cb_data->safe.name), "%s", srv->id);
+ cb_data->safe.proxy_name[0] = '\0';
+ cb_data->safe.proxy_uuid = -1; /* default value */
+ if (srv->proxy) {
+ cb_data->safe.proxy_uuid = srv->proxy->uuid;
+ snprintf(cb_data->safe.proxy_name, sizeof(cb_data->safe.proxy_name), "%s", srv->proxy->id);
+ }
+ /* unsafe data assignments */
+ cb_data->unsafe.ptr = srv;
+ cb_data->unsafe.thread_isolate = thread_isolate;
+ cb_data->unsafe.srv_lock = !thread_isolate;
+}
+
+/* take an event-check snapshot from a live check */
+void _srv_event_hdl_prepare_checkres(struct event_hdl_cb_data_server_checkres *checkres,
+ struct check *check)
+{
+ checkres->agent = !!(check->state & CHK_ST_AGENT);
+ checkres->result = check->result;
+ checkres->duration = check->duration;
+ checkres->reason.status = check->status;
+ checkres->reason.code = check->code;
+ checkres->health.cur = check->health;
+ checkres->health.rise = check->rise;
+ checkres->health.fall = check->fall;
+}
+
+/* Prepare SERVER_STATE event
+ *
+ * This special event will contain extra hints related to the state change
+ *
+ * Must be called with server lock held
+ */
+void _srv_event_hdl_prepare_state(struct event_hdl_cb_data_server_state *cb_data,
+ struct server *srv, int type, int cause,
+ enum srv_state prev_state, int requeued)
+{
+ /* state event provides additional info about the server state change */
+ cb_data->safe.type = type;
+ cb_data->safe.new_state = srv->cur_state;
+ cb_data->safe.old_state = prev_state;
+ cb_data->safe.requeued = requeued;
+ if (type) {
+ /* administrative */
+ cb_data->safe.adm_st_chg.cause = cause;
+ }
+ else {
+ /* operational */
+ cb_data->safe.op_st_chg.cause = cause;
+ if (cause == SRV_OP_STCHGC_HEALTH || cause == SRV_OP_STCHGC_AGENT) {
+ struct check *check = (cause == SRV_OP_STCHGC_HEALTH) ? &srv->check : &srv->agent;
+
+ /* provide additional check-related state change result */
+ _srv_event_hdl_prepare_checkres(&cb_data->safe.op_st_chg.check, check);
+ }
+ }
+}
+
+/* Prepare SERVER_INETADDR event, prev data is learned from the current
+ * server settings.
+ *
+ * This special event will contain extra hints related to the addr change
+ *
+ * Must be called with the server lock held.
+ */
+static void _srv_event_hdl_prepare_inetaddr(struct event_hdl_cb_data_server_inetaddr *cb_data,
+ struct server *srv,
+ const struct sockaddr_storage *next_addr,
+ unsigned int next_port, uint8_t next_mapports,
+ uint8_t purge_conn)
+{
+ struct sockaddr_storage *prev_addr = &srv->addr;
+ unsigned int prev_port = srv->svc_port;
+ uint8_t prev_mapports = !!(srv->flags & SRV_F_MAPPORTS);
+
+ /* only INET families are supported */
+ BUG_ON((prev_addr->ss_family != AF_UNSPEC &&
+ prev_addr->ss_family != AF_INET && prev_addr->ss_family != AF_INET6) ||
+ (next_addr->ss_family != AF_UNSPEC &&
+ next_addr->ss_family != AF_INET && next_addr->ss_family != AF_INET6));
+
+ /* prev */
+ cb_data->safe.prev.family = prev_addr->ss_family;
+ memset(&cb_data->safe.prev.addr, 0, sizeof(cb_data->safe.prev.addr));
+ if (prev_addr->ss_family == AF_INET)
+ cb_data->safe.prev.addr.v4.s_addr =
+ ((struct sockaddr_in *)prev_addr)->sin_addr.s_addr;
+ else if (prev_addr->ss_family == AF_INET6)
+ memcpy(&cb_data->safe.prev.addr.v6,
+ &((struct sockaddr_in6 *)prev_addr)->sin6_addr,
+ sizeof(struct in6_addr));
+ cb_data->safe.prev.port.svc = prev_port;
+ cb_data->safe.prev.port.map = prev_mapports;
+
+ /* next */
+ cb_data->safe.next.family = next_addr->ss_family;
+ memset(&cb_data->safe.next.addr, 0, sizeof(cb_data->safe.next.addr));
+ if (next_addr->ss_family == AF_INET)
+ cb_data->safe.next.addr.v4.s_addr =
+ ((struct sockaddr_in *)next_addr)->sin_addr.s_addr;
+ else if (next_addr->ss_family == AF_INET6)
+ memcpy(&cb_data->safe.next.addr.v6,
+ &((struct sockaddr_in6 *)next_addr)->sin6_addr,
+ sizeof(struct in6_addr));
+ cb_data->safe.next.port.svc = next_port;
+ cb_data->safe.next.port.map = next_mapports;
+
+ cb_data->safe.purge_conn = purge_conn;
+}
+
+/* server event publishing helper: publish in both global and
+ * server dedicated subscription list.
+ */
+#define _srv_event_hdl_publish(e, d, s) \
+ ({ \
+ /* publish in server dedicated sub list */ \
+ event_hdl_publish(&s->e_subs, e, EVENT_HDL_CB_DATA(&d));\
+ /* publish in global subscription list */ \
+ event_hdl_publish(NULL, e, EVENT_HDL_CB_DATA(&d)); \
+ })
+
+/* General server event publishing:
+ * Use this to publish EVENT_HDL_SUB_SERVER family type event
+ * from srv facility.
+ *
+ * server ptr must be valid.
+ * Must be called with srv lock or under thread_isolate.
+ */
+static void srv_event_hdl_publish(struct event_hdl_sub_type event,
+ struct server *srv, uint8_t thread_isolate)
+{
+ struct event_hdl_cb_data_server cb_data;
+
+ /* prepare event data */
+ _srv_event_hdl_prepare(&cb_data, srv, thread_isolate);
+ _srv_event_hdl_publish(event, cb_data, srv);
+}
+
+/* Publish SERVER_CHECK event
+ *
+ * This special event will contain extra hints related to the check itself
+ *
+ * Must be called with server lock held
+ */
+void srv_event_hdl_publish_check(struct server *srv, struct check *check)
+{
+ struct event_hdl_cb_data_server_check cb_data;
+
+ /* check event provides additional info about the server check */
+ _srv_event_hdl_prepare_checkres(&cb_data.safe.res, check);
+
+ cb_data.unsafe.ptr = check;
+
+ /* prepare event data (common server data) */
+ _srv_event_hdl_prepare((struct event_hdl_cb_data_server *)&cb_data, srv, 0);
+
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_CHECK, cb_data, srv);
+}
+
+/*
+ * Check that we did not get a hash collision.
+ * Unlikely, but it can happen. The server's proxy must be at least
+ * read-locked.
+ */
+static inline void srv_check_for_dup_dyncookie(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ struct server *tmpserv;
+
+ for (tmpserv = p->srv; tmpserv != NULL;
+ tmpserv = tmpserv->next) {
+ if (tmpserv == s)
+ continue;
+ if (tmpserv->next_admin & SRV_ADMF_FMAINT)
+ continue;
+ if (tmpserv->cookie &&
+ strcmp(tmpserv->cookie, s->cookie) == 0) {
+ ha_warning("We generated two equal cookies for two different servers.\n"
+ "Please change the secret key for '%s'.\n",
+ s->proxy->id);
+ }
+ }
+
+}
+
+/*
+ * Must be called with the server lock held, and will read-lock the proxy.
+ */
+void srv_set_dyncookie(struct server *s)
+{
+ struct proxy *p = s->proxy;
+ char *tmpbuf;
+ unsigned long long hash_value;
+ size_t key_len;
+ size_t buffer_len;
+ int addr_len;
+ int port;
+
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &p->lock);
+
+ if ((s->flags & SRV_F_COOKIESET) ||
+ !(s->proxy->ck_opts & PR_CK_DYNAMIC) ||
+ s->proxy->dyncookie_key == NULL)
+ goto out;
+ key_len = strlen(p->dyncookie_key);
+
+ if (s->addr.ss_family != AF_INET &&
+ s->addr.ss_family != AF_INET6)
+ goto out;
+ /*
+ * Buffer to calculate the cookie value.
+ * The buffer contains the secret key + the server IP address
+ * + the TCP port.
+ */
+ addr_len = (s->addr.ss_family == AF_INET) ? 4 : 16;
+ /*
+ * The TCP port should use only 2 bytes, but is stored in
+ * an unsigned int in struct server, so let's use 4, to be
+ * on the safe side.
+ */
+ buffer_len = key_len + addr_len + 4;
+ tmpbuf = trash.area;
+ memcpy(tmpbuf, p->dyncookie_key, key_len);
+ memcpy(&(tmpbuf[key_len]),
+ s->addr.ss_family == AF_INET ?
+ (void *)&((struct sockaddr_in *)&s->addr)->sin_addr.s_addr :
+ (void *)&(((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr),
+ addr_len);
+ /*
+ * Make sure it's the same across all the load balancers,
+ * no matter their endianness.
+ */
+ port = htonl(s->svc_port);
+ memcpy(&tmpbuf[key_len + addr_len], &port, 4);
+ hash_value = XXH64(tmpbuf, buffer_len, 0);
+ memprintf(&s->cookie, "%016llx", hash_value);
+ if (!s->cookie)
+ goto out;
+ s->cklen = 16;
+
+ /* Don't bother checking if the dyncookie is duplicated if
+ * the server is marked as "disabled", maybe it doesn't have
+ * its real IP yet, but just a place holder.
+ */
+ if (!(s->next_admin & SRV_ADMF_FMAINT))
+ srv_check_for_dup_dyncookie(s);
+ out:
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &p->lock);
+}
+
+/* Returns true if it's possible to reuse an idle connection from server <srv>
+ * for a websocket stream. This is the case if server is configured to use the
+ * same protocol for both HTTP and websocket streams. This depends on the value
+ * of "proto", "alpn" and "ws" keywords.
+ */
+int srv_check_reuse_ws(struct server *srv)
+{
+ if (srv->mux_proto || srv->use_ssl != 1 || !srv->ssl_ctx.alpn_str) {
+ /* explicit srv.mux_proto or no ALPN : srv.mux_proto is used
+ * for mux selection.
+ */
+ const struct ist srv_mux = srv->mux_proto ?
+ srv->mux_proto->token : IST_NULL;
+
+ switch (srv->ws) {
+ /* "auto" means use the same protocol : reuse is possible. */
+ case SRV_WS_AUTO:
+ return 1;
+
+ /* "h2" means use h2 for websocket : reuse is possible if
+ * server mux is h2.
+ */
+ case SRV_WS_H2:
+ if (srv->mux_proto && isteq(srv_mux, ist("h2")))
+ return 1;
+ break;
+
+ /* "h1" means use h1 for websocket : reuse is possible if
+ * server mux is h1.
+ */
+ case SRV_WS_H1:
+ if (!srv->mux_proto || isteq(srv_mux, ist("h1")))
+ return 1;
+ break;
+ }
+ }
+ else {
+ /* ALPN selection.
+ * Based on the assumption that only "h2" and "http/1.1" token
+ * are used on server ALPN.
+ */
+ const struct ist alpn = ist2(srv->ssl_ctx.alpn_str,
+ srv->ssl_ctx.alpn_len);
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ /* for auto mode, consider reuse as possible if the
+ * server uses a single protocol ALPN
+ */
+ if (!istchr(alpn, ','))
+ return 1;
+ break;
+
+ case SRV_WS_H2:
+ return isteq(alpn, ist("\x02h2"));
+
+ case SRV_WS_H1:
+ return isteq(alpn, ist("\x08http/1.1"));
+ }
+ }
+
+ return 0;
+}
+
+/* Return the proto to used for a websocket stream on <srv> without ALPN. NULL
+ * is a valid value indicating to use the fallback mux.
+ */
+const struct mux_ops *srv_get_ws_proto(struct server *srv)
+{
+ const struct mux_proto_list *mux = NULL;
+
+ switch (srv->ws) {
+ case SRV_WS_AUTO:
+ mux = srv->mux_proto;
+ break;
+
+ case SRV_WS_H1:
+ mux = get_mux_proto(ist("h1"));
+ break;
+
+ case SRV_WS_H2:
+ mux = get_mux_proto(ist("h2"));
+ break;
+ }
+
+ return mux ? mux->mux : NULL;
+}
+
+/*
+ * Must be called with the server lock held. The server is first removed from
+ * the proxy tree if it was already attached. If <reattach> is true, the server
+ * will then be attached in the proxy tree. The proxy lock is held to
+ * manipulate the tree.
+ */
+static void srv_set_addr_desc(struct server *s, int reattach)
+{
+ struct proxy *p = s->proxy;
+ char *key;
+
+ key = sa2str(&s->addr, s->svc_port, s->flags & SRV_F_MAPPORTS);
+
+ if (s->addr_node.key) {
+ if (key && strcmp(key, s->addr_node.key) == 0) {
+ free(key);
+ return;
+ }
+
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ ebpt_delete(&s->addr_node);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+
+ free(s->addr_node.key);
+ }
+
+ s->addr_node.key = key;
+
+ if (reattach) {
+ if (s->addr_node.key) {
+ HA_RWLOCK_WRLOCK(PROXY_LOCK, &p->lock);
+ ebis_insert(&p->used_server_addr, &s->addr_node);
+ HA_RWLOCK_WRUNLOCK(PROXY_LOCK, &p->lock);
+ }
+ }
+}
+
+/*
+ * Registers the server keyword list <kwl> as a list of valid keywords for next
+ * parsing sessions.
+ */
+void srv_register_keywords(struct srv_kw_list *kwl)
+{
+ LIST_APPEND(&srv_keywords.list, &kwl->list);
+}
+
+/* Return a pointer to the server keyword <kw>, or NULL if not found. If the
+ * keyword is found with a NULL ->parse() function, then an attempt is made to
+ * find one with a valid ->parse() function. This way it is possible to declare
+ * platform-dependant, known keywords as NULL, then only declare them as valid
+ * if some options are met. Note that if the requested keyword contains an
+ * opening parenthesis, everything from this point is ignored.
+ */
+struct srv_kw *srv_find_kw(const char *kw)
+{
+ int index;
+ const char *kwend;
+ struct srv_kw_list *kwl;
+ struct srv_kw *ret = NULL;
+
+ kwend = strchr(kw, '(');
+ if (!kwend)
+ kwend = kw + strlen(kw);
+
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if ((strncmp(kwl->kw[index].kw, kw, kwend - kw) == 0) &&
+ kwl->kw[index].kw[kwend-kw] == 0) {
+ if (kwl->kw[index].parse)
+ return &kwl->kw[index]; /* found it !*/
+ else
+ ret = &kwl->kw[index]; /* may be OK */
+ }
+ }
+ }
+ return ret;
+}
+
+/* Dumps all registered "server" keywords to the <out> string pointer. The
+ * unsupported keywords are only dumped if their supported form was not
+ * found.
+ */
+void srv_dump_kws(char **out)
+{
+ struct srv_kw_list *kwl;
+ int index;
+
+ if (!out)
+ return;
+
+ *out = NULL;
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ if (kwl->kw[index].parse ||
+ srv_find_kw(kwl->kw[index].kw) == &kwl->kw[index]) {
+ memprintf(out, "%s[%4s] %s%s%s%s\n", *out ? *out : "",
+ kwl->scope,
+ kwl->kw[index].kw,
+ kwl->kw[index].skip ? " <arg>" : "",
+ kwl->kw[index].default_ok ? " [dflt_ok]" : "",
+ kwl->kw[index].parse ? "" : " (not supported)");
+ }
+ }
+ }
+}
+
+/* Try to find in srv_keyword the word that looks closest to <word> by counting
+ * transitions between letters, digits and other characters. Will return the
+ * best matching word if found, otherwise NULL. An optional array of extra
+ * words to compare may be passed in <extra>, but it must then be terminated
+ * by a NULL entry. If unused it may be NULL.
+ */
+static const char *srv_find_best_kw(const char *word)
+{
+ uint8_t word_sig[1024];
+ uint8_t list_sig[1024];
+ const struct srv_kw_list *kwl;
+ const char *best_ptr = NULL;
+ int dist, best_dist = INT_MAX;
+ const char **extra;
+ int index;
+
+ make_word_fingerprint(word_sig, word);
+ list_for_each_entry(kwl, &srv_keywords.list, list) {
+ for (index = 0; kwl->kw[index].kw != NULL; index++) {
+ make_word_fingerprint(list_sig, kwl->kw[index].kw);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = kwl->kw[index].kw;
+ }
+ }
+ }
+
+ for (extra = extra_kw_list; *extra; extra++) {
+ make_word_fingerprint(list_sig, *extra);
+ dist = word_fingerprint_distance(word_sig, list_sig);
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_ptr = *extra;
+ }
+ }
+
+ if (best_dist > 2 * strlen(word) || (best_ptr && best_dist > 2 * strlen(best_ptr)))
+ best_ptr = NULL;
+
+ return best_ptr;
+}
+
+/* Parse the "backup" server keyword */
+static int srv_parse_backup(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_BACKUP;
+ return 0;
+}
+
+
+/* Parse the "cookie" server keyword */
+static int srv_parse_cookie(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->cookie);
+ newsrv->cookie = strdup(arg);
+ newsrv->cklen = strlen(arg);
+ newsrv->flags |= SRV_F_COOKIESET;
+ return 0;
+}
+
+/* Parse the "disabled" server keyword */
+static int srv_parse_disabled(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_STOPPED;
+ newsrv->check.state |= CHK_ST_PAUSED;
+ newsrv->check.health = 0;
+ return 0;
+}
+
+/* Parse the "enabled" server keyword */
+static int srv_parse_enabled(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->next_admin &= ~SRV_ADMF_CMAINT & ~SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_RUNNING;
+ newsrv->check.state &= ~CHK_ST_PAUSED;
+ newsrv->check.health = newsrv->check.rise;
+ return 0;
+}
+
+/* Parse the "error-limit" server keyword */
+static int srv_parse_error_limit(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects an integer argument.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->consecutive_errors_limit = atoi(args[*cur_arg + 1]);
+
+ if (newsrv->consecutive_errors_limit <= 0) {
+ memprintf(err, "%s has to be > 0.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "ws" keyword */
+static int srv_parse_ws(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (!args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects 'auto', 'h1' or 'h2' value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(args[*cur_arg + 1], "h1") == 0) {
+ newsrv->ws = SRV_WS_H1;
+ }
+ else if (strcmp(args[*cur_arg + 1], "h2") == 0) {
+ newsrv->ws = SRV_WS_H2;
+ }
+ else if (strcmp(args[*cur_arg + 1], "auto") == 0) {
+ newsrv->ws = SRV_WS_AUTO;
+ }
+ else {
+ memprintf(err, "'%s' has to be 'auto', 'h1' or 'h2'", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+
+ return 0;
+}
+
+/* Parse the "init-addr" server keyword */
+static int srv_parse_init_addr(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *end;
+ int done;
+ struct sockaddr_storage sa;
+
+ newsrv->init_addr_methods = 0;
+ memset(&newsrv->init_addr, 0, sizeof(newsrv->init_addr));
+
+ for (p = args[*cur_arg + 1]; *p; p = end) {
+ /* cut on next comma */
+ for (end = p; *end && *end != ','; end++);
+ if (*end)
+ *(end++) = 0;
+
+ memset(&sa, 0, sizeof(sa));
+ if (strcmp(p, "libc") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LIBC);
+ }
+ else if (strcmp(p, "last") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_LAST);
+ }
+ else if (strcmp(p, "none") == 0) {
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_NONE);
+ }
+ else if (str2ip2(p, &sa, 0)) {
+ if (is_addr(&newsrv->init_addr)) {
+ memprintf(err, "'%s' : initial address already specified, cannot add '%s'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->init_addr = sa;
+ done = srv_append_initaddr(&newsrv->init_addr_methods, SRV_IADDR_IP);
+ }
+ else {
+ memprintf(err, "'%s' : unknown init-addr method '%s', supported methods are 'libc', 'last', 'none'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ if (!done) {
+ memprintf(err, "'%s' : too many init-addr methods when trying to add '%s'",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Parse the "log-bufsize" server keyword */
+static int srv_parse_log_bufsize(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects an integer argument.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->log_bufsize = atoi(args[*cur_arg + 1]);
+
+ if (newsrv->log_bufsize <= 0) {
+ memprintf(err, "%s has to be > 0.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "log-proto" server keyword */
+static int srv_parse_log_proto(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "legacy") == 0)
+ newsrv->log_proto = SRV_LOG_PROTO_LEGACY;
+ else if (strcmp(args[*cur_arg + 1], "octet-count") == 0)
+ newsrv->log_proto = SRV_LOG_PROTO_OCTET_COUNTING;
+ else {
+ memprintf(err, "'%s' expects one of 'legacy' or 'octet-count' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "maxconn" server keyword */
+static int srv_parse_maxconn(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->maxconn = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "maxqueue" server keyword */
+static int srv_parse_maxqueue(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->maxqueue = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "minconn" server keyword */
+static int srv_parse_minconn(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->minconn = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+static int srv_parse_max_reuse(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->max_reuse = atoi(arg);
+
+ return 0;
+}
+
+static int srv_parse_pool_purge_delay(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ const char *res;
+ char *arg;
+ unsigned int time;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ res = parse_time_err(arg, &time, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ memprintf(err, "timer overflow in argument '%s' to '%s' (maximum value is 2147483647 ms or ~24.8 days)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ memprintf(err, "timer underflow in argument '%s' to '%s' (minimum non-null value is 1 ms)",
+ args[*cur_arg+1], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (res) {
+ memprintf(err, "unexpected character '%c' in argument to <%s>.\n",
+ *res, args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->pool_purge_delay = time;
+
+ return 0;
+}
+
+static int srv_parse_pool_low_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->low_idle_conns = atoi(arg);
+ return 0;
+}
+
+static int srv_parse_pool_max_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <value> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->max_idle_conns = atoi(arg);
+ if ((int)newsrv->max_idle_conns < -1) {
+ memprintf(err, "'%s' must be >= -1", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* parse the "id" server keyword */
+static int srv_parse_id(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ struct eb32_node *node;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : expects an integer argument", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->puid = atol(args[*cur_arg + 1]);
+ newsrv->conf.id.key = newsrv->puid;
+
+ if (newsrv->puid <= 0) {
+ memprintf(err, "'%s' : custom id has to be > 0", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ node = eb32_lookup(&curproxy->conf.used_server_id, newsrv->puid);
+ if (node) {
+ struct server *target = container_of(node, struct server, conf.id);
+ memprintf(err, "'%s' : custom id %d already used at %s:%d ('server %s')",
+ args[*cur_arg], newsrv->puid, target->conf.file, target->conf.line,
+ target->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newsrv->flags |= SRV_F_FORCED_ID;
+ return 0;
+}
+
+/* Parse the "namespace" server keyword */
+static int srv_parse_namespace(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+#ifdef USE_NS
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' : expects <name> as argument", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(arg, "*") == 0) {
+ /* Use the namespace associated with the connection (if present). */
+ newsrv->flags |= SRV_F_USE_NS_FROM_PP;
+ return 0;
+ }
+
+ /*
+ * As this parser may be called several times for the same 'default-server'
+ * object, or for a new 'server' instance deriving from a 'default-server'
+ * one with SRV_F_USE_NS_FROM_PP flag enabled, let's reset it.
+ */
+ newsrv->flags &= ~SRV_F_USE_NS_FROM_PP;
+
+ newsrv->netns = netns_store_lookup(arg, strlen(arg));
+ if (!newsrv->netns)
+ newsrv->netns = netns_store_insert(arg);
+
+ if (!newsrv->netns) {
+ memprintf(err, "Cannot open namespace '%s'", arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+#else
+ memprintf(err, "'%s': '%s' option not implemented", args[0], args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+/* Parse the "no-backup" server keyword */
+static int srv_parse_no_backup(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_BACKUP;
+ return 0;
+}
+
+
+/* Disable server PROXY protocol flags. */
+static inline int srv_disable_pp_flags(struct server *srv, unsigned int flags)
+{
+ srv->pp_opts &= ~flags;
+ return 0;
+}
+
+/* Parse the "no-send-proxy" server keyword */
+static int srv_parse_no_send_proxy(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_disable_pp_flags(newsrv, SRV_PP_V1);
+}
+
+/* Parse the "no-send-proxy-v2" server keyword */
+static int srv_parse_no_send_proxy_v2(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_disable_pp_flags(newsrv, SRV_PP_V2);
+}
+
+/* Parse the "shard" server keyword */
+static int srv_parse_shard(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->shard = atol(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "no-tfo" server keyword */
+static int srv_parse_no_tfo(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_FASTOPEN;
+ return 0;
+}
+
+/* Parse the "non-stick" server keyword */
+static int srv_parse_non_stick(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_NON_STICK;
+ return 0;
+}
+
+/* Enable server PROXY protocol flags. */
+static inline int srv_enable_pp_flags(struct server *srv, unsigned int flags)
+{
+ srv->pp_opts |= flags;
+ return 0;
+}
+/* parse the "proto" server keyword */
+static int srv_parse_proto(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ struct ist proto;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' : missing value", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ proto = ist(args[*cur_arg + 1]);
+ newsrv->mux_proto = get_mux_proto(proto);
+ if (!newsrv->mux_proto) {
+ memprintf(err, "'%s' : unknown MUX protocol '%s'", args[*cur_arg], args[*cur_arg+1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ return 0;
+}
+
+/* parse the "proxy-v2-options" */
+static int srv_parse_proxy_v2_options(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ char *p, *n;
+ for (p = args[*cur_arg+1]; p; p = n) {
+ n = strchr(p, ',');
+ if (n)
+ *n++ = '\0';
+ if (strcmp(p, "ssl") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ } else if (strcmp(p, "cert-cn") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CN;
+ } else if (strcmp(p, "cert-key") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_KEY_ALG;
+ } else if (strcmp(p, "cert-sig") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_SIG_ALG;
+ } else if (strcmp(p, "ssl-cipher") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_SSL;
+ newsrv->pp_opts |= SRV_PP_V2_SSL_CIPHER;
+ } else if (strcmp(p, "authority") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_AUTHORITY;
+ } else if (strcmp(p, "crc32c") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_CRC32C;
+ } else if (strcmp(p, "unique-id") == 0) {
+ newsrv->pp_opts |= SRV_PP_V2_UNIQUE_ID;
+ } else
+ goto fail;
+ }
+ return 0;
+ fail:
+ if (err)
+ memprintf(err, "'%s' : proxy v2 option not implemented", p);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* Parse the "observe" server keyword */
+static int srv_parse_observe(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <mode> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (strcmp(arg, "none") == 0) {
+ newsrv->observe = HANA_OBS_NONE;
+ }
+ else if (strcmp(arg, "layer4") == 0) {
+ newsrv->observe = HANA_OBS_LAYER4;
+ }
+ else if (strcmp(arg, "layer7") == 0) {
+ if (curproxy->mode != PR_MODE_HTTP) {
+ memprintf(err, "'%s' can only be used in http proxies.\n", arg);
+ return ERR_ALERT;
+ }
+ newsrv->observe = HANA_OBS_LAYER7;
+ }
+ else {
+ memprintf(err, "'%s' expects one of 'none', 'layer4', 'layer7' "
+ "but got '%s'\n", args[*cur_arg], arg);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-error" server keyword */
+static int srv_parse_on_error(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "fastinter") == 0)
+ newsrv->onerror = HANA_ONERR_FASTINTER;
+ else if (strcmp(args[*cur_arg + 1], "fail-check") == 0)
+ newsrv->onerror = HANA_ONERR_FAILCHK;
+ else if (strcmp(args[*cur_arg + 1], "sudden-death") == 0)
+ newsrv->onerror = HANA_ONERR_SUDDTH;
+ else if (strcmp(args[*cur_arg + 1], "mark-down") == 0)
+ newsrv->onerror = HANA_ONERR_MARKDWN;
+ else {
+ memprintf(err, "'%s' expects one of 'fastinter', "
+ "'fail-check', 'sudden-death' or 'mark-down' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-marked-down" server keyword */
+static int srv_parse_on_marked_down(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "shutdown-sessions") == 0)
+ newsrv->onmarkeddown = HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS;
+ else {
+ memprintf(err, "'%s' expects 'shutdown-sessions' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "on-marked-up" server keyword */
+static int srv_parse_on_marked_up(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "shutdown-backup-sessions") == 0)
+ newsrv->onmarkedup = HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS;
+ else {
+ memprintf(err, "'%s' expects 'shutdown-backup-sessions' but got '%s'",
+ args[*cur_arg], args[*cur_arg + 1]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "redir" server keyword */
+static int srv_parse_redir(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'%s' expects <prefix> as argument.\n", args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->rdr_pfx);
+ newsrv->rdr_pfx = strdup(arg);
+ newsrv->rdr_len = strlen(arg);
+
+ return 0;
+}
+
+/* Parse the "resolvers" server keyword */
+static int srv_parse_resolvers(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ free(newsrv->resolvers_id);
+ newsrv->resolvers_id = strdup(args[*cur_arg + 1]);
+ return 0;
+}
+
+/* Parse the "resolve-net" server keyword */
+static int srv_parse_resolve_net(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *e;
+ unsigned char mask;
+ struct resolv_options *opt;
+
+ if (!args[*cur_arg + 1] || args[*cur_arg + 1][0] == '\0') {
+ memprintf(err, "'%s' expects a list of networks.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ opt = &newsrv->resolv_opts;
+
+ /* Split arguments by comma, and convert it from ipv4 or ipv6
+ * string network in in_addr or in6_addr.
+ */
+ p = args[*cur_arg + 1];
+ e = p;
+ while (*p != '\0') {
+ /* If no room available, return error. */
+ if (opt->pref_net_nb >= SRV_MAX_PREF_NET) {
+ memprintf(err, "'%s' exceed %d networks.",
+ args[*cur_arg], SRV_MAX_PREF_NET);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ /* look for end or comma. */
+ while (*e != ',' && *e != '\0')
+ e++;
+ if (*e == ',') {
+ *e = '\0';
+ e++;
+ }
+ if (str2net(p, 0, &opt->pref_net[opt->pref_net_nb].addr.in4,
+ &opt->pref_net[opt->pref_net_nb].mask.in4)) {
+ /* Try to convert input string from ipv4 or ipv6 network. */
+ opt->pref_net[opt->pref_net_nb].family = AF_INET;
+ } else if (str62net(p, &opt->pref_net[opt->pref_net_nb].addr.in6,
+ &mask)) {
+ /* Try to convert input string from ipv6 network. */
+ len2mask6(mask, &opt->pref_net[opt->pref_net_nb].mask.in6);
+ opt->pref_net[opt->pref_net_nb].family = AF_INET6;
+ } else {
+ /* All network conversions fail, return error. */
+ memprintf(err, "'%s' invalid network '%s'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ opt->pref_net_nb++;
+ p = e;
+ }
+
+ return 0;
+}
+
+/* Parse the "resolve-opts" server keyword */
+static int srv_parse_resolve_opts(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *p, *end;
+
+ for (p = args[*cur_arg + 1]; *p; p = end) {
+ /* cut on next comma */
+ for (end = p; *end && *end != ','; end++);
+ if (*end)
+ *(end++) = 0;
+
+ if (strcmp(p, "allow-dup-ip") == 0) {
+ newsrv->resolv_opts.accept_duplicate_ip = 1;
+ }
+ else if (strcmp(p, "ignore-weight") == 0) {
+ newsrv->resolv_opts.ignore_weight = 1;
+ }
+ else if (strcmp(p, "prevent-dup-ip") == 0) {
+ newsrv->resolv_opts.accept_duplicate_ip = 0;
+ }
+ else {
+ memprintf(err, "'%s' : unknown resolve-opts option '%s', supported methods are 'allow-dup-ip', 'ignore-weight', and 'prevent-dup-ip'.",
+ args[*cur_arg], p);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Parse the "resolve-prefer" server keyword */
+static int srv_parse_resolve_prefer(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ if (strcmp(args[*cur_arg + 1], "ipv4") == 0)
+ newsrv->resolv_opts.family_prio = AF_INET;
+ else if (strcmp(args[*cur_arg + 1], "ipv6") == 0)
+ newsrv->resolv_opts.family_prio = AF_INET6;
+ else {
+ memprintf(err, "'%s' expects either ipv4 or ipv6 as argument.",
+ args[*cur_arg]);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ return 0;
+}
+
+/* Parse the "send-proxy" server keyword */
+static int srv_parse_send_proxy(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_enable_pp_flags(newsrv, SRV_PP_V1);
+}
+
+/* Parse the "send-proxy-v2" server keyword */
+static int srv_parse_send_proxy_v2(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ return srv_enable_pp_flags(newsrv, SRV_PP_V2);
+}
+
+/* Parse the "set-proxy-v2-tlv-fmt" server keyword */
+static int srv_parse_set_proxy_v2_tlv_fmt(char **args, int *cur_arg,
+ struct proxy *px, struct server *newsrv, char **err)
+{
+ char *error = NULL, *cmd = NULL;
+ unsigned int tlv_type = 0;
+ struct srv_pp_tlv_list *srv_tlv = NULL;
+
+ cmd = args[*cur_arg];
+ if (!*cmd) {
+ memprintf(err, "'%s' : could not read set-proxy-v2-tlv-fmt command", args[*cur_arg]);
+ goto fail;
+ }
+
+ cmd += strlen("set-proxy-v2-tlv-fmt");
+
+ if (*cmd == '(') {
+ cmd++; /* skip the '(' */
+ errno = 0;
+ tlv_type = strtoul(cmd, &error, 0); /* convert TLV ID */
+ if (unlikely((cmd == error) || (errno != 0))) {
+ memprintf(err, "'%s' : could not convert TLV ID", args[*cur_arg]);
+ goto fail;
+ }
+ if (errno == EINVAL) {
+ memprintf(err, "'%s' : could not find a valid number for the TLV ID", args[*cur_arg]);
+ goto fail;
+ }
+ if (*error != ')') {
+ memprintf(err, "'%s' : expects set-proxy-v2-tlv(<TLV ID>)", args[*cur_arg]);
+ goto fail;
+ }
+ if (tlv_type > 0xFF) {
+ memprintf(err, "'%s' : the maximum allowed TLV ID is %d", args[*cur_arg], 0xFF);
+ goto fail;
+ }
+ }
+
+ srv_tlv = malloc(sizeof(*srv_tlv));
+ if (unlikely(!srv_tlv)) {
+ memprintf(err, "'%s' : failed to parse allocate TLV entry", args[*cur_arg]);
+ goto fail;
+ }
+ srv_tlv->type = tlv_type;
+ srv_tlv->fmt_string = strdup(args[*cur_arg + 1]);
+ if (unlikely(!srv_tlv->fmt_string)) {
+ memprintf(err, "'%s' : failed to save format string for parsing", args[*cur_arg]);
+ goto fail;
+ }
+
+ LIST_APPEND(&newsrv->pp_tlvs, &srv_tlv->list);
+
+ (*cur_arg)++;
+
+ return 0;
+
+ fail:
+ free(srv_tlv);
+ errno = 0;
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* Parse the "slowstart" server keyword */
+static int srv_parse_slowstart(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ /* slowstart is stored in seconds */
+ unsigned int val;
+ const char *time_err = parse_time_err(args[*cur_arg + 1], &val, TIME_UNIT_MS);
+
+ if (time_err == PARSE_TIME_OVER) {
+ memprintf(err, "overflow in argument <%s> to <%s> of server %s, maximum value is 2147483647 ms (~24.8 days).",
+ args[*cur_arg+1], args[*cur_arg], newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (time_err == PARSE_TIME_UNDER) {
+ memprintf(err, "underflow in argument <%s> to <%s> of server %s, minimum non-null value is 1 ms.",
+ args[*cur_arg+1], args[*cur_arg], newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ else if (time_err) {
+ memprintf(err, "unexpected character '%c' in 'slowstart' argument of server %s.",
+ *time_err, newsrv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->slowstart = (val + 999) / 1000;
+
+ return 0;
+}
+
+/* Parse the "source" server keyword */
+static int srv_parse_source(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *errmsg;
+ int port_low, port_high;
+ struct sockaddr_storage *sk;
+
+ errmsg = NULL;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects <addr>[:<port>[-<port>]], and optionally '%s' <addr>, "
+ "and '%s' <name> as argument.\n", args[*cur_arg], "usesrc", "interface");
+ goto err;
+ }
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_RANGE | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->conn_src.opts |= CO_SRC_BIND;
+ newsrv->conn_src.source_addr = *sk;
+
+ if (port_low != port_high) {
+ int i;
+
+ newsrv->conn_src.sport_range = port_range_alloc_range(port_high - port_low + 1);
+ if (!newsrv->conn_src.sport_range) {
+ ha_alert("Server '%s': Out of memory (sport_range)\n", args[0]);
+ goto err;
+ }
+ for (i = 0; i < newsrv->conn_src.sport_range->size; i++)
+ newsrv->conn_src.sport_range->ports[i] = port_low + i;
+ }
+
+ *cur_arg += 2;
+ while (*(args[*cur_arg])) {
+ if (strcmp(args[*cur_arg], "usesrc") == 0) { /* address to use outside */
+#if defined(CONFIG_HAP_TRANSPARENT)
+ if (!*args[*cur_arg + 1]) {
+ ha_alert("'usesrc' expects <addr>[:<port>], 'client', 'clientip', "
+ "or 'hdr_ip(name,#)' as argument.\n");
+ goto err;
+ }
+ if (strcmp(args[*cur_arg + 1], "client") == 0) {
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_CLI;
+ }
+ else if (strcmp(args[*cur_arg + 1], "clientip") == 0) {
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_CIP;
+ }
+ else if (!strncmp(args[*cur_arg + 1], "hdr_ip(", 7)) {
+ char *name, *end;
+
+ name = args[*cur_arg + 1] + 7;
+ while (isspace((unsigned char)*name))
+ name++;
+
+ end = name;
+ while (*end && !isspace((unsigned char)*end) && *end != ',' && *end != ')')
+ end++;
+
+ newsrv->conn_src.opts &= ~CO_SRC_TPROXY_MASK;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_DYN;
+ free(newsrv->conn_src.bind_hdr_name);
+ newsrv->conn_src.bind_hdr_name = calloc(1, end - name + 1);
+ if (!newsrv->conn_src.bind_hdr_name) {
+ ha_alert("Server '%s': Out of memory (bind_hdr_name)\n", args[0]);
+ goto err;
+ }
+ newsrv->conn_src.bind_hdr_len = end - name;
+ memcpy(newsrv->conn_src.bind_hdr_name, name, end - name);
+ newsrv->conn_src.bind_hdr_name[end - name] = '\0';
+ newsrv->conn_src.bind_hdr_occ = -1;
+
+ /* now look for an occurrence number */
+ while (isspace((unsigned char)*end))
+ end++;
+ if (*end == ',') {
+ end++;
+ name = end;
+ if (*end == '-')
+ end++;
+ while (isdigit((unsigned char)*end))
+ end++;
+ newsrv->conn_src.bind_hdr_occ = strl2ic(name, end - name);
+ }
+
+ if (newsrv->conn_src.bind_hdr_occ < -MAX_HDR_HISTORY) {
+ ha_alert("usesrc hdr_ip(name,num) does not support negative"
+ " occurrences values smaller than %d.\n", MAX_HDR_HISTORY);
+ goto err;
+ }
+ }
+ else {
+ struct sockaddr_storage *sk;
+ int port1, port2;
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port1, &port2, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ ha_alert("'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->conn_src.tproxy_addr = *sk;
+ newsrv->conn_src.opts |= CO_SRC_TPROXY_ADDR;
+ }
+ global.last_checks |= LSTCHK_NETADM;
+ *cur_arg += 2;
+ continue;
+#else /* no TPROXY support */
+ ha_alert("'usesrc' not allowed here because support for TPROXY was not compiled in.\n");
+ goto err;
+#endif /* defined(CONFIG_HAP_TRANSPARENT) */
+ } /* "usesrc" */
+
+ if (strcmp(args[*cur_arg], "interface") == 0) { /* specifically bind to this interface */
+#ifdef SO_BINDTODEVICE
+ if (!*args[*cur_arg + 1]) {
+ ha_alert("'%s' : missing interface name.\n", args[0]);
+ goto err;
+ }
+ free(newsrv->conn_src.iface_name);
+ newsrv->conn_src.iface_name = strdup(args[*cur_arg + 1]);
+ newsrv->conn_src.iface_len = strlen(newsrv->conn_src.iface_name);
+ global.last_checks |= LSTCHK_NETADM;
+#else
+ ha_alert("'%s' : '%s' option not implemented.\n", args[0], args[*cur_arg]);
+ goto err;
+#endif
+ *cur_arg += 2;
+ continue;
+ }
+ /* this keyword in not an option of "source" */
+ break;
+ } /* while */
+
+ return 0;
+
+ err:
+ free(errmsg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* Parse the "stick" server keyword */
+static int srv_parse_stick(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ newsrv->flags &= ~SRV_F_NON_STICK;
+ return 0;
+}
+
+/* Parse the "track" server keyword */
+static int srv_parse_track(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *arg;
+
+ arg = args[*cur_arg + 1];
+ if (!*arg) {
+ memprintf(err, "'track' expects [<proxy>/]<server> as argument.\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ free(newsrv->trackit);
+ newsrv->trackit = strdup(arg);
+
+ return 0;
+}
+
+/* Parse the "socks4" server keyword */
+static int srv_parse_socks4(char **args, int *cur_arg,
+ struct proxy *curproxy, struct server *newsrv, char **err)
+{
+ char *errmsg;
+ int port_low, port_high;
+ struct sockaddr_storage *sk;
+
+ errmsg = NULL;
+
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "'%s' expects <addr>:<port> as argument.\n", args[*cur_arg]);
+ goto err;
+ }
+
+ /* 'sk' is statically allocated (no need to be freed). */
+ sk = str2sa_range(args[*cur_arg + 1], NULL, &port_low, &port_high, NULL, NULL, NULL,
+ &errmsg, NULL, NULL,
+ PA_O_RESOLVE | PA_O_PORT_OK | PA_O_PORT_MAND | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(err, "'%s %s' : %s\n", args[*cur_arg], args[*cur_arg + 1], errmsg);
+ goto err;
+ }
+
+ newsrv->flags |= SRV_F_SOCKS4_PROXY;
+ newsrv->socks4_addr = *sk;
+
+ return 0;
+
+ err:
+ free(errmsg);
+ return ERR_ALERT | ERR_FATAL;
+}
+
+
+/* parse the "tfo" server keyword */
+static int srv_parse_tfo(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ newsrv->flags |= SRV_F_FASTOPEN;
+ return 0;
+}
+
+/* parse the "usesrc" server keyword */
+static int srv_parse_usesrc(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ memprintf(err, "'%s' only allowed after a '%s' statement.",
+ "usesrc", "source");
+ return ERR_ALERT | ERR_FATAL;
+}
+
+/* parse the "weight" server keyword */
+static int srv_parse_weight(char **args, int *cur_arg, struct proxy *px, struct server *newsrv, char **err)
+{
+ int w;
+
+ w = atol(args[*cur_arg + 1]);
+ if (w < 0 || w > SRV_UWGHT_MAX) {
+ memprintf(err, "weight of server %s is not within 0 and %d (%d).",
+ newsrv->id, SRV_UWGHT_MAX, w);
+ return ERR_ALERT | ERR_FATAL;
+ }
+ newsrv->uweight = newsrv->iweight = w;
+
+ return 0;
+}
+
+/* Returns 1 if the server has streams pointing to it, and 0 otherwise.
+ *
+ * Must be called with the server lock held.
+ */
+static int srv_has_streams(struct server *srv)
+{
+ int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++)
+ if (!MT_LIST_ISEMPTY(&srv->per_thr[thr].streams))
+ return 1;
+ return 0;
+}
+
+/* Shutdown all connections of a server. The caller must pass a termination
+ * code in <why>, which must be one of SF_ERR_* indicating the reason for the
+ * shutdown.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_shutdown_streams(struct server *srv, int why)
+{
+ struct stream *stream;
+ struct mt_list *elt1, elt2;
+ int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++)
+ mt_list_for_each_entry_safe(stream, &srv->per_thr[thr].streams, by_srv, elt1, elt2)
+ if (stream->srv_conn == srv)
+ stream_shutdown(stream, why);
+}
+
+/* Shutdown all connections of all backup servers of a proxy. The caller must
+ * pass a termination code in <why>, which must be one of SF_ERR_* indicating
+ * the reason for the shutdown.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_shutdown_backup_streams(struct proxy *px, int why)
+{
+ struct server *srv;
+
+ for (srv = px->srv; srv != NULL; srv = srv->next)
+ if (srv->flags & SRV_F_BACKUP)
+ srv_shutdown_streams(srv, why);
+}
+
+static void srv_append_op_chg_cause(struct buffer *msg, struct server *s, enum srv_op_st_chg_cause cause)
+{
+ switch (cause) {
+ case SRV_OP_STCHGC_NONE:
+ break; /* do nothing */
+ case SRV_OP_STCHGC_HEALTH:
+ check_append_info(msg, &s->check);
+ break;
+ case SRV_OP_STCHGC_AGENT:
+ check_append_info(msg, &s->agent);
+ break;
+ default:
+ chunk_appendf(msg, ", %s", srv_op_st_chg_cause(cause));
+ break;
+ }
+}
+
+static void srv_append_adm_chg_cause(struct buffer *msg, struct server *s, enum srv_adm_st_chg_cause cause)
+{
+ if (cause)
+ chunk_appendf(msg, " (%s)", srv_adm_st_chg_cause(cause));
+}
+
+/* Appends some information to a message string related to a server tracking
+ * or requeued connections info.
+ *
+ * If <forced> is null and the server tracks another one, a "via"
+ * If <xferred> is non-negative, some information about requeued sessions are
+ * provided.
+ *
+ * Must be called with the server lock held.
+ */
+static void srv_append_more(struct buffer *msg, struct server *s,
+ int xferred, int forced)
+{
+ if (!forced && s->track) {
+ chunk_appendf(msg, " via %s/%s", s->track->proxy->id, s->track->id);
+ }
+
+ if (xferred >= 0) {
+ if (s->next_state == SRV_ST_STOPPED)
+ chunk_appendf(msg, ". %d active and %d backup servers left.%s"
+ " %d sessions active, %d requeued, %d remaining in queue",
+ s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ s->cur_sess, xferred, s->queue.length);
+ else
+ chunk_appendf(msg, ". %d active and %d backup servers online.%s"
+ " %d sessions requeued, %d total in queue",
+ s->proxy->srv_act, s->proxy->srv_bck,
+ (s->proxy->srv_bck && !s->proxy->srv_act) ? " Running on backup." : "",
+ xferred, s->queue.length);
+ }
+}
+
+/* Marks server <s> down, regardless of its checks' statuses. The server
+ * transfers queued streams whenever possible to other servers at a sync
+ * point. Maintenance servers are ignored.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_stopped(struct server *s, enum srv_op_st_chg_cause cause)
+{
+ struct server *srv;
+
+ if ((s->cur_admin & SRV_ADMF_MAINT) || s->next_state == SRV_ST_STOPPED)
+ return;
+
+ s->next_state = SRV_ST_STOPPED;
+
+ /* propagate changes */
+ srv_update_status(s, 0, cause);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_stopped(srv, SRV_OP_STCHGC_NONE);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Marks server <s> up regardless of its checks' statuses and provided it isn't
+ * in maintenance. The server tries to grab requests from the proxy at a sync
+ * point. Maintenance servers are ignored.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_running(struct server *s, enum srv_op_st_chg_cause cause)
+{
+ struct server *srv;
+
+ if (s->cur_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->next_state == SRV_ST_STARTING || s->next_state == SRV_ST_RUNNING)
+ return;
+
+ s->next_state = SRV_ST_STARTING;
+
+ if (s->slowstart <= 0)
+ s->next_state = SRV_ST_RUNNING;
+
+ /* propagate changes */
+ srv_update_status(s, 0, cause);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_running(srv, SRV_OP_STCHGC_NONE);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Marks server <s> stopping regardless of its checks' statuses and provided it
+ * isn't in maintenance. The server tries to redispatch pending requests
+ * to the proxy. Maintenance servers are ignored.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_stopping(struct server *s, enum srv_op_st_chg_cause cause)
+{
+ struct server *srv;
+
+ if (s->cur_admin & SRV_ADMF_MAINT)
+ return;
+
+ if (s->next_state == SRV_ST_STOPPING)
+ return;
+
+ s->next_state = SRV_ST_STOPPING;
+
+ /* propagate changes */
+ srv_update_status(s, 0, cause);
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_stopping(srv, SRV_OP_STCHGC_NONE);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Enables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
+ * enforce either maint mode or drain mode. It is not allowed to set more than
+ * one flag at once. The equivalent "inherited" flag is propagated to all
+ * tracking servers. Maintenance mode disables health checks (but not agent
+ * checks). When either the flag is already set or no flag is passed, nothing
+ * is done. If <cause> is non-null, it will be displayed at the end of the log
+ * lines to justify the state change.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_admin_flag(struct server *s, enum srv_admin mode, enum srv_adm_st_chg_cause cause)
+{
+ struct server *srv;
+
+ if (!mode)
+ return;
+
+ /* stop going down as soon as we meet a server already in the same state */
+ if (s->next_admin & mode)
+ return;
+
+ s->next_admin |= mode;
+
+ /* propagate changes */
+ srv_update_status(s, 1, cause);
+
+ /* stop going down if the equivalent flag was already present (forced or inherited) */
+ if (((mode & SRV_ADMF_MAINT) && (s->next_admin & ~mode & SRV_ADMF_MAINT)) ||
+ ((mode & SRV_ADMF_DRAIN) && (s->next_admin & ~mode & SRV_ADMF_DRAIN)))
+ return;
+
+ /* compute the inherited flag to propagate */
+ if (mode & SRV_ADMF_MAINT)
+ mode = SRV_ADMF_IMAINT;
+ else if (mode & SRV_ADMF_DRAIN)
+ mode = SRV_ADMF_IDRAIN;
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_set_admin_flag(srv, mode, cause);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* Disables admin flag <mode> (among SRV_ADMF_*) on server <s>. This is used to
+ * stop enforcing either maint mode or drain mode. It is not allowed to set more
+ * than one flag at once. The equivalent "inherited" flag is propagated to all
+ * tracking servers. Leaving maintenance mode re-enables health checks. When
+ * either the flag is already cleared or no flag is passed, nothing is done.
+ *
+ * Must be called with the server lock held.
+ */
+void srv_clr_admin_flag(struct server *s, enum srv_admin mode)
+{
+ struct server *srv;
+
+ if (!mode)
+ return;
+
+ /* stop going down as soon as we see the flag is not there anymore */
+ if (!(s->next_admin & mode))
+ return;
+
+ s->next_admin &= ~mode;
+
+ /* propagate changes */
+ srv_update_status(s, 1, SRV_ADM_STCHGC_NONE);
+
+ /* stop going down if the equivalent flag is still present (forced or inherited) */
+ if (((mode & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) ||
+ ((mode & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)))
+ return;
+
+ if (mode & SRV_ADMF_MAINT)
+ mode = SRV_ADMF_IMAINT;
+ else if (mode & SRV_ADMF_DRAIN)
+ mode = SRV_ADMF_IDRAIN;
+
+ for (srv = s->trackers; srv; srv = srv->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+ srv_clr_admin_flag(srv, mode);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ }
+}
+
+/* principle: propagate maint and drain to tracking servers. This is useful
+ * upon startup so that inherited states are correct.
+ */
+static void srv_propagate_admin_state(struct server *srv)
+{
+ struct server *srv2;
+
+ if (!srv->trackers)
+ return;
+
+ for (srv2 = srv->trackers; srv2; srv2 = srv2->tracknext) {
+ HA_SPIN_LOCK(SERVER_LOCK, &srv2->lock);
+ if (srv->next_admin & (SRV_ADMF_MAINT | SRV_ADMF_CMAINT))
+ srv_set_admin_flag(srv2, SRV_ADMF_IMAINT, SRV_ADM_STCHGC_NONE);
+
+ if (srv->next_admin & SRV_ADMF_DRAIN)
+ srv_set_admin_flag(srv2, SRV_ADMF_IDRAIN, SRV_ADM_STCHGC_NONE);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv2->lock);
+ }
+}
+
+/* Compute and propagate the admin states for all servers in proxy <px>.
+ * Only servers *not* tracking another one are considered, because other
+ * ones will be handled when the server they track is visited.
+ */
+void srv_compute_all_admin_states(struct proxy *px)
+{
+ struct server *srv;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->track)
+ continue;
+ srv_propagate_admin_state(srv);
+ }
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted, doing so helps
+ * all code contributors.
+ * Optional keywords are also declared with a NULL ->parse() function so that
+ * the config parser can report an appropriate error when a known keyword was
+ * not enabled.
+ * Note: -1 as ->skip value means that the number of arguments are variable.
+ */
+static struct srv_kw_list srv_kws = { "ALL", { }, {
+ { "backup", srv_parse_backup, 0, 1, 1 }, /* Flag as backup server */
+ { "cookie", srv_parse_cookie, 1, 1, 0 }, /* Assign a cookie to the server */
+ { "disabled", srv_parse_disabled, 0, 1, 1 }, /* Start the server in 'disabled' state */
+ { "enabled", srv_parse_enabled, 0, 1, 1 }, /* Start the server in 'enabled' state */
+ { "error-limit", srv_parse_error_limit, 1, 1, 1 }, /* Configure the consecutive count of check failures to consider a server on error */
+ { "ws", srv_parse_ws, 1, 1, 1 }, /* websocket protocol */
+ { "id", srv_parse_id, 1, 0, 1 }, /* set id# of server */
+ { "init-addr", srv_parse_init_addr, 1, 1, 0 }, /* */
+ { "log-bufsize", srv_parse_log_bufsize, 1, 1, 0 }, /* Set the ring bufsize for log server (only for log backends) */
+ { "log-proto", srv_parse_log_proto, 1, 1, 0 }, /* Set the protocol for event messages, only relevant in a log or ring section */
+ { "maxconn", srv_parse_maxconn, 1, 1, 1 }, /* Set the max number of concurrent connection */
+ { "maxqueue", srv_parse_maxqueue, 1, 1, 1 }, /* Set the max number of connection to put in queue */
+ { "max-reuse", srv_parse_max_reuse, 1, 1, 0 }, /* Set the max number of requests on a connection, -1 means unlimited */
+ { "minconn", srv_parse_minconn, 1, 1, 1 }, /* Enable a dynamic maxconn limit */
+ { "namespace", srv_parse_namespace, 1, 1, 0 }, /* Namespace the server socket belongs to (if supported) */
+ { "no-backup", srv_parse_no_backup, 0, 1, 1 }, /* Flag as non-backup server */
+ { "no-send-proxy", srv_parse_no_send_proxy, 0, 1, 1 }, /* Disable use of PROXY V1 protocol */
+ { "no-send-proxy-v2", srv_parse_no_send_proxy_v2, 0, 1, 1 }, /* Disable use of PROXY V2 protocol */
+ { "no-tfo", srv_parse_no_tfo, 0, 1, 1 }, /* Disable use of TCP Fast Open */
+ { "non-stick", srv_parse_non_stick, 0, 1, 0 }, /* Disable stick-table persistence */
+ { "observe", srv_parse_observe, 1, 1, 1 }, /* Enables health adjusting based on observing communication with the server */
+ { "on-error", srv_parse_on_error, 1, 1, 1 }, /* Configure the action on check failure */
+ { "on-marked-down", srv_parse_on_marked_down, 1, 1, 1 }, /* Configure the action when a server is marked down */
+ { "on-marked-up", srv_parse_on_marked_up, 1, 1, 1 }, /* Configure the action when a server is marked up */
+ { "pool-low-conn", srv_parse_pool_low_conn, 1, 1, 1 }, /* Set the min number of orphan idle connecbefore being allowed to pick from other threads */
+ { "pool-max-conn", srv_parse_pool_max_conn, 1, 1, 1 }, /* Set the max number of orphan idle connections, -1 means unlimited */
+ { "pool-purge-delay", srv_parse_pool_purge_delay, 1, 1, 1 }, /* Set the time before we destroy orphan idle connections, defaults to 1s */
+ { "proto", srv_parse_proto, 1, 1, 1 }, /* Set the proto to use for all outgoing connections */
+ { "proxy-v2-options", srv_parse_proxy_v2_options, 1, 1, 1 }, /* options for send-proxy-v2 */
+ { "redir", srv_parse_redir, 1, 1, 0 }, /* Enable redirection mode */
+ { "resolve-net", srv_parse_resolve_net, 1, 1, 0 }, /* Set the preferred network range for name resolution */
+ { "resolve-opts", srv_parse_resolve_opts, 1, 1, 0 }, /* Set options for name resolution */
+ { "resolve-prefer", srv_parse_resolve_prefer, 1, 1, 0 }, /* Set the preferred family for name resolution */
+ { "resolvers", srv_parse_resolvers, 1, 1, 0 }, /* Configure the resolver to use for name resolution */
+ { "send-proxy", srv_parse_send_proxy, 0, 1, 1 }, /* Enforce use of PROXY V1 protocol */
+ { "send-proxy-v2", srv_parse_send_proxy_v2, 0, 1, 1 }, /* Enforce use of PROXY V2 protocol */
+ { "set-proxy-v2-tlv-fmt", srv_parse_set_proxy_v2_tlv_fmt, 0, 1, 1 }, /* Set TLV of PROXY V2 protocol */
+ { "shard", srv_parse_shard, 1, 1, 1 }, /* Server shard (only in peers protocol context) */
+ { "slowstart", srv_parse_slowstart, 1, 1, 1 }, /* Set the warm-up timer for a previously failed server */
+ { "source", srv_parse_source, -1, 1, 1 }, /* Set the source address to be used to connect to the server */
+ { "stick", srv_parse_stick, 0, 1, 0 }, /* Enable stick-table persistence */
+ { "tfo", srv_parse_tfo, 0, 1, 1 }, /* enable TCP Fast Open of server */
+ { "track", srv_parse_track, 1, 1, 1 }, /* Set the current state of the server, tracking another one */
+ { "socks4", srv_parse_socks4, 1, 1, 0 }, /* Set the socks4 proxy of the server*/
+ { "usesrc", srv_parse_usesrc, 0, 1, 1 }, /* safe-guard against usesrc without preceding <source> keyword */
+ { "weight", srv_parse_weight, 1, 1, 1 }, /* Set the load-balancing weight */
+ { NULL, NULL, 0 },
+}};
+
+INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws);
+
+/* Recomputes the server's eweight based on its state, uweight, the current time,
+ * and the proxy's algorithm. To be used after updating sv->uweight. The warmup
+ * state is automatically disabled if the time is elapsed. If <must_update> is
+ * not zero, the update will be propagated immediately.
+ *
+ * Must be called with the server lock held.
+ */
+void server_recalc_eweight(struct server *sv, int must_update)
+{
+ struct proxy *px = sv->proxy;
+ unsigned w;
+
+ if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) {
+ /* go to full throttle if the slowstart interval is reached */
+ if (sv->next_state == SRV_ST_STARTING)
+ sv->next_state = SRV_ST_RUNNING;
+ }
+
+ /* We must take care of not pushing the server to full throttle during slow starts.
+ * It must also start immediately, at least at the minimal step when leaving maintenance.
+ */
+ if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN))
+ w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart;
+ else
+ w = px->lbprm.wdiv;
+
+ sv->next_eweight = (sv->uweight * w + px->lbprm.wmult - 1) / px->lbprm.wmult;
+
+ /* propagate changes only if needed (i.e. not recursively) */
+ if (must_update)
+ srv_update_status(sv, 0, SRV_OP_STCHGC_NONE);
+}
+
+/*
+ * Parses weight_str and configures sv accordingly.
+ * Returns NULL on success, error message string otherwise.
+ *
+ * Must be called with the server lock held.
+ */
+const char *server_parse_weight_change_request(struct server *sv,
+ const char *weight_str)
+{
+ struct proxy *px;
+ long int w;
+ char *end;
+
+ px = sv->proxy;
+
+ /* if the weight is terminated with '%', it is set relative to
+ * the initial weight, otherwise it is absolute.
+ */
+ if (!*weight_str)
+ return "Require <weight> or <weight%>.\n";
+
+ w = strtol(weight_str, &end, 10);
+ if (end == weight_str)
+ return "Empty weight string empty or preceded by garbage";
+ else if (end[0] == '%' && end[1] == '\0') {
+ if (w < 0)
+ return "Relative weight must be positive.\n";
+ /* Avoid integer overflow */
+ if (w > 25600)
+ w = 25600;
+ w = sv->iweight * w / 100;
+ if (w > 256)
+ w = 256;
+ }
+ else if (w < 0 || w > 256)
+ return "Absolute weight can only be between 0 and 256 inclusive.\n";
+ else if (end[0] != '\0')
+ return "Trailing garbage in weight string";
+
+ if (w && w != sv->iweight && !(px->lbprm.algo & BE_LB_PROP_DYN))
+ return "Backend is using a static LB algorithm and only accepts weights '0%' and '100%'.\n";
+
+ sv->uweight = w;
+ server_recalc_eweight(sv, 1);
+
+ return NULL;
+}
+
+/*
+ * Parses <addr_str> and configures <sv> accordingly. <from> precise
+ * the source of the change in the associated message log.
+ * Returns:
+ * - error string on error
+ * - NULL on success
+ *
+ * Must be called with the server lock held.
+ */
+const char *server_parse_addr_change_request(struct server *sv,
+ const char *addr_str, const char *updater)
+{
+ unsigned char ip[INET6_ADDRSTRLEN];
+
+ if (inet_pton(AF_INET6, addr_str, ip)) {
+ srv_update_addr(sv, ip, AF_INET6, updater);
+ return NULL;
+ }
+ if (inet_pton(AF_INET, addr_str, ip)) {
+ srv_update_addr(sv, ip, AF_INET, updater);
+ return NULL;
+ }
+
+ return "Could not understand IP address format.\n";
+}
+
+/*
+ * Must be called with the server lock held.
+ */
+const char *server_parse_maxconn_change_request(struct server *sv,
+ const char *maxconn_str)
+{
+ long int v;
+ char *end;
+
+ if (!*maxconn_str)
+ return "Require <maxconn>.\n";
+
+ v = strtol(maxconn_str, &end, 10);
+ if (end == maxconn_str)
+ return "maxconn string empty or preceded by garbage";
+ else if (end[0] != '\0')
+ return "Trailing garbage in maxconn string";
+
+ if (sv->maxconn == sv->minconn) { // static maxconn
+ sv->maxconn = sv->minconn = v;
+ } else { // dynamic maxconn
+ sv->maxconn = v;
+ }
+
+ if (may_dequeue_tasks(sv, sv->proxy))
+ process_srv_queue(sv);
+
+ return NULL;
+}
+
+static struct sample_expr *srv_sni_sample_parse_expr(struct server *srv, struct proxy *px,
+ const char *file, int linenum, char **err)
+{
+ int idx;
+ const char *args[] = {
+ srv->sni_expr,
+ NULL,
+ };
+
+ idx = 0;
+ px->conf.args.ctx = ARGC_SRV;
+
+ return sample_parse_expr((char **)args, &idx, file, linenum, err, &px->conf.args, NULL);
+}
+
+int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err)
+{
+ struct sample_expr *expr;
+
+ expr = srv_sni_sample_parse_expr(newsrv, px, px->conf.file, px->conf.line, err);
+ if (!expr) {
+ memprintf(err, "error detected while parsing sni expression : %s", *err);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!(expr->fetch->val & SMP_VAL_BE_SRV_CON)) {
+ memprintf(err, "error detected while parsing sni expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.",
+ newsrv->sni_expr, sample_src_names(expr->fetch->use));
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ px->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+ release_sample_expr(newsrv->ssl_ctx.sni);
+ newsrv->ssl_ctx.sni = expr;
+
+ return 0;
+}
+
+static void display_parser_err(const char *file, int linenum, char **args, int cur_arg, int err_code, char **err)
+{
+ char *msg = "error encountered while processing ";
+ char *quote = "'";
+ char *token = args[cur_arg];
+
+ if (err && *err) {
+ indent_msg(err, 2);
+ msg = *err;
+ quote = "";
+ token = "";
+ }
+
+ if (err_code & ERR_WARN && !(err_code & ERR_ALERT))
+ ha_warning("%s%s%s%s.\n", msg, quote, token, quote);
+ else
+ ha_alert("%s%s%s%s.\n", msg, quote, token, quote);
+}
+
+static void srv_conn_src_sport_range_cpy(struct server *srv, const struct server *src)
+{
+ int range_sz;
+
+ range_sz = src->conn_src.sport_range->size;
+ if (range_sz > 0) {
+ srv->conn_src.sport_range = port_range_alloc_range(range_sz);
+ if (srv->conn_src.sport_range != NULL) {
+ int i;
+
+ for (i = 0; i < range_sz; i++) {
+ srv->conn_src.sport_range->ports[i] =
+ src->conn_src.sport_range->ports[i];
+ }
+ }
+ }
+}
+
+/*
+ * Copy <src> server connection source settings to <srv> server everything needed.
+ */
+static void srv_conn_src_cpy(struct server *srv, const struct server *src)
+{
+ srv->conn_src.opts = src->conn_src.opts;
+ srv->conn_src.source_addr = src->conn_src.source_addr;
+
+ /* Source port range copy. */
+ if (src->conn_src.sport_range != NULL)
+ srv_conn_src_sport_range_cpy(srv, src);
+
+#ifdef CONFIG_HAP_TRANSPARENT
+ if (src->conn_src.bind_hdr_name != NULL) {
+ srv->conn_src.bind_hdr_name = strdup(src->conn_src.bind_hdr_name);
+ srv->conn_src.bind_hdr_len = strlen(src->conn_src.bind_hdr_name);
+ }
+ srv->conn_src.bind_hdr_occ = src->conn_src.bind_hdr_occ;
+ srv->conn_src.tproxy_addr = src->conn_src.tproxy_addr;
+#endif
+ if (src->conn_src.iface_name != NULL)
+ srv->conn_src.iface_name = strdup(src->conn_src.iface_name);
+}
+
+/*
+ * Copy <src> server SSL settings to <srv> server allocating
+ * everything needed.
+ */
+#if defined(USE_OPENSSL)
+static void srv_ssl_settings_cpy(struct server *srv, const struct server *src)
+{
+ /* <src> is the current proxy's default server and SSL is enabled */
+ BUG_ON(src->ssl_ctx.ctx != NULL); /* the SSL_CTX must never be initialized in a default-server */
+
+ if (src == &srv->proxy->defsrv && src->use_ssl == 1)
+ srv->flags |= SRV_F_DEFSRV_USE_SSL;
+
+ if (src->ssl_ctx.ca_file != NULL)
+ srv->ssl_ctx.ca_file = strdup(src->ssl_ctx.ca_file);
+ if (src->ssl_ctx.crl_file != NULL)
+ srv->ssl_ctx.crl_file = strdup(src->ssl_ctx.crl_file);
+ if (src->ssl_ctx.client_crt != NULL)
+ srv->ssl_ctx.client_crt = strdup(src->ssl_ctx.client_crt);
+
+ srv->ssl_ctx.verify = src->ssl_ctx.verify;
+
+
+ if (src->ssl_ctx.verify_host != NULL)
+ srv->ssl_ctx.verify_host = strdup(src->ssl_ctx.verify_host);
+ if (src->ssl_ctx.ciphers != NULL)
+ srv->ssl_ctx.ciphers = strdup(src->ssl_ctx.ciphers);
+ if (src->ssl_ctx.options)
+ srv->ssl_ctx.options = src->ssl_ctx.options;
+ if (src->ssl_ctx.methods.flags)
+ srv->ssl_ctx.methods.flags = src->ssl_ctx.methods.flags;
+ if (src->ssl_ctx.methods.min)
+ srv->ssl_ctx.methods.min = src->ssl_ctx.methods.min;
+ if (src->ssl_ctx.methods.max)
+ srv->ssl_ctx.methods.max = src->ssl_ctx.methods.max;
+
+ if (src->ssl_ctx.ciphersuites != NULL)
+ srv->ssl_ctx.ciphersuites = strdup(src->ssl_ctx.ciphersuites);
+ if (src->sni_expr != NULL)
+ srv->sni_expr = strdup(src->sni_expr);
+
+ if (src->ssl_ctx.alpn_str) {
+ srv->ssl_ctx.alpn_str = malloc(src->ssl_ctx.alpn_len);
+ if (srv->ssl_ctx.alpn_str) {
+ memcpy(srv->ssl_ctx.alpn_str, src->ssl_ctx.alpn_str,
+ src->ssl_ctx.alpn_len);
+ srv->ssl_ctx.alpn_len = src->ssl_ctx.alpn_len;
+ }
+ }
+
+ if (src->ssl_ctx.npn_str) {
+ srv->ssl_ctx.npn_str = malloc(src->ssl_ctx.npn_len);
+ if (srv->ssl_ctx.npn_str) {
+ memcpy(srv->ssl_ctx.npn_str, src->ssl_ctx.npn_str,
+ src->ssl_ctx.npn_len);
+ srv->ssl_ctx.npn_len = src->ssl_ctx.npn_len;
+ }
+ }
+}
+
+/* Activate ssl on server <s>.
+ * do nothing if there is no change to apply
+ *
+ * Must be called with the server lock held.
+ */
+void srv_set_ssl(struct server *s, int use_ssl)
+{
+ if (s->use_ssl == use_ssl)
+ return;
+
+ s->use_ssl = use_ssl;
+ if (s->use_ssl)
+ s->xprt = xprt_get(XPRT_SSL);
+ else
+ s->xprt = xprt_get(XPRT_RAW);
+}
+
+#endif /* USE_OPENSSL */
+
+/*
+ * Prepare <srv> for hostname resolution.
+ * May be safely called with a default server as <src> argument (without hostname).
+ * Returns -1 in case of any allocation failure, 0 if not.
+ */
+int srv_prepare_for_resolution(struct server *srv, const char *hostname)
+{
+ char *hostname_dn;
+ int hostname_len, hostname_dn_len;
+
+ if (!hostname)
+ return 0;
+
+ hostname_len = strlen(hostname);
+ hostname_dn = trash.area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, trash.size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ srv->hostname = strdup(hostname);
+ srv->hostname_dn = strdup(hostname_dn);
+ srv->hostname_dn_len = hostname_dn_len;
+ if (!srv->hostname || !srv->hostname_dn)
+ goto err;
+
+ return 0;
+
+ err:
+ ha_free(&srv->hostname);
+ ha_free(&srv->hostname_dn);
+ return -1;
+}
+
+/*
+ * Copy <src> server settings to <srv> server allocating
+ * everything needed.
+ * This function is not supposed to be called at any time, but only
+ * during server settings parsing or during server allocations from
+ * a server template, and just after having calloc()'ed a new server.
+ * So, <src> may only be a default server (when parsing server settings)
+ * or a server template (during server allocations from a server template).
+ * <srv_tmpl> distinguishes these two cases (must be 1 if <srv> is a template,
+ * 0 if not).
+ */
+void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl)
+{
+ struct srv_pp_tlv_list *srv_tlv = NULL, *new_srv_tlv = NULL;
+
+ /* Connection source settings copy */
+ srv_conn_src_cpy(srv, src);
+
+ if (srv_tmpl) {
+ srv->addr = src->addr;
+ srv->addr_type = src->addr_type;
+ srv->svc_port = src->svc_port;
+ }
+
+ srv->pp_opts = src->pp_opts;
+ if (src->rdr_pfx != NULL) {
+ srv->rdr_pfx = strdup(src->rdr_pfx);
+ srv->rdr_len = src->rdr_len;
+ }
+ if (src->cookie != NULL) {
+ srv->cookie = strdup(src->cookie);
+ srv->cklen = src->cklen;
+ }
+ srv->use_ssl = src->use_ssl;
+ srv->check.addr = src->check.addr;
+ srv->agent.addr = src->agent.addr;
+ srv->check.use_ssl = src->check.use_ssl;
+ srv->check.port = src->check.port;
+ srv->check.sni = src->check.sni;
+ srv->check.alpn_str = src->check.alpn_str;
+ srv->check.alpn_len = src->check.alpn_len;
+ /* Note: 'flags' field has potentially been already initialized. */
+ srv->flags |= src->flags;
+ srv->do_check = src->do_check;
+ srv->do_agent = src->do_agent;
+ srv->check.inter = src->check.inter;
+ srv->check.fastinter = src->check.fastinter;
+ srv->check.downinter = src->check.downinter;
+ srv->agent.use_ssl = src->agent.use_ssl;
+ srv->agent.port = src->agent.port;
+
+ if (src->agent.tcpcheck_rules) {
+ srv->agent.tcpcheck_rules = calloc(1, sizeof(*srv->agent.tcpcheck_rules));
+ if (srv->agent.tcpcheck_rules) {
+ srv->agent.tcpcheck_rules->flags = src->agent.tcpcheck_rules->flags;
+ srv->agent.tcpcheck_rules->list = src->agent.tcpcheck_rules->list;
+ LIST_INIT(&srv->agent.tcpcheck_rules->preset_vars);
+ dup_tcpcheck_vars(&srv->agent.tcpcheck_rules->preset_vars,
+ &src->agent.tcpcheck_rules->preset_vars);
+ }
+ }
+
+ srv->agent.inter = src->agent.inter;
+ srv->agent.fastinter = src->agent.fastinter;
+ srv->agent.downinter = src->agent.downinter;
+ srv->maxqueue = src->maxqueue;
+ srv->ws = src->ws;
+ srv->minconn = src->minconn;
+ srv->maxconn = src->maxconn;
+ srv->slowstart = src->slowstart;
+ srv->observe = src->observe;
+ srv->onerror = src->onerror;
+ srv->onmarkeddown = src->onmarkeddown;
+ srv->onmarkedup = src->onmarkedup;
+ if (src->trackit != NULL)
+ srv->trackit = strdup(src->trackit);
+ srv->consecutive_errors_limit = src->consecutive_errors_limit;
+ srv->uweight = srv->iweight = src->iweight;
+
+ srv->check.send_proxy = src->check.send_proxy;
+ /* health: up, but will fall down at first failure */
+ srv->check.rise = srv->check.health = src->check.rise;
+ srv->check.fall = src->check.fall;
+
+ /* Here we check if 'disabled' is the default server state */
+ if (src->next_admin & (SRV_ADMF_CMAINT | SRV_ADMF_FMAINT)) {
+ srv->next_admin |= SRV_ADMF_CMAINT | SRV_ADMF_FMAINT;
+ srv->next_state = SRV_ST_STOPPED;
+ srv->check.state |= CHK_ST_PAUSED;
+ srv->check.health = 0;
+ }
+
+ /* health: up but will fall down at first failure */
+ srv->agent.rise = srv->agent.health = src->agent.rise;
+ srv->agent.fall = src->agent.fall;
+
+ if (src->resolvers_id != NULL)
+ srv->resolvers_id = strdup(src->resolvers_id);
+ srv->resolv_opts.family_prio = src->resolv_opts.family_prio;
+ srv->resolv_opts.accept_duplicate_ip = src->resolv_opts.accept_duplicate_ip;
+ srv->resolv_opts.ignore_weight = src->resolv_opts.ignore_weight;
+ if (srv->resolv_opts.family_prio == AF_UNSPEC)
+ srv->resolv_opts.family_prio = AF_INET6;
+ memcpy(srv->resolv_opts.pref_net,
+ src->resolv_opts.pref_net,
+ sizeof srv->resolv_opts.pref_net);
+ srv->resolv_opts.pref_net_nb = src->resolv_opts.pref_net_nb;
+
+ srv->init_addr_methods = src->init_addr_methods;
+ srv->init_addr = src->init_addr;
+#if defined(USE_OPENSSL)
+ srv_ssl_settings_cpy(srv, src);
+#endif
+#ifdef TCP_USER_TIMEOUT
+ srv->tcp_ut = src->tcp_ut;
+#endif
+ srv->mux_proto = src->mux_proto;
+ srv->pool_purge_delay = src->pool_purge_delay;
+ srv->low_idle_conns = src->low_idle_conns;
+ srv->max_idle_conns = src->max_idle_conns;
+ srv->max_reuse = src->max_reuse;
+
+ if (srv_tmpl)
+ srv->srvrq = src->srvrq;
+
+ srv->netns = src->netns;
+ srv->check.via_socks4 = src->check.via_socks4;
+ srv->socks4_addr = src->socks4_addr;
+ srv->log_bufsize = src->log_bufsize;
+
+ LIST_INIT(&srv->pp_tlvs);
+
+ list_for_each_entry(srv_tlv, &src->pp_tlvs, list) {
+ new_srv_tlv = malloc(sizeof(*new_srv_tlv));
+ if (unlikely(!new_srv_tlv)) {
+ break;
+ }
+ new_srv_tlv->fmt_string = strdup(srv_tlv->fmt_string);
+ if (unlikely(!new_srv_tlv->fmt_string)) {
+ free(new_srv_tlv);
+ break;
+ }
+ new_srv_tlv->type = srv_tlv->type;
+ LIST_APPEND(&srv->pp_tlvs, &new_srv_tlv->list);
+ }
+}
+
+/* allocate a server and attach it to the global servers_list. Returns
+ * the server on success, otherwise NULL.
+ */
+struct server *new_server(struct proxy *proxy)
+{
+ struct server *srv;
+
+ srv = calloc(1, sizeof *srv);
+ if (!srv)
+ return NULL;
+
+ srv_take(srv);
+
+ srv->obj_type = OBJ_TYPE_SERVER;
+ srv->proxy = proxy;
+ queue_init(&srv->queue, proxy, srv);
+ LIST_APPEND(&servers_list, &srv->global_list);
+ LIST_INIT(&srv->srv_rec_item);
+ LIST_INIT(&srv->ip_rec_item);
+ LIST_INIT(&srv->pp_tlvs);
+ MT_LIST_INIT(&srv->prev_deleted);
+ event_hdl_sub_list_init(&srv->e_subs);
+ srv->rid = 0; /* rid defaults to 0 */
+
+ srv->next_state = SRV_ST_RUNNING; /* early server setup */
+ srv->last_change = ns_to_sec(now_ns);
+
+ srv->check.obj_type = OBJ_TYPE_CHECK;
+ srv->check.status = HCHK_STATUS_INI;
+ srv->check.server = srv;
+ srv->check.proxy = proxy;
+ srv->check.tcpcheck_rules = &proxy->tcpcheck_rules;
+
+ srv->agent.obj_type = OBJ_TYPE_CHECK;
+ srv->agent.status = HCHK_STATUS_INI;
+ srv->agent.server = srv;
+ srv->agent.proxy = proxy;
+ srv->xprt = srv->check.xprt = srv->agent.xprt = xprt_get(XPRT_RAW);
+
+ srv->extra_counters = NULL;
+#ifdef USE_OPENSSL
+ HA_RWLOCK_INIT(&srv->ssl_ctx.lock);
+#endif
+
+ /* please don't put default server settings here, they are set in
+ * proxy_preset_defaults().
+ */
+ return srv;
+}
+
+/* Increment the server refcount. */
+void srv_take(struct server *srv)
+{
+ HA_ATOMIC_INC(&srv->refcount);
+}
+
+/* deallocate common server parameters (may be used by default-servers) */
+void srv_free_params(struct server *srv)
+{
+ free(srv->cookie);
+ free(srv->rdr_pfx);
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ free((char*)srv->conf.file);
+ free(srv->per_thr);
+ free(srv->per_tgrp);
+ free(srv->curr_idle_thr);
+ free(srv->resolvers_id);
+ free(srv->addr_node.key);
+ free(srv->lb_nodes);
+ if (srv->log_target) {
+ deinit_log_target(srv->log_target);
+ free(srv->log_target);
+ }
+
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv)
+ xprt_get(XPRT_SSL)->destroy_srv(srv);
+}
+
+/* Deallocate a server <srv> and its member. <srv> must be allocated. For
+ * dynamic servers, its refcount is decremented first. The free operations are
+ * conducted only if the refcount is nul.
+ *
+ * As a convenience, <srv.next> is returned if srv is not NULL. It may be useful
+ * when calling srv_drop on the list of servers.
+ */
+struct server *srv_drop(struct server *srv)
+{
+ struct server *next = NULL;
+
+ if (!srv)
+ goto end;
+
+ next = srv->next;
+
+ /* For dynamic servers, decrement the reference counter. Only free the
+ * server when reaching zero.
+ */
+ if (HA_ATOMIC_SUB_FETCH(&srv->refcount, 1))
+ goto end;
+
+ /* make sure we are removed from our 'next->prev_deleted' list
+ * This doesn't require full thread isolation as we're using mt lists
+ * However this could easily be turned into regular list if required
+ * (with the proper use of thread isolation)
+ */
+ MT_LIST_DELETE(&srv->prev_deleted);
+
+ task_destroy(srv->warmup);
+ task_destroy(srv->srvrq_check);
+
+ free(srv->id);
+ srv_free_params(srv);
+
+ HA_SPIN_DESTROY(&srv->lock);
+
+ LIST_DELETE(&srv->global_list);
+ event_hdl_sub_list_destroy(&srv->e_subs);
+
+ EXTRA_COUNTERS_FREE(srv->extra_counters);
+
+ ha_free(&srv);
+
+ end:
+ return next;
+}
+
+/* Detach server from proxy list. It is supported to call this
+ * even if the server is not yet in the list
+ */
+static void _srv_detach(struct server *srv)
+{
+ struct proxy *be = srv->proxy;
+
+ if (be->srv == srv) {
+ be->srv = srv->next;
+ }
+ else {
+ struct server *prev;
+
+ for (prev = be->srv; prev && prev->next != srv; prev = prev->next)
+ ;
+ if (prev)
+ prev->next = srv->next;
+ }
+}
+
+/* Remove a server <srv> from a tracking list if <srv> is tracking another
+ * server. No special care is taken if <srv> is tracked itself by another one :
+ * this situation should be avoided by the caller.
+ *
+ * Not thread-safe.
+ */
+static void release_server_track(struct server *srv)
+{
+ struct server *strack = srv->track;
+ struct server **base;
+
+ if (!strack)
+ return;
+
+ for (base = &strack->trackers; *base; base = &((*base)->tracknext)) {
+ if (*base == srv) {
+ *base = srv->tracknext;
+ return;
+ }
+ }
+
+ /* srv not found on the tracking list, this should never happen */
+ BUG_ON(!*base);
+}
+
+/*
+ * Parse as much as possible such a range string argument: low[-high]
+ * Set <nb_low> and <nb_high> values so that they may be reused by this loop
+ * for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
+ * Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
+ * Returns 0 if succeeded, -1 if not.
+ */
+static int _srv_parse_tmpl_range(struct server *srv, const char *arg,
+ int *nb_low, int *nb_high)
+{
+ char *nb_high_arg;
+
+ *nb_high = 0;
+ chunk_printf(&trash, "%s", arg);
+ *nb_low = atoi(trash.area);
+
+ if ((nb_high_arg = strchr(trash.area, '-'))) {
+ *nb_high_arg++ = '\0';
+ *nb_high = atoi(nb_high_arg);
+ }
+ else {
+ *nb_high += *nb_low;
+ *nb_low = 1;
+ }
+
+ if (*nb_low < 0 || *nb_high < *nb_low)
+ return -1;
+
+ return 0;
+}
+
+/* Parse as much as possible such a range string argument: low[-high]
+ * Set <nb_low> and <nb_high> values so that they may be reused by this loop
+ * for(int i = nb_low; i <= nb_high; i++)... with nb_low >= 1.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * Fails if 'low' < 0 or 'high' is present and not higher than 'low'.
+ * Returns 0 if succeeded, -1 if not.
+ */
+static inline void _srv_parse_set_id_from_prefix(struct server *srv,
+ const char *prefix, int nb)
+{
+ chunk_printf(&trash, "%s%d", prefix, nb);
+ free(srv->id);
+ srv->id = strdup(trash.area);
+}
+
+/* Initialize as much as possible servers from <srv> server template.
+ * Note that a server template is a special server with
+ * a few different parameters than a server which has
+ * been parsed mostly the same way as a server.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * Returns the number of servers successfully allocated,
+ * 'srv' template included.
+ */
+static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px)
+{
+ int i;
+ struct server *newsrv;
+
+ for (i = srv->tmpl_info.nb_low + 1; i <= srv->tmpl_info.nb_high; i++) {
+ newsrv = new_server(px);
+ if (!newsrv)
+ goto err;
+
+ newsrv->conf.file = strdup(srv->conf.file);
+ newsrv->conf.line = srv->conf.line;
+
+ srv_settings_cpy(newsrv, srv, 1);
+ srv_prepare_for_resolution(newsrv, srv->hostname);
+
+ if (newsrv->sni_expr) {
+ newsrv->ssl_ctx.sni = srv_sni_sample_parse_expr(newsrv, px, NULL, 0, NULL);
+ if (!newsrv->ssl_ctx.sni)
+ goto err;
+ }
+
+ /* append to list of servers available to receive an hostname */
+ if (newsrv->srvrq)
+ LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
+
+ /* Set this new server ID. */
+ _srv_parse_set_id_from_prefix(newsrv, srv->tmpl_info.prefix, i);
+
+ /* Linked backwards first. This will be restablished after parsing. */
+ newsrv->next = px->srv;
+ px->srv = newsrv;
+ }
+ _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low);
+
+ return i - srv->tmpl_info.nb_low;
+
+ err:
+ _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low);
+ if (newsrv) {
+ release_sample_expr(newsrv->ssl_ctx.sni);
+ free_check(&newsrv->agent);
+ free_check(&newsrv->check);
+ LIST_DELETE(&newsrv->global_list);
+ }
+ free(newsrv);
+ return i - srv->tmpl_info.nb_low;
+}
+
+/* Ensure server config will work with effective proxy mode
+ *
+ * This function is expected to be called after _srv_parse_init() initialization
+ * but only when the effective server's proxy mode is known, which is not always
+ * the case during parsing time, in which case the function will be called during
+ * postparsing thanks to the _srv_postparse() below.
+ *
+ * Returns ERR_NONE on success else a combination or ERR_CODE.
+ */
+static int _srv_check_proxy_mode(struct server *srv, char postparse)
+{
+ int err_code = ERR_NONE;
+
+ if (postparse && !(srv->proxy->cap & PR_CAP_LB))
+ return ERR_NONE; /* nothing to do, the check was already performed during parsing */
+
+ if (srv->conf.file)
+ set_usermsgs_ctx(srv->conf.file, srv->conf.line, NULL);
+
+ if (!srv->proxy) {
+ /* proxy mode not known, cannot perform checks (ie: defaults section) */
+ goto out;
+ }
+
+ if (srv->proxy->mode == PR_MODE_SYSLOG) {
+ /* log backend server (belongs to proxy with mode log enabled):
+ * perform some compatibility checks
+ */
+
+ /* supported address family types are:
+ * - ipv4
+ * - ipv6
+ * (UNSPEC is supported because it means it will be resolved later)
+ */
+ if (srv->addr.ss_family != AF_UNSPEC &&
+ srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6) {
+ ha_alert("log server address family not supported for log backend server.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* only @tcp or @udp address forms (or equivalent) are supported */
+ if (!(srv->addr_type.xprt_type == PROTO_TYPE_DGRAM && srv->addr_type.proto_type == PROTO_TYPE_DGRAM) &&
+ !(srv->addr_type.xprt_type == PROTO_TYPE_STREAM && srv->addr_type.proto_type == PROTO_TYPE_STREAM)) {
+ ha_alert("log server address type not supported for log backend server.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ else {
+ /* for all other proxy modes: only TCP expected as srv's transport type for now */
+ if (srv->addr_type.xprt_type != PROTO_TYPE_STREAM) {
+ ha_alert("unsupported transport for server address in '%s' backend.\n", proxy_mode_str(srv->proxy->mode));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ out:
+ if (srv->conf.file)
+ reset_usermsgs_ctx();
+
+ return err_code;
+}
+
+/* Perform some server postparsing checks / tasks:
+ * We must be careful that checks / postinits performed within this function
+ * don't depend or conflict with other postcheck functions that are registered
+ * using REGISTER_POST_SERVER_CHECK() hook.
+ *
+ * Returns ERR_NONE on success else a combination or ERR_CODE.
+ */
+static int _srv_postparse(struct server *srv)
+{
+ int err_code = ERR_NONE;
+
+ err_code |= _srv_check_proxy_mode(srv, 1);
+
+ return err_code;
+}
+REGISTER_POST_SERVER_CHECK(_srv_postparse);
+
+/* Allocate a new server pointed by <srv> and try to parse the first arguments
+ * in <args> as an address for a server or an address-range for a template or
+ * nothing for a default-server. <cur_arg> is incremented to the next argument.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * A mask of errors is returned. On a parsing error, ERR_FATAL is set. In case
+ * of memory exhaustion, ERR_ABORT is set. If the server cannot be allocated,
+ * <srv> will be set to NULL.
+ */
+static int _srv_parse_init(struct server **srv, char **args, int *cur_arg,
+ struct proxy *curproxy,
+ int parse_flags)
+{
+ struct server *newsrv = NULL;
+ const char *err = NULL;
+ int err_code = 0;
+ char *fqdn = NULL;
+ int tmpl_range_low = 0, tmpl_range_high = 0;
+ char *errmsg = NULL;
+
+ *srv = NULL;
+
+ /* There is no mandatory first arguments for default server. */
+ if (parse_flags & SRV_PARSE_PARSE_ADDR) {
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ if (!*args[3]) {
+ /* 'server-template' line number of argument check. */
+ ha_alert("'%s' expects <prefix> <nb | range> <addr>[:<port>] as arguments.\n",
+ args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_prefix_char(args[1]);
+ }
+ else {
+ if (!*args[2]) {
+ /* 'server' line number of argument check. */
+ ha_alert("'%s' expects <name> and <addr>[:<port>] as arguments.\n",
+ args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = invalid_char(args[1]);
+ }
+
+ if (err) {
+ ha_alert("character '%c' is not permitted in %s %s '%s'.\n",
+ *err, args[0], !(parse_flags & SRV_PARSE_TEMPLATE) ? "name" : "prefix", args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ *cur_arg = 2;
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ /* Parse server-template <nb | range> arg. */
+ if (_srv_parse_tmpl_range(newsrv, args[*cur_arg], &tmpl_range_low, &tmpl_range_high) < 0) {
+ ha_alert("Wrong %s number or range arg '%s'.\n",
+ args[0], args[*cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ (*cur_arg)++;
+ }
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ struct sockaddr_storage *sk;
+ int port1, port2, port;
+
+ *srv = newsrv = new_server(curproxy);
+ if (!newsrv) {
+ ha_alert("out of memory.\n");
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+ register_parsing_obj(&newsrv->obj_type);
+
+ if (parse_flags & SRV_PARSE_TEMPLATE) {
+ newsrv->tmpl_info.nb_low = tmpl_range_low;
+ newsrv->tmpl_info.nb_high = tmpl_range_high;
+ }
+
+ if (parse_flags & SRV_PARSE_DYNAMIC)
+ newsrv->flags |= SRV_F_DYNAMIC;
+
+ /* Note: for a server template, its id is its prefix.
+ * This is a temporary id which will be used for server allocations to come
+ * after parsing.
+ */
+ if (!(parse_flags & SRV_PARSE_TEMPLATE))
+ newsrv->id = strdup(args[1]);
+ else
+ newsrv->tmpl_info.prefix = strdup(args[1]);
+
+ /* several ways to check the port component :
+ * - IP => port=+0, relative (IPv4 only)
+ * - IP: => port=+0, relative
+ * - IP:N => port=N, absolute
+ * - IP:+N => port=+N, relative
+ * - IP:-N => port=-N, relative
+ */
+ if (!(parse_flags & SRV_PARSE_PARSE_ADDR))
+ goto skip_addr;
+
+ sk = str2sa_range(args[*cur_arg], &port, &port1, &port2, NULL, NULL, &newsrv->addr_type,
+ &errmsg, NULL, &fqdn,
+ (parse_flags & SRV_PARSE_INITIAL_RESOLVE ? PA_O_RESOLVE : 0) | PA_O_PORT_OK |
+ (parse_flags & SRV_PARSE_IN_PEER_SECTION ? PA_O_PORT_MAND : PA_O_PORT_OFS) |
+ PA_O_STREAM | PA_O_DGRAM | PA_O_XPRT);
+ if (!sk) {
+ ha_alert("%s\n", errmsg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ ha_free(&errmsg);
+ goto out;
+ }
+
+ if (!port1 || !port2) {
+ if (sk->ss_family != AF_CUST_RHTTP_SRV) {
+ /* no port specified, +offset, -offset */
+ newsrv->flags |= SRV_F_MAPPORTS;
+ }
+ else {
+ newsrv->flags |= SRV_F_RHTTP;
+ }
+ }
+
+ /* save hostname and create associated name resolution */
+ if (fqdn) {
+ if (fqdn[0] == '_') { /* SRV record */
+ /* Check if a SRV request already exists, and if not, create it */
+ if ((newsrv->srvrq = find_srvrq_by_name(fqdn, curproxy)) == NULL)
+ newsrv->srvrq = new_resolv_srvrq(newsrv, fqdn);
+ if (newsrv->srvrq == NULL) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(&newsrv->srvrq->attached_servers, &newsrv->srv_rec_item);
+ }
+ else if (srv_prepare_for_resolution(newsrv, fqdn) == -1) {
+ ha_alert("Can't create DNS resolution for server '%s'\n",
+ newsrv->id);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ newsrv->addr = *sk;
+ newsrv->svc_port = port;
+ /*
+ * we don't need to lock the server here, because
+ * we are in the process of initializing.
+ *
+ * Note that the server is not attached into the proxy tree if
+ * this is a dynamic server.
+ */
+ srv_set_addr_desc(newsrv, !(parse_flags & SRV_PARSE_DYNAMIC));
+
+ if (!newsrv->srvrq && !newsrv->hostname &&
+ !protocol_lookup(newsrv->addr.ss_family, PROTO_TYPE_STREAM, 0)) {
+ ha_alert("Unknown protocol family %d '%s'\n",
+ newsrv->addr.ss_family, args[*cur_arg]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ (*cur_arg)++;
+ skip_addr:
+ if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
+ /* Copy default server settings to new server */
+ srv_settings_cpy(newsrv, &curproxy->defsrv, 0);
+ } else {
+ /* Initialize dynamic server weight to 1 */
+ newsrv->uweight = newsrv->iweight = 1;
+
+ /* A dynamic server is disabled on startup */
+ newsrv->next_admin = SRV_ADMF_FMAINT;
+ newsrv->next_state = SRV_ST_STOPPED;
+ server_recalc_eweight(newsrv, 0);
+
+ /* Set default values for checks */
+ newsrv->check.inter = DEF_CHKINTR;
+ newsrv->check.rise = DEF_RISETIME;
+ newsrv->check.fall = DEF_FALLTIME;
+
+ newsrv->agent.inter = DEF_CHKINTR;
+ newsrv->agent.rise = DEF_AGENT_RISETIME;
+ newsrv->agent.fall = DEF_AGENT_FALLTIME;
+ }
+ HA_SPIN_INIT(&newsrv->lock);
+ }
+ else {
+ *srv = newsrv = &curproxy->defsrv;
+ *cur_arg = 1;
+ newsrv->resolv_opts.family_prio = AF_INET6;
+ newsrv->resolv_opts.accept_duplicate_ip = 0;
+ }
+
+ free(fqdn);
+ if (!(curproxy->cap & PR_CAP_LB)) {
+ /* No need to wait for effective proxy mode, it is already known:
+ * Only general purpose user-declared proxies ("listen", "frontend", "backend")
+ * offer the possibility to configure the mode of the proxy. Hopefully for us,
+ * they have the PR_CAP_LB set.
+ */
+ return _srv_check_proxy_mode(newsrv, 0);
+ }
+ return 0;
+
+out:
+ free(fqdn);
+ return err_code;
+}
+
+/* Parse the server keyword in <args>.
+ * <cur_arg> is incremented beyond the keyword optional value. Note that this
+ * might not be the case if an error is reported.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ *
+ * A mask of errors is returned. ERR_FATAL is set if the parsing should be
+ * interrupted.
+ */
+static int _srv_parse_kw(struct server *srv, char **args, int *cur_arg,
+ struct proxy *curproxy,
+ int parse_flags)
+{
+ int err_code = 0;
+ struct srv_kw *kw;
+ const char *best;
+ char *errmsg = NULL;
+
+ kw = srv_find_kw(args[*cur_arg]);
+ if (!kw) {
+ best = srv_find_best_kw(args[*cur_arg]);
+ if (best)
+ ha_alert("unknown keyword '%s'; did you mean '%s' maybe ?%s\n",
+ args[*cur_arg], best,
+ (parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
+ " Hint: no address was expected for this server.");
+ else
+ ha_alert("unknown keyword '%s'.%s\n", args[*cur_arg],
+ (parse_flags & SRV_PARSE_PARSE_ADDR) ? "" :
+ " Hint: no address was expected for this server.");
+
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (!kw->parse) {
+ ha_alert("'%s' option is not implemented in this version (check build options)\n",
+ args[*cur_arg]);
+ err_code = ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((parse_flags & SRV_PARSE_DEFAULT_SERVER) && !kw->default_ok) {
+ ha_alert("'%s' option is not accepted in default-server sections\n",
+ args[*cur_arg]);
+ err_code = ERR_ALERT;
+ goto out;
+ }
+ else if ((parse_flags & SRV_PARSE_DYNAMIC) && !kw->dynamic_ok) {
+ ha_alert("'%s' option is not accepted for dynamic server\n",
+ args[*cur_arg]);
+ err_code |= ERR_ALERT;
+ goto out;
+ }
+
+ err_code = kw->parse(args, cur_arg, curproxy, srv, &errmsg);
+ if (err_code) {
+ display_parser_err(NULL, 0, args, *cur_arg, err_code, &errmsg);
+ free(errmsg);
+ }
+
+out:
+ if (kw->skip != -1)
+ *cur_arg += 1 + kw->skip;
+
+ return err_code;
+}
+
+/* This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ */
+static int _srv_parse_sni_expr_init(char **args, int cur_arg,
+ struct server *srv, struct proxy *proxy,
+ char **errmsg)
+{
+ int ret;
+
+ if (!srv->sni_expr)
+ return 0;
+
+ ret = server_parse_sni_expr(srv, proxy, errmsg);
+ if (!ret)
+ return 0;
+
+ return ret;
+}
+
+/* Server initializations finalization.
+ * Initialize health check, agent check, SNI expression and outgoing TLVs if enabled.
+ * Must not be called for a default server instance.
+ *
+ * This function is first intended to be used through parse_server to
+ * initialize a new server on startup.
+ */
+static int _srv_parse_finalize(char **args, int cur_arg,
+ struct server *srv, struct proxy *px,
+ int parse_flags)
+{
+ int ret;
+ char *errmsg = NULL;
+ struct srv_pp_tlv_list *srv_tlv = NULL;
+
+ if (srv->do_check && srv->trackit) {
+ ha_alert("unable to enable checks and tracking at the same time!\n");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if (srv->do_agent && !srv->agent.port) {
+ ha_alert("server %s does not have agent port. Agent check has been disabled.\n",
+ srv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ if ((ret = _srv_parse_sni_expr_init(args, cur_arg, srv, px, &errmsg)) != 0) {
+ if (errmsg) {
+ ha_alert("%s\n", errmsg);
+ free(errmsg);
+ }
+ return ret;
+ }
+
+ /* A dynamic server is disabled on startup. It must not be counted as
+ * an active backend entry.
+ */
+ if (!(parse_flags & SRV_PARSE_DYNAMIC)) {
+ if (srv->flags & SRV_F_BACKUP)
+ px->srv_bck++;
+ else
+ px->srv_act++;
+ }
+
+ list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) {
+ LIST_INIT(&srv_tlv->fmt);
+ if (srv_tlv->fmt_string && unlikely(!parse_logformat_string(srv_tlv->fmt_string,
+ srv->proxy, &srv_tlv->fmt, 0, SMP_VAL_BE_SRV_CON, &errmsg))) {
+ if (errmsg) {
+ ha_alert("%s\n", errmsg);
+ free(errmsg);
+ }
+ return ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ srv_lb_commit_status(srv);
+
+ return 0;
+}
+
+int parse_server(const char *file, int linenum, char **args,
+ struct proxy *curproxy, const struct proxy *defproxy,
+ int parse_flags)
+{
+ struct server *newsrv = NULL;
+ int err_code = 0;
+
+ int cur_arg;
+
+ set_usermsgs_ctx(file, linenum, NULL);
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER) && curproxy == defproxy) {
+ ha_alert("'%s' not allowed in 'defaults' section.\n", args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (failifnotcap(curproxy, PR_CAP_BE, file, linenum, args[0], NULL)) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if ((parse_flags & (SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) ==
+ (SRV_PARSE_IN_PEER_SECTION|SRV_PARSE_PARSE_ADDR)) {
+ if (!*args[2])
+ return 0;
+ }
+
+ err_code = _srv_parse_init(&newsrv, args, &cur_arg, curproxy,
+ parse_flags);
+
+ /* the servers are linked backwards first */
+ if (newsrv && !(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ newsrv->next = curproxy->srv;
+ curproxy->srv = newsrv;
+ }
+
+ if (err_code & ERR_CODE)
+ goto out;
+
+ if (!newsrv->conf.file) // note: do it only once for default-server
+ newsrv->conf.file = strdup(file);
+ newsrv->conf.line = linenum;
+
+ while (*args[cur_arg]) {
+ err_code = _srv_parse_kw(newsrv, args, &cur_arg, curproxy,
+ parse_flags);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+
+ if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) {
+ err_code |= _srv_parse_finalize(args, cur_arg, newsrv, curproxy, parse_flags);
+ if (err_code & ERR_FATAL)
+ goto out;
+ }
+
+ if (parse_flags & SRV_PARSE_TEMPLATE)
+ _srv_parse_tmpl_init(newsrv, curproxy);
+
+ /* If the server id is fixed, insert it in the proxy used_id tree.
+ * This is needed to detect a later duplicate id via srv_parse_id.
+ *
+ * If no is specified, a dynamic one is generated in
+ * check_config_validity.
+ */
+ if (newsrv->flags & SRV_F_FORCED_ID)
+ eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id);
+
+ HA_DIAG_WARNING_COND((curproxy->cap & PR_CAP_LB) && !newsrv->uweight,
+ "configured with weight of 0 will never be selected by load balancing algorithms\n");
+
+ reset_usermsgs_ctx();
+ return 0;
+
+ out:
+ reset_usermsgs_ctx();
+ return err_code;
+}
+
+/* Returns a pointer to the first server matching either id <id>.
+ * NULL is returned if no match is found.
+ * the lookup is performed in the backend <bk>
+ */
+struct server *server_find_by_id(struct proxy *bk, int id)
+{
+ struct eb32_node *eb32;
+ struct server *curserver;
+
+ if (!bk || (id ==0))
+ return NULL;
+
+ /* <bk> has no backend capabilities, so it can't have a server */
+ if (!(bk->cap & PR_CAP_BE))
+ return NULL;
+
+ curserver = NULL;
+
+ eb32 = eb32_lookup(&bk->conf.used_server_id, id);
+ if (eb32)
+ curserver = container_of(eb32, struct server, conf.id);
+
+ return curserver;
+}
+
+/* Returns a pointer to the first server matching either name <name>, or id
+ * if <name> starts with a '#'. NULL is returned if no match is found.
+ * the lookup is performed in the backend <bk>
+ */
+struct server *server_find_by_name(struct proxy *bk, const char *name)
+{
+ struct server *curserver;
+
+ if (!bk || !name)
+ return NULL;
+
+ /* <bk> has no backend capabilities, so it can't have a server */
+ if (!(bk->cap & PR_CAP_BE))
+ return NULL;
+
+ curserver = NULL;
+ if (*name == '#') {
+ curserver = server_find_by_id(bk, atoi(name + 1));
+ if (curserver)
+ return curserver;
+ }
+ else {
+ curserver = bk->srv;
+
+ while (curserver && (strcmp(curserver->id, name) != 0))
+ curserver = curserver->next;
+
+ if (curserver)
+ return curserver;
+ }
+
+ return NULL;
+}
+
+struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff)
+{
+ struct server *byname;
+ struct server *byid;
+
+ if (!name && !id)
+ return NULL;
+
+ if (diff)
+ *diff = 0;
+
+ byname = byid = NULL;
+
+ if (name) {
+ byname = server_find_by_name(bk, name);
+ if (byname && (!id || byname->puid == id))
+ return byname;
+ }
+
+ /* remaining possibilities :
+ * - name not set
+ * - name set but not found
+ * - name found but ID doesn't match
+ */
+ if (id) {
+ byid = server_find_by_id(bk, id);
+ if (byid) {
+ if (byname) {
+ /* use id only if forced by configuration */
+ if (byid->flags & SRV_F_FORCED_ID) {
+ if (diff)
+ *diff |= 2;
+ return byid;
+ }
+ else {
+ if (diff)
+ *diff |= 1;
+ return byname;
+ }
+ }
+
+ /* remaining possibilities:
+ * - name not set
+ * - name set but not found
+ */
+ if (name && diff)
+ *diff |= 2;
+ return byid;
+ }
+
+ /* id bot found */
+ if (byname) {
+ if (diff)
+ *diff |= 1;
+ return byname;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * update a server's current IP address.
+ * ip is a pointer to the new IP address, whose address family is ip_sin_family.
+ * ip is in network format.
+ * updater is a string which contains an information about the requester of the update.
+ * updater is used if not NULL.
+ *
+ * A log line and a stderr warning message is generated based on server's backend options.
+ *
+ * Must be called with the server lock held.
+ */
+int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater)
+{
+ union {
+ struct event_hdl_cb_data_server_inetaddr addr;
+ struct event_hdl_cb_data_server common;
+ } cb_data;
+ struct sockaddr_storage new_addr = { }; // shut up gcc warning
+
+ /* save the new IP family & address if necessary */
+ switch (ip_sin_family) {
+ case AF_INET:
+ if (s->addr.ss_family == ip_sin_family &&
+ !memcmp(ip, &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr, 4))
+ return 0;
+ break;
+ case AF_INET6:
+ if (s->addr.ss_family == ip_sin_family &&
+ !memcmp(ip, &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr, 16))
+ return 0;
+ break;
+ };
+
+ /* generates a log line and a warning on stderr */
+ if (1) {
+ /* book enough space for both IPv4 and IPv6 */
+ char oldip[INET6_ADDRSTRLEN];
+ char newip[INET6_ADDRSTRLEN];
+
+ memset(oldip, '\0', INET6_ADDRSTRLEN);
+ memset(newip, '\0', INET6_ADDRSTRLEN);
+
+ /* copy old IP address in a string */
+ switch (s->addr.ss_family) {
+ case AF_INET:
+ inet_ntop(s->addr.ss_family, &((struct sockaddr_in *)&s->addr)->sin_addr, oldip, INET_ADDRSTRLEN);
+ break;
+ case AF_INET6:
+ inet_ntop(s->addr.ss_family, &((struct sockaddr_in6 *)&s->addr)->sin6_addr, oldip, INET6_ADDRSTRLEN);
+ break;
+ default:
+ strlcpy2(oldip, "(none)", sizeof(oldip));
+ break;
+ };
+
+ /* copy new IP address in a string */
+ switch (ip_sin_family) {
+ case AF_INET:
+ inet_ntop(ip_sin_family, ip, newip, INET_ADDRSTRLEN);
+ break;
+ case AF_INET6:
+ inet_ntop(ip_sin_family, ip, newip, INET6_ADDRSTRLEN);
+ break;
+ };
+
+ /* save log line into a buffer */
+ chunk_printf(&trash, "%s/%s changed its IP from %s to %s by %s",
+ s->proxy->id, s->id, oldip, newip, updater);
+
+ /* write the buffer on stderr */
+ ha_warning("%s.\n", trash.area);
+
+ /* send a log */
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ }
+
+ /* save the new IP family */
+ new_addr.ss_family = ip_sin_family;
+ /* save the new IP address */
+ switch (ip_sin_family) {
+ case AF_INET:
+ memcpy(&((struct sockaddr_in *)&new_addr)->sin_addr.s_addr, ip, 4);
+ break;
+ case AF_INET6:
+ memcpy(((struct sockaddr_in6 *)&new_addr)->sin6_addr.s6_addr, ip, 16);
+ break;
+ };
+
+ _srv_event_hdl_prepare(&cb_data.common, s, 0);
+ _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s,
+ &new_addr, s->svc_port, !!(s->flags & SRV_F_MAPPORTS),
+ 0);
+
+ /* server_atomic_sync_task will apply the changes for us */
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s);
+
+ return 0;
+}
+
+/* update agent health check address and port
+ * addr can be ip4/ip6 or a hostname
+ * if one error occurs, don't apply anything
+ * must be called with the server lock held.
+ */
+const char *srv_update_agent_addr_port(struct server *s, const char *addr, const char *port)
+{
+ struct sockaddr_storage sk;
+ struct buffer *msg;
+ int new_port;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (!(s->agent.state & CHK_ST_ENABLED)) {
+ chunk_strcat(msg, "agent checks are not enabled on this server");
+ goto out;
+ }
+ if (addr) {
+ memset(&sk, 0, sizeof(struct sockaddr_storage));
+ if (str2ip(addr, &sk) == NULL) {
+ chunk_appendf(msg, "invalid addr '%s'", addr);
+ goto out;
+ }
+ }
+ if (port) {
+ if (strl2irc(port, strlen(port), &new_port) != 0) {
+ chunk_appendf(msg, "provided port is not an integer");
+ goto out;
+ }
+ if (new_port < 0 || new_port > 65535) {
+ chunk_appendf(msg, "provided port is invalid");
+ goto out;
+ }
+ }
+out:
+ if (msg->data)
+ return msg->area;
+ else {
+ if (addr)
+ set_srv_agent_addr(s, &sk);
+ if (port)
+ set_srv_agent_port(s, new_port);
+ }
+ return NULL;
+}
+
+/* update server health check address and port
+ * addr must be ip4 or ip6, it won't be resolved
+ * if one error occurs, don't apply anything
+ * must be called with the server lock held.
+ */
+const char *srv_update_check_addr_port(struct server *s, const char *addr, const char *port)
+{
+ struct sockaddr_storage sk;
+ struct buffer *msg;
+ int new_port;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (!(s->check.state & CHK_ST_ENABLED)) {
+ chunk_strcat(msg, "health checks are not enabled on this server");
+ goto out;
+ }
+ if (addr) {
+ memset(&sk, 0, sizeof(struct sockaddr_storage));
+ if (str2ip2(addr, &sk, 0) == NULL) {
+ chunk_appendf(msg, "invalid addr '%s'", addr);
+ goto out;
+ }
+ }
+ if (port) {
+ if (strl2irc(port, strlen(port), &new_port) != 0) {
+ chunk_appendf(msg, "provided port is not an integer");
+ goto out;
+ }
+ if (new_port < 0 || new_port > 65535) {
+ chunk_appendf(msg, "provided port is invalid");
+ goto out;
+ }
+ /* prevent the update of port to 0 if MAPPORTS are in use */
+ if ((s->flags & SRV_F_MAPPORTS) && new_port == 0) {
+ chunk_appendf(msg, "can't unset 'port' since MAPPORTS is in use");
+ goto out;
+ }
+ }
+out:
+ if (msg->data)
+ return msg->area;
+ else {
+ if (addr)
+ s->check.addr = sk;
+ if (port)
+ s->check.port = new_port;
+ }
+ return NULL;
+}
+
+/*
+ * This function update a server's addr and port only for AF_INET and AF_INET6 families.
+ *
+ * Caller can pass its name through <updater> to get it integrated in the response message
+ * returned by the function.
+ *
+ * The function first does the following, in that order:
+ * - validates the new addr and/or port
+ * - checks if an update is required (new IP or port is different than current ones)
+ * - checks the update is allowed:
+ * - don't switch from/to a family other than AF_INET4 and AF_INET6
+ * - allow all changes if no CHECKS are configured
+ * - if CHECK is configured:
+ * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their own ports
+ * - applies required changes to both ADDR and PORT if both 'required' and 'allowed'
+ * conditions are met
+ *
+ * Must be called with the server lock held.
+ */
+const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater)
+{
+ union {
+ struct event_hdl_cb_data_server_inetaddr addr;
+ struct event_hdl_cb_data_server common;
+ } cb_data;
+ struct sockaddr_storage sa;
+ int ret;
+ char current_addr[INET6_ADDRSTRLEN];
+ uint16_t current_port, new_port = 0;
+ struct buffer *msg;
+ int ip_change = 0;
+ int port_change = 0;
+ uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS);
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (addr) {
+ memset(&sa, 0, sizeof(struct sockaddr_storage));
+ if (str2ip2(addr, &sa, 0) == NULL) {
+ chunk_printf(msg, "Invalid addr '%s'", addr);
+ goto out;
+ }
+
+ /* changes are allowed on AF_INET* families only */
+ if ((sa.ss_family != AF_INET) && (sa.ss_family != AF_INET6)) {
+ chunk_printf(msg, "Update to families other than AF_INET and AF_INET6 supported only through configuration file");
+ goto out;
+ }
+
+ /* collecting data currently setup */
+ memset(current_addr, '\0', sizeof(current_addr));
+ ret = addr_to_str(&s->addr, current_addr, sizeof(current_addr));
+ /* changes are allowed on AF_INET* families only */
+ if ((ret != AF_INET) && (ret != AF_INET6)) {
+ chunk_printf(msg, "Update for the current server address family is only supported through configuration file");
+ goto out;
+ }
+
+ /* applying ADDR changes if required and allowed
+ * ipcmp returns 0 when both ADDR are the same
+ */
+ if (ipcmp(&s->addr, &sa, 0) == 0) {
+ chunk_appendf(msg, "no need to change the addr");
+ goto port;
+ }
+ ip_change = 1;
+
+ /* update report for caller */
+ chunk_printf(msg, "IP changed from '%s' to '%s'", current_addr, addr);
+ }
+
+ port:
+ if (port) {
+ char sign = '\0';
+ char *endptr;
+
+ if (addr)
+ chunk_appendf(msg, ", ");
+
+ /* collecting data currently setup */
+ current_port = s->svc_port;
+
+ sign = *port;
+ errno = 0;
+ new_port = strtol(port, &endptr, 10);
+ if ((errno != 0) || (port == endptr)) {
+ chunk_appendf(msg, "problem converting port '%s' to an int", port);
+ goto out;
+ }
+
+ /* check if caller triggers a port mapped or offset */
+ if (sign == '-' || (sign == '+')) {
+ /* check if server currently uses port map */
+ if (!(s->flags & SRV_F_MAPPORTS)) {
+ /* check is configured
+ * we're switching from a fixed port to a SRV_F_MAPPORTS (mapped) port
+ * prevent PORT change if check doesn't have it's dedicated port while switching
+ * to port mapping */
+ if (!s->check.port) {
+ chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive.");
+ goto out;
+ }
+ /* switch from fixed port to port map mandatorily triggers
+ * a port change */
+ port_change = 1;
+ }
+ /* we're already using port maps */
+ else {
+ port_change = current_port != new_port;
+ }
+ }
+ /* fixed port */
+ else {
+ port_change = current_port != new_port;
+ }
+
+ /* applying PORT changes if required and update response message */
+ if (port_change) {
+ uint16_t new_port_print = new_port;
+
+ /* prepare message */
+ chunk_appendf(msg, "port changed from '");
+ if (s->flags & SRV_F_MAPPORTS)
+ chunk_appendf(msg, "+");
+ chunk_appendf(msg, "%d' to '", current_port);
+
+ if (sign == '-') {
+ mapports = 1;
+ chunk_appendf(msg, "%c", sign);
+ /* just use for result output */
+ new_port_print = -new_port_print;
+ }
+ else if (sign == '+') {
+ mapports = 1;
+ chunk_appendf(msg, "%c", sign);
+ }
+ else {
+ mapports = 0;
+ }
+
+ chunk_appendf(msg, "%d'", new_port_print);
+ }
+ else {
+ chunk_appendf(msg, "no need to change the port");
+ }
+ }
+
+out:
+ if (ip_change || port_change) {
+ _srv_event_hdl_prepare(&cb_data.common, s, 0);
+ _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s,
+ ((ip_change) ? &sa : &s->addr),
+ ((port_change) ? new_port : s->svc_port), mapports,
+ 1);
+
+ /* server_atomic_sync_task will apply the changes for us */
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s);
+ }
+ if (updater)
+ chunk_appendf(msg, " by '%s'", updater);
+ chunk_appendf(msg, "\n");
+ return msg->area;
+}
+
+/*
+ * update server status based on result of SRV resolution
+ * returns:
+ * 0 if server status is updated
+ * 1 if server status has not changed
+ *
+ * Must be called with the server lock held.
+ */
+int srvrq_update_srv_status(struct server *s, int has_no_ip)
+{
+ if (!s->srvrq)
+ return 1;
+
+ /* since this server has an IP, it can go back in production */
+ if (has_no_ip == 0) {
+ srv_clr_admin_flag(s, SRV_ADMF_RMAINT);
+ return 1;
+ }
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOENT);
+ return 0;
+}
+
+/*
+ * update server status based on result of name resolution
+ * returns:
+ * 0 if server status is updated
+ * 1 if server status has not changed
+ *
+ * Must be called with the server lock held.
+ */
+int snr_update_srv_status(struct server *s, int has_no_ip)
+{
+ struct resolvers *resolvers = s->resolvers;
+ struct resolv_resolution *resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
+ int exp;
+
+ /* If resolution is NULL we're dealing with SRV records Additional records */
+ if (resolution == NULL)
+ return srvrq_update_srv_status(s, has_no_ip);
+
+ switch (resolution->status) {
+ case RSLV_STATUS_NONE:
+ /* status when HAProxy has just (re)started.
+ * Nothing to do, since the task is already automatically started */
+ break;
+
+ case RSLV_STATUS_VALID:
+ /*
+ * resume health checks
+ * server will be turned back on if health check is safe
+ */
+ if (has_no_ip) {
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP);
+ return 0;
+ }
+
+ if (!(s->next_admin & SRV_ADMF_RMAINT))
+ return 1;
+ srv_clr_admin_flag(s, SRV_ADMF_RMAINT);
+ chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer",
+ s->proxy->id, s->id);
+
+ ha_warning("%s.\n", trash.area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area);
+ return 0;
+
+ case RSLV_STATUS_NX:
+ /* stop server if resolution is NX for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.nx);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NX);
+ return 0;
+
+ case RSLV_STATUS_TIMEOUT:
+ /* stop server if resolution is TIMEOUT for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.timeout);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_TIMEOUT);
+ return 0;
+
+ case RSLV_STATUS_REFUSED:
+ /* stop server if resolution is REFUSED for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.refused);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_REFUSED);
+ return 0;
+
+ default:
+ /* stop server if resolution failed for a long enough period */
+ exp = tick_add(resolution->last_valid, resolvers->hold.other);
+ if (!tick_is_expired(exp, now_ms))
+ break;
+
+ if (s->next_admin & SRV_ADMF_RMAINT)
+ return 1;
+ srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_UNSPEC);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Server Name Resolution valid response callback
+ * It expects:
+ * - <nameserver>: the name server which answered the valid response
+ * - <response>: buffer containing a valid DNS response
+ * - <response_len>: size of <response>
+ * It performs the following actions:
+ * - ignore response if current ip found and server family not met
+ * - update with first new ip found if family is met and current IP is not found
+ * returns:
+ * 0 on error
+ * 1 when no error or safe ignore
+ *
+ * Must be called with server lock held
+ */
+int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters)
+{
+ struct server *s = NULL;
+ struct resolv_resolution *resolution = NULL;
+ void *serverip, *firstip;
+ short server_sin_family, firstip_sin_family;
+ int ret;
+ struct buffer *chk = get_trash_chunk();
+ int has_no_ip = 0;
+
+ s = objt_server(requester->owner);
+ if (!s)
+ return 1;
+
+ if (s->srvrq) {
+ /* If DNS resolution is disabled ignore it.
+ * This is the case if the server was associated to
+ * a SRV record and this record is now expired.
+ */
+ if (s->flags & SRV_F_NO_RESOLUTION)
+ return 1;
+ }
+
+ resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL);
+ if (!resolution)
+ return 1;
+
+ /* initializing variables */
+ firstip = NULL; /* pointer to the first valid response found */
+ /* it will be used as the new IP if a change is required */
+ firstip_sin_family = AF_UNSPEC;
+ serverip = NULL; /* current server IP address */
+
+ /* initializing server IP pointer */
+ server_sin_family = s->addr.ss_family;
+ switch (server_sin_family) {
+ case AF_INET:
+ serverip = &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr;
+ break;
+
+ case AF_INET6:
+ serverip = &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr;
+ break;
+
+ case AF_UNSPEC:
+ break;
+
+ default:
+ goto invalid;
+ }
+
+ ret = resolv_get_ip_from_response(&resolution->response, &s->resolv_opts,
+ serverip, server_sin_family, &firstip,
+ &firstip_sin_family, s);
+
+ switch (ret) {
+ case RSLV_UPD_NO:
+ goto update_status;
+
+ case RSLV_UPD_SRVIP_NOT_FOUND:
+ goto save_ip;
+
+ case RSLV_UPD_NO_IP_FOUND:
+ has_no_ip = 1;
+ goto update_status;
+
+ case RSLV_UPD_NAME_ERROR:
+ /* update resolution status to OTHER error type */
+ resolution->status = RSLV_STATUS_OTHER;
+ has_no_ip = 1;
+ goto update_status;
+
+ default:
+ has_no_ip = 1;
+ goto invalid;
+
+ }
+
+ save_ip:
+ if (counters) {
+ counters->app.resolver.update++;
+ /* save the first ip we found */
+ chunk_printf(chk, "%s/%s", counters->pid, counters->id);
+ }
+ else
+ chunk_printf(chk, "DNS cache");
+ srv_update_addr(s, firstip, firstip_sin_family, (char *) chk->area);
+
+ update_status:
+ if (!snr_update_srv_status(s, has_no_ip) && has_no_ip)
+ memset(&s->addr, 0, sizeof(s->addr));
+ return 1;
+
+ invalid:
+ if (counters) {
+ counters->app.resolver.invalid++;
+ goto update_status;
+ }
+ if (!snr_update_srv_status(s, has_no_ip) && has_no_ip)
+ memset(&s->addr, 0, sizeof(s->addr));
+ return 0;
+}
+
+/*
+ * SRV record error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ *
+ * Grabs the server's lock.
+ */
+int srvrq_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct resolv_srvrq *srvrq;
+ struct resolv_resolution *res;
+ struct resolvers *resolvers;
+ int exp;
+
+ /* SRV records */
+ srvrq = objt_resolv_srvrq(requester->owner);
+ if (!srvrq)
+ return 0;
+
+ resolvers = srvrq->resolvers;
+ res = requester->resolution;
+
+ switch (res->status) {
+
+ case RSLV_STATUS_NX:
+ /* stop server if resolution is NX for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.nx);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ case RSLV_STATUS_TIMEOUT:
+ /* stop server if resolution is TIMEOUT for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.timeout);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ case RSLV_STATUS_REFUSED:
+ /* stop server if resolution is REFUSED for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.refused);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ break;
+
+ default:
+ /* stop server if resolution failed for a long enough period */
+ exp = tick_add(res->last_valid, resolvers->hold.other);
+ if (!tick_is_expired(exp, now_ms))
+ return 1;
+ }
+
+ /* Remove any associated server ref */
+ resolv_detach_from_resolution_answer_items(res, requester);
+
+ return 0;
+}
+
+/*
+ * Server Name Resolution error management callback
+ * returns:
+ * 0 if we can trash answser items.
+ * 1 when safely ignored and we must kept answer items
+ *
+ * Grabs the server's lock.
+ */
+int snr_resolution_error_cb(struct resolv_requester *requester, int error_code)
+{
+ struct server *s;
+
+ s = objt_server(requester->owner);
+ if (!s)
+ return 0;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+ if (!snr_update_srv_status(s, 1)) {
+ memset(&s->addr, 0, sizeof(s->addr));
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+ resolv_detach_from_resolution_answer_items(requester->resolution, requester);
+ return 0;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ return 1;
+}
+
+/*
+ * Function to check if <ip> is already affected to a server in the backend
+ * which owns <srv> and is up.
+ * It returns a pointer to the first server found or NULL if <ip> is not yet
+ * assigned.
+ *
+ * Must be called with server lock held
+ */
+struct server *snr_check_ip_callback(struct server *srv, void *ip, unsigned char *ip_family)
+{
+ struct server *tmpsrv;
+ struct proxy *be;
+
+ if (!srv)
+ return NULL;
+
+ be = srv->proxy;
+ for (tmpsrv = be->srv; tmpsrv; tmpsrv = tmpsrv->next) {
+ /* we found the current server is the same, ignore it */
+ if (srv == tmpsrv)
+ continue;
+
+ /* We want to compare the IP in the record with the IP of the servers in the
+ * same backend, only if:
+ * * DNS resolution is enabled on the server
+ * * the hostname used for the resolution by our server is the same than the
+ * one used for the server found in the backend
+ * * the server found in the backend is not our current server
+ */
+ HA_SPIN_LOCK(SERVER_LOCK, &tmpsrv->lock);
+ if ((tmpsrv->hostname_dn == NULL) ||
+ (srv->hostname_dn_len != tmpsrv->hostname_dn_len) ||
+ (strcasecmp(srv->hostname_dn, tmpsrv->hostname_dn) != 0) ||
+ (srv->puid == tmpsrv->puid)) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ continue;
+ }
+
+ /* If the server has been taken down, don't consider it */
+ if (tmpsrv->next_admin & SRV_ADMF_RMAINT) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ continue;
+ }
+
+ /* At this point, we have 2 different servers using the same DNS hostname
+ * for their respective resolution.
+ */
+ if (*ip_family == tmpsrv->addr.ss_family &&
+ ((tmpsrv->addr.ss_family == AF_INET &&
+ memcmp(ip, &((struct sockaddr_in *)&tmpsrv->addr)->sin_addr, 4) == 0) ||
+ (tmpsrv->addr.ss_family == AF_INET6 &&
+ memcmp(ip, &((struct sockaddr_in6 *)&tmpsrv->addr)->sin6_addr, 16) == 0))) {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ return tmpsrv;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &tmpsrv->lock);
+ }
+
+
+ return NULL;
+}
+
+/* Sets the server's address (srv->addr) from srv->hostname using the libc's
+ * resolver. This is suited for initial address configuration. Returns 0 on
+ * success otherwise a non-zero error code. In case of error, *err_code, if
+ * not NULL, is filled up.
+ */
+int srv_set_addr_via_libc(struct server *srv, int *err_code)
+{
+ struct sockaddr_storage new_addr;
+
+ memset(&new_addr, 0, sizeof(new_addr));
+
+ /* Use the preferred family, if configured */
+ new_addr.ss_family = srv->addr.ss_family;
+ if (str2ip2(srv->hostname, &new_addr, 1) == NULL) {
+ if (err_code)
+ *err_code |= ERR_WARN;
+ return 1;
+ }
+ _srv_set_inetaddr(srv, &new_addr);
+ return 0;
+}
+
+/* Set the server's FDQN (->hostname) from <hostname>.
+ * Returns -1 if failed, 0 if not.
+ *
+ * Must be called with the server lock held.
+ */
+int srv_set_fqdn(struct server *srv, const char *hostname, int resolv_locked)
+{
+ struct resolv_resolution *resolution;
+ char *hostname_dn;
+ int hostname_len, hostname_dn_len;
+
+ /* Note that the server lock is already held. */
+ if (!srv->resolvers)
+ return -1;
+
+ if (!resolv_locked)
+ HA_SPIN_LOCK(DNS_LOCK, &srv->resolvers->lock);
+ /* run time DNS/SRV resolution was not active for this server
+ * and we can't enable it at run time for now.
+ */
+ if (!srv->resolv_requester && !srv->srvrq)
+ goto err;
+
+ chunk_reset(&trash);
+ hostname_len = strlen(hostname);
+ hostname_dn = trash.area;
+ hostname_dn_len = resolv_str_to_dn_label(hostname, hostname_len,
+ hostname_dn, trash.size);
+ if (hostname_dn_len == -1)
+ goto err;
+
+ resolution = (srv->resolv_requester ? srv->resolv_requester->resolution : NULL);
+ if (resolution &&
+ resolution->hostname_dn &&
+ resolution->hostname_dn_len == hostname_dn_len &&
+ strcasecmp(resolution->hostname_dn, hostname_dn) == 0)
+ goto end;
+
+ resolv_unlink_resolution(srv->resolv_requester);
+
+ free(srv->hostname);
+ free(srv->hostname_dn);
+ srv->hostname = strdup(hostname);
+ srv->hostname_dn = strdup(hostname_dn);
+ srv->hostname_dn_len = hostname_dn_len;
+ if (!srv->hostname || !srv->hostname_dn)
+ goto err;
+
+ if (srv->flags & SRV_F_NO_RESOLUTION)
+ goto end;
+
+ if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1) == -1)
+ goto err;
+
+ end:
+ if (!resolv_locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
+ return 0;
+
+ err:
+ if (!resolv_locked)
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->resolvers->lock);
+ return -1;
+}
+
+/* Sets the server's address (srv->addr) from srv->lastaddr which was filled
+ * from the state file. This is suited for initial address configuration.
+ * Returns 0 on success otherwise a non-zero error code. In case of error,
+ * *err_code, if not NULL, is filled up.
+ */
+static int srv_apply_lastaddr(struct server *srv, int *err_code)
+{
+ struct sockaddr_storage new_addr;
+
+ memset(&new_addr, 0, sizeof(new_addr));
+
+ /* Use the preferred family, if configured */
+ new_addr.ss_family = srv->addr.ss_family;
+ if (!str2ip2(srv->lastaddr, &new_addr, 0)) {
+ if (err_code)
+ *err_code |= ERR_WARN;
+ return 1;
+ }
+ _srv_set_inetaddr(srv, &new_addr);
+ return 0;
+}
+
+/* returns 0 if no error, otherwise a combination of ERR_* flags */
+static int srv_iterate_initaddr(struct server *srv)
+{
+ char *name = srv->hostname;
+ int return_code = 0;
+ int err_code;
+ unsigned int methods;
+
+ /* If no addr and no hostname set, get the name from the DNS SRV request */
+ if (!name && srv->srvrq)
+ name = srv->srvrq->name;
+
+ methods = srv->init_addr_methods;
+ if (!methods) {
+ /* otherwise default to "last,libc" */
+ srv_append_initaddr(&methods, SRV_IADDR_LAST);
+ srv_append_initaddr(&methods, SRV_IADDR_LIBC);
+ if (srv->resolvers_id) {
+ /* dns resolution is configured, add "none" to not fail on startup */
+ srv_append_initaddr(&methods, SRV_IADDR_NONE);
+ }
+ }
+
+ /* "-dr" : always append "none" so that server addresses resolution
+ * failures are silently ignored, this is convenient to validate some
+ * configs out of their environment.
+ */
+ if (global.tune.options & GTUNE_RESOLVE_DONTFAIL)
+ srv_append_initaddr(&methods, SRV_IADDR_NONE);
+
+ while (methods) {
+ err_code = 0;
+ switch (srv_get_next_initaddr(&methods)) {
+ case SRV_IADDR_LAST:
+ if (!srv->lastaddr)
+ continue;
+ if (srv_apply_lastaddr(srv, &err_code) == 0)
+ goto out;
+ return_code |= err_code;
+ break;
+
+ case SRV_IADDR_LIBC:
+ if (!srv->hostname)
+ continue;
+ if (srv_set_addr_via_libc(srv, &err_code) == 0)
+ goto out;
+ return_code |= err_code;
+ break;
+
+ case SRV_IADDR_NONE:
+ srv_set_admin_flag(srv, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_NONE);
+ if (return_code) {
+ ha_notice("could not resolve address '%s', disabling server.\n",
+ name);
+ }
+ return return_code;
+
+ case SRV_IADDR_IP:
+ _srv_set_inetaddr(srv, &srv->init_addr);
+ if (return_code) {
+ ha_warning("could not resolve address '%s', falling back to configured address.\n",
+ name);
+ }
+ goto out;
+
+ default: /* unhandled method */
+ break;
+ }
+ }
+
+ if (!return_code)
+ ha_alert("no method found to resolve address '%s'.\n", name);
+ else
+ ha_alert("could not resolve address '%s'.\n", name);
+
+ return_code |= ERR_ALERT | ERR_FATAL;
+ return return_code;
+out:
+ srv_set_dyncookie(srv);
+ srv_set_addr_desc(srv, 1);
+ return return_code;
+}
+
+/*
+ * This function parses all backends and all servers within each backend
+ * and performs servers' addr resolution based on information provided by:
+ * - configuration file
+ * - server-state file (states provided by an 'old' haproxy process)
+ *
+ * Returns 0 if no error, otherwise, a combination of ERR_ flags.
+ */
+int srv_init_addr(void)
+{
+ struct proxy *curproxy;
+ int return_code = 0;
+
+ curproxy = proxies_list;
+ while (curproxy) {
+ struct server *srv;
+
+ /* servers are in backend only */
+ if (!(curproxy->cap & PR_CAP_BE) || (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)))
+ goto srv_init_addr_next;
+
+ for (srv = curproxy->srv; srv; srv = srv->next) {
+ set_usermsgs_ctx(srv->conf.file, srv->conf.line, &srv->obj_type);
+ if (srv->hostname || srv->srvrq)
+ return_code |= srv_iterate_initaddr(srv);
+ reset_usermsgs_ctx();
+ }
+
+ srv_init_addr_next:
+ curproxy = curproxy->next;
+ }
+
+ return return_code;
+}
+
+/*
+ * Must be called with the server lock held.
+ */
+const char *srv_update_fqdn(struct server *server, const char *fqdn, const char *updater, int resolv_locked)
+{
+
+ struct buffer *msg;
+
+ msg = get_trash_chunk();
+ chunk_reset(msg);
+
+ if (server->hostname && strcmp(fqdn, server->hostname) == 0) {
+ chunk_appendf(msg, "no need to change the FDQN");
+ goto out;
+ }
+
+ if (strlen(fqdn) > DNS_MAX_NAME_SIZE || invalid_domainchar(fqdn)) {
+ chunk_appendf(msg, "invalid fqdn '%s'", fqdn);
+ goto out;
+ }
+
+ chunk_appendf(msg, "%s/%s changed its FQDN from %s to %s",
+ server->proxy->id, server->id, server->hostname, fqdn);
+
+ if (srv_set_fqdn(server, fqdn, resolv_locked) < 0) {
+ chunk_reset(msg);
+ chunk_appendf(msg, "could not update %s/%s FQDN",
+ server->proxy->id, server->id);
+ goto out;
+ }
+
+ /* Flag as FQDN set from stats socket. */
+ server->next_admin |= SRV_ADMF_HMAINT;
+
+ out:
+ if (updater)
+ chunk_appendf(msg, " by '%s'", updater);
+ chunk_appendf(msg, "\n");
+
+ return msg->area;
+}
+
+
+/* Expects to find a backend and a server in <arg> under the form <backend>/<server>,
+ * and returns the pointer to the server. Otherwise, display adequate error messages
+ * on the CLI, sets the CLI's state to CLI_ST_PRINT and returns NULL. This is only
+ * used for CLI commands requiring a server name.
+ * Important: the <arg> is modified to remove the '/'.
+ */
+struct server *cli_find_server(struct appctx *appctx, char *arg)
+{
+ struct proxy *px;
+ struct server *sv;
+ struct ist be_name, sv_name = ist(arg);
+
+ be_name = istsplit(&sv_name, '/');
+ if (!istlen(sv_name)) {
+ cli_err(appctx, "Require 'backend/server'.");
+ return NULL;
+ }
+
+ if (!(px = proxy_be_by_name(ist0(be_name)))) {
+ cli_err(appctx, "No such backend.");
+ return NULL;
+ }
+ if (!(sv = server_find_by_name(px, ist0(sv_name)))) {
+ cli_err(appctx, "No such server.");
+ return NULL;
+ }
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) {
+ cli_err(appctx, "Proxy is disabled.\n");
+ return NULL;
+ }
+
+ return sv;
+}
+
+
+/* grabs the server lock */
+static int cli_parse_set_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (strcmp(args[3], "weight") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = server_parse_weight_change_request(sv, args[4]);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_err(appctx, warning);
+ }
+ else if (strcmp(args[3], "state") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (strcmp(args[4], "ready") == 0)
+ srv_adm_set_ready(sv);
+ else if (strcmp(args[4], "drain") == 0)
+ srv_adm_set_drain(sv);
+ else if (strcmp(args[4], "maint") == 0)
+ srv_adm_set_maint(sv);
+ else
+ cli_err(appctx, "'set server <srv> state' expects 'ready', 'drain' and 'maint'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "health") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (sv->track)
+ cli_err(appctx, "cannot change health on a tracking server.\n");
+ else if (strcmp(args[4], "up") == 0) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_CLI);
+ }
+ else if (strcmp(args[4], "stopping") == 0) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, SRV_OP_STCHGC_CLI);
+ }
+ else if (strcmp(args[4], "down") == 0) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_CLI);
+ }
+ else
+ cli_err(appctx, "'set server <srv> health' expects 'up', 'stopping', or 'down'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "agent") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->agent.state & CHK_ST_ENABLED))
+ cli_err(appctx, "agent checks are not enabled on this server.\n");
+ else if (strcmp(args[4], "up") == 0) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_CLI);
+ }
+ else if (strcmp(args[4], "down") == 0) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_CLI);
+ }
+ else
+ cli_err(appctx, "'set server <srv> agent' expects 'up' or 'down'.\n");
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "agent-addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> agent-addr requires"
+ " an address and optionally a port.\n");
+ goto out;
+ }
+ addr = args[4];
+ if (strcmp(args[5], "port") == 0)
+ port = args[6];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_agent_addr_port(sv, addr, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "agent-port") == 0) {
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> agent-port requires"
+ " a port.\n");
+ goto out;
+ }
+ port = args[4];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_agent_addr_port(sv, NULL, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "agent-send") == 0) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (!(sv->agent.state & CHK_ST_ENABLED))
+ cli_err(appctx, "agent checks are not enabled on this server.\n");
+ else {
+ if (!set_srv_agent_send(sv, args[4]))
+ cli_err(appctx, "cannot allocate memory for new string.\n");
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "check-addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> check-addr requires"
+ " an address and optionally a port.\n");
+ goto out;
+ }
+ addr = args[4];
+ if (strcmp(args[5], "port") == 0)
+ port = args[6];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_check_addr_port(sv, addr, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "check-port") == 0) {
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> check-port requires"
+ " a port.\n");
+ goto out;
+ }
+ port = args[4];
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_check_addr_port(sv, NULL, port);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "addr") == 0) {
+ char *addr = NULL;
+ char *port = NULL;
+ if (strlen(args[4]) == 0) {
+ cli_err(appctx, "set server <b>/<s> addr requires an address and optionally a port.\n");
+ goto out;
+ }
+ else {
+ addr = args[4];
+ }
+ if (strcmp(args[5], "port") == 0) {
+ port = args[6];
+ }
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ warning = srv_update_addr_port(sv, addr, port, "stats socket command");
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ srv_clr_admin_flag(sv, SRV_ADMF_RMAINT);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ }
+ else if (strcmp(args[3], "fqdn") == 0) {
+ if (!*args[4]) {
+ cli_err(appctx, "set server <b>/<s> fqdn requires a FQDN.\n");
+ goto out;
+ }
+ if (!sv->resolvers) {
+ cli_err(appctx, "set server <b>/<s> fqdn failed because no resolution is configured.\n");
+ goto out;
+ }
+ if (sv->srvrq) {
+ cli_err(appctx, "set server <b>/<s> fqdn failed because SRV resolution is configured.\n");
+ goto out;
+ }
+ HA_SPIN_LOCK(DNS_LOCK, &sv->resolvers->lock);
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ /* ensure runtime resolver will process this new fqdn */
+ if (sv->flags & SRV_F_NO_RESOLUTION) {
+ sv->flags &= ~SRV_F_NO_RESOLUTION;
+ }
+ warning = srv_update_fqdn(sv, args[4], "stats socket command", 1);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ HA_SPIN_UNLOCK(DNS_LOCK, &sv->resolvers->lock);
+ if (warning)
+ cli_msg(appctx, LOG_WARNING, warning);
+ }
+ else if (strcmp(args[3], "ssl") == 0) {
+#ifdef USE_OPENSSL
+ if (sv->flags & SRV_F_DYNAMIC) {
+ cli_err(appctx, "'set server <srv> ssl' not supported on dynamic servers\n");
+ goto out;
+ }
+
+ if (sv->ssl_ctx.ctx == NULL) {
+ cli_err(appctx, "'set server <srv> ssl' cannot be set. "
+ " default-server should define ssl settings\n");
+ goto out;
+ }
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ if (strcmp(args[4], "on") == 0) {
+ srv_set_ssl(sv, 1);
+ } else if (strcmp(args[4], "off") == 0) {
+ srv_set_ssl(sv, 0);
+ } else {
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ cli_err(appctx, "'set server <srv> ssl' expects 'on' or 'off'.\n");
+ goto out;
+ }
+ srv_cleanup_connections(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ cli_msg(appctx, LOG_NOTICE, "server ssl setting updated.\n");
+#else
+ cli_msg(appctx, LOG_NOTICE, "server ssl setting not supported.\n");
+#endif
+ } else {
+ cli_err(appctx,
+ "usage: set server <backend>/<server> "
+ "addr | agent | agent-addr | agent-port | agent-send | "
+ "check-addr | check-port | fqdn | health | ssl | "
+ "state | weight\n");
+ }
+ out:
+ return 1;
+}
+
+static int cli_parse_get_weight(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *be;
+ struct server *sv;
+ struct ist be_name, sv_name = ist(args[2]);
+
+ be_name = istsplit(&sv_name, '/');
+ if (!istlen(sv_name))
+ return cli_err(appctx, "Require 'backend/server'.");
+
+ if (!(be = proxy_be_by_name(ist0(be_name))))
+ return cli_err(appctx, "No such backend.");
+ if (!(sv = server_find_by_name(be, ist0(sv_name))))
+ return cli_err(appctx, "No such server.");
+
+ /* return server's effective weight at the moment */
+ snprintf(trash.area, trash.size, "%d (initial %d)\n", sv->uweight,
+ sv->iweight);
+ if (applet_putstr(appctx, trash.area) == -1)
+ return 0;
+ return 1;
+}
+
+/* Parse a "set weight" command.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_set_weight(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+
+ warning = server_parse_weight_change_request(sv, args[3]);
+ if (warning)
+ cli_err(appctx, warning);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+
+ return 1;
+}
+
+/* parse a "set maxconn server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_set_maxconn_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+ const char *warning;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[3]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+
+ warning = server_parse_maxconn_change_request(sv, args[4]);
+ if (warning)
+ cli_err(appctx, warning);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+
+ return 1;
+}
+
+/* parse a "disable agent" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_agent(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "disable health" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_health(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->check.state &= ~CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "disable server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_disable_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_adm_set_maint(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable agent" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_agent(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (!(sv->agent.state & CHK_ST_CONFIGURED))
+ return cli_err(appctx, "Agent was not configured on this server, cannot enable.\n");
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->agent.state |= CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable health" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_health(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ if (!(sv->check.state & CHK_ST_CONFIGURED))
+ return cli_err(appctx, "Health check was not configured on this server, cannot enable.\n");
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ sv->check.state |= CHK_ST_ENABLED;
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* parse a "enable server" command. It always returns 1.
+ *
+ * Grabs the server lock.
+ */
+static int cli_parse_enable_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[2]);
+ if (!sv)
+ return 1;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_adm_set_ready(sv);
+ if (!(sv->flags & SRV_F_COOKIESET)
+ && (sv->proxy->ck_opts & PR_CK_DYNAMIC) &&
+ sv->cookie)
+ srv_check_for_dup_dyncookie(sv);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* Allocates data structure related to load balancing for the server <sv>. It
+ * is only required for dynamic servers.
+ *
+ * At the moment, the server lock is not used as this function is only called
+ * for a dynamic server not yet registered.
+ *
+ * Returns 1 on success, 0 on allocation failure.
+ */
+static int srv_alloc_lb(struct server *sv, struct proxy *be)
+{
+ int node;
+
+ sv->lb_tree = (sv->flags & SRV_F_BACKUP) ?
+ &be->lbprm.chash.bck : &be->lbprm.chash.act;
+ sv->lb_nodes_tot = sv->uweight * BE_WEIGHT_SCALE;
+ sv->lb_nodes_now = 0;
+
+ if (((be->lbprm.algo & (BE_LB_KIND | BE_LB_PARM)) == (BE_LB_KIND_RR | BE_LB_RR_RANDOM)) ||
+ ((be->lbprm.algo & (BE_LB_KIND | BE_LB_HASH_TYPE)) == (BE_LB_KIND_HI | BE_LB_HASH_CONS))) {
+ sv->lb_nodes = calloc(sv->lb_nodes_tot, sizeof(*sv->lb_nodes));
+
+ if (!sv->lb_nodes)
+ return 0;
+
+ for (node = 0; node < sv->lb_nodes_tot; node++) {
+ sv->lb_nodes[node].server = sv;
+ sv->lb_nodes[node].node.key = full_hash(sv->puid * SRV_EWGHT_RANGE + node);
+ }
+ }
+
+ return 1;
+}
+
+/* updates the server's weight during a warmup stage. Once the final weight is
+ * reached, the task automatically stops. Note that any server status change
+ * must have updated s->last_change accordingly.
+ */
+static struct task *server_warmup(struct task *t, void *context, unsigned int state)
+{
+ struct server *s = context;
+
+ /* by default, plan on stopping the task */
+ t->expire = TICK_ETERNITY;
+ if ((s->next_admin & SRV_ADMF_MAINT) ||
+ (s->next_state != SRV_ST_STARTING))
+ return t;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &s->lock);
+
+ /* recalculate the weights and update the state */
+ server_recalc_eweight(s, 1);
+
+ /* probably that we can refill this server with a bit more connections */
+ pendconn_grab_from_px(s);
+
+ HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock);
+
+ /* get back there in 1 second or 1/20th of the slowstart interval,
+ * whichever is greater, resulting in small 5% steps.
+ */
+ if (s->next_state == SRV_ST_STARTING)
+ t->expire = tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20)));
+ return t;
+}
+
+/* Allocate the slowstart task if the server is configured with a slowstart
+ * timer. If server next_state is SRV_ST_STARTING, the task is scheduled.
+ *
+ * Returns 0 on success else non-zero.
+ */
+static int init_srv_slowstart(struct server *srv)
+{
+ struct task *t;
+
+ if (srv->slowstart) {
+ if ((t = task_new_anywhere()) == NULL) {
+ ha_alert("Cannot activate slowstart for server %s/%s: out of memory.\n", srv->proxy->id, srv->id);
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ /* We need a warmup task that will be called when the server
+ * state switches from down to up.
+ */
+ srv->warmup = t;
+ t->process = server_warmup;
+ t->context = srv;
+
+ /* server can be in this state only because of */
+ if (srv->next_state == SRV_ST_STARTING) {
+ task_schedule(srv->warmup,
+ tick_add(now_ms,
+ MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20)));
+ }
+ }
+
+ return ERR_NONE;
+}
+REGISTER_POST_SERVER_CHECK(init_srv_slowstart);
+
+/* Memory allocation and initialization of the per_thr field.
+ * Returns 0 if the field has been successfully initialized, -1 on failure.
+ */
+int srv_init_per_thr(struct server *srv)
+{
+ int i;
+
+ srv->per_thr = calloc(global.nbthread, sizeof(*srv->per_thr));
+ srv->per_tgrp = calloc(global.nbtgroups, sizeof(*srv->per_tgrp));
+ if (!srv->per_thr || !srv->per_tgrp)
+ return -1;
+
+ for (i = 0; i < global.nbthread; i++) {
+ srv->per_thr[i].idle_conns = EB_ROOT;
+ srv->per_thr[i].safe_conns = EB_ROOT;
+ srv->per_thr[i].avail_conns = EB_ROOT;
+ MT_LIST_INIT(&srv->per_thr[i].streams);
+
+ LIST_INIT(&srv->per_thr[i].idle_conn_list);
+ }
+
+ return 0;
+}
+
+/* Parse a "add server" command
+ * Returns 0 if the server has been successfully initialized, 1 on failure.
+ */
+static int cli_parse_add_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *be;
+ struct server *srv;
+ char *be_name, *sv_name;
+ int errcode, argc;
+ int next_id;
+ const int parse_flags = SRV_PARSE_DYNAMIC|SRV_PARSE_PARSE_ADDR;
+
+ usermsgs_clr("CLI");
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ++args;
+
+ sv_name = be_name = args[1];
+ /* split backend/server arg */
+ while (*sv_name && *(++sv_name)) {
+ if (*sv_name == '/') {
+ *sv_name = '\0';
+ ++sv_name;
+ break;
+ }
+ }
+
+ if (!*sv_name)
+ return cli_err(appctx, "Require 'backend/server'.");
+
+ be = proxy_be_by_name(be_name);
+ if (!be)
+ return cli_err(appctx, "No such backend.");
+
+ if (!(be->lbprm.algo & BE_LB_PROP_DYN)) {
+ cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers.");
+ return 1;
+ }
+
+ if (be->mode == PR_MODE_SYSLOG) {
+ cli_err(appctx," Dynamic servers cannot be used with log backends.");
+ return 1;
+ }
+
+ /* At this point, some operations might not be thread-safe anymore. This
+ * might be the case for parsing handlers which were designed to run
+ * only at the starting stage on single-thread mode.
+ *
+ * Activate thread isolation to ensure thread-safety.
+ */
+ thread_isolate();
+
+ args[1] = sv_name;
+ errcode = _srv_parse_init(&srv, args, &argc, be, parse_flags);
+ if (errcode)
+ goto out;
+
+ while (*args[argc]) {
+ errcode = _srv_parse_kw(srv, args, &argc, be, parse_flags);
+
+ if (errcode)
+ goto out;
+ }
+
+ errcode = _srv_parse_finalize(args, argc, srv, be, parse_flags);
+ if (errcode)
+ goto out;
+
+ /* A dynamic server does not currently support resolution.
+ *
+ * Initialize it explicitly to the "none" method to ensure no
+ * resolution will ever be executed.
+ */
+ srv->init_addr_methods = SRV_IADDR_NONE;
+
+ if (srv->mux_proto) {
+ int proto_mode = conn_pr_mode_to_proto_mode(be->mode);
+ const struct mux_proto_list *mux_ent;
+
+ mux_ent = conn_get_best_mux_entry(srv->mux_proto->token, PROTO_SIDE_BE, proto_mode);
+
+ if (!mux_ent || !isteq(mux_ent->token, srv->mux_proto->token)) {
+ ha_alert("MUX protocol is not usable for server.\n");
+ goto out;
+ }
+ }
+
+ if (srv_init_per_thr(srv) == -1) {
+ ha_alert("failed to allocate per-thread lists for server.\n");
+ goto out;
+ }
+
+ if (srv->max_idle_conns != 0) {
+ srv->curr_idle_thr = calloc(global.nbthread, sizeof(*srv->curr_idle_thr));
+ if (!srv->curr_idle_thr) {
+ ha_alert("failed to allocate counters for server.\n");
+ goto out;
+ }
+ }
+
+ if (!srv_alloc_lb(srv, be)) {
+ ha_alert("Failed to initialize load-balancing data.\n");
+ goto out;
+ }
+
+ if (!stats_allocate_proxy_counters_internal(&srv->extra_counters,
+ COUNTERS_SV,
+ STATS_PX_CAP_SRV)) {
+ ha_alert("failed to allocate extra counters for server.\n");
+ goto out;
+ }
+
+ /* ensure minconn/maxconn consistency */
+ srv_minmax_conn_apply(srv);
+
+ if (srv->use_ssl == 1 || (srv->proxy->options & PR_O_TCPCHK_SSL) ||
+ srv->check.use_ssl == 1) {
+ if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->prepare_srv) {
+ if (xprt_get(XPRT_SSL)->prepare_srv(srv))
+ goto out;
+ }
+ }
+
+ if (srv->trackit) {
+ if (srv_apply_track(srv, be))
+ goto out;
+ }
+
+ /* Init check/agent if configured. The check is manually disabled
+ * because a dynamic server is started in a disable state. It must be
+ * manually activated via a "enable health/agent" command.
+ */
+ if (srv->do_check) {
+ if (init_srv_check(srv))
+ goto out;
+
+ srv->check.state &= ~CHK_ST_ENABLED;
+ }
+
+ if (srv->do_agent) {
+ if (init_srv_agent_check(srv))
+ goto out;
+
+ srv->agent.state &= ~CHK_ST_ENABLED;
+ }
+
+ /* Init slowstart if needed. */
+ if (init_srv_slowstart(srv))
+ goto out;
+
+ /* Attach the server to the end of the proxy linked list. Note that this
+ * operation is not thread-safe so this is executed under thread
+ * isolation.
+ *
+ * If a server with the same name is found, reject the new one.
+ */
+
+ /* TODO use a double-linked list for px->srv */
+ if (be->srv) {
+ struct server *next = be->srv;
+
+ while (1) {
+ /* check for duplicate server */
+ if (strcmp(srv->id, next->id) == 0) {
+ ha_alert("Already exists a server with the same name in backend.\n");
+ goto out;
+ }
+
+ if (!next->next)
+ break;
+
+ next = next->next;
+ }
+
+ next->next = srv;
+ }
+ else {
+ srv->next = be->srv;
+ be->srv = srv;
+ }
+
+ /* generate the server id if not manually specified */
+ if (!srv->puid) {
+ next_id = get_next_id(&be->conf.used_server_id, 1);
+ if (!next_id) {
+ ha_alert("Cannot attach server : no id left in proxy\n");
+ goto out;
+ }
+
+ srv->conf.id.key = srv->puid = next_id;
+ }
+ srv->conf.name.key = srv->id;
+
+ /* insert the server in the backend trees */
+ eb32_insert(&be->conf.used_server_id, &srv->conf.id);
+ ebis_insert(&be->conf.used_server_name, &srv->conf.name);
+ /* addr_node.key could be NULL if FQDN resolution is postponed (ie: add server from cli) */
+ if (srv->addr_node.key)
+ ebis_insert(&be->used_server_addr, &srv->addr_node);
+
+ /* check if LSB bit (odd bit) is set for reuse_cnt */
+ if (srv_id_reuse_cnt & 1) {
+ /* cnt must be increased */
+ srv_id_reuse_cnt++;
+ }
+ /* srv_id_reuse_cnt is always even at this stage, divide by 2 to
+ * save some space
+ * (sizeof(srv->rid) is half of sizeof(srv_id_reuse_cnt))
+ */
+ srv->rid = (srv_id_reuse_cnt) ? (srv_id_reuse_cnt / 2) : 0;
+
+ /* adding server cannot fail when we reach this:
+ * publishing EVENT_HDL_SUB_SERVER_ADD
+ */
+ srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_ADD, srv, 1);
+
+ thread_release();
+
+ /* Start the check task. The server must be fully initialized.
+ *
+ * <srvpos> and <nbcheck> parameters are set to 1 as there should be no
+ * need to randomly spread the task interval for dynamic servers.
+ */
+ if (srv->check.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&srv->check, 0, 1, 1))
+ ha_alert("System might be unstable, consider to execute a reload");
+ }
+ if (srv->agent.state & CHK_ST_CONFIGURED) {
+ if (!start_check_task(&srv->agent, 0, 1, 1))
+ ha_alert("System might be unstable, consider to execute a reload");
+ }
+
+ ha_notice("New server registered.\n");
+ cli_umsg(appctx, LOG_INFO);
+
+ return 0;
+
+out:
+ if (srv) {
+ if (srv->track)
+ release_server_track(srv);
+
+ if (srv->check.state & CHK_ST_CONFIGURED)
+ free_check(&srv->check);
+ if (srv->agent.state & CHK_ST_CONFIGURED)
+ free_check(&srv->agent);
+
+ /* remove the server from the proxy linked list */
+ _srv_detach(srv);
+ }
+
+ thread_release();
+
+ if (!usermsgs_empty())
+ cli_umsgerr(appctx);
+
+ if (srv)
+ srv_drop(srv);
+
+ return 1;
+}
+
+/* Parse a "del server" command
+ * Returns 0 if the server has been successfully initialized, 1 on failure.
+ */
+static int cli_parse_delete_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *be;
+ struct server *srv;
+ struct server *prev_del;
+ struct ist be_name, sv_name;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ++args;
+
+ /* The proxy servers list is currently not protected by a lock so this
+ * requires thread isolation. In addition, any place referencing the
+ * server about to be deleted would be unsafe after our operation, so
+ * we must be certain to be alone so that no other thread has even
+ * started to grab a temporary reference to this server.
+ */
+ thread_isolate_full();
+
+ sv_name = ist(args[1]);
+ be_name = istsplit(&sv_name, '/');
+ if (!istlen(sv_name)) {
+ cli_err(appctx, "Require 'backend/server'.");
+ goto out;
+ }
+
+ if (!(be = proxy_be_by_name(ist0(be_name)))) {
+ cli_err(appctx, "No such backend.");
+ goto out;
+ }
+ if (!(srv = server_find_by_name(be, ist0(sv_name)))) {
+ cli_err(appctx, "No such server.");
+ goto out;
+ }
+
+ if (srv->flags & SRV_F_NON_PURGEABLE) {
+ cli_err(appctx, "This server cannot be removed at runtime due to other configuration elements pointing to it.");
+ goto out;
+ }
+
+ /* Only servers in maintenance can be deleted. This ensures that the
+ * server is not present anymore in the lb structures (through
+ * lbprm.set_server_status_down).
+ */
+ if (!(srv->cur_admin & SRV_ADMF_MAINT)) {
+ cli_err(appctx, "Only servers in maintenance mode can be deleted.");
+ goto out;
+ }
+
+ /* Ensure that there is no active/idle/pending connection on the server.
+ *
+ * TODO idle connections should not prevent server deletion. A proper
+ * cleanup function should be implemented to be used here.
+ */
+ if (srv->cur_sess || srv->curr_idle_conns ||
+ !eb_is_empty(&srv->queue.head) || srv_has_streams(srv)) {
+ cli_err(appctx, "Server still has connections attached to it, cannot remove it.");
+ goto out;
+ }
+
+ /* removing cannot fail anymore when we reach this:
+ * publishing EVENT_HDL_SUB_SERVER_DEL
+ */
+ srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DEL, srv, 1);
+
+ /* remove srv from tracking list */
+ if (srv->track)
+ release_server_track(srv);
+
+ /* stop the check task if running */
+ if (srv->check.state & CHK_ST_CONFIGURED)
+ check_purge(&srv->check);
+ if (srv->agent.state & CHK_ST_CONFIGURED)
+ check_purge(&srv->agent);
+
+ /* detach the server from the proxy linked list
+ * The proxy servers list is currently not protected by a lock, so this
+ * requires thread_isolate/release.
+ */
+ _srv_detach(srv);
+
+ /* Some deleted servers could still point to us using their 'next',
+ * update them as needed
+ * Please note the small race between the POP and APPEND, although in
+ * this situation this is not an issue as we are under full thread
+ * isolation
+ */
+ while ((prev_del = MT_LIST_POP(&srv->prev_deleted, struct server *, prev_deleted))) {
+ /* update its 'next' ptr */
+ prev_del->next = srv->next;
+ if (srv->next) {
+ /* now it is our 'next' responsibility */
+ MT_LIST_APPEND(&srv->next->prev_deleted, &prev_del->prev_deleted);
+ }
+ }
+
+ /* we ourselves need to inform our 'next' that we will still point it */
+ if (srv->next)
+ MT_LIST_APPEND(&srv->next->prev_deleted, &srv->prev_deleted);
+
+ /* remove srv from addr_node tree */
+ eb32_delete(&srv->conf.id);
+ ebpt_delete(&srv->conf.name);
+ if (srv->addr_node.key)
+ ebpt_delete(&srv->addr_node);
+
+ /* remove srv from idle_node tree for idle conn cleanup */
+ eb32_delete(&srv->idle_node);
+
+ /* flag the server as deleted
+ * (despite the server being removed from primary server list,
+ * one could still access the server data from a valid ptr)
+ * Deleted flag helps detecting when a server is in transient removal
+ * state.
+ * ie: removed from the list but not yet freed/purged from memory.
+ */
+ srv->flags |= SRV_F_DELETED;
+
+ /* set LSB bit (odd bit) for reuse_cnt */
+ srv_id_reuse_cnt |= 1;
+
+ thread_release();
+
+ ha_notice("Server deleted.\n");
+ srv_drop(srv);
+
+ cli_msg(appctx, LOG_INFO, "Server deleted.");
+
+ return 0;
+
+out:
+ thread_release();
+
+ return 1;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "disable", "agent", NULL }, "disable agent : disable agent checks", cli_parse_disable_agent, NULL },
+ { { "disable", "health", NULL }, "disable health : disable health checks", cli_parse_disable_health, NULL },
+ { { "disable", "server", NULL }, "disable server (DEPRECATED) : disable a server for maintenance (use 'set server' instead)", cli_parse_disable_server, NULL },
+ { { "enable", "agent", NULL }, "enable agent : enable agent checks", cli_parse_enable_agent, NULL },
+ { { "enable", "health", NULL }, "enable health : enable health checks", cli_parse_enable_health, NULL },
+ { { "enable", "server", NULL }, "enable server (DEPRECATED) : enable a disabled server (use 'set server' instead)", cli_parse_enable_server, NULL },
+ { { "set", "maxconn", "server", NULL }, "set maxconn server <bk>/<srv> : change a server's maxconn setting", cli_parse_set_maxconn_server, NULL },
+ { { "set", "server", NULL }, "set server <bk>/<srv> [opts] : change a server's state, weight, address or ssl", cli_parse_set_server },
+ { { "get", "weight", NULL }, "get weight <bk>/<srv> : report a server's current weight", cli_parse_get_weight },
+ { { "set", "weight", NULL }, "set weight <bk>/<srv> (DEPRECATED) : change a server's weight (use 'set server' instead)", cli_parse_set_weight },
+ { { "add", "server", NULL }, "add server <bk>/<srv> : create a new server", cli_parse_add_server, NULL },
+ { { "del", "server", NULL }, "del server <bk>/<srv> : remove a dynamically added server", cli_parse_delete_server, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* Prepare a server <srv> to track check status of another one. <srv>.<trackit>
+ * field is used to retrieve the identifier of the tracked server, either with
+ * the format "proxy/server" or just "server". <curproxy> must point to the
+ * backend owning <srv>; if no proxy is specified in <trackit>, it will be used
+ * to find the tracked server.
+ *
+ * Returns 0 if the server track has been activated else non-zero.
+ *
+ * Not thread-safe.
+ */
+int srv_apply_track(struct server *srv, struct proxy *curproxy)
+{
+ struct proxy *px;
+ struct server *strack, *loop;
+ char *pname, *sname;
+
+ if (!srv->trackit)
+ return 1;
+
+ pname = srv->trackit;
+ sname = strrchr(pname, '/');
+
+ if (sname) {
+ *sname++ = '\0';
+ }
+ else {
+ sname = pname;
+ pname = NULL;
+ }
+
+ if (pname) {
+ px = proxy_be_by_name(pname);
+ if (!px) {
+ ha_alert("unable to find required proxy '%s' for tracking.\n",
+ pname);
+ return 1;
+ }
+ }
+ else {
+ px = curproxy;
+ }
+
+ strack = findserver(px, sname);
+ if (!strack) {
+ ha_alert("unable to find required server '%s' for tracking.\n",
+ sname);
+ return 1;
+ }
+
+ if (strack->flags & SRV_F_DYNAMIC) {
+ ha_alert("unable to use %s/%s for tracking as it is a dynamic server.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ if (!strack->do_check && !strack->do_agent && !strack->track &&
+ !strack->trackit) {
+ ha_alert("unable to use %s/%s for "
+ "tracking as it does not have any check nor agent enabled.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ for (loop = strack->track; loop && loop != srv; loop = loop->track)
+ ;
+
+ if (srv == strack || loop) {
+ ha_alert("unable to track %s/%s as it "
+ "belongs to a tracking chain looping back to %s/%s.\n",
+ px->id, strack->id, px->id,
+ srv == strack ? strack->id : loop->id);
+ return 1;
+ }
+
+ if (curproxy != px &&
+ (curproxy->options & PR_O_DISABLE404) != (px->options & PR_O_DISABLE404)) {
+ ha_alert("unable to use %s/%s for"
+ "tracking: disable-on-404 option inconsistency.\n",
+ px->id, strack->id);
+ return 1;
+ }
+
+ srv->track = strack;
+ srv->tracknext = strack->trackers;
+ strack->trackers = srv;
+ strack->flags |= SRV_F_NON_PURGEABLE;
+
+ ha_free(&srv->trackit);
+
+ return 0;
+}
+
+/* This function propagates srv state change to lb algorithms */
+static void srv_lb_propagate(struct server *s)
+{
+ struct proxy *px = s->proxy;
+
+ if (px->lbprm.update_server_eweight)
+ px->lbprm.update_server_eweight(s);
+ else if (srv_willbe_usable(s)) {
+ if (px->lbprm.set_server_status_up)
+ px->lbprm.set_server_status_up(s);
+ }
+ else {
+ if (px->lbprm.set_server_status_down)
+ px->lbprm.set_server_status_down(s);
+ }
+}
+
+/* directly update server state based on an operational change
+ * (compare current and next state to know which transition to apply)
+ *
+ * The function returns the number of requeued sessions (either taken by
+ * the server or redispatched to others servers) due to the server state
+ * change.
+ */
+static int _srv_update_status_op(struct server *s, enum srv_op_st_chg_cause cause)
+{
+ struct buffer *tmptrash = NULL;
+ int log_level;
+ int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
+ int xferred = 0;
+
+ if ((s->cur_state != SRV_ST_STOPPED) && (s->next_state == SRV_ST_STOPPED)) {
+ srv_lb_propagate(s);
+
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is DOWN", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_op_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, xferred, 0);
+
+ ha_warning("%s.\n", tmptrash->area);
+
+ /* we don't send an alert if the server was previously paused */
+ log_level = srv_was_stopping ? LOG_NOTICE : LOG_ALERT;
+ send_log(s->proxy, log_level, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, log_level, "%s",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else if ((s->cur_state != SRV_ST_STOPPING) && (s->next_state == SRV_ST_STOPPING)) {
+ srv_lb_propagate(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is stopping", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_op_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, xferred, 0);
+
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else if (((s->cur_state != SRV_ST_RUNNING) && (s->next_state == SRV_ST_RUNNING))
+ || ((s->cur_state != SRV_ST_STARTING) && (s->next_state == SRV_ST_STARTING))) {
+
+ if (s->next_state == SRV_ST_STARTING && s->warmup)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+
+ server_recalc_eweight(s, 0);
+ /* now propagate the status change to any LB algorithms */
+ srv_lb_propagate(s);
+
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all streams
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->flags & SRV_F_BACKUP) && s->next_eweight)
+ srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = pendconn_grab_from_px(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is UP", s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ srv_append_op_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, xferred, 0);
+
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, LOG_NOTICE, "%s",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else if (s->cur_eweight != s->next_eweight) {
+ /* now propagate the status change to any LB algorithms */
+ srv_lb_propagate(s);
+ }
+ return xferred;
+}
+
+/* deduct and update server state from an administrative change
+ * (use current and next admin to deduct the administrative transition that
+ * may result in server state update)
+ *
+ * The function returns the number of requeued sessions (either taken by
+ * the server or redispatched to others servers) due to the server state
+ * change.
+ */
+static int _srv_update_status_adm(struct server *s, enum srv_adm_st_chg_cause cause)
+{
+ struct buffer *tmptrash = NULL;
+ int srv_was_stopping = (s->cur_state == SRV_ST_STOPPING) || (s->cur_admin & SRV_ADMF_DRAIN);
+ int xferred = 0;
+
+ /* Maintenance must also disable health checks */
+ if (!(s->cur_admin & SRV_ADMF_MAINT) && (s->next_admin & SRV_ADMF_MAINT)) {
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state |= CHK_ST_PAUSED;
+ s->check.health = 0;
+ }
+
+ if (s->cur_state == SRV_ST_STOPPED) { /* server was already down */
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s was DOWN and now enters maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id);
+ srv_append_adm_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, -1, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else { /* server was still running */
+ s->check.health = 0; /* failure */
+
+ s->next_state = SRV_ST_STOPPED;
+ srv_lb_propagate(s);
+
+ if (s->onmarkeddown & HANA_ONMARKEDDOWN_SHUTDOWNSESSIONS)
+ srv_shutdown_streams(s, SF_ERR_DOWN);
+
+ /* force connection cleanup on the given server */
+ srv_cleanup_connections(s);
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is going DOWN for maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ srv_append_adm_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, xferred, (s->next_admin & SRV_ADMF_FMAINT));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, srv_was_stopping ? LOG_NOTICE : LOG_ALERT, "%s.\n",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ }
+ }
+ }
+ else if ((s->cur_admin & SRV_ADMF_MAINT) && !(s->next_admin & SRV_ADMF_MAINT)) {
+ /* OK here we're leaving maintenance, we have many things to check,
+ * because the server might possibly be coming back up depending on
+ * its state. In practice, leaving maintenance means that we should
+ * immediately turn to UP (more or less the slowstart) under the
+ * following conditions :
+ * - server is neither checked nor tracked
+ * - server tracks another server which is not checked
+ * - server tracks another server which is already up
+ * Which sums up as something simpler :
+ * "either the tracking server is up or the server's checks are disabled
+ * or up". Otherwise we only re-enable health checks. There's a special
+ * case associated to the stopping state which can be inherited. Note
+ * that the server might still be in drain mode, which is naturally dealt
+ * with by the lower level functions.
+ */
+ if (s->check.state & CHK_ST_ENABLED) {
+ s->check.state &= ~CHK_ST_PAUSED;
+ s->check.health = s->check.rise; /* start OK but check immediately */
+ }
+
+ if ((!s->track || s->track->next_state != SRV_ST_STOPPED) &&
+ (!(s->agent.state & CHK_ST_ENABLED) || (s->agent.health >= s->agent.rise)) &&
+ (!(s->check.state & CHK_ST_ENABLED) || (s->check.health >= s->check.rise))) {
+ if (s->track && s->track->next_state == SRV_ST_STOPPING) {
+ s->next_state = SRV_ST_STOPPING;
+ }
+ else {
+ s->next_state = SRV_ST_STARTING;
+ if (s->slowstart > 0) {
+ if (s->warmup)
+ task_schedule(s->warmup, tick_add(now_ms, MS_TO_TICKS(MAX(1000, s->slowstart / 20))));
+ }
+ else
+ s->next_state = SRV_ST_RUNNING;
+ }
+
+ }
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving forced maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') is %s/%s (resolves again)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s/%s (leaving maintenance)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP",
+ (s->next_admin & SRV_ADMF_DRAIN) ? "DRAIN" : "READY");
+ }
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+
+ server_recalc_eweight(s, 0);
+ /* now propagate the status change to any LB algorithms */
+ srv_lb_propagate(s);
+
+ /* If the server is set with "on-marked-up shutdown-backup-sessions",
+ * and it's not a backup server and its effective weight is > 0,
+ * then it can accept new connections, so we shut down all streams
+ * on all backup servers.
+ */
+ if ((s->onmarkedup & HANA_ONMARKEDUP_SHUTDOWNBACKUPSESSIONS) &&
+ !(s->flags & SRV_F_BACKUP) && s->next_eweight)
+ srv_shutdown_backup_streams(s->proxy, SF_ERR_UP);
+
+ /* check if we can handle some connections queued at the proxy. We
+ * will take as many as we can handle.
+ */
+ xferred = pendconn_grab_from_px(s);
+ }
+ else if (s->next_admin & SRV_ADMF_MAINT) {
+ /* remaining in maintenance mode, let's inform precisely about the
+ * situation.
+ */
+ if (!(s->next_admin & SRV_ADMF_FMAINT) && (s->cur_admin & SRV_ADMF_FMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is leaving forced maintenance but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ if (!(s->next_admin & SRV_ADMF_RMAINT) && (s->cur_admin & SRV_ADMF_RMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s ('%s') resolves again but remains in maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id, s->hostname);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else if (!(s->next_admin & SRV_ADMF_IMAINT) && (s->cur_admin & SRV_ADMF_IMAINT)) {
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced maintenance",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ /* don't report anything when leaving drain mode and remaining in maintenance */
+ }
+
+ if (!(s->next_admin & SRV_ADMF_MAINT)) {
+ if (!(s->cur_admin & SRV_ADMF_DRAIN) && (s->next_admin & SRV_ADMF_DRAIN)) {
+ /* drain state is applied only if not yet in maint */
+
+ srv_lb_propagate(s);
+
+ /* we might have streams queued on this server and waiting for
+ * a connection. Those which are redispatchable will be queued
+ * to another server or to the proxy itself.
+ */
+ xferred = pendconn_redistribute(s);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ chunk_printf(tmptrash, "%sServer %s/%s enters drain state",
+ s->flags & SRV_F_BACKUP ? "Backup " : "", s->proxy->id, s->id);
+ srv_append_adm_chg_cause(tmptrash, s, cause);
+ srv_append_more(tmptrash, s, xferred, (s->next_admin & SRV_ADMF_FDRAIN));
+
+ if (!(global.mode & MODE_STARTING)) {
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ send_email_alert(s, LOG_NOTICE, "%s",
+ tmptrash->area);
+ }
+ free_trash_chunk(tmptrash);
+ }
+ }
+ else if ((s->cur_admin & SRV_ADMF_DRAIN) && !(s->next_admin & SRV_ADMF_DRAIN)) {
+ /* OK completely leaving drain mode */
+ server_recalc_eweight(s, 0);
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (s->cur_admin & SRV_ADMF_FDRAIN) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving forced drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s is %s (leaving drain)",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id,
+ (s->next_state == SRV_ST_STOPPED) ? "DOWN" : "UP");
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+
+ /* now propagate the status change to any LB algorithms */
+ srv_lb_propagate(s);
+ }
+ else if ((s->next_admin & SRV_ADMF_DRAIN)) {
+ /* remaining in drain mode after removing one of its flags */
+
+ tmptrash = alloc_trash_chunk();
+ if (tmptrash) {
+ if (!(s->next_admin & SRV_ADMF_FDRAIN)) {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+
+ if (s->track) /* normally it's mandatory here */
+ chunk_appendf(tmptrash, " via %s/%s",
+ s->track->proxy->id, s->track->id);
+ }
+ else {
+ chunk_printf(tmptrash,
+ "%sServer %s/%s remains in forced drain mode",
+ s->flags & SRV_F_BACKUP ? "Backup " : "",
+ s->proxy->id, s->id);
+ }
+ ha_warning("%s.\n", tmptrash->area);
+ send_log(s->proxy, LOG_NOTICE, "%s.\n",
+ tmptrash->area);
+ free_trash_chunk(tmptrash);
+ }
+ }
+ }
+ return xferred;
+}
+
+/*
+ * This function applies server's status changes.
+ *
+ * Must be called with the server lock held. This may also be called at init
+ * time as the result of parsing the state file, in which case no lock will be
+ * held, and the server's warmup task can be null.
+ * <type> should be 0 for operational and 1 for administrative
+ * <cause> must be srv_op_st_chg_cause enum for operational and
+ * srv_adm_st_chg_cause enum for administrative
+ */
+static void srv_update_status(struct server *s, int type, int cause)
+{
+ int prev_srv_count = s->proxy->srv_bck + s->proxy->srv_act;
+ enum srv_state srv_prev_state = s->cur_state;
+ union {
+ struct event_hdl_cb_data_server_state state;
+ struct event_hdl_cb_data_server_admin admin;
+ struct event_hdl_cb_data_server common;
+ } cb_data;
+ int requeued;
+
+ /* prepare common server event data */
+ _srv_event_hdl_prepare(&cb_data.common, s, 0);
+
+ if (type) {
+ cb_data.admin.safe.cause = cause;
+ cb_data.admin.safe.old_admin = s->cur_admin;
+ cb_data.admin.safe.new_admin = s->next_admin;
+ requeued = _srv_update_status_adm(s, cause);
+ cb_data.admin.safe.requeued = requeued;
+ /* publish admin change */
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_ADMIN, cb_data.admin, s);
+ }
+ else
+ requeued = _srv_update_status_op(s, cause);
+
+ /* explicitly commit state changes (even if it was already applied implicitly
+ * by some lb state change function), so we don't miss anything
+ */
+ srv_lb_commit_status(s);
+
+ /* check if server stats must be updated due the the server state change */
+ if (srv_prev_state != s->cur_state) {
+ if (srv_prev_state == SRV_ST_STOPPED) {
+ /* server was down and no longer is */
+ if (s->last_change < ns_to_sec(now_ns)) // ignore negative times
+ s->down_time += ns_to_sec(now_ns) - s->last_change;
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s);
+ }
+ else if (s->cur_state == SRV_ST_STOPPED) {
+ /* server was up and is currently down */
+ s->counters.down_trans++;
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DOWN, cb_data.common, s);
+ }
+ s->last_change = ns_to_sec(now_ns);
+
+ /* publish the state change */
+ _srv_event_hdl_prepare_state(&cb_data.state,
+ s, type, cause, srv_prev_state, requeued);
+ _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_STATE, cb_data.state, s);
+ }
+
+ /* check if backend stats must be updated due to the server state change */
+ if (prev_srv_count && s->proxy->srv_bck == 0 && s->proxy->srv_act == 0)
+ set_backend_down(s->proxy); /* backend going down */
+ else if (!prev_srv_count && (s->proxy->srv_bck || s->proxy->srv_act)) {
+ /* backend was down and is back up again:
+ * no helper function, updating last_change and backend downtime stats
+ */
+ if (s->proxy->last_change < ns_to_sec(now_ns)) // ignore negative times
+ s->proxy->down_time += ns_to_sec(now_ns) - s->proxy->last_change;
+ s->proxy->last_change = ns_to_sec(now_ns);
+ }
+}
+
+struct task *srv_cleanup_toremove_conns(struct task *task, void *context, unsigned int state)
+{
+ struct connection *conn;
+
+ while ((conn = MT_LIST_POP(&idle_conns[tid].toremove_conns,
+ struct connection *, toremove_list)) != NULL) {
+ conn->mux->destroy(conn->ctx);
+ }
+
+ return task;
+}
+
+/* Move <toremove_nb> count connections from <list> storage to <toremove_list>
+ * list storage. -1 means moving all of them.
+ *
+ * Returns the number of connections moved.
+ *
+ * Must be called with idle_conns_lock held.
+ */
+static int srv_migrate_conns_to_remove(struct list *list, struct mt_list *toremove_list, int toremove_nb)
+{
+ struct connection *conn;
+ int i = 0;
+
+ while (!LIST_ISEMPTY(list)) {
+ if (toremove_nb != -1 && i >= toremove_nb)
+ break;
+
+ conn = LIST_ELEM(list->n, struct connection *, idle_list);
+ conn_delete_from_tree(conn);
+ MT_LIST_APPEND(toremove_list, &conn->toremove_list);
+ i++;
+ }
+
+ return i;
+}
+/* cleanup connections for a given server
+ * might be useful when going on forced maintenance or live changing ip/port
+ */
+static void srv_cleanup_connections(struct server *srv)
+{
+ int did_remove;
+ int i;
+
+ /* nothing to do if pool-max-conn is null */
+ if (!srv->max_idle_conns)
+ return;
+
+ /* check all threads starting with ours */
+ for (i = tid;;) {
+ did_remove = 0;
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ if (srv_migrate_conns_to_remove(&srv->per_thr[i].idle_conn_list, &idle_conns[i].toremove_conns, -1) > 0)
+ did_remove = 1;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ if (did_remove)
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+
+ if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
+ break;
+ }
+}
+
+/* removes an idle conn after updating the server idle conns counters */
+void srv_release_conn(struct server *srv, struct connection *conn)
+{
+ if (conn->flags & CO_FL_LIST_MASK) {
+ /* The connection is currently in the server's idle list, so tell it
+ * there's one less connection available in that list.
+ */
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ _HA_ATOMIC_DEC(conn->flags & CO_FL_SAFE_LIST ? &srv->curr_safe_nb : &srv->curr_idle_nb);
+ _HA_ATOMIC_DEC(&srv->curr_idle_thr[tid]);
+ }
+ else {
+ /* The connection is not private and not in any server's idle
+ * list, so decrement the current number of used connections
+ */
+ _HA_ATOMIC_DEC(&srv->curr_used_conns);
+ }
+
+ /* Remove the connection from any tree (safe, idle or available) */
+ if (conn->hash_node) {
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(conn);
+ conn->flags &= ~CO_FL_LIST_MASK;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+}
+
+/* retrieve a connection from its <hash> in <tree>
+ * returns NULL if no connection found
+ */
+struct connection *srv_lookup_conn(struct eb_root *tree, uint64_t hash)
+{
+ struct eb64_node *node = NULL;
+ struct connection *conn = NULL;
+ struct conn_hash_node *hash_node = NULL;
+
+ node = eb64_lookup(tree, hash);
+ if (node) {
+ hash_node = ebmb_entry(node, struct conn_hash_node, node);
+ conn = hash_node->conn;
+ }
+
+ return conn;
+}
+
+/* retrieve the next connection sharing the same hash as <conn>
+ * returns NULL if no connection found
+ */
+struct connection *srv_lookup_conn_next(struct connection *conn)
+{
+ struct eb64_node *node = NULL;
+ struct connection *next_conn = NULL;
+ struct conn_hash_node *hash_node = NULL;
+
+ node = eb64_next_dup(&conn->hash_node->node);
+ if (node) {
+ hash_node = eb64_entry(node, struct conn_hash_node, node);
+ next_conn = hash_node->conn;
+ }
+
+ return next_conn;
+}
+
+/* Add <conn> in <srv> idle trees. Set <is_safe> if connection is deemed safe
+ * for reuse.
+ *
+ * This function is a simple wrapper for tree insert. It should only be used
+ * for internal usage or when removing briefly the connection to avoid takeover
+ * on it before reinserting it with this function. In other context, prefer to
+ * use the full feature srv_add_to_idle_list().
+ *
+ * Must be called with idle_conns_lock.
+ */
+void _srv_add_idle(struct server *srv, struct connection *conn, int is_safe)
+{
+ struct eb_root *tree = is_safe ? &srv->per_thr[tid].safe_conns :
+ &srv->per_thr[tid].idle_conns;
+
+ /* first insert in idle or safe tree. */
+ eb64_insert(tree, &conn->hash_node->node);
+
+ /* insert in list sorted by connection usage. */
+ LIST_APPEND(&srv->per_thr[tid].idle_conn_list, &conn->idle_list);
+}
+
+/* This adds an idle connection to the server's list if the connection is
+ * reusable, not held by any owner anymore, but still has available streams.
+ */
+int srv_add_to_idle_list(struct server *srv, struct connection *conn, int is_safe)
+{
+ /* we try to keep the connection in the server's idle list
+ * if we don't have too many FD in use, and if the number of
+ * idle+current conns is lower than what was observed before
+ * last purge, or if we already don't have idle conns for the
+ * current thread and we don't exceed last count by global.nbthread.
+ */
+ if (!(conn->flags & CO_FL_PRIVATE) &&
+ srv && srv->pool_purge_delay > 0 &&
+ ((srv->proxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR) &&
+ ha_used_fds < global.tune.pool_high_count &&
+ (srv->max_idle_conns == -1 || srv->max_idle_conns > srv->curr_idle_conns) &&
+ ((eb_is_empty(&srv->per_thr[tid].safe_conns) &&
+ (is_safe || eb_is_empty(&srv->per_thr[tid].idle_conns))) ||
+ (ha_used_fds < global.tune.pool_low_count &&
+ (srv->curr_used_conns + srv->curr_idle_conns <=
+ MAX(srv->curr_used_conns, srv->est_need_conns) + srv->low_idle_conns ||
+ (conn->flags & CO_FL_REVERSED)))) &&
+ !conn->mux->used_streams(conn) && conn->mux->avail_streams(conn)) {
+ int retadd;
+
+ retadd = _HA_ATOMIC_ADD_FETCH(&srv->curr_idle_conns, 1);
+ if (retadd > srv->max_idle_conns) {
+ _HA_ATOMIC_DEC(&srv->curr_idle_conns);
+ return 0;
+ }
+ _HA_ATOMIC_DEC(&srv->curr_used_conns);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ conn_delete_from_tree(conn);
+
+ if (is_safe) {
+ conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_SAFE_LIST;
+ _srv_add_idle(srv, conn, 1);
+ _HA_ATOMIC_INC(&srv->curr_safe_nb);
+ } else {
+ conn->flags = (conn->flags & ~CO_FL_LIST_MASK) | CO_FL_IDLE_LIST;
+ _srv_add_idle(srv, conn, 0);
+ _HA_ATOMIC_INC(&srv->curr_idle_nb);
+ }
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _HA_ATOMIC_INC(&srv->curr_idle_thr[tid]);
+
+ __ha_barrier_full();
+ if ((volatile void *)srv->idle_node.node.leaf_p == NULL) {
+ HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ if ((volatile void *)srv->idle_node.node.leaf_p == NULL) {
+ srv->idle_node.key = tick_add(srv->pool_purge_delay,
+ now_ms);
+ eb32_insert(&idle_conn_srv, &srv->idle_node);
+ if (!task_in_wq(idle_conn_task) && !
+ task_in_rq(idle_conn_task)) {
+ task_schedule(idle_conn_task,
+ srv->idle_node.key);
+ }
+
+ }
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* Insert <conn> connection in <srv> server available list. This is reserved
+ * for backend connection currently in used with usable streams left.
+ */
+void srv_add_to_avail_list(struct server *srv, struct connection *conn)
+{
+ /* connection cannot be in idle list if used as an avail idle conn. */
+ BUG_ON(LIST_INLIST(&conn->idle_list));
+ eb64_insert(&srv->per_thr[tid].avail_conns, &conn->hash_node->node);
+}
+
+struct task *srv_cleanup_idle_conns(struct task *task, void *context, unsigned int state)
+{
+ struct server *srv;
+ struct eb32_node *eb;
+ int i;
+ unsigned int next_wakeup;
+
+ next_wakeup = TICK_ETERNITY;
+ HA_SPIN_LOCK(OTHER_LOCK, &idle_conn_srv_lock);
+ while (1) {
+ int exceed_conns;
+ int to_kill;
+ int curr_idle;
+
+ eb = eb32_lookup_ge(&idle_conn_srv, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+
+ eb = eb32_first(&idle_conn_srv);
+ if (likely(!eb))
+ break;
+ }
+ if (tick_is_lt(now_ms, eb->key)) {
+ /* timer not expired yet, revisit it later */
+ next_wakeup = eb->key;
+ break;
+ }
+ srv = eb32_entry(eb, struct server, idle_node);
+
+ /* Calculate how many idle connections we want to kill :
+ * we want to remove half the difference between the total
+ * of established connections (used or idle) and the max
+ * number of used connections.
+ */
+ curr_idle = srv->curr_idle_conns;
+ if (curr_idle == 0)
+ goto remove;
+ exceed_conns = srv->curr_used_conns + curr_idle - MAX(srv->max_used_conns, srv->est_need_conns);
+ exceed_conns = to_kill = exceed_conns / 2 + (exceed_conns & 1);
+
+ srv->est_need_conns = (srv->est_need_conns + srv->max_used_conns) / 2;
+ if (srv->est_need_conns < srv->max_used_conns)
+ srv->est_need_conns = srv->max_used_conns;
+
+ HA_ATOMIC_STORE(&srv->max_used_conns, srv->curr_used_conns);
+
+ if (exceed_conns <= 0)
+ goto remove;
+
+ /* check all threads starting with ours */
+ for (i = tid;;) {
+ int max_conn;
+ int j;
+ int did_remove = 0;
+
+ max_conn = (exceed_conns * srv->curr_idle_thr[i]) /
+ curr_idle + 1;
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+ j = srv_migrate_conns_to_remove(&srv->per_thr[i].idle_conn_list, &idle_conns[i].toremove_conns, max_conn);
+ if (j > 0)
+ did_remove = 1;
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[i].idle_conns_lock);
+
+ if (did_remove)
+ task_wakeup(idle_conns[i].cleanup_task, TASK_WOKEN_OTHER);
+
+ if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid)
+ break;
+ }
+remove:
+ eb32_delete(&srv->idle_node);
+
+ if (srv->curr_idle_conns) {
+ /* There are still more idle connections, add the
+ * server back in the tree.
+ */
+ srv->idle_node.key = tick_add(srv->pool_purge_delay, now_ms);
+ eb32_insert(&idle_conn_srv, &srv->idle_node);
+ next_wakeup = tick_first(next_wakeup, srv->idle_node.key);
+ }
+ }
+ HA_SPIN_UNLOCK(OTHER_LOCK, &idle_conn_srv_lock);
+
+ task->expire = next_wakeup;
+ return task;
+}
+
+/* Close remaining idle connections. This functions is designed to be run on
+ * process shutdown. This guarantees a proper socket shutdown to avoid
+ * TIME_WAIT state. For a quick operation, only ctrl is closed, xprt stack is
+ * bypassed.
+ *
+ * This function is not thread-safe so it must only be called via a global
+ * deinit function.
+ */
+static void srv_close_idle_conns(struct server *srv)
+{
+ struct eb_root **cleaned_tree;
+ int i;
+
+ for (i = 0; i < global.nbthread; ++i) {
+ struct eb_root *conn_trees[] = {
+ &srv->per_thr[i].idle_conns,
+ &srv->per_thr[i].safe_conns,
+ &srv->per_thr[i].avail_conns,
+ NULL
+ };
+
+ for (cleaned_tree = conn_trees; *cleaned_tree; ++cleaned_tree) {
+ while (!eb_is_empty(*cleaned_tree)) {
+ struct ebmb_node *node = ebmb_first(*cleaned_tree);
+ struct conn_hash_node *conn_hash_node = ebmb_entry(node, struct conn_hash_node, node);
+ struct connection *conn = conn_hash_node->conn;
+
+ if (conn->ctrl->ctrl_close)
+ conn->ctrl->ctrl_close(conn);
+ conn_delete_from_tree(conn);
+ }
+ }
+ }
+}
+
+REGISTER_SERVER_DEINIT(srv_close_idle_conns);
+
+/* config parser for global "tune.idle-pool.shared", accepts "on" or "off" */
+static int cfg_parse_idle_pool_shared(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_IDLE_POOL_SHARED;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_IDLE_POOL_SHARED;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config parser for global "tune.pool-{low,high}-fd-ratio" */
+static int cfg_parse_pool_fd_ratio(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ int arg = -1;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (*(args[1]) != 0)
+ arg = atoi(args[1]);
+
+ if (arg < 0 || arg > 100) {
+ memprintf(err, "'%s' expects an integer argument between 0 and 100.", args[0]);
+ return -1;
+ }
+
+ if (args[0][10] == 'h')
+ global.tune.pool_high_ratio = arg;
+ else
+ global.tune.pool_low_ratio = arg;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.idle-pool.shared", cfg_parse_idle_pool_shared },
+ { CFG_GLOBAL, "tune.pool-high-fd-ratio", cfg_parse_pool_fd_ratio },
+ { CFG_GLOBAL, "tune.pool-low-fd-ratio", cfg_parse_pool_fd_ratio },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/server_state.c b/src/server_state.c
new file mode 100644
index 0000000..ebdcf3c
--- /dev/null
+++ b/src/server_state.c
@@ -0,0 +1,947 @@
+/*
+ * Server-state management functions.
+ *
+ * Copyright (C) 2021 HAProxy Technologies, Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+
+#include <import/eb64tree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/backend.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/port_range.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/server.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+/* Update a server state using the parameters available in the params list.
+ * The caller must provide a supported version
+ * Grabs the server lock during operation.
+ */
+static void srv_state_srv_update(struct server *srv, int version, char **params)
+{
+ char *p;
+ struct buffer *msg;
+ const char *warning;
+
+ /* fields since version 1
+ * and common to all other upcoming versions
+ */
+ enum srv_state srv_op_state;
+ enum srv_admin srv_admin_state;
+ unsigned srv_uweight, srv_iweight;
+ unsigned long srv_last_time_change;
+ short srv_check_status;
+ enum chk_result srv_check_result;
+ int srv_check_health;
+ int srv_check_state, srv_agent_state;
+ int bk_f_forced_id;
+ int srv_f_forced_id;
+ int fqdn_set_by_cli;
+ const char *fqdn;
+ const char *port_st;
+ unsigned int port_svc;
+ char *srvrecord;
+ char *addr;
+ int partial_apply = 0;
+#ifdef USE_OPENSSL
+ int use_ssl;
+#endif
+
+ fqdn = NULL;
+ port_svc = 0;
+ msg = alloc_trash_chunk();
+ if (!msg)
+ goto end;
+
+ HA_SPIN_LOCK(SERVER_LOCK, &srv->lock);
+
+ /* Only version 1 supported for now, don't check it. Fields are :
+ * srv_addr: params[0]
+ * srv_op_state: params[1]
+ * srv_admin_state: params[2]
+ * srv_uweight: params[3]
+ * srv_iweight: params[4]
+ * srv_last_time_change: params[5]
+ * srv_check_status: params[6]
+ * srv_check_result: params[7]
+ * srv_check_health: params[8]
+ * srv_check_state: params[9]
+ * srv_agent_state: params[10]
+ * bk_f_forced_id: params[11]
+ * srv_f_forced_id: params[12]
+ * srv_fqdn: params[13]
+ * srv_port: params[14]
+ * srvrecord: params[15]
+ * srv_use_ssl: params[16]
+ * srv_check_port: params[17]
+ * srv_check_addr: params[18]
+ * srv_agent_addr: params[19]
+ * srv_agent_port: params[20]
+ */
+
+ /* validating srv_op_state */
+ p = NULL;
+ errno = 0;
+ srv_op_state = strtol(params[1], &p, 10);
+ if ((p == params[1]) || errno == EINVAL || errno == ERANGE ||
+ (srv_op_state != SRV_ST_STOPPED &&
+ srv_op_state != SRV_ST_STARTING &&
+ srv_op_state != SRV_ST_RUNNING &&
+ srv_op_state != SRV_ST_STOPPING)) {
+ chunk_appendf(msg, ", invalid srv_op_state value '%s'", params[1]);
+ }
+
+ /* validating srv_admin_state */
+ p = NULL;
+ errno = 0;
+ srv_admin_state = strtol(params[2], &p, 10);
+ fqdn_set_by_cli = !!(srv_admin_state & SRV_ADMF_HMAINT);
+
+ /* inherited statuses will be recomputed later.
+ * Also disable SRV_ADMF_HMAINT flag (set from stats socket fqdn).
+ */
+ srv_admin_state &= ~SRV_ADMF_IDRAIN & ~SRV_ADMF_IMAINT & ~SRV_ADMF_HMAINT & ~SRV_ADMF_RMAINT;
+
+ if ((p == params[2]) || errno == EINVAL || errno == ERANGE ||
+ (srv_admin_state != 0 &&
+ srv_admin_state != SRV_ADMF_FMAINT &&
+ srv_admin_state != SRV_ADMF_CMAINT &&
+ srv_admin_state != (SRV_ADMF_CMAINT | SRV_ADMF_FMAINT) &&
+ srv_admin_state != (SRV_ADMF_CMAINT | SRV_ADMF_FDRAIN) &&
+ srv_admin_state != SRV_ADMF_FDRAIN)) {
+ chunk_appendf(msg, ", invalid srv_admin_state value '%s'", params[2]);
+ }
+
+ /* validating srv_uweight */
+ p = NULL;
+ errno = 0;
+ srv_uweight = strtol(params[3], &p, 10);
+ if ((p == params[3]) || errno == EINVAL || errno == ERANGE || (srv_uweight > SRV_UWGHT_MAX))
+ chunk_appendf(msg, ", invalid srv_uweight value '%s'", params[3]);
+
+ /* validating srv_iweight */
+ p = NULL;
+ errno = 0;
+ srv_iweight = strtol(params[4], &p, 10);
+ if ((p == params[4]) || errno == EINVAL || errno == ERANGE || (srv_iweight > SRV_UWGHT_MAX))
+ chunk_appendf(msg, ", invalid srv_iweight value '%s'", params[4]);
+
+ /* validating srv_last_time_change */
+ p = NULL;
+ errno = 0;
+ srv_last_time_change = strtol(params[5], &p, 10);
+ if ((p == params[5]) || errno == EINVAL || errno == ERANGE)
+ chunk_appendf(msg, ", invalid srv_last_time_change value '%s'", params[5]);
+
+ /* validating srv_check_status */
+ p = NULL;
+ errno = 0;
+ srv_check_status = strtol(params[6], &p, 10);
+ if (p == params[6] || errno == EINVAL || errno == ERANGE ||
+ (srv_check_status >= HCHK_STATUS_SIZE))
+ chunk_appendf(msg, ", invalid srv_check_status value '%s'", params[6]);
+
+ /* validating srv_check_result */
+ p = NULL;
+ errno = 0;
+ srv_check_result = strtol(params[7], &p, 10);
+ if ((p == params[7]) || errno == EINVAL || errno == ERANGE ||
+ (srv_check_result != CHK_RES_UNKNOWN &&
+ srv_check_result != CHK_RES_NEUTRAL &&
+ srv_check_result != CHK_RES_FAILED &&
+ srv_check_result != CHK_RES_PASSED &&
+ srv_check_result != CHK_RES_CONDPASS)) {
+ chunk_appendf(msg, ", invalid srv_check_result value '%s'", params[7]);
+ }
+
+ /* validating srv_check_health */
+ p = NULL;
+ errno = 0;
+ srv_check_health = strtol(params[8], &p, 10);
+ if (p == params[8] || errno == EINVAL || errno == ERANGE)
+ chunk_appendf(msg, ", invalid srv_check_health value '%s'", params[8]);
+
+ /* validating srv_check_state */
+ p = NULL;
+ errno = 0;
+ srv_check_state = strtol(params[9], &p, 10);
+ if (p == params[9] || errno == EINVAL || errno == ERANGE ||
+ (srv_check_state & ~(CHK_ST_INPROGRESS | CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_PAUSED | CHK_ST_AGENT)))
+ chunk_appendf(msg, ", invalid srv_check_state value '%s'", params[9]);
+
+ /* validating srv_agent_state */
+ p = NULL;
+ errno = 0;
+ srv_agent_state = strtol(params[10], &p, 10);
+ if (p == params[10] || errno == EINVAL || errno == ERANGE ||
+ (srv_agent_state & ~(CHK_ST_INPROGRESS | CHK_ST_CONFIGURED | CHK_ST_ENABLED | CHK_ST_PAUSED | CHK_ST_AGENT)))
+ chunk_appendf(msg, ", invalid srv_agent_state value '%s'", params[10]);
+
+ /* validating bk_f_forced_id */
+ p = NULL;
+ errno = 0;
+ bk_f_forced_id = strtol(params[11], &p, 10);
+ if (p == params[11] || errno == EINVAL || errno == ERANGE || !((bk_f_forced_id == 0) || (bk_f_forced_id == 1)))
+ chunk_appendf(msg, ", invalid bk_f_forced_id value '%s'", params[11]);
+
+ /* validating srv_f_forced_id */
+ p = NULL;
+ errno = 0;
+ srv_f_forced_id = strtol(params[12], &p, 10);
+ if (p == params[12] || errno == EINVAL || errno == ERANGE || !((srv_f_forced_id == 0) || (srv_f_forced_id == 1)))
+ chunk_appendf(msg, ", invalid srv_f_forced_id value '%s'", params[12]);
+
+ /* validating srv_fqdn */
+ fqdn = params[13];
+ if (fqdn && *fqdn == '-')
+ fqdn = NULL;
+ if (fqdn && (strlen(fqdn) > DNS_MAX_NAME_SIZE || invalid_domainchar(fqdn))) {
+ chunk_appendf(msg, ", invalid srv_fqdn value '%s'", params[13]);
+ fqdn = NULL;
+ }
+
+ port_st = params[14];
+ if (port_st) {
+ port_svc = strl2uic(port_st, strlen(port_st));
+ if (port_svc > USHRT_MAX) {
+ chunk_appendf(msg, ", invalid srv_port value '%s'", port_st);
+ port_st = NULL;
+ }
+ }
+
+ /* SRV record
+ * NOTE: in HAProxy, SRV records must start with an underscore '_'
+ */
+ srvrecord = params[15];
+ if (srvrecord && *srvrecord != '_')
+ srvrecord = NULL;
+
+ /* don't apply anything if one error has been detected */
+ if (msg->data)
+ goto out;
+ partial_apply = 1;
+
+ /* recover operational state and apply it to this server
+ * and all servers tracking this one */
+ srv->check.health = srv_check_health;
+ switch (srv_op_state) {
+ case SRV_ST_STOPPED:
+ srv->check.health = 0;
+ srv_set_stopped(srv, SRV_OP_STCHGC_STATEFILE);
+ break;
+ case SRV_ST_STARTING:
+ /* If rise == 1 there is no STARTING state, let's switch to
+ * RUNNING
+ */
+ if (srv->check.rise == 1) {
+ srv->check.health = srv->check.rise + srv->check.fall - 1;
+ srv_set_running(srv, SRV_OP_STCHGC_NONE);
+ break;
+ }
+ if (srv->check.health < 1 || srv->check.health >= srv->check.rise)
+ srv->check.health = srv->check.rise - 1;
+ srv->next_state = srv_op_state;
+ break;
+ case SRV_ST_STOPPING:
+ /* If fall == 1 there is no STOPPING state, let's switch to
+ * STOPPED
+ */
+ if (srv->check.fall == 1) {
+ srv->check.health = 0;
+ srv_set_stopped(srv, SRV_OP_STCHGC_STATEFILE);
+ break;
+ }
+ if (srv->check.health < srv->check.rise ||
+ srv->check.health > srv->check.rise + srv->check.fall - 2)
+ srv->check.health = srv->check.rise;
+ srv_set_stopping(srv, SRV_OP_STCHGC_STATEFILE);
+ break;
+ case SRV_ST_RUNNING:
+ srv->check.health = srv->check.rise + srv->check.fall - 1;
+ srv_set_running(srv, SRV_OP_STCHGC_NONE);
+ break;
+ }
+
+ /* When applying server state, the following rules apply:
+ * - in case of a configuration change, we apply the setting from the new
+ * configuration, regardless of old running state
+ * - if no configuration change, we apply old running state only if old running
+ * state is different from new configuration state
+ */
+ /* configuration has changed */
+ if ((srv_admin_state & SRV_ADMF_CMAINT) != (srv->next_admin & SRV_ADMF_CMAINT)) {
+ if (srv->next_admin & SRV_ADMF_CMAINT)
+ srv_adm_set_maint(srv);
+ else
+ srv_adm_set_ready(srv);
+ }
+ /* configuration is the same, let's compate old running state and new conf state */
+ else {
+ if (srv_admin_state & SRV_ADMF_FMAINT && !(srv->next_admin & SRV_ADMF_CMAINT))
+ srv_adm_set_maint(srv);
+ else if (!(srv_admin_state & SRV_ADMF_FMAINT) && (srv->next_admin & SRV_ADMF_CMAINT))
+ srv_adm_set_ready(srv);
+ }
+ /* apply drain mode if server is currently enabled */
+ if (!(srv->next_admin & SRV_ADMF_FMAINT) && (srv_admin_state & SRV_ADMF_FDRAIN)) {
+ /* The SRV_ADMF_FDRAIN flag is inherited when srv->iweight is 0
+ * (srv->iweight is the weight set up in configuration).
+ * There are two possible reasons for FDRAIN to have been present :
+ * - previous config weight was zero
+ * - "set server b/s drain" was sent to the CLI
+ *
+ * In the first case, we simply want to drop this drain state
+ * if the new weight is not zero anymore, meaning the administrator
+ * has intentionally turned the weight back to a positive value to
+ * enable the server again after an operation. In the second case,
+ * the drain state was forced on the CLI regardless of the config's
+ * weight so we don't want a change to the config weight to lose this
+ * status. What this means is :
+ * - if previous weight was 0 and new one is >0, drop the DRAIN state.
+ * - if the previous weight was >0, keep it.
+ */
+ if (srv_iweight > 0 || srv->iweight == 0)
+ srv_adm_set_drain(srv);
+ }
+
+ srv->last_change = ns_to_sec(now_ns) - srv_last_time_change;
+ srv->check.status = srv_check_status;
+ srv->check.result = srv_check_result;
+
+ /* Only case we want to apply is removing ENABLED flag which could have been
+ * done by the "disable health" command over the stats socket
+ */
+ if ((srv->check.state & CHK_ST_CONFIGURED) &&
+ (srv_check_state & CHK_ST_CONFIGURED) &&
+ !(srv_check_state & CHK_ST_ENABLED))
+ srv->check.state &= ~CHK_ST_ENABLED;
+
+ /* Only case we want to apply is removing ENABLED flag which could have been
+ * done by the "disable agent" command over the stats socket
+ */
+ if ((srv->agent.state & CHK_ST_CONFIGURED) &&
+ (srv_agent_state & CHK_ST_CONFIGURED) &&
+ !(srv_agent_state & CHK_ST_ENABLED))
+ srv->agent.state &= ~CHK_ST_ENABLED;
+
+ /* We want to apply the previous 'running' weight (srv_uweight) only if there
+ * was no change in the configuration: both previous and new iweight are equals
+ *
+ * It means that a configuration file change has precedence over a unix socket change
+ * for server's weight
+ *
+ * by default, HAProxy applies the following weight when parsing the configuration
+ * srv->uweight = srv->iweight
+ */
+ if (srv_iweight == srv->iweight) {
+ srv->uweight = srv_uweight;
+ }
+ server_recalc_eweight(srv, 1);
+
+ /* load server IP address */
+ if (strcmp(params[0], "-") != 0)
+ srv->lastaddr = strdup(params[0]);
+
+ if (fqdn && srv->hostname) {
+ if (strcmp(srv->hostname, fqdn) == 0) {
+ /* Here we reset the 'set from stats socket FQDN' flag
+ * to support such transitions:
+ * Let's say initial FQDN value is foo1 (in configuration file).
+ * - FQDN changed from stats socket, from foo1 to foo2 value,
+ * - FQDN changed again from file configuration (with the same previous value
+ set from stats socket, from foo1 to foo2 value),
+ * - reload for any other reason than a FQDN modification,
+ * the configuration file FQDN matches the fqdn server state file value.
+ * So we must reset the 'set from stats socket FQDN' flag to be consistent with
+ * any further FQDN modification.
+ */
+ srv->next_admin &= ~SRV_ADMF_HMAINT;
+ }
+ else {
+ /* If the FDQN has been changed from stats socket,
+ * apply fqdn state file value (which is the value set
+ * from stats socket).
+ * Also ensure the runtime resolver will process this resolution.
+ */
+ if (fqdn_set_by_cli) {
+ srv_set_fqdn(srv, fqdn, 0);
+ srv->flags &= ~SRV_F_NO_RESOLUTION;
+ srv->next_admin |= SRV_ADMF_HMAINT;
+ }
+ }
+ }
+ /* If all the conditions below are validated, this means
+ * we're evaluating a server managed by SRV resolution
+ */
+ else if (fqdn && !srv->hostname && srvrecord) {
+ int res;
+ int i;
+ char *tmp;
+
+ /* we can't apply previous state if SRV record has changed */
+ if (!srv->srvrq) {
+ chunk_appendf(msg, ", no SRV resolution for server '%s'. Previous state not applied", srv->id);
+ goto out;
+ }
+ if (strcmp(srv->srvrq->name, srvrecord) != 0) {
+ chunk_appendf(msg, ", SRV record mismatch between configuration ('%s') and state file ('%s) for server '%s'. Previous state not applied", srv->srvrq->name, srvrecord, srv->id);
+ goto out;
+ }
+
+ /* prepare DNS resolution for this server */
+ res = srv_prepare_for_resolution(srv, fqdn);
+ if (res == -1) {
+ chunk_appendf(msg, ", can't allocate memory for DNS resolution for server '%s'", srv->id);
+ goto out;
+ }
+
+ /* Remove from available list and insert in tree
+ * since this server has an hostname
+ */
+ LIST_DEL_INIT(&srv->srv_rec_item);
+ srv->host_dn.key = tmp = strdup(srv->hostname_dn);
+
+ /* convert the key in lowercase because tree
+ * lookup is case sensitive but we don't care
+ */
+ for (i = 0; tmp[i]; i++)
+ tmp[i] = tolower(tmp[i]);
+
+ /* insert in tree and set the srvrq expiration date */
+ ebis_insert(&srv->srvrq->named_servers, &srv->host_dn);
+ task_schedule(srv->srvrq_check, tick_add(now_ms, srv->srvrq->resolvers->hold.timeout));
+
+ /* Unset SRV_F_MAPPORTS for SRV records.
+ * SRV_F_MAPPORTS is unfortunately set by parse_server()
+ * because no ports are provided in the configuration file.
+ * This is because HAProxy will use the port found into the SRV record.
+ */
+ srv->flags &= ~SRV_F_MAPPORTS;
+ }
+
+ if (port_st)
+ srv->svc_port = port_svc;
+
+
+ if (params[16]) {
+#ifdef USE_OPENSSL
+ use_ssl = strtol(params[16], &p, 10);
+
+ /* configure ssl if connection has been initiated at startup */
+ if (srv->ssl_ctx.ctx != NULL)
+ srv_set_ssl(srv, use_ssl);
+#endif
+ }
+
+ port_st = NULL;
+ if (params[17] && strcmp(params[17], "0") != 0)
+ port_st = params[17];
+ addr = NULL;
+ if (params[18] && strcmp(params[18], "-") != 0)
+ addr = params[18];
+ if (addr || port_st) {
+ warning = srv_update_check_addr_port(srv, addr, port_st);
+ if (warning) {
+ chunk_appendf(msg, ", %s", warning);
+ goto out;
+ }
+ }
+
+ port_st = NULL;
+ if (params[20] && strcmp(params[20], "0") != 0)
+ port_st = params[20];
+ addr = NULL;
+ if (params[19] && strcmp(params[19], "-") != 0)
+ addr = params[19];
+ if (addr || port_st) {
+ warning = srv_update_agent_addr_port(srv, addr, port_st);
+ if (warning) {
+ chunk_appendf(msg, ", %s", warning);
+ goto out;
+ }
+ }
+
+ out:
+ HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
+ if (msg->data) {
+ if (partial_apply == 1)
+ ha_warning("server-state partially applied for server '%s/%s'%s\n",
+ srv->proxy->id, srv->id, msg->area);
+ else
+ ha_warning("server-state application failed for server '%s/%s'%s\n",
+ srv->proxy->id, srv->id, msg->area);
+ }
+ end:
+ free_trash_chunk(msg);
+}
+
+/*
+ * Loop on the proxy's servers and try to load its state from <st_tree> using
+ * srv_state_srv_update(). The proxy name and the server name are concatenated
+ * to form the key. If found the entry is removed from the tree.
+ */
+static void srv_state_px_update(const struct proxy *px, int vsn, struct eb_root *st_tree)
+{
+ struct server_state_line *st_line;
+ struct eb64_node *node;
+ struct server *srv;
+ unsigned long key;
+
+ for (srv = px->srv; srv; srv = srv->next) {
+ chunk_printf(&trash, "%s %s", px->id, srv->id);
+ key = XXH3(trash.area, trash.data, 0);
+ node = eb64_lookup(st_tree, key);
+ if (!node)
+ continue; /* next server */
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ srv_state_srv_update(srv, vsn, st_line->params+4);
+
+ /* the node may be released now */
+ eb64_delete(node);
+ free(st_line->line);
+ free(st_line);
+ }
+}
+
+/*
+ * read next line from file <f> and return the server state version if one found.
+ * If file is empty, then -1 is returned
+ * If no version is found, then 0 is returned
+ * Note that this should be the first read on <f>
+ */
+static int srv_state_get_version(FILE *f) {
+ char mybuf[SRV_STATE_LINE_MAXLEN];
+ char *endptr;
+ long int vsn;
+
+ /* first character of first line of the file must contain the version of the export */
+ if (fgets(mybuf, SRV_STATE_LINE_MAXLEN, f) == NULL)
+ return -1;
+
+ vsn = strtol(mybuf, &endptr, 10);
+ if (endptr == mybuf || *endptr != '\n') {
+ /* Empty or truncated line */
+ return 0;
+ }
+
+ if (vsn < SRV_STATE_FILE_VERSION_MIN || vsn > SRV_STATE_FILE_VERSION_MAX) {
+ /* Wrong version number */
+ return 0;
+ }
+
+ return vsn;
+}
+
+
+/*
+ * parses server state line stored in <buf> and supposedly in version <version>.
+ * Set <params> accordingly on success. It returns 1 on success, 0 if the line
+ * must be ignored and -1 on error.
+ * The caller must provide a supported version
+ */
+static int srv_state_parse_line(char *buf, const int version, char **params)
+{
+ int buflen, arg, ret;
+ char *cur;
+
+ buflen = strlen(buf);
+ cur = buf;
+ ret = 1; /* be optimistic and pretend a success */
+
+ /* we need at least one character and a non-truncated line */
+ if (buflen == 0 || buf[buflen - 1] != '\n') {
+ ret = -1;
+ goto out;
+ }
+
+ /* skip blank characters at the beginning of the line */
+ while (*cur == ' ' || *cur == '\t')
+ ++cur;
+
+ /* ignore empty or commented lines */
+ if (!*cur || *cur == '\n' || *cur == '#') {
+ ret = 0;
+ goto out;
+ }
+
+ /* Removes trailing '\n' to ease parsing */
+ buf[buflen - 1] = '\0';
+
+ /* we're now ready to move the line into <params> */
+ memset(params, 0, SRV_STATE_FILE_MAX_FIELDS * sizeof(*params));
+ arg = 0;
+ while (*cur) {
+ /* first of all, stop if there are too many fields */
+ if (arg >= SRV_STATE_FILE_MAX_FIELDS)
+ break;
+
+ /* then skip leading spaces */
+ while (*cur && (*cur == ' ' || *cur == '\t')) {
+ ++cur;
+ if (!*cur)
+ break;
+ }
+
+ /*
+ * idx:
+ * be_id: params[0]
+ * be_name: params[1]
+ * srv_id: params[2]
+ * srv_name: params[3]
+ * v1
+ * srv_addr: params[4]
+ * srv_op_state: params[5]
+ * srv_admin_state: params[6]
+ * srv_uweight: params[7]
+ * srv_iweight: params[8]
+ * srv_last_time_change: params[9]
+ * srv_check_status: params[10]
+ * srv_check_result: params[11]
+ * srv_check_health: params[12]
+ * srv_check_state: params[13]
+ * srv_agent_state: params[14]
+ * bk_f_forced_id: params[15]
+ * srv_f_forced_id: params[16]
+ * srv_fqdn: params[17]
+ * srv_port: params[18]
+ * srvrecord: params[19]
+ *
+ * srv_use_ssl: params[20] (optional field)
+ * srv_check_port: params[21] (optional field)
+ * srv_check_addr: params[22] (optional field)
+ * srv_agent_addr: params[23] (optional field)
+ * srv_agent_port: params[24] (optional field)
+ *
+ */
+ params[arg++] = cur;
+
+ /* look for the end of the current field */
+ while (*cur && *cur != ' ' && *cur != '\t') {
+ ++cur;
+ if (!*cur)
+ break;
+ }
+
+ /* otherwise, cut the field and move to the next one */
+ *cur++ = '\0';
+ }
+
+ /* if the number of fields does not match the version, then return an error */
+ if (version == 1 &&
+ (arg < SRV_STATE_FILE_MIN_FIELDS_VERSION_1 ||
+ arg > SRV_STATE_FILE_MAX_FIELDS_VERSION_1))
+ ret = -1;
+
+ out:
+ return ret;
+}
+
+
+/*
+ * parses a server state line using srv_state_parse_line() and store the result
+ * in <st_tree>. If an error occurred during the parsing, the line is
+ * ignored. if <px> is defined, it is used to check the backend id/name against
+ * the parsed params and to compute the key of the line.
+ */
+static int srv_state_parse_and_store_line(char *line, int vsn, struct eb_root *st_tree,
+ struct proxy *px)
+{
+ struct server_state_line *st_line;
+ int ret = 0;
+
+ /* store line in tree and duplicate the line */
+ st_line = calloc(1, sizeof(*st_line));
+ if (st_line == NULL)
+ goto skip_line;
+ st_line->line = strdup(line);
+ if (st_line->line == NULL)
+ goto skip_line;
+
+ ret = srv_state_parse_line(st_line->line, vsn, st_line->params);
+ if (ret <= 0)
+ goto skip_line;
+
+ /* Check backend name against params if <px> is defined */
+ if (px) {
+ int check_id = (atoi(st_line->params[0]) == px->uuid);
+ int check_name = (strcmp(px->id, st_line->params[1]) == 0);
+ int bk_f_forced_id = (atoi(st_line->params[15]) & PR_O_FORCED_ID);
+
+
+ if (!check_id && !check_name) {
+ /* backend does not match at all: skip the line */
+ goto skip_line;
+ }
+ else if (!check_id) {
+ /* Id mismatch: warn but continue */
+ ha_warning("Proxy '%s': backend ID mismatch: from server state file: '%s', from running config '%d'\n",
+ px->id, st_line->params[0], px->uuid);
+ send_log(px, LOG_NOTICE, "backend ID mismatch: from server state file: '%s', from running config '%d'\n",
+ st_line->params[0], px->uuid);
+ }
+ else if (!check_name) {
+ /* Name mismatch: warn and skip the line, except if the backend id was forced
+ * in the previous configuration */
+ ha_warning("Proxy '%s': backend name mismatch: from server state file: '%s', from running config '%s'\n",
+ px->id, st_line->params[1], px->id);
+ send_log(px, LOG_NOTICE, "backend name mismatch: from server state file: '%s', from running config '%s'\n",
+ st_line->params[1], px->id);
+ if (!bk_f_forced_id)
+ goto skip_line;
+ }
+ }
+
+ /*
+ * The key: "be_name srv_name"
+ * if <px> is defined: be_name == px->id
+ * otherwise: be_name == params[1]
+ */
+ chunk_printf(&trash, "%s %s", (px ? px->id : st_line->params[1]), st_line->params[3]);
+ st_line->node.key = XXH3(trash.area, trash.data, 0);
+ if (eb64_insert(st_tree, &st_line->node) != &st_line->node) {
+ /* this is a duplicate key, probably a hand-crafted file, drop it! */
+ goto skip_line;
+ }
+
+ return ret;
+
+ skip_line:
+ /* free up memory in case of error during the processing of the line */
+ if (st_line) {
+ free(st_line->line);
+ free(st_line);
+ }
+ return ret;
+}
+
+/* Helper function to get the server-state file path.
+ * If <filename> starts with a '/', it is considered as an absolute path. In
+ * this case or if <global.server_state_base> is not set, <filename> only is
+ * considered. Otherwise, the <global.server_state_base> is concatenated to
+ * <filename> to produce the file path and copied to <dst_path>. in both cases,
+ * the result must not exceeds <maxpathlen>.
+ *
+ * The len is returned on success or -1 if the path is too long. On error, the
+ * caller must not rely on <dst_path>.
+ */
+static inline int srv_state_get_filepath(char *dst_path, int maxpathlen, const char *filename)
+{
+ char *sep;
+ int len = 0;
+
+ /* create the globalfilepath variable */
+ if (*filename == '/' || !global.server_state_base) {
+ /* absolute path or no base directory provided */
+ len = strlcpy2(dst_path, filename, maxpathlen);
+ }
+ else {
+ /* concat base directory and global server-state file */
+ sep = (global.server_state_base[strlen(global.server_state_base)-1] != '/' ? "/": "");
+ len = snprintf(dst_path, maxpathlen, "%s%s%s", global.server_state_base, sep, filename);
+ }
+ return (len < maxpathlen ? len: -1);
+}
+
+
+/* This function parses all the proxies and only take care of the backends (since we're looking for server)
+ * For each proxy, it does the following:
+ * - opens its server state file (either one or local one)
+ * - read whole file, line by line
+ * - analyse each line to check if it matches our current backend:
+ * - backend name matches
+ * - backend id matches if id is forced and name doesn't match
+ * - if the server pointed by the line is found, then state is applied
+ *
+ * If the running backend uuid or id differs from the state file, then HAProxy reports
+ * a warning.
+ *
+ * Grabs the server's lock via srv_state_srv_update().
+ */
+void apply_server_state(void)
+{
+ /* tree where global state_file is loaded */
+ struct eb_root global_state_tree = EB_ROOT_UNIQUE;
+ struct proxy *curproxy;
+ struct server_state_line *st_line;
+ struct eb64_node *node, *next_node;
+ FILE *f;
+ char mybuf[SRV_STATE_LINE_MAXLEN];
+ char file[MAXPATHLEN];
+ int local_vsn, global_vsn, len, linenum;
+
+ global_vsn = 0; /* no global file */
+ if (!global.server_state_file)
+ goto no_globalfile;
+ len = srv_state_get_filepath(file, MAXPATHLEN, global.server_state_file);
+ if (len == -1) {
+ ha_warning("config: Can't load global server state file: file too long.\n");
+ goto no_globalfile;
+ }
+
+ /* Load global server state in a tree */
+ errno = 0;
+ f = fopen(file, "r");
+ if (!f) {
+ if (errno == ENOENT)
+ ha_notice("config: Can't open global server state file '%s': %s\n", file, strerror(errno));
+ else
+ ha_warning("config: Can't open global server state file '%s': %s\n", file, strerror(errno));
+ goto no_globalfile;
+ }
+
+ global_vsn = srv_state_get_version(f);
+ if (global_vsn < 1) {
+ if (global_vsn == -1)
+ ha_notice("config: Empty global server state file '%s'.\n",
+ file);
+ if (global_vsn == 0)
+ ha_warning("config: Can't get version of the global server state file '%s'.\n",
+ file);
+ goto close_globalfile;
+ }
+
+ for (linenum = 1; fgets(mybuf, SRV_STATE_LINE_MAXLEN, f); linenum++) {
+ int ret;
+
+ ret = srv_state_parse_and_store_line(mybuf, global_vsn, &global_state_tree, NULL);
+ if (ret == -1) {
+ ha_warning("config: corrupted global server state file '%s' at line %d.\n",
+ file, linenum);
+ global_vsn = 0;
+ break;
+ }
+ }
+
+ close_globalfile:
+ fclose(f);
+
+ no_globalfile:
+ /* parse all proxies and load states form tree (global file) or from local file */
+ for (curproxy = proxies_list; curproxy != NULL; curproxy = curproxy->next) {
+ struct eb_root local_state_tree = EB_ROOT_UNIQUE;
+
+ /* Must be an enabled backend with at least a server */
+ if (!(curproxy->cap & PR_CAP_BE) || (curproxy->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) || !curproxy->srv)
+ continue; /* next proxy */
+
+ /* Mode must be specified */
+ BUG_ON(curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_UNSPEC);
+
+ /* No server-state file for this proxy */
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_NONE)
+ continue; /* next proxy */
+
+ if (curproxy->load_server_state_from_file == PR_SRV_STATE_FILE_GLOBAL) {
+ /* when global file is used, we get data from the tree
+ * Note that in such case we don't check backend name neither uuid.
+ * Backend name can't be wrong since it's used as a key to retrieve the server state
+ * line from the tree.
+ */
+ if (global_vsn)
+ srv_state_px_update(curproxy, global_vsn, &global_state_tree);
+ continue; /* next proxy */
+ }
+
+ /*
+ * Here we load a local server state-file
+ */
+
+ /* create file variable */
+ len = srv_state_get_filepath(file, MAXPATHLEN, curproxy->server_state_file_name);
+ if (len == -1) {
+ ha_warning("Proxy '%s': Can't load local server state file: file too long.\n", curproxy->id);
+ continue; /* next proxy */
+ }
+
+ /* Load local server state in a tree */
+ errno = 0;
+ f = fopen(file, "r");
+ if (!f) {
+ if (errno == ENOENT)
+ ha_notice("Proxy '%s': Can't open server state file '%s': %s.\n",
+ curproxy->id, file, strerror(errno));
+ else
+ ha_warning("Proxy '%s': Can't open server state file '%s': %s.\n",
+ curproxy->id, file, strerror(errno));
+ continue; /* next proxy */
+ }
+
+ /* first character of first line of the file must contain the version of the export */
+ local_vsn = srv_state_get_version(f);
+ if (local_vsn < 1) {
+ if (local_vsn == -1)
+ ha_notice("Proxy '%s': Empty server state file '%s'.\n",
+ curproxy->id, file);
+ if (local_vsn == 0)
+ ha_warning("Proxy '%s': Can't get version of the server state file '%s'.\n",
+ curproxy->id, file);
+ goto close_localfile;
+ }
+
+ /* First, parse lines of the local server-state file and store them in a eb-tree */
+ for (linenum = 1; fgets(mybuf, SRV_STATE_LINE_MAXLEN, f); linenum++) {
+ int ret;
+
+ ret = srv_state_parse_and_store_line(mybuf, local_vsn, &local_state_tree, curproxy);
+ if (ret == -1) {
+ ha_warning("Proxy '%s': corrupted server state file '%s' at line %d.\n",
+ curproxy->id, file, linenum);
+ local_vsn = 0;
+ break;
+ }
+ }
+
+ if (local_vsn)
+ srv_state_px_update(curproxy, local_vsn, &local_state_tree);
+
+ /* Remove unused server-state lines */
+ node = eb64_first(&local_state_tree);
+ while (node) {
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ next_node = eb64_next(node);
+ eb64_delete(node);
+
+ if (local_vsn) {
+ /* if no server found, then warn */
+ ha_warning("Proxy '%s': can't find server '%s' in backend '%s'\n",
+ curproxy->id, st_line->params[3], curproxy->id);
+ send_log(curproxy, LOG_NOTICE, "can't find server '%s' in backend '%s'\n",
+ st_line->params[3], curproxy->id);
+ }
+
+ free(st_line->line);
+ free(st_line);
+ node = next_node;
+ }
+
+ close_localfile:
+ fclose(f);
+ }
+
+ node = eb64_first(&global_state_tree);
+ while (node) {
+ st_line = eb64_entry(node, typeof(*st_line), node);
+ next_node = eb64_next(node);
+ eb64_delete(node);
+ free(st_line->line);
+ free(st_line);
+ node = next_node;
+ }
+}
diff --git a/src/session.c b/src/session.c
new file mode 100644
index 0000000..ce9ccbf
--- /dev/null
+++ b/src/session.c
@@ -0,0 +1,528 @@
+/*
+ * Session management functions.
+ *
+ * Copyright 2000-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/ssl_sock-t.h>
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/session.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+DECLARE_POOL(pool_head_session, "session", sizeof(struct session));
+DECLARE_POOL(pool_head_sess_srv_list, "session server list",
+ sizeof(struct sess_srv_list));
+
+int conn_complete_session(struct connection *conn);
+
+/* Create a a new session and assign it to frontend <fe>, listener <li>,
+ * origin <origin>, set the current date and clear the stick counters pointers.
+ * Returns the session upon success or NULL. The session may be released using
+ * session_free(). Note: <li> may be NULL.
+ */
+struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type *origin)
+{
+ struct session *sess;
+
+ sess = pool_alloc(pool_head_session);
+ if (sess) {
+ sess->listener = li;
+ sess->fe = fe;
+ sess->origin = origin;
+ sess->accept_date = date; /* user-visible date for logging */
+ sess->accept_ts = now_ns; /* corrected date for internal use */
+ sess->stkctr = NULL;
+ if (pool_head_stk_ctr) {
+ sess->stkctr = pool_alloc(pool_head_stk_ctr);
+ if (!sess->stkctr)
+ goto out_fail_alloc;
+ memset(sess->stkctr, 0, sizeof(sess->stkctr[0]) * global.tune.nb_stk_ctr);
+ }
+ vars_init_head(&sess->vars, SCOPE_SESS);
+ sess->task = NULL;
+ sess->t_handshake = -1; /* handshake not done yet */
+ sess->t_idle = -1;
+ _HA_ATOMIC_INC(&totalconn);
+ _HA_ATOMIC_INC(&jobs);
+ LIST_INIT(&sess->srv_list);
+ sess->idle_conns = 0;
+ sess->flags = SESS_FL_NONE;
+ sess->src = NULL;
+ sess->dst = NULL;
+ }
+ return sess;
+ out_fail_alloc:
+ pool_free(pool_head_session, sess);
+ return NULL;
+}
+
+void session_free(struct session *sess)
+{
+ struct connection *conn, *conn_back;
+ struct sess_srv_list *srv_list, *srv_list_back;
+
+ if (sess->listener)
+ listener_release(sess->listener);
+ session_store_counters(sess);
+ pool_free(pool_head_stk_ctr, sess->stkctr);
+ vars_prune_per_sess(&sess->vars);
+ conn = objt_conn(sess->origin);
+ if (conn != NULL && conn->mux)
+ conn->mux->destroy(conn->ctx);
+ list_for_each_entry_safe(srv_list, srv_list_back, &sess->srv_list, srv_list) {
+ list_for_each_entry_safe(conn, conn_back, &srv_list->conn_list, session_list) {
+ LIST_DEL_INIT(&conn->session_list);
+ if (conn->mux) {
+ conn->owner = NULL;
+ conn->flags &= ~CO_FL_SESS_IDLE;
+ conn->mux->destroy(conn->ctx);
+ } else {
+ /* We have a connection, but not yet an associated mux.
+ * So destroy it now.
+ */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ conn_free(conn);
+ }
+ }
+ pool_free(pool_head_sess_srv_list, srv_list);
+ }
+ sockaddr_free(&sess->src);
+ sockaddr_free(&sess->dst);
+ pool_free(pool_head_session, sess);
+ _HA_ATOMIC_DEC(&jobs);
+}
+
+/* callback used from the connection/mux layer to notify that a connection is
+ * going to be released.
+ */
+void conn_session_free(struct connection *conn)
+{
+ session_free(conn->owner);
+ conn->owner = NULL;
+}
+
+/* count a new session to keep frontend, listener and track stats up to date */
+static void session_count_new(struct session *sess)
+{
+ struct stkctr *stkctr;
+ void *ptr;
+ int i;
+
+ proxy_inc_fe_sess_ctr(sess->listener, sess->fe);
+
+ for (i = 0; i < global.tune.nb_stk_ctr; i++) {
+ stkctr = &sess->stkctr[i];
+ if (!stkctr_entry(stkctr))
+ continue;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_CNT);
+ if (ptr)
+ HA_ATOMIC_INC(&stktable_data_cast(ptr, std_t_uint));
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_RATE);
+ if (ptr)
+ update_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_SESS_RATE].u, 1);
+ }
+}
+
+/* This function is called from the protocol layer accept() in order to
+ * instantiate a new session on behalf of a given listener and frontend. It
+ * returns a positive value upon success, 0 if the connection can be ignored,
+ * or a negative value upon critical failure. The accepted connection is
+ * closed if we return <= 0. If no handshake is needed, it immediately tries
+ * to instantiate a new stream. The connection must already have been filled
+ * with the incoming connection handle (a fd), a target (the listener) and a
+ * source address.
+ */
+int session_accept_fd(struct connection *cli_conn)
+{
+ struct listener *l = __objt_listener(cli_conn->target);
+ struct proxy *p = l->bind_conf->frontend;
+ int cfd = cli_conn->handle.fd;
+ struct session *sess;
+ int ret;
+
+ ret = -1; /* assume unrecoverable error by default */
+
+ cli_conn->proxy_netns = l->rx.settings->netns;
+
+ /* Active reversed connection has already been initialized before being
+ * accepted. It must not be reset.
+ * TODO use a dedicated accept_fd callback for reverse protocol
+ */
+ if (!cli_conn->xprt) {
+ if (conn_prepare(cli_conn, l->rx.proto, l->bind_conf->xprt) < 0)
+ goto out_free_conn;
+
+ conn_ctrl_init(cli_conn);
+
+ /* wait for a PROXY protocol header */
+ if (l->bind_conf->options & BC_O_ACC_PROXY)
+ cli_conn->flags |= CO_FL_ACCEPT_PROXY;
+
+ /* wait for a NetScaler client IP insertion protocol header */
+ if (l->bind_conf->options & BC_O_ACC_CIP)
+ cli_conn->flags |= CO_FL_ACCEPT_CIP;
+
+ /* Add the handshake pseudo-XPRT */
+ if (cli_conn->flags & (CO_FL_ACCEPT_PROXY | CO_FL_ACCEPT_CIP)) {
+ if (xprt_add_hs(cli_conn) != 0)
+ goto out_free_conn;
+ }
+ }
+
+ sess = session_new(p, l, &cli_conn->obj_type);
+ if (!sess)
+ goto out_free_conn;
+
+ conn_set_owner(cli_conn, sess, NULL);
+
+ /* now evaluate the tcp-request layer4 rules. We only need a session
+ * and no stream for these rules.
+ */
+ if (!LIST_ISEMPTY(&p->tcp_req.l4_rules) && !tcp_exec_l4_rules(sess)) {
+ /* let's do a no-linger now to close with a single RST. */
+ if (!(cli_conn->flags & CO_FL_FDLESS))
+ setsockopt(cfd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger));
+ ret = 0; /* successful termination */
+ goto out_free_sess;
+ }
+ /* TCP rules may flag the connection as needing proxy protocol, now that it's done we can start ourxprt */
+ if (conn_xprt_start(cli_conn) < 0)
+ goto out_free_sess;
+
+ /* FIXME/WTA: we should implement the setsockopt() calls at the proto
+ * level instead and let non-inet protocols implement their own equivalent.
+ */
+ if (cli_conn->flags & CO_FL_FDLESS)
+ goto skip_fd_setup;
+
+ /* Adjust some socket options */
+ if (l->rx.addr.ss_family == AF_INET || l->rx.addr.ss_family == AF_INET6) {
+ setsockopt(cfd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one));
+
+ if (p->options & PR_O_TCP_CLI_KA) {
+ setsockopt(cfd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one));
+
+#ifdef TCP_KEEPCNT
+ if (p->clitcpka_cnt)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPCNT, &p->clitcpka_cnt, sizeof(p->clitcpka_cnt));
+#endif
+
+#ifdef TCP_KEEPIDLE
+ if (p->clitcpka_idle)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPIDLE, &p->clitcpka_idle, sizeof(p->clitcpka_idle));
+#endif
+
+#ifdef TCP_KEEPINTVL
+ if (p->clitcpka_intvl)
+ setsockopt(cfd, IPPROTO_TCP, TCP_KEEPINTVL, &p->clitcpka_intvl, sizeof(p->clitcpka_intvl));
+#endif
+ }
+
+ if (p->options & PR_O_TCP_NOLING)
+ HA_ATOMIC_OR(&fdtab[cfd].state, FD_LINGER_RISK);
+
+#if defined(TCP_MAXSEG)
+ if (l->bind_conf->maxseg < 0) {
+ /* we just want to reduce the current MSS by that value */
+ int mss;
+ socklen_t mss_len = sizeof(mss);
+ if (getsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, &mss_len) == 0) {
+ mss += l->bind_conf->maxseg; /* remember, it's < 0 */
+ setsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss));
+ }
+ }
+#endif
+ }
+
+ if (global.tune.client_sndbuf)
+ setsockopt(cfd, SOL_SOCKET, SO_SNDBUF, &global.tune.client_sndbuf, sizeof(global.tune.client_sndbuf));
+
+ if (global.tune.client_rcvbuf)
+ setsockopt(cfd, SOL_SOCKET, SO_RCVBUF, &global.tune.client_rcvbuf, sizeof(global.tune.client_rcvbuf));
+
+ skip_fd_setup:
+ /* OK, now either we have a pending handshake to execute with and then
+ * we must return to the I/O layer, or we can proceed with the end of
+ * the stream initialization. In case of handshake, we also set the I/O
+ * timeout to the frontend's client timeout and register a task in the
+ * session for this purpose. The connection's owner is left to the
+ * session during this period.
+ *
+ * At this point we set the relation between sess/task/conn this way :
+ *
+ * +----------------- task
+ * | |
+ * orig -- sess <-- context |
+ * | ^ | |
+ * v | | |
+ * conn -- owner ---> task <-----+
+ */
+ if (cli_conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)) {
+ int timeout;
+ int clt_tmt = p->timeout.client;
+ int hs_tmt = p->timeout.client_hs;
+
+ if (unlikely((sess->task = task_new_here()) == NULL))
+ goto out_free_sess;
+
+ /* Handshake timeout as default timeout */
+ timeout = hs_tmt ? hs_tmt : clt_tmt;
+ sess->task->context = sess;
+ sess->task->nice = l->bind_conf->nice;
+ sess->task->process = session_expire_embryonic;
+ sess->task->expire = tick_add_ifset(now_ms, timeout);
+ task_queue(sess->task);
+ return 1;
+ }
+
+ /* OK let's complete stream initialization since there is no handshake */
+ if (conn_complete_session(cli_conn) >= 0)
+ return 1;
+
+ /* if we reach here we have deliberately decided not to keep this
+ * session (e.g. tcp-request rule), so that's not an error we should
+ * try to protect against.
+ */
+ ret = 0;
+
+ /* error unrolling */
+ out_free_sess:
+ /* prevent call to listener_release during session_free. It will be
+ * done below, for all errors. */
+ sess->listener = NULL;
+ session_free(sess);
+
+ out_free_conn:
+ if (ret < 0 && l->bind_conf->xprt == xprt_get(XPRT_RAW) &&
+ p->mode == PR_MODE_HTTP && l->bind_conf->mux_proto == NULL &&
+ !(cli_conn->flags & CO_FL_FDLESS)) {
+ /* critical error, no more memory, try to emit a 500 response */
+ send(cfd, http_err_msgs[HTTP_ERR_500], strlen(http_err_msgs[HTTP_ERR_500]),
+ MSG_DONTWAIT|MSG_NOSIGNAL);
+ }
+
+ if (cli_conn->mux) {
+ /* Mux is already initialized for active reversed connection. */
+ cli_conn->mux->destroy(cli_conn->ctx);
+ }
+ else {
+ conn_stop_tracking(cli_conn);
+ conn_full_close(cli_conn);
+ conn_free(cli_conn);
+ }
+ listener_release(l);
+ return ret;
+}
+
+
+/* prepare the trash with a log prefix for session <sess>. It only works with
+ * embryonic sessions based on a real connection. This function requires that
+ * at sess->origin points to the incoming connection.
+ */
+static void session_prepare_log_prefix(struct session *sess)
+{
+ const struct sockaddr_storage *src;
+ struct tm tm;
+ char pn[INET6_ADDRSTRLEN];
+ int ret;
+ char *end;
+
+ src = sess_src(sess);
+ ret = (src ? addr_to_str(src, pn, sizeof(pn)) : 0);
+ if (ret <= 0)
+ chunk_printf(&trash, "unknown [");
+ else if (ret == AF_UNIX)
+ chunk_printf(&trash, "%s:%d [", pn, sess->listener->luid);
+ else
+ chunk_printf(&trash, "%s:%d [", pn, get_host_port(src));
+
+ get_localtime(sess->accept_date.tv_sec, &tm);
+ end = date2str_log(trash.area + trash.data, &tm, &(sess->accept_date),
+ trash.size - trash.data);
+ trash.data = end - trash.area;
+ if (sess->listener->name)
+ chunk_appendf(&trash, "] %s/%s", sess->fe->id, sess->listener->name);
+ else
+ chunk_appendf(&trash, "] %s/%d", sess->fe->id, sess->listener->luid);
+}
+
+
+/* fill the trash buffer with the string to use for send_log during
+ * session_kill_embryonic(). Add log prefix and error string.
+ *
+ * The function is able to dump an SSL error string when CO_ER_SSL_HANDSHAKE
+ * is met.
+ */
+static void session_build_err_string(struct session *sess)
+{
+ struct connection *conn = __objt_conn(sess->origin);
+ const char *err_msg;
+ struct ssl_sock_ctx __maybe_unused *ssl_ctx;
+
+ err_msg = conn_err_code_str(conn);
+ session_prepare_log_prefix(sess); /* use trash buffer */
+
+#ifdef USE_OPENSSL
+ ssl_ctx = conn_get_ssl_sock_ctx(conn);
+
+ /* when the SSL error code is present and during a SSL Handshake failure,
+ * try to dump the error string from OpenSSL */
+ if (conn->err_code == CO_ER_SSL_HANDSHAKE && ssl_ctx && ssl_ctx->error_code != 0) {
+ chunk_appendf(&trash, ": SSL handshake failure (");
+ ERR_error_string_n(ssl_ctx->error_code, b_orig(&trash)+b_data(&trash), b_room(&trash));
+ trash.data = strlen(b_orig(&trash));
+ chunk_appendf(&trash, ")\n");
+ }
+
+ else
+#endif /* ! USE_OPENSSL */
+
+ if (err_msg)
+ chunk_appendf(&trash, ": %s\n", err_msg);
+ else
+ chunk_appendf(&trash, ": unknown connection error (code=%d flags=%08x)\n",
+ conn->err_code, conn->flags);
+
+ return;
+}
+
+
+
+/* This function kills an existing embryonic session. It stops the connection's
+ * transport layer, releases assigned resources, resumes the listener if it was
+ * disabled and finally kills the file descriptor. This function requires that
+ * sess->origin points to the incoming connection.
+ */
+static void session_kill_embryonic(struct session *sess, unsigned int state)
+{
+ int level = LOG_INFO;
+ struct connection *conn = __objt_conn(sess->origin);
+ struct task *task = sess->task;
+ unsigned int log = sess->fe->to_log;
+
+ if (sess->fe->options2 & PR_O2_LOGERRORS)
+ level = LOG_ERR;
+
+ if (log && (sess->fe->options & PR_O_NULLNOLOG)) {
+ /* with "option dontlognull", we don't log connections with no transfer */
+ if (!conn->err_code ||
+ conn->err_code == CO_ER_PRX_EMPTY || conn->err_code == CO_ER_PRX_ABORT ||
+ conn->err_code == CO_ER_CIP_EMPTY || conn->err_code == CO_ER_CIP_ABORT ||
+ conn->err_code == CO_ER_SSL_EMPTY || conn->err_code == CO_ER_SSL_ABORT)
+ log = 0;
+ }
+
+ if (log) {
+ if (!conn->err_code && (state & TASK_WOKEN_TIMER)) {
+ if (conn->flags & CO_FL_ACCEPT_PROXY)
+ conn->err_code = CO_ER_PRX_TIMEOUT;
+ else if (conn->flags & CO_FL_ACCEPT_CIP)
+ conn->err_code = CO_ER_CIP_TIMEOUT;
+ else if (conn->flags & CO_FL_SSL_WAIT_HS)
+ conn->err_code = CO_ER_SSL_TIMEOUT;
+ }
+
+ if(!LIST_ISEMPTY(&sess->fe->logformat_error)) {
+ /* Display a log line following the configured error-log-format. */
+ sess_log(sess);
+ }
+ else {
+ session_build_err_string(sess);
+ send_log(sess->fe, level, "%s", trash.area);
+ }
+ }
+
+ /* kill the connection now */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ conn_free(conn);
+ sess->origin = NULL;
+
+ task_destroy(task);
+ session_free(sess);
+}
+
+/* Manages the embryonic session timeout. It is only called when the timeout
+ * strikes and performs the required cleanup. It's only exported to make it
+ * resolve in "show tasks".
+ */
+struct task *session_expire_embryonic(struct task *t, void *context, unsigned int state)
+{
+ struct session *sess = context;
+
+ if (!(state & TASK_WOKEN_TIMER))
+ return t;
+
+ session_kill_embryonic(sess, state);
+ return NULL;
+}
+
+/* Finish initializing a session from a connection, or kills it if the
+ * connection shows and error. Returns <0 if the connection was killed. It may
+ * be called either asynchronously when ssl handshake is done with an embryonic
+ * session, or synchronously to finalize the session. The distinction is made
+ * on sess->task which is only set in the embryonic session case.
+ */
+int conn_complete_session(struct connection *conn)
+{
+ struct session *sess = conn->owner;
+
+ sess->t_handshake = ns_to_ms(now_ns - sess->accept_ts);
+
+ if (conn->flags & CO_FL_ERROR)
+ goto fail;
+
+ /* if logs require transport layer information, note it on the connection */
+ if (sess->fe->to_log & LW_XPRT)
+ conn->flags |= CO_FL_XPRT_TRACKED;
+
+ /* we may have some tcp-request-session rules */
+ if (!LIST_ISEMPTY(&sess->fe->tcp_req.l5_rules) && !tcp_exec_l5_rules(sess))
+ goto fail;
+
+ session_count_new(sess);
+ if (!conn->mux) {
+ if (conn_install_mux_fe(conn, NULL) < 0)
+ goto fail;
+ }
+
+ /* the embryonic session's task is not needed anymore */
+ task_destroy(sess->task);
+ sess->task = NULL;
+ conn_set_owner(conn, sess, conn_session_free);
+
+ return 0;
+
+ fail:
+ if (sess->task)
+ session_kill_embryonic(sess, 0);
+ return -1;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sha1.c b/src/sha1.c
new file mode 100644
index 0000000..b7c2d70
--- /dev/null
+++ b/src/sha1.c
@@ -0,0 +1,308 @@
+/*
+ * Based on the git SHA1 Implementation.
+ *
+ * Copyright (C) 2009-2015, Linus Torvalds and others.
+ *
+ * SHA1 routine optimized to do word accesses rather than byte accesses,
+ * and to avoid unnecessary copies into the context array.
+ *
+ * This was initially based on the Mozilla SHA1 implementation, although
+ * none of the original Mozilla code remains.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* this is only to get definitions for memcpy(), ntohl() and htonl() */
+#include <string.h>
+#include <inttypes.h>
+#include <arpa/inet.h>
+
+#include <import/sha1.h>
+
+/*
+ * Performance might be improved if the CPU architecture is OK with
+ * unaligned 32-bit loads and a fast ntohl() is available.
+ * Otherwise fall back to byte loads and shifts which is portable,
+ * and is faster on architectures with memory alignment issues.
+ */
+
+#if defined(__i386__) || defined(__x86_64__) || \
+ defined(__ppc__) || defined(__ppc64__) || \
+ defined(__powerpc__) || defined(__powerpc64__) || \
+ defined(__s390__) || defined(__s390x__)
+
+#define get_be32(p) ntohl(*(unsigned int *)(p))
+#define put_be32(p, v) do { *(unsigned int *)(p) = htonl(v); } while (0)
+
+#else
+
+static inline uint32_t get_be32(const void *ptr)
+{
+ const unsigned char *p = ptr;
+ return (uint32_t)p[0] << 24 |
+ (uint32_t)p[1] << 16 |
+ (uint32_t)p[2] << 8 |
+ (uint32_t)p[3] << 0;
+}
+
+static inline void put_be32(void *ptr, uint32_t value)
+{
+ unsigned char *p = ptr;
+ p[0] = value >> 24;
+ p[1] = value >> 16;
+ p[2] = value >> 8;
+ p[3] = value >> 0;
+}
+
+#endif
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+
+/*
+ * Force usage of rol or ror by selecting the one with the smaller constant.
+ * It _can_ generate slightly smaller code (a constant of 1 is special), but
+ * perhaps more importantly it's possibly faster on any uarch that does a
+ * rotate with a loop.
+ */
+
+#define SHA_ASM(op, x, n) ({ unsigned int __res; __asm__(op " %1,%0":"=r" (__res):"i" (n), "0" (x)); __res; })
+#define SHA_ROL(x,n) SHA_ASM("rol", x, n)
+#define SHA_ROR(x,n) SHA_ASM("ror", x, n)
+
+#else
+
+#define SHA_ROT(X,l,r) (((X) << (l)) | ((X) >> (r)))
+#define SHA_ROL(X,n) SHA_ROT(X,n,32-(n))
+#define SHA_ROR(X,n) SHA_ROT(X,32-(n),n)
+
+#endif
+
+/*
+ * If you have 32 registers or more, the compiler can (and should)
+ * try to change the array[] accesses into registers. However, on
+ * machines with less than ~25 registers, that won't really work,
+ * and at least gcc will make an unholy mess of it.
+ *
+ * So to avoid that mess which just slows things down, we force
+ * the stores to memory to actually happen (we might be better off
+ * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
+ * suggested by Artur Skawina - that will also make gcc unable to
+ * try to do the silly "optimize away loads" part because it won't
+ * see what the value will be).
+ *
+ * Ben Herrenschmidt reports that on PPC, the C version comes close
+ * to the optimized asm with this (ie on PPC you don't want that
+ * 'volatile', since there are lots of registers).
+ *
+ * On ARM we get the best code generation by forcing a full memory barrier
+ * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
+ * the stack frame size simply explode and performance goes down the drain.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+ #define setW(x, val) (*(volatile unsigned int *)&W(x) = (val))
+#elif defined(__GNUC__) && defined(__arm__)
+ #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
+#else
+ #define setW(x, val) (W(x) = (val))
+#endif
+
+/* This "rolls" over the 512-bit array */
+#define W(x) (array[(x)&15])
+
+/*
+ * Where do we get the source from? The first 16 iterations get it from
+ * the input data, the next mix it from the 512-bit array.
+ */
+#define SHA_SRC(t) get_be32((unsigned char *) block + (t)*4)
+#define SHA_MIX(t) SHA_ROL(W((t)+13) ^ W((t)+8) ^ W((t)+2) ^ W(t), 1);
+
+#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
+ unsigned int TEMP = input(t); setW(t, TEMP); \
+ E += TEMP + SHA_ROL(A,5) + (fn) + (constant); \
+ B = SHA_ROR(B, 2); } while (0)
+
+#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
+#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
+#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
+
+static void blk_SHA1_Block(blk_SHA_CTX *ctx, const void *block)
+{
+ unsigned int A,B,C,D,E;
+ unsigned int array[16];
+
+ A = ctx->H[0];
+ B = ctx->H[1];
+ C = ctx->H[2];
+ D = ctx->H[3];
+ E = ctx->H[4];
+
+ /* Round 1 - iterations 0-16 take their input from 'block' */
+ T_0_15( 0, A, B, C, D, E);
+ T_0_15( 1, E, A, B, C, D);
+ T_0_15( 2, D, E, A, B, C);
+ T_0_15( 3, C, D, E, A, B);
+ T_0_15( 4, B, C, D, E, A);
+ T_0_15( 5, A, B, C, D, E);
+ T_0_15( 6, E, A, B, C, D);
+ T_0_15( 7, D, E, A, B, C);
+ T_0_15( 8, C, D, E, A, B);
+ T_0_15( 9, B, C, D, E, A);
+ T_0_15(10, A, B, C, D, E);
+ T_0_15(11, E, A, B, C, D);
+ T_0_15(12, D, E, A, B, C);
+ T_0_15(13, C, D, E, A, B);
+ T_0_15(14, B, C, D, E, A);
+ T_0_15(15, A, B, C, D, E);
+
+ /* Round 1 - tail. Input from 512-bit mixing array */
+ T_16_19(16, E, A, B, C, D);
+ T_16_19(17, D, E, A, B, C);
+ T_16_19(18, C, D, E, A, B);
+ T_16_19(19, B, C, D, E, A);
+
+ /* Round 2 */
+ T_20_39(20, A, B, C, D, E);
+ T_20_39(21, E, A, B, C, D);
+ T_20_39(22, D, E, A, B, C);
+ T_20_39(23, C, D, E, A, B);
+ T_20_39(24, B, C, D, E, A);
+ T_20_39(25, A, B, C, D, E);
+ T_20_39(26, E, A, B, C, D);
+ T_20_39(27, D, E, A, B, C);
+ T_20_39(28, C, D, E, A, B);
+ T_20_39(29, B, C, D, E, A);
+ T_20_39(30, A, B, C, D, E);
+ T_20_39(31, E, A, B, C, D);
+ T_20_39(32, D, E, A, B, C);
+ T_20_39(33, C, D, E, A, B);
+ T_20_39(34, B, C, D, E, A);
+ T_20_39(35, A, B, C, D, E);
+ T_20_39(36, E, A, B, C, D);
+ T_20_39(37, D, E, A, B, C);
+ T_20_39(38, C, D, E, A, B);
+ T_20_39(39, B, C, D, E, A);
+
+ /* Round 3 */
+ T_40_59(40, A, B, C, D, E);
+ T_40_59(41, E, A, B, C, D);
+ T_40_59(42, D, E, A, B, C);
+ T_40_59(43, C, D, E, A, B);
+ T_40_59(44, B, C, D, E, A);
+ T_40_59(45, A, B, C, D, E);
+ T_40_59(46, E, A, B, C, D);
+ T_40_59(47, D, E, A, B, C);
+ T_40_59(48, C, D, E, A, B);
+ T_40_59(49, B, C, D, E, A);
+ T_40_59(50, A, B, C, D, E);
+ T_40_59(51, E, A, B, C, D);
+ T_40_59(52, D, E, A, B, C);
+ T_40_59(53, C, D, E, A, B);
+ T_40_59(54, B, C, D, E, A);
+ T_40_59(55, A, B, C, D, E);
+ T_40_59(56, E, A, B, C, D);
+ T_40_59(57, D, E, A, B, C);
+ T_40_59(58, C, D, E, A, B);
+ T_40_59(59, B, C, D, E, A);
+
+ /* Round 4 */
+ T_60_79(60, A, B, C, D, E);
+ T_60_79(61, E, A, B, C, D);
+ T_60_79(62, D, E, A, B, C);
+ T_60_79(63, C, D, E, A, B);
+ T_60_79(64, B, C, D, E, A);
+ T_60_79(65, A, B, C, D, E);
+ T_60_79(66, E, A, B, C, D);
+ T_60_79(67, D, E, A, B, C);
+ T_60_79(68, C, D, E, A, B);
+ T_60_79(69, B, C, D, E, A);
+ T_60_79(70, A, B, C, D, E);
+ T_60_79(71, E, A, B, C, D);
+ T_60_79(72, D, E, A, B, C);
+ T_60_79(73, C, D, E, A, B);
+ T_60_79(74, B, C, D, E, A);
+ T_60_79(75, A, B, C, D, E);
+ T_60_79(76, E, A, B, C, D);
+ T_60_79(77, D, E, A, B, C);
+ T_60_79(78, C, D, E, A, B);
+ T_60_79(79, B, C, D, E, A);
+
+ ctx->H[0] += A;
+ ctx->H[1] += B;
+ ctx->H[2] += C;
+ ctx->H[3] += D;
+ ctx->H[4] += E;
+}
+
+void blk_SHA1_Init(blk_SHA_CTX *ctx)
+{
+ ctx->size = 0;
+
+ /* Initialize H with the magic constants (see FIPS180 for constants) */
+ ctx->H[0] = 0x67452301;
+ ctx->H[1] = 0xefcdab89;
+ ctx->H[2] = 0x98badcfe;
+ ctx->H[3] = 0x10325476;
+ ctx->H[4] = 0xc3d2e1f0;
+}
+
+void blk_SHA1_Update(blk_SHA_CTX *ctx, const void *data, unsigned long len)
+{
+ unsigned int lenW = ctx->size & 63;
+
+ ctx->size += len;
+
+ /* Read the data into W and process blocks as they get full */
+ if (lenW) {
+ unsigned int left = 64 - lenW;
+ if (len < left)
+ left = len;
+ memcpy(lenW + (char *)ctx->W, data, left);
+ lenW = (lenW + left) & 63;
+ len -= left;
+ data = ((const char *)data + left);
+ if (lenW)
+ return;
+ blk_SHA1_Block(ctx, ctx->W);
+ }
+ while (len >= 64) {
+ blk_SHA1_Block(ctx, data);
+ data = ((const char *)data + 64);
+ len -= 64;
+ }
+ if (len)
+ memcpy(ctx->W, data, len);
+}
+
+void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
+{
+ static const unsigned char pad[64] = { 0x80 };
+ unsigned int padlen[2];
+ int i;
+
+ /* Pad with a binary 1 (ie 0x80), then zeroes, then length */
+ padlen[0] = htonl((uint32_t)(ctx->size >> 29));
+ padlen[1] = htonl((uint32_t)(ctx->size << 3));
+
+ i = ctx->size & 63;
+ blk_SHA1_Update(ctx, pad, 1 + (63 & (55 - i)));
+ blk_SHA1_Update(ctx, padlen, 8);
+
+ /* Output hash */
+ for (i = 0; i < 5; i++)
+ put_be32(hashout + i * 4, ctx->H[i]);
+}
diff --git a/src/shctx.c b/src/shctx.c
new file mode 100644
index 0000000..be59053
--- /dev/null
+++ b/src/shctx.c
@@ -0,0 +1,320 @@
+/*
+ * shctx.c - shared context management functions for SSL
+ *
+ * Copyright (C) 2011-2012 EXCELIANCE
+ *
+ * Author: Emeric Brun - emeric@exceliance.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <import/ebmbtree.h>
+#include <haproxy/list.h>
+#include <haproxy/shctx.h>
+
+/*
+ * Reserve a new row if <first> is null, put it in the hotlist, set the refcount to 1
+ * or append new blocks to the row with <first> as first block if non null.
+ *
+ * Reserve blocks in the avail list and put them in the hot list
+ * Return the first block put in the hot list or NULL if not enough blocks available
+ */
+struct shared_block *shctx_row_reserve_hot(struct shared_context *shctx,
+ struct shared_block *first, int data_len)
+{
+ struct shared_block *last = NULL, *block, *sblock;
+ struct shared_block *ret = first;
+ int remain = 1;
+
+ BUG_ON(data_len < 0);
+
+ /* Check the object size limit. */
+ if (shctx->max_obj_size > 0) {
+ if ((first && first->len + data_len > shctx->max_obj_size) ||
+ (!first && data_len > shctx->max_obj_size))
+ goto out;
+ }
+
+ if (first) {
+ /* Check that there is some block to reserve.
+ * In this first block of code we compute the remaining room in the
+ * current list of block already reserved for this object.
+ * We return asap if there is enough room to copy <data_len> bytes.
+ */
+ last = first->last_reserved;
+ /* Remaining room. */
+ remain = (shctx->block_size * first->block_count - first->len);
+ if (remain) {
+ if (remain > data_len) {
+ return last ? last : first;
+ } else {
+ data_len -= remain;
+ if (data_len <= 0)
+ return last ? last : first;
+ }
+ }
+ }
+
+ shctx_wrlock(shctx);
+
+ /* not enough usable blocks */
+ if (data_len > shctx->nbav * shctx->block_size) {
+ shctx_wrunlock(shctx);
+ goto out;
+ }
+
+
+ if (data_len <= 0 || LIST_ISEMPTY(&shctx->avail)) {
+ ret = NULL;
+ shctx_wrunlock(shctx);
+ goto out;
+ }
+
+ list_for_each_entry_safe(block, sblock, &shctx->avail, list) {
+
+ /* release callback */
+ if (block->len && shctx->free_block)
+ shctx->free_block(block, shctx->cb_data);
+ block->len = 0;
+
+ if (ret) {
+ shctx_block_append_hot(shctx, ret, block);
+ if (!remain) {
+ first->last_append = block;
+ remain = 1;
+ }
+ } else {
+ ret = shctx_block_detach(shctx, block);
+ ret->len = 0;
+ ret->block_count = 0;
+ ret->last_append = NULL;
+ ret->refcount = 1;
+ }
+
+ ++ret->block_count;
+
+ data_len -= shctx->block_size;
+
+ if (data_len <= 0) {
+ ret->last_reserved = block;
+ break;
+ }
+ }
+
+ shctx_wrunlock(shctx);
+
+ if (shctx->reserve_finish)
+ shctx->reserve_finish(shctx);
+
+out:
+ return ret;
+}
+
+/*
+ * if the refcount is 0 move the row to the hot list. Increment the refcount
+ */
+void shctx_row_detach(struct shared_context *shctx, struct shared_block *first)
+{
+ if (first->refcount <= 0) {
+
+ BUG_ON(!first->last_reserved);
+
+ /* Detach row from avail list, link first item's prev to last
+ * item's next. This allows to use the LIST_SPLICE_END_DETACHED
+ * macro. */
+ first->list.p->n = first->last_reserved->list.n;
+ first->last_reserved->list.n->p = first->list.p;
+
+ first->list.p = &first->last_reserved->list;
+ first->last_reserved->list.n = &first->list;
+
+ shctx->nbav -= first->block_count;
+ }
+
+ first->refcount++;
+}
+
+/*
+ * decrement the refcount and move the row at the end of the avail list if it reaches 0.
+ */
+void shctx_row_reattach(struct shared_context *shctx, struct shared_block *first)
+{
+ first->refcount--;
+
+ if (first->refcount <= 0) {
+
+ BUG_ON(!first->last_reserved);
+
+ /* Reattach to avail list */
+ first->list.p = &first->last_reserved->list;
+ LIST_SPLICE_END_DETACHED(&shctx->avail, &first->list);
+
+ shctx->nbav += first->block_count;
+ }
+}
+
+
+/*
+ * Append data in the row if there is enough space.
+ * The row should be in the hot list
+ *
+ * Return the amount of appended data if ret >= 0
+ * or how much more space it needs to contains the data if < 0.
+ */
+int shctx_row_data_append(struct shared_context *shctx, struct shared_block *first,
+ unsigned char *data, int len)
+{
+ int remain, start;
+ struct shared_block *block;
+
+ /* return -<len> needed to work */
+ if (len > first->block_count * shctx->block_size - first->len)
+ return (first->block_count * shctx->block_size - first->len) - len;
+
+ block = first->last_append ? first->last_append : first;
+ do {
+ /* end of copy */
+ if (len <= 0)
+ break;
+
+ /* remaining written bytes in the current block. */
+ remain = (shctx->block_size * first->block_count - first->len) % shctx->block_size;
+ BUG_ON(remain < 0);
+
+ /* if remain == 0, previous buffers are full, or first->len == 0 */
+ if (!remain) {
+ remain = shctx->block_size;
+ start = 0;
+ }
+ else {
+ /* start must be calculated before remain is modified */
+ start = shctx->block_size - remain;
+ BUG_ON(start < 0);
+ }
+
+ /* must not try to copy more than len */
+ remain = MIN(remain, len);
+
+ memcpy(block->data + start, data, remain);
+
+ data += remain;
+ len -= remain;
+ first->len += remain; /* update len in the head of the row */
+ first->last_append = block;
+
+ block = LIST_ELEM(block->list.n, struct shared_block*, list);
+ } while (block != first);
+
+ return len;
+}
+
+/*
+ * Copy <len> data from a row of blocks, return the remaining data to copy
+ * If 0 is returned, the full data has successfully been copied
+ *
+ * The row should be in the hot list
+ */
+int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first,
+ unsigned char *dst, int offset, int len)
+{
+ int count = 0, size = 0, start = -1;
+ struct shared_block *block;
+
+ /* can't copy more */
+ if (len > first->len)
+ len = first->len;
+
+ block = first;
+ count = 0;
+ /* Pass through the blocks to copy them */
+ do {
+ if (count >= first->block_count || len <= 0)
+ break;
+
+ count++;
+ /* continue until we are in right block
+ corresponding to the offset */
+ if (count < offset / shctx->block_size + 1)
+ continue;
+
+ /* on the first block, data won't possibly began at offset 0 */
+ if (start == -1)
+ start = offset - (count - 1) * shctx->block_size;
+
+ BUG_ON(start < 0);
+
+ /* size can be lower than a block when copying the last block */
+ size = MIN(shctx->block_size - start, len);
+ BUG_ON(size < 0);
+
+ memcpy(dst, block->data + start, size);
+ dst += size;
+ len -= size;
+ start = 0;
+
+ block = LIST_ELEM(block->list.n, struct shared_block*, list);
+ } while (block != first);
+ return len;
+}
+
+/* Allocate shared memory context.
+ * <maxblocks> is maximum blocks.
+ * If <maxblocks> is set to less or equal to 0, ssl cache is disabled.
+ * Returns: -1 on alloc failure, <maxblocks> if it performs context alloc,
+ * and 0 if cache is already allocated.
+ */
+int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize,
+ unsigned int maxobjsz, int extra)
+{
+ int i;
+ struct shared_context *shctx;
+ int ret;
+ void *cur;
+ int maptype = MAP_SHARED;
+
+ if (maxblocks <= 0)
+ return 0;
+
+ /* make sure to align the records on a pointer size */
+ blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *);
+ extra = (extra + sizeof(void *) - 1) & -sizeof(void *);
+
+ shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)),
+ PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0);
+ if (!shctx || shctx == MAP_FAILED) {
+ shctx = NULL;
+ ret = SHCTX_E_ALLOC_CACHE;
+ goto err;
+ }
+
+ shctx->nbav = 0;
+
+ LIST_INIT(&shctx->avail);
+ HA_RWLOCK_INIT(&shctx->lock);
+
+ shctx->block_size = blocksize;
+ shctx->max_obj_size = maxobjsz == (unsigned int)-1 ? 0 : maxobjsz;
+
+ /* init the free blocks after the shared context struct */
+ cur = (void *)shctx + sizeof(struct shared_context) + extra;
+ for (i = 0; i < maxblocks; i++) {
+ struct shared_block *cur_block = (struct shared_block *)cur;
+ cur_block->len = 0;
+ cur_block->refcount = 0;
+ cur_block->block_count = 1;
+ LIST_APPEND(&shctx->avail, &cur_block->list);
+ shctx->nbav++;
+ cur += sizeof(struct shared_block) + blocksize;
+ }
+ ret = maxblocks;
+
+err:
+ *orig_shctx = shctx;
+ return ret;
+}
+
diff --git a/src/signal.c b/src/signal.c
new file mode 100644
index 0000000..1bb60eb
--- /dev/null
+++ b/src/signal.c
@@ -0,0 +1,284 @@
+/*
+ * Asynchronous signal delivery functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <signal.h>
+#include <string.h>
+
+#include <haproxy/errors.h>
+#include <haproxy/signal.h>
+#include <haproxy/task.h>
+
+/* Principle : we keep an in-order list of the first occurrence of all received
+ * signals. All occurrences of a same signal are grouped though. The signal
+ * queue does not need to be deeper than the number of signals we can handle.
+ * The handlers will be called asynchronously with the signal number. They can
+ * check themselves the number of calls by checking the descriptor this signal.
+ */
+
+int signal_queue_len; /* length of signal queue, <= MAX_SIGNAL (1 entry per signal max) */
+int signal_queue[MAX_SIGNAL]; /* in-order queue of received signals */
+struct signal_descriptor signal_state[MAX_SIGNAL];
+sigset_t blocked_sig;
+int signal_pending = 0; /* non-zero if t least one signal remains unprocessed */
+
+DECLARE_STATIC_POOL(pool_head_sig_handlers, "sig_handlers", sizeof(struct sig_handler));
+
+/* Common signal handler, used by all signals. Received signals are queued.
+ * Signal number zero has a specific status, as it cannot be delivered by the
+ * system, any function may call it to perform asynchronous signal delivery.
+ */
+void signal_handler(int sig)
+{
+ if (sig < 0 || sig >= MAX_SIGNAL) {
+ /* unhandled signal */
+ signal(sig, SIG_IGN);
+ qfprintf(stderr, "Received unhandled signal %d. Signal has been disabled.\n", sig);
+ return;
+ }
+
+ if (!signal_state[sig].count) {
+ /* signal was not queued yet */
+ if (signal_queue_len < MAX_SIGNAL)
+ signal_queue[signal_queue_len++] = sig;
+ else
+ qfprintf(stderr, "Signal %d : signal queue is unexpectedly full.\n", sig);
+ }
+
+ signal_state[sig].count++;
+ if (sig)
+ signal(sig, signal_handler); /* re-arm signal */
+
+ /* If the thread is TH_FL_SLEEPING we need to wake it */
+ wake_thread(tid);
+}
+
+/* Call handlers of all pending signals and clear counts and queue length. The
+ * handlers may unregister themselves by calling signal_register() while they
+ * are called, just like it is done with normal signal handlers.
+ * Note that it is more efficient to call the inline version which checks the
+ * queue length before getting here.
+ */
+void __signal_process_queue()
+{
+ int sig, cur_pos = 0;
+ struct signal_descriptor *desc;
+ sigset_t old_sig;
+
+ /* block signal delivery during processing */
+ ha_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
+
+ /* It is important that we scan the queue forwards so that we can
+ * catch any signal that would have been queued by another signal
+ * handler. That allows real signal handlers to redistribute signals
+ * to tasks subscribed to signal zero.
+ */
+ for (cur_pos = 0; cur_pos < signal_queue_len; cur_pos++) {
+ sig = signal_queue[cur_pos];
+ desc = &signal_state[sig];
+ if (desc->count) {
+ struct sig_handler *sh, *shb;
+ list_for_each_entry_safe(sh, shb, &desc->handlers, list) {
+ if ((sh->flags & SIG_F_TYPE_FCT) && sh->handler)
+ ((void (*)(struct sig_handler *))sh->handler)(sh);
+ else if ((sh->flags & SIG_F_TYPE_TASK) && sh->handler)
+ task_wakeup(sh->handler, TASK_WOKEN_SIGNAL);
+ }
+ desc->count = 0;
+ }
+ }
+ signal_queue_len = 0;
+
+ /* restore signal delivery */
+ ha_sigmask(SIG_SETMASK, &old_sig, NULL);
+}
+
+/* perform minimal initializations */
+static void signal_init()
+{
+ int sig;
+
+ signal_queue_len = 0;
+ memset(signal_queue, 0, sizeof(signal_queue));
+ memset(signal_state, 0, sizeof(signal_state));
+
+ sigfillset(&blocked_sig);
+ sigdelset(&blocked_sig, SIGPROF);
+ /* man sigprocmask: If SIGBUS, SIGFPE, SIGILL, or SIGSEGV are
+ generated while they are blocked, the result is undefined, unless
+ the signal was generated by kill(2),
+ sigqueue(3), or raise(3).
+ Do not ignore WDTSIG or DEBUGSIG either, or it may deadlock the
+ watchdog */
+ sigdelset(&blocked_sig, SIGBUS);
+ sigdelset(&blocked_sig, SIGFPE);
+ sigdelset(&blocked_sig, SIGILL);
+ sigdelset(&blocked_sig, SIGSEGV);
+#ifdef DEBUGSIG
+ sigdelset(&blocked_sig, DEBUGSIG);
+#endif
+#ifdef WDTSIG
+ sigdelset(&blocked_sig, WDTSIG);
+#endif
+ for (sig = 0; sig < MAX_SIGNAL; sig++)
+ LIST_INIT(&signal_state[sig].handlers);
+}
+
+/*
+ * This function should be called to unblock all signals
+ */
+void haproxy_unblock_signals()
+{
+ sigset_t set;
+
+ /* Ensure signals are not blocked. Some shells or service managers may
+ * accidentally block all of our signals unfortunately, causing lots of
+ * zombie processes to remain in the background during reloads.
+ */
+ sigemptyset(&set);
+ ha_sigmask(SIG_SETMASK, &set, NULL);
+}
+
+/* releases all registered signal handlers */
+void deinit_signals()
+{
+ int sig;
+ struct sig_handler *sh, *shb;
+
+ for (sig = 0; sig < MAX_SIGNAL; sig++) {
+ if (sig != SIGPROF)
+ signal(sig, SIG_DFL);
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ }
+ }
+}
+
+/* Register a function and an integer argument on a signal. A pointer to the
+ * newly allocated sig_handler is returned, or NULL in case of any error. The
+ * caller is responsible for unregistering the function when not used anymore.
+ * Note that passing a NULL as the function pointer enables interception of the
+ * signal without processing, which is identical to SIG_IGN. If the signal is
+ * zero (which the system cannot deliver), only internal functions will be able
+ * to notify the registered functions.
+ */
+struct sig_handler *signal_register_fct(int sig, void (*fct)(struct sig_handler *), int arg)
+{
+ struct sig_handler *sh;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return NULL;
+
+ if (sig)
+ signal(sig, fct ? signal_handler : SIG_IGN);
+
+ if (!fct)
+ return NULL;
+
+ sh = pool_alloc(pool_head_sig_handlers);
+ if (!sh)
+ return NULL;
+
+ sh->handler = fct;
+ sh->arg = arg;
+ sh->flags = SIG_F_TYPE_FCT;
+ LIST_APPEND(&signal_state[sig].handlers, &sh->list);
+ return sh;
+}
+
+/* Register a task and a wake-up reason on a signal. A pointer to the newly
+ * allocated sig_handler is returned, or NULL in case of any error. The caller
+ * is responsible for unregistering the task when not used anymore. Note that
+ * passing a NULL as the task pointer enables interception of the signal
+ * without processing, which is identical to SIG_IGN. If the signal is zero
+ * (which the system cannot deliver), only internal functions will be able to
+ * notify the registered functions.
+ */
+struct sig_handler *signal_register_task(int sig, struct task *task, int reason)
+{
+ struct sig_handler *sh;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return NULL;
+
+ if (sig)
+ signal(sig, signal_handler);
+
+ if (!task)
+ return NULL;
+
+ sh = pool_alloc(pool_head_sig_handlers);
+ if (!sh)
+ return NULL;
+
+ sh->handler = task;
+ sh->arg = reason & ~TASK_WOKEN_ANY;
+ sh->flags = SIG_F_TYPE_TASK;
+ LIST_APPEND(&signal_state[sig].handlers, &sh->list);
+ return sh;
+}
+
+/* Immediately unregister a handler so that no further signals may be delivered
+ * to it. The struct is released so the caller may not reference it anymore.
+ */
+void signal_unregister_handler(struct sig_handler *handler)
+{
+ LIST_DELETE(&handler->list);
+ pool_free(pool_head_sig_handlers, handler);
+}
+
+/* Immediately unregister a handler so that no further signals may be delivered
+ * to it. The handler struct does not need to be known, only the function or
+ * task pointer. This method is expensive because it scans all the list, so it
+ * should only be used for rare cases (eg: exit). The struct is released so the
+ * caller may not reference it anymore.
+ */
+void signal_unregister_target(int sig, void *target)
+{
+ struct sig_handler *sh, *shb;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return;
+
+ if (!target)
+ return;
+
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ if (sh->handler == target) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ break;
+ }
+ }
+}
+
+/*
+ * Immedialtely unregister every handler assigned to a signal <sig>.
+ * Once the handler list is empty, the signal is ignored with SIG_IGN.
+ */
+
+void signal_unregister(int sig)
+{
+ struct sig_handler *sh, *shb;
+
+ if (sig < 0 || sig >= MAX_SIGNAL)
+ return;
+
+ list_for_each_entry_safe(sh, shb, &signal_state[sig].handlers, list) {
+ LIST_DELETE(&sh->list);
+ pool_free(pool_head_sig_handlers, sh);
+ }
+
+ signal(sig, SIG_IGN);
+}
+
+INITCALL0(STG_PREPARE, signal_init);
diff --git a/src/sink.c b/src/sink.c
new file mode 100644
index 0000000..66c2b8c
--- /dev/null
+++ b/src/sink.c
@@ -0,0 +1,1406 @@
+/*
+ * Event sink management
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/ring.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/signal.h>
+#include <haproxy/sink.h>
+#include <haproxy/stconn.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+
+struct list sink_list = LIST_HEAD_INIT(sink_list);
+
+/* sink proxies list */
+struct proxy *sink_proxies_list;
+
+struct sink *cfg_sink;
+
+struct sink *sink_find(const char *name)
+{
+ struct sink *sink;
+
+ list_for_each_entry(sink, &sink_list, sink_list)
+ if (strcmp(sink->name, name) == 0)
+ return sink;
+ return NULL;
+}
+
+/* creates a new sink and adds it to the list, it's still generic and not fully
+ * initialized. Returns NULL on allocation failure. If another one already
+ * exists with the same name, it will be returned. The caller can detect it as
+ * a newly created one has type SINK_TYPE_NEW.
+ */
+static struct sink *__sink_new(const char *name, const char *desc, int fmt)
+{
+ struct sink *sink;
+
+ sink = sink_find(name);
+ if (sink)
+ goto end;
+
+ sink = calloc(1, sizeof(*sink));
+ if (!sink)
+ goto end;
+
+ sink->name = strdup(name);
+ if (!sink->name)
+ goto err;
+
+ sink->desc = strdup(desc);
+ if (!sink->desc)
+ goto err;
+
+ sink->fmt = fmt;
+ sink->type = SINK_TYPE_NEW;
+ sink->maxlen = BUFSIZE;
+ /* address will be filled by the caller if needed */
+ sink->ctx.fd = -1;
+ sink->ctx.dropped = 0;
+ HA_RWLOCK_INIT(&sink->ctx.lock);
+ LIST_APPEND(&sink_list, &sink->sink_list);
+ end:
+ return sink;
+
+ err:
+ ha_free(&sink->name);
+ ha_free(&sink->desc);
+ ha_free(&sink);
+
+ return NULL;
+}
+
+/* creates a sink called <name> of type FD associated to fd <fd>, format <fmt>,
+ * and description <desc>. Returns NULL on allocation failure or conflict.
+ * Perfect duplicates are merged (same type, fd, and name).
+ */
+struct sink *sink_new_fd(const char *name, const char *desc, enum log_fmt fmt, int fd)
+{
+ struct sink *sink;
+
+ sink = __sink_new(name, desc, fmt);
+ if (!sink || (sink->type == SINK_TYPE_FD && sink->ctx.fd == fd))
+ goto end;
+
+ if (sink->type != SINK_TYPE_NEW) {
+ sink = NULL;
+ goto end;
+ }
+
+ sink->type = SINK_TYPE_FD;
+ sink->ctx.fd = fd;
+ end:
+ return sink;
+}
+
+/* creates a sink called <name> of type BUF of size <size>, format <fmt>,
+ * and description <desc>. Returns NULL on allocation failure or conflict.
+ * Perfect duplicates are merged (same type and name). If sizes differ, the
+ * largest one is kept.
+ */
+struct sink *sink_new_buf(const char *name, const char *desc, enum log_fmt fmt, size_t size)
+{
+ struct sink *sink;
+
+ sink = __sink_new(name, desc, fmt);
+ if (!sink)
+ goto fail;
+
+ if (sink->type == SINK_TYPE_BUFFER) {
+ /* such a buffer already exists, we may have to resize it */
+ if (!ring_resize(sink->ctx.ring, size))
+ goto fail;
+ goto end;
+ }
+
+ if (sink->type != SINK_TYPE_NEW) {
+ /* already exists of another type */
+ goto fail;
+ }
+
+ sink->ctx.ring = ring_new(size);
+ if (!sink->ctx.ring) {
+ LIST_DELETE(&sink->sink_list);
+ free(sink->name);
+ free(sink->desc);
+ free(sink);
+ goto fail;
+ }
+
+ sink->type = SINK_TYPE_BUFFER;
+ end:
+ return sink;
+ fail:
+ return NULL;
+}
+
+/* tries to send <nmsg> message parts from message array <msg> to sink <sink>.
+ * Formatting according to the sink's preference is done here, unless sink->fmt
+ * is unspecified, in which case the caller formatting will be used instead.
+ * Lost messages are NOT accounted for. It is preferable to call sink_write()
+ * instead which will also try to emit the number of dropped messages when there
+ * are any.
+ *
+ * It will stop writing at <maxlen> instead of sink->maxlen if <maxlen> is
+ * positive and inferior to sink->maxlen.
+ *
+ * It returns >0 if it could write anything, <=0 otherwise.
+ */
+ ssize_t __sink_write(struct sink *sink, struct log_header hdr,
+ size_t maxlen, const struct ist msg[], size_t nmsg)
+ {
+ struct ist *pfx = NULL;
+ size_t npfx = 0;
+
+ if (sink->fmt == LOG_FORMAT_RAW)
+ goto send;
+
+ if (sink->fmt != LOG_FORMAT_UNSPEC)
+ hdr.format = sink->fmt; /* sink format prevails over log one */
+ pfx = build_log_header(hdr, &npfx);
+
+send:
+ if (!maxlen)
+ maxlen = ~0;
+ if (sink->type == SINK_TYPE_FD) {
+ return fd_write_frag_line(sink->ctx.fd, MIN(maxlen, sink->maxlen), pfx, npfx, msg, nmsg, 1);
+ }
+ else if (sink->type == SINK_TYPE_BUFFER) {
+ return ring_write(sink->ctx.ring, MIN(maxlen, sink->maxlen), pfx, npfx, msg, nmsg);
+ }
+ return 0;
+}
+
+/* Tries to emit a message indicating the number of dropped events.
+ * The log header of the original message that we tried to emit is reused
+ * here with the only difference that we override the log level. This is
+ * possible since the announce message will be sent from the same context.
+ *
+ * In case of success, the amount of drops is reduced by as much. It's supposed
+ * to be called under an exclusive lock on the sink to avoid multiple producers
+ * doing the same. On success, >0 is returned, otherwise <=0 on failure.
+ */
+int sink_announce_dropped(struct sink *sink, struct log_header hdr)
+{
+ unsigned int dropped;
+ struct buffer msg;
+ struct ist msgvec[1];
+ char logbuf[64];
+
+ while (unlikely((dropped = sink->ctx.dropped) > 0)) {
+ chunk_init(&msg, logbuf, sizeof(logbuf));
+ chunk_printf(&msg, "%u event%s dropped", dropped, dropped > 1 ? "s" : "");
+ msgvec[0] = ist2(msg.area, msg.data);
+
+ hdr.level = LOG_NOTICE; /* override level but keep original log header data */
+
+ if (__sink_write(sink, hdr, 0, msgvec, 1) <= 0)
+ return 0;
+ /* success! */
+ HA_ATOMIC_SUB(&sink->ctx.dropped, dropped);
+ }
+ return 1;
+}
+
+/* parse the "show events" command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_events(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct sink *sink;
+ uint ring_flags;
+ int arg;
+
+ args++; // make args[1] the 1st arg
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sink */
+ chunk_printf(&trash, "Supported events sinks are listed below. Add -w(wait), -n(new). Any key to stop\n");
+ list_for_each_entry(sink, &sink_list, sink_list) {
+ chunk_appendf(&trash, " %-10s : type=%s, %u dropped, %s\n",
+ sink->name,
+ sink->type == SINK_TYPE_NEW ? "init" :
+ sink->type == SINK_TYPE_FD ? "fd" :
+ sink->type == SINK_TYPE_BUFFER ? "buffer" : "?",
+ sink->ctx.dropped, sink->desc);
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+ }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ sink = sink_find(args[1]);
+ if (!sink)
+ return cli_err(appctx, "No such event sink");
+
+ if (sink->type != SINK_TYPE_BUFFER)
+ return cli_msg(appctx, LOG_NOTICE, "Nothing to report for this sink");
+
+ ring_flags = 0;
+ for (arg = 2; *args[arg]; arg++) {
+ if (strcmp(args[arg], "-w") == 0)
+ ring_flags |= RING_WF_WAIT_MODE;
+ else if (strcmp(args[arg], "-n") == 0)
+ ring_flags |= RING_WF_SEEK_NEW;
+ else if (strcmp(args[arg], "-nw") == 0 || strcmp(args[arg], "-wn") == 0)
+ ring_flags |= RING_WF_WAIT_MODE | RING_WF_SEEK_NEW;
+ else
+ return cli_err(appctx, "unknown option");
+ }
+ return ring_attach_cli(sink->ctx.ring, appctx, ring_flags);
+}
+
+/* Pre-configures a ring proxy to emit connections */
+void sink_setup_proxy(struct proxy *px)
+{
+ px->last_change = ns_to_sec(now_ns);
+ px->cap = PR_CAP_BE;
+ px->maxconn = 0;
+ px->conn_retries = 1;
+ px->timeout.server = TICK_ETERNITY;
+ px->timeout.client = TICK_ETERNITY;
+ px->timeout.connect = TICK_ETERNITY;
+ px->accept = NULL;
+ px->options2 |= PR_O2_INDEPSTR | PR_O2_SMARTCON | PR_O2_SMARTACC;
+ px->next = sink_proxies_list;
+ sink_proxies_list = px;
+}
+
+/*
+ * IO Handler to handle message push to syslog tcp server.
+ * It takes its context from appctx->svcctx.
+ */
+static void sink_forward_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct sink *sink = sft->sink;
+ struct ring *ring = sink->ctx.ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs, last_ofs;
+ int ret = 0;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW))))
+ goto out;
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ goto out;
+ }
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (appctx != sft->appctx) {
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ goto close;
+ }
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(sft->ofs == ~0)) {
+ sft->ofs = b_peek_ofs(buf, 0);
+ HA_ATOMIC_INC(b_orig(buf) + sft->ofs);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = sft->ofs - b_head_ofs(buf);
+ if (sft->ofs < b_head_ofs(buf))
+ ofs += b_size(buf);
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ if (unlikely(msg_len + 1 > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ chunk_reset(&trash);
+ len = b_getblk(buf, trash.area, msg_len, ofs + cnt);
+ trash.data += len;
+ trash.area[trash.data++] = '\n';
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ last_ofs = b_tail_ofs(buf);
+ sft->ofs = b_peek_ofs(buf, ofs);
+
+ HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ ofs = b_tail_ofs(buf);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+ if (ofs != last_ofs) {
+ /* more data was added into the ring between the
+ * unlock and the lock, and the writer might not
+ * have seen us. We need to reschedule a read.
+ */
+ applet_have_more_data(appctx);
+ } else
+ applet_have_no_more_data(appctx);
+ }
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+
+out:
+ /* always drain data from server */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ return;
+
+close:
+ se_fl_set(appctx->sedesc, SE_FL_EOS|SE_FL_EOI);
+}
+
+/*
+ * IO Handler to handle message push to syslog tcp server
+ * using octet counting frames
+ * It takes its context from appctx->svcctx.
+ */
+static void sink_forward_oc_io_handler(struct appctx *appctx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct sink *sink = sft->sink;
+ struct ring *ring = sink->ctx.ring;
+ struct buffer *buf = &ring->buf;
+ uint64_t msg_len;
+ size_t len, cnt, ofs;
+ int ret = 0;
+ char *p;
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW))))
+ goto out;
+
+ /* if stopping was requested, close immediately */
+ if (unlikely(stopping))
+ goto close;
+
+ /* if the connection is not established, inform the stream that we want
+ * to be notified whenever the connection completes.
+ */
+ if (sc_opposite(sc)->state < SC_ST_EST) {
+ applet_need_more_data(appctx);
+ se_need_remote_conn(appctx->sedesc);
+ applet_have_more_data(appctx);
+ goto out;
+ }
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (appctx != sft->appctx) {
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ goto close;
+ }
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_DEL_INIT(&appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+
+ HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock);
+
+ /* explanation for the initialization below: it would be better to do
+ * this in the parsing function but this would occasionally result in
+ * dropped events because we'd take a reference on the oldest message
+ * and keep it while being scheduled. Thus instead let's take it the
+ * first time we enter here so that we have a chance to pass many
+ * existing messages before grabbing a reference to a location. This
+ * value cannot be produced after initialization.
+ */
+ if (unlikely(sft->ofs == ~0)) {
+ sft->ofs = b_peek_ofs(buf, 0);
+ HA_ATOMIC_INC(b_orig(buf) + sft->ofs);
+ }
+
+ /* we were already there, adjust the offset to be relative to
+ * the buffer's head and remove us from the counter.
+ */
+ ofs = sft->ofs - b_head_ofs(buf);
+ if (sft->ofs < b_head_ofs(buf))
+ ofs += b_size(buf);
+ BUG_ON(ofs >= buf->size);
+ HA_ATOMIC_DEC(b_peek(buf, ofs));
+
+ /* in this loop, ofs always points to the counter byte that precedes
+ * the message so that we can take our reference there if we have to
+ * stop before the end (ret=0).
+ */
+ ret = 1;
+ while (ofs + 1 < b_data(buf)) {
+ cnt = 1;
+ len = b_peek_varint(buf, ofs + cnt, &msg_len);
+ if (!len)
+ break;
+ cnt += len;
+ BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf));
+
+ chunk_reset(&trash);
+ p = ulltoa(msg_len, trash.area, b_size(&trash));
+ if (p) {
+ trash.data = (p - trash.area) + 1;
+ *p = ' ';
+ }
+
+ if (!p || (trash.data + msg_len > b_size(&trash))) {
+ /* too large a message to ever fit, let's skip it */
+ ofs += cnt + msg_len;
+ continue;
+ }
+
+ trash.data += b_getblk(buf, p + 1, msg_len, ofs + cnt);
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ ret = 0;
+ break;
+ }
+ ofs += cnt + msg_len;
+ }
+
+ HA_ATOMIC_INC(b_peek(buf, ofs));
+ sft->ofs = b_peek_ofs(buf, ofs);
+
+ HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock);
+
+ if (ret) {
+ /* let's be woken up once new data arrive */
+ HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock);
+ LIST_APPEND(&ring->waiters, &appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock);
+ applet_have_no_more_data(appctx);
+ }
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+
+ out:
+ /* always drain data from server */
+ co_skip(sc_oc(sc), sc_oc(sc)->output);
+ return;
+
+close:
+ se_fl_set(appctx->sedesc, SE_FL_EOS|SE_FL_EOI);
+ goto out;
+}
+
+void __sink_forward_session_deinit(struct sink_forward_target *sft)
+{
+ struct sink *sink;
+
+ sink = sft->sink;
+ if (!sink)
+ return;
+
+ HA_RWLOCK_WRLOCK(RING_LOCK, &sink->ctx.ring->lock);
+ LIST_DEL_INIT(&sft->appctx->wait_entry);
+ HA_RWLOCK_WRUNLOCK(RING_LOCK, &sink->ctx.ring->lock);
+
+ sft->appctx = NULL;
+ task_wakeup(sink->forward_task, TASK_WOKEN_MSG);
+}
+
+static int sink_forward_session_init(struct appctx *appctx)
+{
+ struct sink_forward_target *sft = appctx->svcctx;
+ struct stream *s;
+ struct sockaddr_storage *addr = NULL;
+
+ if (!sockaddr_alloc(&addr, &sft->srv->addr, sizeof(sft->srv->addr)))
+ goto out_error;
+ /* srv port should be learned from srv->svc_port not from srv->addr */
+ set_host_port(addr, sft->srv->svc_port);
+
+ if (appctx_finalize_startup(appctx, sft->srv->proxy, &BUF_NULL) == -1)
+ goto out_free_addr;
+
+ s = appctx_strm(appctx);
+ s->scb->dst = addr;
+ s->scb->flags |= (SC_FL_RCV_ONCE|SC_FL_NOLINGER);
+
+ s->target = &sft->srv->obj_type;
+ s->flags = SF_ASSIGNED;
+
+ s->do_log = NULL;
+ s->uniq_id = 0;
+
+ applet_expect_no_data(appctx);
+ sft->appctx = appctx;
+
+ return 0;
+
+ out_free_addr:
+ sockaddr_free(&addr);
+ out_error:
+ return -1;
+}
+
+static void sink_forward_session_release(struct appctx *appctx)
+{
+ struct sink_forward_target *sft = appctx->svcctx;
+
+ if (!sft)
+ return;
+
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ if (sft->appctx == appctx)
+ __sink_forward_session_deinit(sft);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+}
+
+static struct applet sink_forward_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SINKFWD>", /* used for logging */
+ .fct = sink_forward_io_handler,
+ .init = sink_forward_session_init,
+ .release = sink_forward_session_release,
+};
+
+static struct applet sink_forward_oc_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<SINKFWDOC>", /* used for logging */
+ .fct = sink_forward_oc_io_handler,
+ .init = sink_forward_session_init,
+ .release = sink_forward_session_release,
+};
+
+/*
+ * Create a new peer session in assigned state (connect will start automatically)
+ * It sets its context into appctx->svcctx.
+ */
+static struct appctx *sink_forward_session_create(struct sink *sink, struct sink_forward_target *sft)
+{
+ struct appctx *appctx;
+ struct applet *applet = &sink_forward_applet;
+
+ if (sft->srv->log_proto == SRV_LOG_PROTO_OCTET_COUNTING)
+ applet = &sink_forward_oc_applet;
+
+ appctx = appctx_new_here(applet, NULL);
+ if (!appctx)
+ goto out_close;
+ appctx->svcctx = (void *)sft;
+
+ if (appctx_init(appctx) == -1)
+ goto out_free_appctx;
+
+ return appctx;
+
+ /* Error unrolling */
+ out_free_appctx:
+ appctx_free_on_early_error(appctx);
+ out_close:
+ return NULL;
+}
+
+/*
+ * Task to handle connections to forward servers
+ */
+static struct task *process_sink_forward(struct task * task, void *context, unsigned int state)
+{
+ struct sink *sink = (struct sink *)context;
+ struct sink_forward_target *sft = sink->sft;
+
+ task->expire = TICK_ETERNITY;
+
+ if (!stopping) {
+ while (sft) {
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ /* if appctx is NULL, start a new session */
+ if (!sft->appctx)
+ sft->appctx = sink_forward_session_create(sink, sft);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ sft = sft->next;
+ }
+ }
+ else {
+ while (sft) {
+ HA_SPIN_LOCK(SFT_LOCK, &sft->lock);
+ /* awake applet to perform a clean close */
+ if (sft->appctx)
+ appctx_wakeup(sft->appctx);
+ HA_SPIN_UNLOCK(SFT_LOCK, &sft->lock);
+ sft = sft->next;
+ }
+ }
+
+ return task;
+}
+/*
+ * Init task to manage connections to forward servers
+ *
+ * returns 0 in case of error.
+ */
+int sink_init_forward(struct sink *sink)
+{
+ sink->forward_task = task_new_anywhere();
+ if (!sink->forward_task)
+ return 0;
+
+ sink->forward_task->process = process_sink_forward;
+ sink->forward_task->context = (void *)sink;
+ sink->forward_sighandler = signal_register_task(0, sink->forward_task, 0);
+ task_wakeup(sink->forward_task, TASK_WOKEN_INIT);
+ return 1;
+}
+
+/* This tries to rotate a file-backed ring, but only if it contains contents.
+ * This way empty rings will not cause backups to be overwritten and it's safe
+ * to reload multiple times. That's only best effort, failures are silently
+ * ignored.
+ */
+void sink_rotate_file_backed_ring(const char *name)
+{
+ struct ring ring;
+ char *oldback;
+ int ret;
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd < 0)
+ return;
+
+ /* check for contents validity */
+ ret = read(fd, &ring, sizeof(ring));
+ close(fd);
+
+ if (ret != sizeof(ring))
+ goto rotate;
+
+ /* contents are present, we want to keep them => rotate. Note that
+ * an empty ring buffer has one byte (the marker).
+ */
+ if (ring.buf.data > 1)
+ goto rotate;
+
+ /* nothing to keep, let's scratch the file and preserve the backup */
+ return;
+
+ rotate:
+ oldback = NULL;
+ memprintf(&oldback, "%s.bak", name);
+ if (oldback) {
+ /* try to rename any possibly existing ring file to
+ * ".bak" and delete remains of older ones. This will
+ * ensure we don't wipe useful debug info upon restart.
+ */
+ unlink(oldback);
+ if (rename(name, oldback) < 0)
+ unlink(oldback);
+ ha_free(&oldback);
+ }
+}
+
+
+/* helper function to completely deallocate a sink struct
+ */
+static void sink_free(struct sink *sink)
+{
+ struct sink_forward_target *sft_next;
+
+ if (!sink)
+ return;
+ if (sink->type == SINK_TYPE_BUFFER) {
+ if (sink->store) {
+ size_t size = (sink->ctx.ring->buf.size + 4095UL) & -4096UL;
+ void *area = (sink->ctx.ring->buf.area - sizeof(*sink->ctx.ring));
+
+ msync(area, size, MS_SYNC);
+ munmap(area, size);
+ ha_free(&sink->store);
+ }
+ else
+ ring_free(sink->ctx.ring);
+ }
+ LIST_DEL_INIT(&sink->sink_list); // remove from parent list
+ task_destroy(sink->forward_task);
+ free_proxy(sink->forward_px);
+ ha_free(&sink->name);
+ ha_free(&sink->desc);
+ while (sink->sft) {
+ sft_next = sink->sft->next;
+ ha_free(&sink->sft);
+ sink->sft = sft_next;
+ }
+ ha_free(&sink);
+}
+
+/* Helper function to create new high-level ring buffer (as in ring section from
+ * the config): will create a new sink of buf type, and a new forward proxy,
+ * which will be stored in forward_px to know that the sink is responsible for
+ * it.
+ *
+ * Returns NULL on failure
+ */
+static struct sink *sink_new_ringbuf(const char *id, const char *description,
+ const char *file, int linenum, char **err_msg)
+{
+ struct sink *sink;
+ struct proxy *p = NULL; // forward_px
+
+ /* allocate new proxy to handle forwards */
+ p = calloc(1, sizeof(*p));
+ if (!p) {
+ memprintf(err_msg, "out of memory");
+ goto err;
+ }
+
+ init_new_proxy(p);
+ sink_setup_proxy(p);
+ p->id = strdup(id);
+ p->conf.args.file = p->conf.file = strdup(file);
+ p->conf.args.line = p->conf.line = linenum;
+
+ sink = sink_new_buf(id, description, LOG_FORMAT_RAW, BUFSIZE);
+ if (!sink) {
+ memprintf(err_msg, "unable to create a new sink buffer for ring '%s'", id);
+ goto err;
+ }
+
+ /* link sink to proxy */
+ sink->forward_px = p;
+
+ return sink;
+
+ err:
+ free_proxy(p);
+ return NULL;
+}
+
+/* helper function: add a new server to an existing sink
+ *
+ * Returns 1 on success and 0 on failure
+ */
+static int sink_add_srv(struct sink *sink, struct server *srv)
+{
+ struct sink_forward_target *sft;
+
+ /* allocate new sink_forward_target descriptor */
+ sft = calloc(1, sizeof(*sft));
+ if (!sft) {
+ ha_alert("memory allocation error initializing server '%s' in ring '%s'.\n", srv->id, sink->name);
+ return 0;
+ }
+ sft->srv = srv;
+ sft->appctx = NULL;
+ sft->ofs = ~0; /* init ring offset */
+ sft->sink = sink;
+ sft->next = sink->sft;
+ HA_SPIN_INIT(&sft->lock);
+
+ /* mark server attached to the ring */
+ if (!ring_attach(sink->ctx.ring)) {
+ ha_alert("server '%s' sets too many watchers > 255 on ring '%s'.\n", srv->id, sink->name);
+ ha_free(&sft);
+ return 0;
+ }
+ sink->sft = sft;
+ return 1;
+}
+
+/* Finalize sink struct to ensure configuration consistency and
+ * allocate final struct members
+ *
+ * Returns ERR_NONE on success, ERR_WARN on warning
+ * Returns a composition of ERR_ALERT, ERR_ABORT, ERR_FATAL on failure
+ */
+static int sink_finalize(struct sink *sink)
+{
+ int err_code = ERR_NONE;
+ struct server *srv;
+
+ if (sink && (sink->type == SINK_TYPE_BUFFER)) {
+ if (!sink->maxlen)
+ sink->maxlen = ~0; // maxlen not set: no implicit truncation
+ else if (sink->maxlen > ring_max_payload(sink->ctx.ring)) {
+ /* maxlen set by user however it doesn't fit: set to max value */
+ ha_warning("ring '%s' event max length '%u' exceeds max payload size, forced to '%lu'.\n",
+ sink->name, sink->maxlen, (unsigned long)ring_max_payload(sink->ctx.ring));
+ sink->maxlen = ring_max_payload(sink->ctx.ring);
+ err_code |= ERR_WARN;
+ }
+
+ /* prepare forward server descriptors */
+ if (sink->forward_px) {
+ /* sink proxy is set: register all servers from the proxy */
+ srv = sink->forward_px->srv;
+ while (srv) {
+ if (!sink_add_srv(sink, srv)) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+ srv = srv->next;
+ }
+ }
+ /* init forwarding if at least one sft is registered */
+ if (sink->sft && sink_init_forward(sink) == 0) {
+ ha_alert("error when trying to initialize sink buffer forwarding.\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ return err_code;
+}
+
+/*
+ * Parse "ring" section and create corresponding sink buffer.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_parse_ring(const char *file, int linenum, char **args, int kwm)
+{
+ int err_code = 0;
+ char *err_msg = NULL;
+ const char *inv;
+
+ if (strcmp(args[0], "ring") == 0) { /* new ring section */
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing ring name.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ inv = invalid_char(args[1]);
+ if (inv) {
+ ha_alert("parsing [%s:%d] : invalid ring name '%s' (character '%c' is not permitted).\n", file, linenum, args[1], *inv);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (sink_find(args[1])) {
+ ha_alert("parsing [%s:%d] : sink named '%s' already exists.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink = sink_new_ringbuf(args[1], args[1], file, linenum, &err_msg);
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : %s.\n", file, linenum, err_msg);
+ ha_free(&err_msg);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* set maxlen value to 0 for now, we rely on this in postparsing
+ * to know if it was explicitly set using the "maxlen" parameter
+ */
+ cfg_sink->maxlen = 0;
+ }
+ else if (strcmp(args[0], "size") == 0) {
+ size_t size;
+
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : 'size' directive not usable with this type of sink.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ size = atol(args[1]);
+ if (!size) {
+ ha_alert("parsing [%s:%d] : invalid size '%s' for new sink buffer.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (cfg_sink->store) {
+ ha_alert("parsing [%s:%d] : cannot resize an already mapped file, please specify 'size' before 'backing-file'.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (size < cfg_sink->ctx.ring->buf.size) {
+ ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than current size '%llu' for ring '%s'.\n",
+ file, linenum, (ullong)size, (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name);
+ err_code |= ERR_WARN;
+ goto err;
+ }
+
+ if (!ring_resize(cfg_sink->ctx.ring, size)) {
+ ha_alert("parsing [%s:%d] : fail to set sink buffer size '%llu' for ring '%s'.\n", file, linenum,
+ (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0], "backing-file") == 0) {
+ /* This tries to mmap file <file> for size <size> and to use it as a backing store
+ * for ring <ring>. Existing data are delete. NULL is returned on error.
+ */
+ const char *backing = args[1];
+ size_t size;
+ void *area;
+ int fd;
+
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : 'backing-file' only usable with existing rings.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (cfg_sink->store) {
+ ha_alert("parsing [%s:%d] : 'backing-file' already specified for ring '%s' (was '%s').\n", file, linenum, cfg_sink->name, cfg_sink->store);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* let's check if the file exists and is not empty. That's the
+ * only condition under which we'll trigger a rotate, so that
+ * config checks, reloads, or restarts that don't emit anything
+ * do not rotate it again.
+ */
+ sink_rotate_file_backed_ring(backing);
+
+ fd = open(backing, O_RDWR | O_CREAT, 0600);
+ if (fd < 0) {
+ ha_alert("parsing [%s:%d] : cannot open backing-file '%s' for ring '%s': %s.\n", file, linenum, backing, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ size = (cfg_sink->ctx.ring->buf.size + 4095UL) & -4096UL;
+ if (ftruncate(fd, size) != 0) {
+ close(fd);
+ ha_alert("parsing [%s:%d] : could not adjust size of backing-file for ring '%s': %s.\n", file, linenum, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ area = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (area == MAP_FAILED) {
+ close(fd);
+ ha_alert("parsing [%s:%d] : failed to use '%s' as a backing file for ring '%s': %s.\n", file, linenum, backing, cfg_sink->name, strerror(errno));
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ /* we don't need the file anymore */
+ close(fd);
+ cfg_sink->store = strdup(backing);
+
+ /* never fails */
+ ring_free(cfg_sink->ctx.ring);
+ cfg_sink->ctx.ring = ring_make_from_area(area, size);
+ }
+ else if (strcmp(args[0],"server") == 0) {
+ if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) {
+ ha_alert("parsing [%s:%d] : unable to create server '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ err_code |= parse_server(file, linenum, args, cfg_sink->forward_px, NULL,
+ SRV_PARSE_PARSE_ADDR|SRV_PARSE_INITIAL_RESOLVE);
+ }
+ else if (strcmp(args[0],"timeout") == 0) {
+ if (!cfg_sink || !cfg_sink->forward_px) {
+ ha_alert("parsing [%s:%d] : unable to set timeout '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (strcmp(args[1], "connect") == 0 ||
+ strcmp(args[1], "server") == 0) {
+ const char *res;
+ unsigned int tout;
+
+ if (!*args[2]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects <time> as argument.\n",
+ file, linenum, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ res = parse_time_err(args[2], &tout, TIME_UNIT_MS);
+ if (res == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: timer overflow in argument <%s> to <%s %s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ else if (res == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: timer underflow in argument <%s> to <%s %s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[2], args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ else if (res) {
+ ha_alert("parsing [%s:%d]: unexpected character '%c' in argument to <%s %s>.\n",
+ file, linenum, *res, args[0], args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ if (args[1][0] == 'c')
+ cfg_sink->forward_px->timeout.connect = tout;
+ else
+ cfg_sink->forward_px->timeout.server = tout;
+ }
+ }
+ else if (strcmp(args[0],"format") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set format '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink->fmt = get_log_format(args[1]);
+ if (cfg_sink->fmt == LOG_FORMAT_UNSPEC) {
+ ha_alert("parsing [%s:%d] : unknown format '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0],"maxlen") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set event max length '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ cfg_sink->maxlen = atol(args[1]);
+ if (!cfg_sink->maxlen) {
+ ha_alert("parsing [%s:%d] : invalid size '%s' for new sink buffer.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else if (strcmp(args[0],"description") == 0) {
+ if (!cfg_sink) {
+ ha_alert("parsing [%s:%d] : unable to set description '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ if (!*args[1]) {
+ ha_alert("parsing [%s:%d] : missing ring description text.\n", file, linenum);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+ free(cfg_sink->desc);
+
+ cfg_sink->desc = strdup(args[1]);
+ if (!cfg_sink->desc) {
+ ha_alert("parsing [%s:%d] : fail to set description '%s'.\n", file, linenum, args[1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+ }
+ else {
+ ha_alert("parsing [%s:%d] : unknown statement '%s'.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto err;
+ }
+
+err:
+ return err_code;
+}
+
+/* Creates a new sink buffer from a logger.
+ *
+ * It uses the logger's address to declare a forward
+ * server for this buffer. And it initializes the
+ * forwarding.
+ *
+ * The function returns a pointer on the
+ * allocated struct sink if allocate
+ * and initialize succeed, else if it fails
+ * it returns NULL.
+ *
+ * Note: the sink is created using the name
+ * specified into logger->target.ring_name
+ */
+struct sink *sink_new_from_logger(struct logger *logger)
+{
+ struct sink *sink = NULL;
+ struct server *srv = NULL;
+ char *err_msg = NULL;
+
+ /* prepare description for the sink */
+ chunk_reset(&trash);
+ chunk_printf(&trash, "created from log directive declared into '%s' at line %d", logger->conf.file, logger->conf.line);
+
+ /* allocate a new sink buffer */
+ sink = sink_new_ringbuf(logger->target.ring_name, trash.area, logger->conf.file, logger->conf.line, &err_msg);
+ if (!sink) {
+ ha_alert("%s.\n", err_msg);
+ ha_free(&err_msg);
+ goto error;
+ }
+
+ /* ring format normally defaults to RAW, but here we set ring format
+ * to UNSPEC to inherit from caller format in sink_write() since we
+ * cannot customize implicit ring settings
+ */
+ sink->fmt = LOG_FORMAT_UNSPEC;
+
+ /* for the same reason, we disable sink->maxlen to inherit from caller
+ * maxlen in sink_write()
+ */
+ sink->maxlen = 0;
+
+ /* Set default connect and server timeout for sink forward proxy */
+ sink->forward_px->timeout.connect = MS_TO_TICKS(1000);
+ sink->forward_px->timeout.server = MS_TO_TICKS(5000);
+
+ /* allocate a new server to forward messages
+ * from ring buffer
+ */
+ srv = new_server(sink->forward_px);
+ if (!srv)
+ goto error;
+
+ /* init server */
+ srv->id = strdup(logger->target.ring_name);
+ srv->conf.file = strdup(logger->conf.file);
+ srv->conf.line = logger->conf.line;
+ srv->addr = *logger->target.addr;
+ srv->svc_port = get_host_port(logger->target.addr);
+ HA_SPIN_INIT(&srv->lock);
+
+ /* process per thread init */
+ if (srv_init_per_thr(srv) == -1)
+ goto error;
+
+ /* link srv with sink forward proxy: the servers are linked
+ * backwards first into proxy
+ */
+ srv->next = sink->forward_px->srv;
+ sink->forward_px->srv = srv;
+
+ if (sink_finalize(sink) & ERR_CODE)
+ goto error_final;
+
+ return sink;
+ error:
+ srv_drop(srv);
+
+ error_final:
+ sink_free(sink);
+
+ return NULL;
+}
+
+/* This function is pretty similar to sink_from_logger():
+ * But instead of creating a forward proxy and server from a logger struct
+ * it uses already existing srv to create the forwarding sink, so most of
+ * the initialization is bypassed.
+ *
+ * The function returns a pointer on the
+ * allocated struct sink if allocate
+ * and initialize succeed, else if it fails
+ * it returns NULL.
+ *
+ * <from> allows to specify a string that will be inserted into the sink
+ * description to describe where it was created from.
+
+ * Note: the sink is created using the name
+ * specified into srv->id
+ */
+struct sink *sink_new_from_srv(struct server *srv, const char *from)
+{
+ struct sink *sink = NULL;
+ int bufsize = (srv->log_bufsize) ? srv->log_bufsize : BUFSIZE;
+
+ /* prepare description for the sink */
+ chunk_reset(&trash);
+ chunk_printf(&trash, "created from %s declared into '%s' at line %d", from, srv->conf.file, srv->conf.line);
+
+ /* directly create a sink of BUF type, and use UNSPEC log format to
+ * inherit from caller fmt in sink_write()
+ */
+ sink = sink_new_buf(srv->id, trash.area, LOG_FORMAT_UNSPEC, bufsize);
+ if (!sink) {
+ ha_alert("unable to create a new sink buffer for server '%s'.\n", srv->id);
+ goto error;
+ }
+
+ /* we disable sink->maxlen to inherit from caller
+ * maxlen in sink_write()
+ */
+ sink->maxlen = 0;
+
+ /* add server to sink */
+ if (!sink_add_srv(sink, srv))
+ goto error;
+
+ if (sink_finalize(sink) & ERR_CODE)
+ goto error;
+
+ return sink;
+
+ error:
+ sink_free(sink);
+
+ return NULL;
+}
+
+/*
+ * Post parsing "ring" section.
+ *
+ * The function returns 0 in success case, otherwise, it returns error
+ * flags.
+ */
+int cfg_post_parse_ring()
+{
+ int err_code;
+
+ err_code = sink_finalize(cfg_sink);
+ cfg_sink = NULL;
+
+ return err_code;
+}
+
+/* function: resolve a single logger target of BUFFER type
+ *
+ * Returns err_code which defaults to ERR_NONE and can be set to a combination
+ * of ERR_WARN, ERR_ALERT, ERR_FATAL and ERR_ABORT in case of errors.
+ * <msg> could be set at any time (it will usually be set on error, but
+ * could also be set when no error occurred to report a diag warning), thus is
+ * up to the caller to check it and to free it.
+ */
+int sink_resolve_logger_buffer(struct logger *logger, char **msg)
+{
+ struct log_target *target = &logger->target;
+ int err_code = ERR_NONE;
+ struct sink *sink;
+
+ BUG_ON(target->type != LOG_TARGET_BUFFER || (target->flags & LOG_TARGET_FL_RESOLVED));
+ if (target->addr) {
+ sink = sink_new_from_logger(logger);
+ if (!sink) {
+ memprintf(msg, "cannot be initialized (failed to create implicit ring)");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ ha_free(&target->addr); /* we no longer need this */
+ }
+ else {
+ sink = sink_find(target->ring_name);
+ if (!sink) {
+ memprintf(msg, "uses unknown ring named '%s'", target->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ else if (sink->type != SINK_TYPE_BUFFER) {
+ memprintf(msg, "uses incompatible ring '%s'", target->ring_name);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+ /* consistency checks */
+ if (sink && logger->maxlen > ring_max_payload(sink->ctx.ring)) {
+ memprintf(msg, "uses a max length which exceeds ring capacity ('%s' supports %lu bytes at most)",
+ target->ring_name, (unsigned long)ring_max_payload(sink->ctx.ring));
+ }
+ else if (sink && logger->maxlen > sink->maxlen) {
+ memprintf(msg, "uses a ring with a smaller maxlen than the one specified on the log directive ('%s' has maxlen = %d), logs will be truncated according to the lowest maxlen between the two",
+ target->ring_name, sink->maxlen);
+ }
+ end:
+ ha_free(&target->ring_name); /* sink is resolved and will replace ring_name hint */
+ target->sink = sink;
+ return err_code;
+}
+
+static void sink_init()
+{
+ sink_new_fd("stdout", "standard output (fd#1)", LOG_FORMAT_RAW, 1);
+ sink_new_fd("stderr", "standard output (fd#2)", LOG_FORMAT_RAW, 2);
+ sink_new_buf("buf0", "in-memory ring buffer", LOG_FORMAT_TIMED, 1048576);
+}
+
+static void sink_deinit()
+{
+ struct sink *sink, *sb;
+
+ list_for_each_entry_safe(sink, sb, &sink_list, sink_list)
+ sink_free(sink);
+}
+
+INITCALL0(STG_REGISTER, sink_init);
+REGISTER_POST_DEINIT(sink_deinit);
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "events", NULL }, "show events [<sink>] [-w] [-n] : show event sink state", cli_parse_show_events, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* config parsers for this section */
+REGISTER_CONFIG_SECTION("ring", cfg_parse_ring, cfg_post_parse_ring);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/slz.c b/src/slz.c
new file mode 100644
index 0000000..1560bac
--- /dev/null
+++ b/src/slz.c
@@ -0,0 +1,1421 @@
+/*
+ * Copyright (C) 2013-2015 Willy Tarreau <w@1wt.eu>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <import/slz.h>
+#include <import/slz-tables.h>
+
+/* First, RFC1951-specific declarations and extracts from the RFC.
+ *
+ * RFC1951 - deflate stream format
+
+
+ * Data elements are packed into bytes in order of
+ increasing bit number within the byte, i.e., starting
+ with the least-significant bit of the byte.
+ * Data elements other than Huffman codes are packed
+ starting with the least-significant bit of the data
+ element.
+ * Huffman codes are packed starting with the most-
+ significant bit of the code.
+
+ 3.2.3. Details of block format
+
+ Each block of compressed data begins with 3 header bits
+ containing the following data:
+
+ first bit BFINAL
+ next 2 bits BTYPE
+
+ Note that the header bits do not necessarily begin on a byte
+ boundary, since a block does not necessarily occupy an integral
+ number of bytes.
+
+ BFINAL is set if and only if this is the last block of the data
+ set.
+
+ BTYPE specifies how the data are compressed, as follows:
+
+ 00 - no compression
+ 01 - compressed with fixed Huffman codes
+ 10 - compressed with dynamic Huffman codes
+ 11 - reserved (error)
+
+ 3.2.4. Non-compressed blocks (BTYPE=00)
+
+ Any bits of input up to the next byte boundary are ignored.
+ The rest of the block consists of the following information:
+
+ 0 1 2 3 4...
+ +---+---+---+---+================================+
+ | LEN | NLEN |... LEN bytes of literal data...|
+ +---+---+---+---+================================+
+
+ LEN is the number of data bytes in the block. NLEN is the
+ one's complement of LEN.
+
+ 3.2.5. Compressed blocks (length and distance codes)
+
+ As noted above, encoded data blocks in the "deflate" format
+ consist of sequences of symbols drawn from three conceptually
+ distinct alphabets: either literal bytes, from the alphabet of
+ byte values (0..255), or <length, backward distance> pairs,
+ where the length is drawn from (3..258) and the distance is
+ drawn from (1..32,768). In fact, the literal and length
+ alphabets are merged into a single alphabet (0..285), where
+ values 0..255 represent literal bytes, the value 256 indicates
+ end-of-block, and values 257..285 represent length codes
+ (possibly in conjunction with extra bits following the symbol
+ code) as follows:
+
+Length encoding :
+ Extra Extra Extra
+ Code Bits Length(s) Code Bits Lengths Code Bits Length(s)
+ ---- ---- ------ ---- ---- ------- ---- ---- -------
+ 257 0 3 267 1 15,16 277 4 67-82
+ 258 0 4 268 1 17,18 278 4 83-98
+ 259 0 5 269 2 19-22 279 4 99-114
+ 260 0 6 270 2 23-26 280 4 115-130
+ 261 0 7 271 2 27-30 281 5 131-162
+ 262 0 8 272 2 31-34 282 5 163-194
+ 263 0 9 273 3 35-42 283 5 195-226
+ 264 0 10 274 3 43-50 284 5 227-257
+ 265 1 11,12 275 3 51-58 285 0 258
+ 266 1 13,14 276 3 59-66
+
+Distance encoding :
+ Extra Extra Extra
+ Code Bits Dist Code Bits Dist Code Bits Distance
+ ---- ---- ---- ---- ---- ------ ---- ---- --------
+ 0 0 1 10 4 33-48 20 9 1025-1536
+ 1 0 2 11 4 49-64 21 9 1537-2048
+ 2 0 3 12 5 65-96 22 10 2049-3072
+ 3 0 4 13 5 97-128 23 10 3073-4096
+ 4 1 5,6 14 6 129-192 24 11 4097-6144
+ 5 1 7,8 15 6 193-256 25 11 6145-8192
+ 6 2 9-12 16 7 257-384 26 12 8193-12288
+ 7 2 13-16 17 7 385-512 27 12 12289-16384
+ 8 3 17-24 18 8 513-768 28 13 16385-24576
+ 9 3 25-32 19 8 769-1024 29 13 24577-32768
+
+ 3.2.6. Compression with fixed Huffman codes (BTYPE=01)
+
+ The Huffman codes for the two alphabets are fixed, and are not
+ represented explicitly in the data. The Huffman code lengths
+ for the literal/length alphabet are:
+
+ Lit Value Bits Codes
+ --------- ---- -----
+ 0 - 143 8 00110000 through
+ 10111111
+ 144 - 255 9 110010000 through
+ 111111111
+ 256 - 279 7 0000000 through
+ 0010111
+ 280 - 287 8 11000000 through
+ 11000111
+
+ The code lengths are sufficient to generate the actual codes,
+ as described above; we show the codes in the table for added
+ clarity. Literal/length values 286-287 will never actually
+ occur in the compressed data, but participate in the code
+ construction.
+
+ Distance codes 0-31 are represented by (fixed-length) 5-bit
+ codes, with possible additional bits as shown in the table
+ shown in Paragraph 3.2.5, above. Note that distance codes 30-
+ 31 will never actually occur in the compressed data.
+
+*/
+
+/* back references, built in a way that is optimal for 32/64 bits */
+union ref {
+ struct {
+ uint32_t pos;
+ uint32_t word;
+ } by32;
+ uint64_t by64;
+};
+
+#if defined(USE_64BIT_QUEUE) && defined(UNALIGNED_LE_OK)
+
+/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
+ * 32-bit words into output buffer. X must not contain non-zero bits above
+ * xbits.
+ */
+static inline void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint64_t queue = strm->queue + ((uint64_t)x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (__builtin_expect(qbits >= 32, 1)) {
+ *(uint32_t *)strm->outbuf = queue;
+ queue >>= 32;
+ qbits -= 32;
+ strm->outbuf += 4;
+ }
+
+ strm->queue = queue;
+ strm->qbits = qbits;
+}
+
+#define enqueue8 enqueue24
+
+/* flush the queue and align to next byte */
+static inline void flush_bits(struct slz_stream *strm)
+{
+ if (strm->qbits > 0)
+ *strm->outbuf++ = strm->queue;
+
+ if (strm->qbits > 8)
+ *strm->outbuf++ = strm->queue >> 8;
+
+ if (strm->qbits > 16)
+ *strm->outbuf++ = strm->queue >> 16;
+
+ if (strm->qbits > 24)
+ *strm->outbuf++ = strm->queue >> 24;
+
+ strm->queue = 0;
+ strm->qbits = 0;
+}
+
+#else /* non-64 bit or aligned or big endian */
+
+/* enqueue code x of <xbits> bits (LSB aligned, at most 24) and copy complete
+ * bytes into out buf. X must not contain non-zero bits above xbits. Prefer
+ * enqueue8() when xbits is known for being 8 or less.
+ */
+static void enqueue24(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint32_t queue = strm->queue + (x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (qbits >= 16) {
+#ifndef UNALIGNED_LE_OK
+ strm->outbuf[0] = queue;
+ strm->outbuf[1] = queue >> 8;
+#else
+ *(uint16_t *)strm->outbuf = queue;
+#endif
+ strm->outbuf += 2;
+ queue >>= 16;
+ qbits -= 16;
+ }
+
+ if (qbits >= 8) {
+ qbits -= 8;
+ *strm->outbuf++ = queue;
+ queue >>= 8;
+ }
+ strm->qbits = qbits;
+ strm->queue = queue;
+ return;
+}
+
+/* enqueue code x of <xbits> bits (at most 8) and copy complete bytes into
+ * out buf. X must not contain non-zero bits above xbits.
+ */
+static inline void enqueue8(struct slz_stream *strm, uint32_t x, uint32_t xbits)
+{
+ uint32_t queue = strm->queue + (x << strm->qbits);
+ uint32_t qbits = strm->qbits + xbits;
+
+ if (__builtin_expect((signed)(qbits - 8) >= 0, 1)) {
+ qbits -= 8;
+ *strm->outbuf++ = queue;
+ queue >>= 8;
+ }
+
+ strm->qbits = qbits;
+ strm->queue = queue;
+}
+
+/* align to next byte */
+static inline void flush_bits(struct slz_stream *strm)
+{
+ if (strm->qbits > 0)
+ *strm->outbuf++ = strm->queue;
+
+ if (strm->qbits > 8)
+ *strm->outbuf++ = strm->queue >> 8;
+
+ strm->queue = 0;
+ strm->qbits = 0;
+}
+#endif
+
+
+/* only valid if buffer is already aligned */
+static inline void copy_8b(struct slz_stream *strm, uint32_t x)
+{
+ *strm->outbuf++ = x;
+}
+
+/* only valid if buffer is already aligned */
+static inline void copy_16b(struct slz_stream *strm, uint32_t x)
+{
+ strm->outbuf[0] = x;
+ strm->outbuf[1] = x >> 8;
+ strm->outbuf += 2;
+}
+
+/* only valid if buffer is already aligned */
+static inline void copy_32b(struct slz_stream *strm, uint32_t x)
+{
+ strm->outbuf[0] = x;
+ strm->outbuf[1] = x >> 8;
+ strm->outbuf[2] = x >> 16;
+ strm->outbuf[3] = x >> 24;
+ strm->outbuf += 4;
+}
+
+static inline void send_huff(struct slz_stream *strm, uint32_t code)
+{
+ uint32_t bits;
+
+ code = fixed_huff[code];
+ bits = code & 15;
+ code >>= 4;
+ enqueue24(strm, code, bits);
+}
+
+static inline void send_eob(struct slz_stream *strm)
+{
+ enqueue8(strm, 0, 7); // direct encoding of 256 = EOB (cf RFC1951)
+}
+
+/* copies <len> literals from <buf>. <more> indicates that there are data past
+ * buf + <len>. <len> must not be null.
+ */
+static void copy_lit(struct slz_stream *strm, const void *buf, uint32_t len, int more)
+{
+ uint32_t len2;
+
+ do {
+ len2 = len;
+ if (__builtin_expect(len2 > 65535, 0))
+ len2 = 65535;
+
+ len -= len2;
+
+ if (strm->state != SLZ_ST_EOB)
+ send_eob(strm);
+
+ strm->state = (more || len) ? SLZ_ST_EOB : SLZ_ST_DONE;
+
+ enqueue8(strm, !(more || len), 3); // BFINAL = !more ; BTYPE = 00
+ flush_bits(strm);
+ copy_16b(strm, len2); // len2
+ copy_16b(strm, ~len2); // nlen2
+ memcpy(strm->outbuf, buf, len2);
+ buf += len2;
+ strm->outbuf += len2;
+ } while (len);
+}
+
+/* copies <len> literals from <buf>. <more> indicates that there are data past
+ * buf + <len>. <len> must not be null.
+ */
+static void copy_lit_huff(struct slz_stream *strm, const unsigned char *buf, uint32_t len, int more)
+{
+ uint32_t pos;
+
+ /* This ugly construct limits the mount of tests and optimizes for the
+ * most common case (more > 0).
+ */
+ if (strm->state == SLZ_ST_EOB) {
+ eob:
+ strm->state = more ? SLZ_ST_FIXED : SLZ_ST_LAST;
+ enqueue8(strm, 2 + !more, 3); // BFINAL = !more ; BTYPE = 01
+ }
+ else if (!more) {
+ send_eob(strm);
+ goto eob;
+ }
+
+ pos = 0;
+ do {
+ send_huff(strm, buf[pos++]);
+ } while (pos < len);
+}
+
+/* format:
+ * bit0..31 = word
+ * bit32..63 = last position in buffer of similar content
+ */
+
+/* This hash provides good average results on HTML contents, and is among the
+ * few which provide almost optimal results on various different pages.
+ */
+static inline uint32_t slz_hash(uint32_t a)
+{
+#if defined(__ARM_FEATURE_CRC32)
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(a) : "r"(0));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(a) : "r"(0));
+# endif
+ return a >> (32 - HASH_BITS);
+#else
+ return ((a << 19) + (a << 6) - a) >> (32 - HASH_BITS);
+#endif
+}
+
+/* This function compares buffers <a> and <b> and reads 32 or 64 bits at a time
+ * during the approach. It makes us of unaligned little endian memory accesses
+ * on capable architectures. <max> is the maximum number of bytes that can be
+ * read, so both <a> and <b> must have at least <max> bytes ahead. <max> may
+ * safely be null or negative if that simplifies computations in the caller.
+ */
+static inline long memmatch(const unsigned char *a, const unsigned char *b, long max)
+{
+ long len = 0;
+
+#ifdef UNALIGNED_LE_OK
+ unsigned long xor;
+
+ while (1) {
+ if ((long)(len + 2 * sizeof(long)) > max) {
+ while (len < max) {
+ if (a[len] != b[len])
+ break;
+ len++;
+ }
+ return len;
+ }
+
+ xor = *(long *)&a[len] ^ *(long *)&b[len];
+ if (xor)
+ break;
+ len += sizeof(long);
+
+ xor = *(long *)&a[len] ^ *(long *)&b[len];
+ if (xor)
+ break;
+ len += sizeof(long);
+ }
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+ /* x86 has bsf. We know that xor is non-null here */
+ asm("bsf %1,%0\n" : "=r"(xor) : "0" (xor));
+ return len + xor / 8;
+#else
+ if (sizeof(long) > 4 && !(xor & 0xffffffff)) {
+ /* This code is optimized out on 32-bit archs, but we still
+ * need to shift in two passes to avoid a warning. It is
+ * properly optimized out as a single shift.
+ */
+ xor >>= 16; xor >>= 16;
+ if (xor & 0xffff) {
+ if (xor & 0xff)
+ return len + 4;
+ return len + 5;
+ }
+ if (xor & 0xffffff)
+ return len + 6;
+ return len + 7;
+ }
+
+ if (xor & 0xffff) {
+ if (xor & 0xff)
+ return len;
+ return len + 1;
+ }
+ if (xor & 0xffffff)
+ return len + 2;
+ return len + 3;
+#endif // x86
+
+#else // UNALIGNED_LE_OK
+ /* This is the generic version for big endian or unaligned-incompatible
+ * architectures.
+ */
+ while (len < max) {
+ if (a[len] != b[len])
+ break;
+ len++;
+ }
+ return len;
+
+#endif
+}
+
+/* sets <count> BYTES to -32769 in <refs> so that any uninitialized entry will
+ * verify (pos-last-1 >= 32768) and be ignored. <count> must be a multiple of
+ * 128 bytes and <refs> must be at least one count in length. It's supposed to
+ * be applied to 64-bit aligned data exclusively, which makes it slightly
+ * faster than the regular memset() since no alignment check is performed.
+ */
+static void reset_refs(union ref *refs, long count)
+{
+ /* avoid a shift/mask by casting to void* */
+ union ref *end = (void *)refs + count;
+
+ do {
+ refs[ 0].by64 = -32769;
+ refs[ 1].by64 = -32769;
+ refs[ 2].by64 = -32769;
+ refs[ 3].by64 = -32769;
+ refs[ 4].by64 = -32769;
+ refs[ 5].by64 = -32769;
+ refs[ 6].by64 = -32769;
+ refs[ 7].by64 = -32769;
+ refs[ 8].by64 = -32769;
+ refs[ 9].by64 = -32769;
+ refs[10].by64 = -32769;
+ refs[11].by64 = -32769;
+ refs[12].by64 = -32769;
+ refs[13].by64 = -32769;
+ refs[14].by64 = -32769;
+ refs[15].by64 = -32769;
+ refs += 16;
+ } while (refs < end);
+}
+
+/* Compresses <ilen> bytes from <in> into <out> according to RFC1951. The
+ * output result may be up to 5 bytes larger than the input, to which 2 extra
+ * bytes may be added to send the last chunk due to BFINAL+EOB encoding (10
+ * bits) when <more> is not set. The caller is responsible for ensuring there
+ * is enough room in the output buffer for this. The amount of output bytes is
+ * returned, and no CRC is computed.
+ */
+long slz_rfc1951_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long rem = ilen;
+ unsigned long pos = 0;
+ unsigned long last;
+ uint32_t word = 0;
+ long mlen;
+ uint32_t h;
+ uint64_t ent;
+
+ uint32_t plit = 0;
+ uint32_t bit9 = 0;
+ uint32_t dist, code;
+ union ref refs[1 << HASH_BITS];
+
+ if (!strm->level) {
+ /* force to send as literals (eg to preserve CPU) */
+ strm->outbuf = out;
+ plit = pos = ilen;
+ bit9 = 52; /* force literal dump */
+ goto final_lit_dump;
+ }
+
+ reset_refs(refs, sizeof(refs));
+
+ strm->outbuf = out;
+
+#ifndef UNALIGNED_FASTER
+ word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
+#endif
+ while (rem >= 4) {
+#ifndef UNALIGNED_FASTER
+ word = ((unsigned char)in[pos + 3] << 24) + (word >> 8);
+#else
+ word = *(uint32_t *)&in[pos];
+#endif
+ h = slz_hash(word);
+ asm volatile ("" ::); // prevent gcc from trying to be smart with the prefetch
+
+ if (sizeof(long) >= 8) {
+ ent = refs[h].by64;
+ last = (uint32_t)ent;
+ ent >>= 32;
+ refs[h].by64 = ((uint64_t)pos) + ((uint64_t)word << 32);
+ } else {
+ ent = refs[h].by32.word;
+ last = refs[h].by32.pos;
+ refs[h].by32.pos = pos;
+ refs[h].by32.word = word;
+ }
+
+#ifdef FIND_OPTIMAL_MATCH
+ /* Experimental code to see what could be saved with an ideal
+ * longest match lookup algorithm. This one is very slow but
+ * scans the whole window. In short, here are the savings :
+ * file orig fast(ratio) optimal(ratio)
+ * README 5185 3419 (65.9%) 3165 (61.0%) -7.5%
+ * index.html 76799 35662 (46.4%) 29875 (38.9%) -16.3%
+ * rfc1952.c 29383 13442 (45.7%) 11793 (40.1%) -12.3%
+ *
+ * Thus the savings to expect for large files is at best 16%.
+ *
+ * A non-colliding hash gives 33025 instead of 35662 (-7.4%),
+ * and keeping the last two entries gives 31724 (-11.0%).
+ */
+ unsigned long scan;
+ int saved = 0;
+ int bestpos = 0;
+ int bestlen = 0;
+ int firstlen = 0;
+ int max_lookup = 2; // 0 = no limit
+
+ for (scan = pos - 1; scan < pos && (unsigned long)(pos - scan - 1) < 32768; scan--) {
+ int len;
+
+ if (*(uint32_t *)(in + scan) != word)
+ continue;
+
+ len = memmatch(in + pos, in + scan, rem);
+ if (!bestlen)
+ firstlen = len;
+
+ if (len > bestlen) {
+ bestlen = len;
+ bestpos = scan;
+ }
+ if (!--max_lookup)
+ break;
+ }
+ if (bestlen) {
+ //printf("pos=%d last=%d bestpos=%d word=%08x ent=%08x len=%d\n",
+ // (int)pos, (int)last, (int)bestpos, (int)word, (int)ent, bestlen);
+ last = bestpos;
+ ent = word;
+ saved += bestlen - firstlen;
+ }
+ //fprintf(stderr, "first=%d best=%d saved_total=%d\n", firstlen, bestlen, saved);
+#endif
+
+ if ((uint32_t)ent != word) {
+ send_as_lit:
+ rem--;
+ plit++;
+ bit9 += ((unsigned char)word >= 144);
+ pos++;
+ continue;
+ }
+
+ /* We reject pos = last and pos > last+32768 */
+ if ((unsigned long)(pos - last - 1) >= 32768)
+ goto send_as_lit;
+
+ /* Note: cannot encode a length larger than 258 bytes */
+ mlen = memmatch(in + pos + 4, in + last + 4, (rem > 258 ? 258 : rem) - 4) + 4;
+
+ /* found a matching entry */
+
+ if (bit9 >= 52 && mlen < 6)
+ goto send_as_lit;
+
+ /* compute the output code, its size and the length's size in
+ * bits to know if the reference is cheaper than literals.
+ */
+ code = len_fh[mlen];
+
+ /* direct mapping of dist->huffman code */
+ dist = fh_dist_table[pos - last - 1];
+
+ /* if encoding the dist+length is more expensive than sending
+ * the equivalent as bytes, lets keep the literals.
+ */
+ if ((dist & 0x1f) + (code >> 16) + 8 >= 8 * mlen + bit9)
+ goto send_as_lit;
+
+ /* first, copy pending literals */
+ if (plit) {
+ /* Huffman encoding requires 9 bits for octets 144..255, so this
+ * is a waste of space for binary data. Switching between Huffman
+ * and no-comp then huffman consumes 52 bits (7 for EOB + 3 for
+ * block type + 7 for alignment + 32 for LEN+NLEN + 3 for next
+ * block. Only use plain literals if there are more than 52 bits
+ * to save then.
+ */
+ if (bit9 >= 52)
+ copy_lit(strm, in + pos - plit, plit, 1);
+ else
+ copy_lit_huff(strm, in + pos - plit, plit, 1);
+
+ plit = 0;
+ }
+
+ /* use mode 01 - fixed huffman */
+ if (strm->state == SLZ_ST_EOB) {
+ strm->state = SLZ_ST_FIXED;
+ enqueue8(strm, 0x02, 3); // BTYPE = 01, BFINAL = 0
+ }
+
+ /* copy the length first */
+ enqueue24(strm, code & 0xFFFF, code >> 16);
+
+ /* in fixed huffman mode, dist is fixed 5 bits */
+ enqueue24(strm, dist >> 5, dist & 0x1f);
+ bit9 = 0;
+ rem -= mlen;
+ pos += mlen;
+
+#ifndef UNALIGNED_FASTER
+#ifdef UNALIGNED_LE_OK
+ word = *(uint32_t *)&in[pos - 1];
+#else
+ word = ((unsigned char)in[pos] << 8) + ((unsigned char)in[pos + 1] << 16) + ((unsigned char)in[pos + 2] << 24);
+#endif
+#endif
+ }
+
+ if (__builtin_expect(rem, 0)) {
+ /* we're reading the 1..3 last bytes */
+ plit += rem;
+ do {
+ bit9 += ((unsigned char)in[pos++] >= 144);
+ } while (--rem);
+ }
+
+ final_lit_dump:
+ /* now copy remaining literals or mark the end */
+ if (plit) {
+ if (bit9 >= 52)
+ copy_lit(strm, in + pos - plit, plit, more);
+ else
+ copy_lit_huff(strm, in + pos - plit, plit, more);
+
+ plit = 0;
+ }
+
+ strm->ilen += ilen;
+ return strm->outbuf - out;
+}
+
+/* Initializes stream <strm> for use with raw deflate (rfc1951). The CRC is
+ * unused but set to zero. The compression level passed in <level> is set. This
+ * value can only be 0 (no compression) or 1 (compression) and other values
+ * will lead to unpredictable behaviour. The function always returns 0.
+ */
+int slz_rfc1951_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_EOB; // no header
+ strm->level = level;
+ strm->format = SLZ_FMT_DEFLATE;
+ strm->crc32 = 0;
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes any pending data for stream <strm> into buffer <buf>, then emits an
+ * empty literal block to byte-align the output, allowing to completely flush
+ * the queue. This requires that the output buffer still has the size of the
+ * queue available (up to 4 bytes), plus one byte for (BFINAL,BTYPE), plus 4
+ * bytes for LEN+NLEN, or a total of 9 bytes in the worst case. The number of
+ * bytes emitted is returned. It is guaranteed that the queue is empty on
+ * return. This may cause some overhead by adding needless 5-byte blocks if
+ * called to often.
+ */
+int slz_rfc1951_flush(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ /* The queue is always empty on INIT, DONE, and END */
+ if (!strm->qbits)
+ return 0;
+
+ /* we may need to terminate a huffman output. Lit is always in EOB state */
+ if (strm->state != SLZ_ST_EOB) {
+ strm->state = (strm->state == SLZ_ST_LAST) ? SLZ_ST_DONE : SLZ_ST_EOB;
+ send_eob(strm);
+ }
+
+ /* send BFINAL according to state, and BTYPE=00 (lit) */
+ enqueue8(strm, (strm->state == SLZ_ST_DONE) ? 1 : 0, 3);
+ flush_bits(strm); // emit pending bits
+ copy_32b(strm, 0xFFFF0000U); // len=0, nlen=~0
+
+ /* Now the queue is empty, EOB was sent, BFINAL might have been sent if
+ * we completed the last block, and a zero-byte block was sent to byte-
+ * align the output. The last state reflects all this. Let's just
+ * return the number of bytes added to the output buffer.
+ */
+ return strm->outbuf - buf;
+}
+
+/* Flushes any pending for stream <strm> into buffer <buf>, then sends BTYPE=1
+ * and BFINAL=1 if needed. The stream ends in SLZ_ST_DONE. It returns the number
+ * of bytes emitted. The trailer consists in flushing the possibly pending bits
+ * from the queue (up to 7 bits), then possibly EOB (7 bits), then 3 bits, EOB,
+ * a rounding to the next byte, which amounts to a total of 4 bytes max, that
+ * the caller must ensure are available before calling the function.
+ */
+int slz_rfc1951_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (strm->state == SLZ_ST_FIXED || strm->state == SLZ_ST_LAST) {
+ strm->state = (strm->state == SLZ_ST_LAST) ? SLZ_ST_DONE : SLZ_ST_EOB;
+ send_eob(strm);
+ }
+
+ if (strm->state != SLZ_ST_DONE) {
+ /* send BTYPE=1, BFINAL=1 */
+ enqueue8(strm, 3, 3);
+ send_eob(strm);
+ strm->state = SLZ_ST_DONE;
+ }
+
+ flush_bits(strm);
+ return strm->outbuf - buf;
+}
+
+/* Now RFC1952-specific declarations and extracts from RFC.
+ * From RFC1952 about the GZIP file format :
+
+A gzip file consists of a series of "members" ...
+
+2.3. Member format
+
+ Each member has the following structure:
+
+ +---+---+---+---+---+---+---+---+---+---+
+ |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
+ +---+---+---+---+---+---+---+---+---+---+
+
+ (if FLG.FEXTRA set)
+
+ +---+---+=================================+
+ | XLEN |...XLEN bytes of "extra field"...| (more-->)
+ +---+---+=================================+
+
+ (if FLG.FNAME set)
+
+ +=========================================+
+ |...original file name, zero-terminated...| (more-->)
+ +=========================================+
+
+ (if FLG.FCOMMENT set)
+
+ +===================================+
+ |...file comment, zero-terminated...| (more-->)
+ +===================================+
+
+ (if FLG.FHCRC set)
+
+ +---+---+
+ | CRC16 |
+ +---+---+
+
+ +=======================+
+ |...compressed blocks...| (more-->)
+ +=======================+
+
+ 0 1 2 3 4 5 6 7
+ +---+---+---+---+---+---+---+---+
+ | CRC32 | ISIZE |
+ +---+---+---+---+---+---+---+---+
+
+
+2.3.1. Member header and trailer
+
+ ID1 (IDentification 1)
+ ID2 (IDentification 2)
+ These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
+ (0x8b, \213), to identify the file as being in gzip format.
+
+ CM (Compression Method)
+ This identifies the compression method used in the file. CM
+ = 0-7 are reserved. CM = 8 denotes the "deflate"
+ compression method, which is the one customarily used by
+ gzip and which is documented elsewhere.
+
+ FLG (FLaGs)
+ This flag byte is divided into individual bits as follows:
+
+ bit 0 FTEXT
+ bit 1 FHCRC
+ bit 2 FEXTRA
+ bit 3 FNAME
+ bit 4 FCOMMENT
+ bit 5 reserved
+ bit 6 reserved
+ bit 7 reserved
+
+ Reserved FLG bits must be zero.
+
+ MTIME (Modification TIME)
+ This gives the most recent modification time of the original
+ file being compressed. The time is in Unix format, i.e.,
+ seconds since 00:00:00 GMT, Jan. 1, 1970. (Note that this
+ may cause problems for MS-DOS and other systems that use
+ local rather than Universal time.) If the compressed data
+ did not come from a file, MTIME is set to the time at which
+ compression started. MTIME = 0 means no time stamp is
+ available.
+
+ XFL (eXtra FLags)
+ These flags are available for use by specific compression
+ methods. The "deflate" method (CM = 8) sets these flags as
+ follows:
+
+ XFL = 2 - compressor used maximum compression,
+ slowest algorithm
+ XFL = 4 - compressor used fastest algorithm
+
+ OS (Operating System)
+ This identifies the type of file system on which compression
+ took place. This may be useful in determining end-of-line
+ convention for text files. The currently defined values are
+ as follows:
+
+ 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
+ 1 - Amiga
+ 2 - VMS (or OpenVMS)
+ 3 - Unix
+ 4 - VM/CMS
+ 5 - Atari TOS
+ 6 - HPFS filesystem (OS/2, NT)
+ 7 - Macintosh
+ 8 - Z-System
+ 9 - CP/M
+ 10 - TOPS-20
+ 11 - NTFS filesystem (NT)
+ 12 - QDOS
+ 13 - Acorn RISCOS
+ 255 - unknown
+
+ ==> A file compressed using "gzip -1" on Unix-like systems can be :
+
+ 1F 8B 08 00 00 00 00 00 04 03
+ <deflate-compressed stream>
+ crc32 size32
+*/
+
+static const unsigned char gzip_hdr[] = { 0x1F, 0x8B, // ID1, ID2
+ 0x08, 0x00, // Deflate, flags (none)
+ 0x00, 0x00, 0x00, 0x00, // mtime: none
+ 0x04, 0x03 }; // fastest comp, OS=Unix
+
+static inline uint32_t crc32_char(uint32_t crc, uint8_t x)
+{
+#if defined(__ARM_FEATURE_CRC32)
+ crc = ~crc;
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32b %w0,%w0,%w1" : "+r"(crc) : "r"(x));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32b %0,%0,%1" : "+r"(crc) : "r"(x));
+# endif
+ crc = ~crc;
+#else
+ crc = crc32_fast[0][(crc ^ x) & 0xff] ^ (crc >> 8);
+#endif
+ return crc;
+}
+
+static inline uint32_t crc32_uint32(uint32_t data)
+{
+#if defined(__ARM_FEATURE_CRC32)
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(data) : "r"(~0UL));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(data) : "r"(~0UL));
+# endif
+ data = ~data;
+#else
+ data = crc32_fast[3][(data >> 0) & 0xff] ^
+ crc32_fast[2][(data >> 8) & 0xff] ^
+ crc32_fast[1][(data >> 16) & 0xff] ^
+ crc32_fast[0][(data >> 24) & 0xff];
+#endif
+ return data;
+}
+
+/* Modified version originally from RFC1952, working with non-inverting CRCs */
+uint32_t slz_crc32_by1(uint32_t crc, const unsigned char *buf, int len)
+{
+ int n;
+
+ for (n = 0; n < len; n++)
+ crc = crc32_char(crc, buf[n]);
+ return crc;
+}
+
+/* This version computes the crc32 of <buf> over <len> bytes, doing most of it
+ * in 32-bit chunks.
+ */
+uint32_t slz_crc32_by4(uint32_t crc, const unsigned char *buf, int len)
+{
+ const unsigned char *end = buf + len;
+
+ while (buf <= end - 16) {
+#ifdef UNALIGNED_LE_OK
+#if defined(__ARM_FEATURE_CRC32)
+ crc = ~crc;
+# if defined(__ARM_ARCH_ISA_A64)
+ // 64 bit mode
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
+ __asm__ volatile("crc32w %w0,%w0,%w1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
+# else
+ // 32 bit mode (e.g. armv7 compiler building for armv8
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 4)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 8)));
+ __asm__ volatile("crc32w %0,%0,%1" : "+r"(crc) : "r"(*(uint32_t*)(buf + 12)));
+# endif
+ crc = ~crc;
+#else
+ crc ^= *(uint32_t *)buf;
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 4);
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 8);
+ crc = crc32_uint32(crc);
+
+ crc ^= *(uint32_t *)(buf + 12);
+ crc = crc32_uint32(crc);
+#endif
+#else
+ crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[4] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[5] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[6] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[7] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[8] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[9] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[10] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[11] ^ (crc >> 24)) & 0xff];
+
+ crc = crc32_fast[3][(buf[12] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[13] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[14] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[15] ^ (crc >> 24)) & 0xff];
+#endif
+ buf += 16;
+ }
+
+ while (buf <= end - 4) {
+#ifdef UNALIGNED_LE_OK
+ crc ^= *(uint32_t *)buf;
+ crc = crc32_uint32(crc);
+#else
+ crc = crc32_fast[3][(buf[0] ^ (crc >> 0)) & 0xff] ^
+ crc32_fast[2][(buf[1] ^ (crc >> 8)) & 0xff] ^
+ crc32_fast[1][(buf[2] ^ (crc >> 16)) & 0xff] ^
+ crc32_fast[0][(buf[3] ^ (crc >> 24)) & 0xff];
+#endif
+ buf += 4;
+ }
+
+ while (buf < end)
+ crc = crc32_char(crc, *buf++);
+ return crc;
+}
+
+/* uses the most suitable crc32 function to update crc on <buf, len> */
+static inline uint32_t update_crc(uint32_t crc, const void *buf, int len)
+{
+ return slz_crc32_by4(crc, buf, len);
+}
+
+/* Sends the gzip header for stream <strm> into buffer <buf>. When it's done,
+ * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
+ * emitted which is always 10. The caller is responsible for ensuring there's
+ * always enough room in the buffer.
+ */
+int slz_rfc1952_send_header(struct slz_stream *strm, unsigned char *buf)
+{
+ memcpy(buf, gzip_hdr, sizeof(gzip_hdr));
+ strm->state = SLZ_ST_EOB;
+ return sizeof(gzip_hdr);
+}
+
+/* Encodes the block according to rfc1952. This means that the CRC of the input
+ * block is computed according to the CRC32 algorithm. If the header was never
+ * sent, it may be sent first. The number of output bytes is returned.
+ */
+long slz_rfc1952_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long ret = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ ret += slz_rfc1952_send_header(strm, out);
+
+ strm->crc32 = update_crc(strm->crc32, in, ilen);
+ ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
+ return ret;
+}
+
+/* Initializes stream <strm> for use with the gzip format (rfc1952). The
+ * compression level passed in <level> is set. This value can only be 0 (no
+ * compression) or 1 (compression) and other values will lead to unpredictable
+ * behaviour. The function always returns 0.
+ */
+int slz_rfc1952_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_INIT;
+ strm->level = level;
+ strm->format = SLZ_FMT_GZIP;
+ strm->crc32 = 0;
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes any pending data for stream <strm> into buffer <buf>, then emits an
+ * empty literal block to byte-align the output, allowing to completely flush
+ * the queue. Note that if the initial header was never sent, it will be sent
+ * first as well (10 extra bytes). This requires that the output buffer still
+ * has this plus the size of the queue available (up to 4 bytes), plus one byte
+ * for (BFINAL,BTYPE), plus 4 bytes for LEN+NLEN, or a total of 19 bytes in the
+ * worst case. The number of bytes emitted is returned. It is guaranteed that
+ * the queue is empty on return. This may cause some overhead by adding
+ * needless 5-byte blocks if called to often.
+ */
+int slz_rfc1952_flush(struct slz_stream *strm, unsigned char *buf)
+{
+ int sent = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ sent = slz_rfc1952_send_header(strm, buf);
+
+ sent += slz_rfc1951_flush(strm, buf + sent);
+ return sent;
+}
+
+/* Flushes pending bits and sends the gzip trailer for stream <strm> into
+ * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
+ * returns the number of bytes emitted. The trailer consists in flushing the
+ * possibly pending bits from the queue (up to 24 bits), rounding to the next
+ * byte, then 4 bytes for the CRC and another 4 bytes for the input length.
+ * That may about to 4+4+4 = 12 bytes, that the caller must ensure are
+ * available before calling the function. Note that if the initial header was
+ * never sent, it will be sent first as well (10 extra bytes).
+ */
+int slz_rfc1952_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
+
+ slz_rfc1951_finish(strm, strm->outbuf);
+ copy_32b(strm, strm->crc32);
+ copy_32b(strm, strm->ilen);
+ strm->state = SLZ_ST_END;
+
+ return strm->outbuf - buf;
+}
+
+
+/* RFC1950-specific stuff. This is for the Zlib stream format.
+ * From RFC1950 (zlib) :
+ *
+
+ 2.2. Data format
+
+ A zlib stream has the following structure:
+
+ 0 1
+ +---+---+
+ |CMF|FLG| (more-->)
+ +---+---+
+
+
+ (if FLG.FDICT set)
+
+ 0 1 2 3
+ +---+---+---+---+
+ | DICTID | (more-->)
+ +---+---+---+---+
+
+ +=====================+---+---+---+---+
+ |...compressed data...| ADLER32 |
+ +=====================+---+---+---+---+
+
+ Any data which may appear after ADLER32 are not part of the zlib
+ stream.
+
+ CMF (Compression Method and flags)
+ This byte is divided into a 4-bit compression method and a 4-
+ bit information field depending on the compression method.
+
+ bits 0 to 3 CM Compression method
+ bits 4 to 7 CINFO Compression info
+
+ CM (Compression method)
+ This identifies the compression method used in the file. CM = 8
+ denotes the "deflate" compression method with a window size up
+ to 32K. This is the method used by gzip and PNG (see
+ references [1] and [2] in Chapter 3, below, for the reference
+ documents). CM = 15 is reserved. It might be used in a future
+ version of this specification to indicate the presence of an
+ extra field before the compressed data.
+
+ CINFO (Compression info)
+ For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
+ size, minus eight (CINFO=7 indicates a 32K window size). Values
+ of CINFO above 7 are not allowed in this version of the
+ specification. CINFO is not defined in this specification for
+ CM not equal to 8.
+
+ FLG (FLaGs)
+ This flag byte is divided as follows:
+
+ bits 0 to 4 FCHECK (check bits for CMF and FLG)
+ bit 5 FDICT (preset dictionary)
+ bits 6 to 7 FLEVEL (compression level)
+
+ The FCHECK value must be such that CMF and FLG, when viewed as
+ a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
+ is a multiple of 31.
+
+
+ FDICT (Preset dictionary)
+ If FDICT is set, a DICT dictionary identifier is present
+ immediately after the FLG byte. The dictionary is a sequence of
+ bytes which are initially fed to the compressor without
+ producing any compressed output. DICT is the Adler-32 checksum
+ of this sequence of bytes (see the definition of ADLER32
+ below). The decompressor can use this identifier to determine
+ which dictionary has been used by the compressor.
+
+ FLEVEL (Compression level)
+ These flags are available for use by specific compression
+ methods. The "deflate" method (CM = 8) sets these flags as
+ follows:
+
+ 0 - compressor used fastest algorithm
+ 1 - compressor used fast algorithm
+ 2 - compressor used default algorithm
+ 3 - compressor used maximum compression, slowest algorithm
+
+ The information in FLEVEL is not needed for decompression; it
+ is there to indicate if recompression might be worthwhile.
+
+ compressed data
+ For compression method 8, the compressed data is stored in the
+ deflate compressed data format as described in the document
+ "DEFLATE Compressed Data Format Specification" by L. Peter
+ Deutsch. (See reference [3] in Chapter 3, below)
+
+ Other compressed data formats are not specified in this version
+ of the zlib specification.
+
+ ADLER32 (Adler-32 checksum)
+ This contains a checksum value of the uncompressed data
+ (excluding any dictionary data) computed according to Adler-32
+ algorithm. This algorithm is a 32-bit extension and improvement
+ of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
+ standard. See references [4] and [5] in Chapter 3, below)
+
+ Adler-32 is composed of two sums accumulated per byte: s1 is
+ the sum of all bytes, s2 is the sum of all s1 values. Both sums
+ are done modulo 65521. s1 is initialized to 1, s2 to zero. The
+ Adler-32 checksum is stored as s2*65536 + s1 in most-
+ significant-byte first (network) order.
+
+ ==> The stream can start with only 2 bytes :
+ - CM = 0x78 : CMINFO=7 (32kB window), CM=8 (deflate)
+ - FLG = 0x01 : FLEVEL = 0 (fastest), FDICT=0 (no dict), FCHECK=1 so
+ that 0x7801 is a multiple of 31 (30721 = 991 * 31).
+
+ ==> and it ends with only 4 bytes, the Adler-32 checksum in big-endian format.
+
+ */
+
+static const unsigned char zlib_hdr[] = { 0x78, 0x01 }; // 32k win, deflate, chk=1
+
+
+/* Original version from RFC1950, verified and works OK */
+uint32_t slz_adler32_by1(uint32_t crc, const unsigned char *buf, int len)
+{
+ uint32_t s1 = crc & 0xffff;
+ uint32_t s2 = (crc >> 16) & 0xffff;
+ int n;
+
+ for (n = 0; n < len; n++) {
+ s1 = (s1 + buf[n]) % 65521;
+ s2 = (s2 + s1) % 65521;
+ }
+ return (s2 << 16) + s1;
+}
+
+/* Computes the adler32 sum on <buf> for <len> bytes. It avoids the expensive
+ * modulus by retrofitting the number of bytes missed between 65521 and 65536
+ * which is easy to count : For every sum above 65536, the modulus is offset
+ * by (65536-65521) = 15. So for any value, we can count the accumulated extra
+ * values by dividing the sum by 65536 and multiplying this value by
+ * (65536-65521). That's easier with a drawing with boxes and marbles. It gives
+ * this :
+ * x % 65521 = (x % 65536) + (x / 65536) * (65536 - 65521)
+ * = (x & 0xffff) + (x >> 16) * 15.
+ */
+uint32_t slz_adler32_block(uint32_t crc, const unsigned char *buf, long len)
+{
+ long s1 = crc & 0xffff;
+ long s2 = (crc >> 16);
+ long blk;
+ long n;
+
+ do {
+ blk = len;
+ /* ensure we never overflow s2 (limit is about 2^((32-8)/2) */
+ if (blk > (1U << 12))
+ blk = 1U << 12;
+ len -= blk;
+
+ for (n = 0; n < blk; n++) {
+ s1 = (s1 + buf[n]);
+ s2 = (s2 + s1);
+ }
+
+ /* Largest value here is 2^12 * 255 = 1044480 < 2^20. We can
+ * still overflow once, but not twice because the right hand
+ * size is 225 max, so the total is 65761. However we also
+ * have to take care of the values between 65521 and 65536.
+ */
+ s1 = (s1 & 0xffff) + 15 * (s1 >> 16);
+ if (s1 >= 65521)
+ s1 -= 65521;
+
+ /* For s2, the largest value is estimated to 2^32-1 for
+ * simplicity, so the right hand side is about 15*65535
+ * = 983025. We can overflow twice at most.
+ */
+ s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
+ s2 = (s2 & 0xffff) + 15 * (s2 >> 16);
+ if (s2 >= 65521)
+ s2 -= 65521;
+
+ buf += blk;
+ } while (len);
+ return (s2 << 16) + s1;
+}
+
+/* Sends the zlib header for stream <strm> into buffer <buf>. When it's done,
+ * the stream state is updated to SLZ_ST_EOB. It returns the number of bytes
+ * emitted which is always 2. The caller is responsible for ensuring there's
+ * always enough room in the buffer.
+ */
+int slz_rfc1950_send_header(struct slz_stream *strm, unsigned char *buf)
+{
+ memcpy(buf, zlib_hdr, sizeof(zlib_hdr));
+ strm->state = SLZ_ST_EOB;
+ return sizeof(zlib_hdr);
+}
+
+/* Encodes the block according to rfc1950. This means that the CRC of the input
+ * block is computed according to the ADLER32 algorithm. If the header was never
+ * sent, it may be sent first. The number of output bytes is returned.
+ */
+long slz_rfc1950_encode(struct slz_stream *strm, unsigned char *out, const unsigned char *in, long ilen, int more)
+{
+ long ret = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ ret += slz_rfc1950_send_header(strm, out);
+
+ strm->crc32 = slz_adler32_block(strm->crc32, in, ilen);
+ ret += slz_rfc1951_encode(strm, out + ret, in, ilen, more);
+ return ret;
+}
+
+/* Initializes stream <strm> for use with the zlib format (rfc1952). The
+ * compression level passed in <level> is set. This value can only be 0 (no
+ * compression) or 1 (compression) and other values will lead to unpredictable
+ * behaviour. The function always returns 0.
+ */
+int slz_rfc1950_init(struct slz_stream *strm, int level)
+{
+ strm->state = SLZ_ST_INIT;
+ strm->level = level;
+ strm->format = SLZ_FMT_ZLIB;
+ strm->crc32 = 1; // rfc1950/zlib starts with initial crc=1
+ strm->ilen = 0;
+ strm->qbits = 0;
+ strm->queue = 0;
+ return 0;
+}
+
+/* Flushes any pending data for stream <strm> into buffer <buf>, then emits an
+ * empty literal block to byte-align the output, allowing to completely flush
+ * the queue. Note that if the initial header was never sent, it will be sent
+ * first as well (2 extra bytes). This requires that the output buffer still
+ * has this plus the size of the queue available (up to 4 bytes), plus one byte
+ * for (BFINAL,BTYPE), plus 4 bytes for LEN+NLEN, or a total of 11 bytes in the
+ * worst case. The number of bytes emitted is returned. It is guaranteed that
+ * the queue is empty on return. This may cause some overhead by adding
+ * needless 5-byte blocks if called to often.
+ */
+int slz_rfc1950_flush(struct slz_stream *strm, unsigned char *buf)
+{
+ int sent = 0;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ sent = slz_rfc1950_send_header(strm, buf);
+
+ sent += slz_rfc1951_flush(strm, buf + sent);
+ return sent;
+}
+
+/* Flushes pending bits and sends the gzip trailer for stream <strm> into
+ * buffer <buf>. When it's done, the stream state is updated to SLZ_ST_END. It
+ * returns the number of bytes emitted. The trailer consists in flushing the
+ * possibly pending bits from the queue (up to 24 bits), rounding to the next
+ * byte, then 4 bytes for the CRC. That may about to 4+4 = 8 bytes, that the
+ * caller must ensure are available before calling the function. Note that if
+ * the initial header was never sent, it will be sent first as well (2 extra
+ * bytes).
+ */
+int slz_rfc1950_finish(struct slz_stream *strm, unsigned char *buf)
+{
+ strm->outbuf = buf;
+
+ if (__builtin_expect(strm->state == SLZ_ST_INIT, 0))
+ strm->outbuf += slz_rfc1952_send_header(strm, strm->outbuf);
+
+ slz_rfc1951_finish(strm, strm->outbuf);
+ copy_8b(strm, (strm->crc32 >> 24) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 16) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 8) & 0xff);
+ copy_8b(strm, (strm->crc32 >> 0) & 0xff);
+ strm->state = SLZ_ST_END;
+ return strm->outbuf - buf;
+}
+
+__attribute__((constructor))
+static void __slz_initialize(void)
+{
+#if !defined(__ARM_FEATURE_CRC32)
+ __slz_make_crc_table();
+#endif
+ __slz_prepare_dist_table();
+}
diff --git a/src/sock.c b/src/sock.c
new file mode 100644
index 0000000..7fcdc10
--- /dev/null
+++ b/src/sock.c
@@ -0,0 +1,1072 @@
+/*
+ * Generic code for native (BSD-compatible) sockets
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <net/if.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/connection.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proto_sockpair.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+#define SOCK_XFER_OPT_FOREIGN 0x000000001
+#define SOCK_XFER_OPT_V6ONLY 0x000000002
+#define SOCK_XFER_OPT_DGRAM 0x000000004
+
+/* the list of remaining sockets transferred from an older process */
+struct xfer_sock_list {
+ int fd;
+ int options; /* socket options as SOCK_XFER_OPT_* */
+ char *iface;
+ char *namespace;
+ int if_namelen;
+ int ns_namelen;
+ struct xfer_sock_list *prev;
+ struct xfer_sock_list *next;
+ struct sockaddr_storage addr;
+};
+
+static struct xfer_sock_list *xfer_sock_list;
+
+
+/* Accept an incoming connection from listener <l>, and return it, as well as
+ * a CO_AC_* status code into <status> if not null. Null is returned on error.
+ * <l> must be a valid listener with a valid frontend.
+ */
+struct connection *sock_accept_conn(struct listener *l, int *status)
+{
+#ifdef USE_ACCEPT4
+ static int accept4_broken;
+#endif
+ struct proxy *p = l->bind_conf->frontend;
+ struct connection *conn = NULL;
+ struct sockaddr_storage *addr = NULL;
+ socklen_t laddr;
+ int ret;
+ int cfd;
+
+ if (!sockaddr_alloc(&addr, NULL, 0))
+ goto fail_addr;
+
+ /* accept() will mark all accepted FDs O_NONBLOCK and the ones accepted
+ * in the master process as FD_CLOEXEC. It's not done for workers
+ * because 1) workers are not supposed to execute anything so there's
+ * no reason for uselessly slowing down everything, and 2) that would
+ * prevent us from implementing fd passing in the future.
+ */
+#ifdef USE_ACCEPT4
+ laddr = sizeof(*conn->src);
+
+ /* only call accept4() if it's known to be safe, otherwise fallback to
+ * the legacy accept() + fcntl().
+ */
+ if (unlikely(accept4_broken) ||
+ (((cfd = accept4(l->rx.fd, (struct sockaddr*)addr, &laddr,
+ SOCK_NONBLOCK | (master ? SOCK_CLOEXEC : 0))) == -1) &&
+ (errno == ENOSYS || errno == EINVAL || errno == EBADF) &&
+ ((accept4_broken = 1))))
+#endif
+ {
+ laddr = sizeof(*conn->src);
+ if ((cfd = accept(l->rx.fd, (struct sockaddr*)addr, &laddr)) != -1) {
+ fd_set_nonblock(cfd);
+ if (master)
+ fd_set_cloexec(cfd);
+ }
+ }
+
+ if (likely(cfd != -1)) {
+ if (unlikely(cfd >= global.maxsock)) {
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n",
+ p->id);
+ goto fail_conn;
+ }
+
+ /* Perfect, the connection was accepted */
+ conn = conn_new(&l->obj_type);
+ if (!conn)
+ goto fail_conn;
+
+ conn->src = addr;
+ conn->handle.fd = cfd;
+ ret = CO_AC_DONE;
+ goto done;
+ }
+
+ /* error conditions below */
+ sockaddr_free(&addr);
+
+ switch (errno) {
+#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
+ case EWOULDBLOCK:
+#endif
+ case EAGAIN:
+ ret = CO_AC_DONE; /* nothing more to accept */
+ if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) {
+ /* the listening socket might have been disabled in a shared
+ * process and we're a collateral victim. We'll just pause for
+ * a while in case it comes back. In the mean time, we need to
+ * clear this sticky flag.
+ */
+ _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR));
+ ret = CO_AC_PAUSE;
+ }
+ fd_cant_recv(l->rx.fd);
+ break;
+
+ case EINVAL:
+ /* might be trying to accept on a shut fd (eg: soft stop) */
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EINTR:
+ case ECONNABORTED:
+ ret = CO_AC_RETRY;
+ break;
+
+ case ENFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case EMFILE:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ case ENOBUFS:
+ case ENOMEM:
+ if (p)
+ send_log(p, LOG_EMERG,
+ "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n",
+ p->id, global.maxsock);
+ ret = CO_AC_PAUSE;
+ break;
+
+ default:
+ /* unexpected result, let's give up and let other tasks run */
+ ret = CO_AC_YIELD;
+ }
+ done:
+ if (status)
+ *status = ret;
+ return conn;
+
+ fail_conn:
+ sockaddr_free(&addr);
+ /* The accept call already succeeded by the time we try to allocate the connection,
+ * we need to close it in case of failure. */
+ close(cfd);
+ fail_addr:
+ ret = CO_AC_PAUSE;
+ goto done;
+}
+
+/* Create a socket to connect to the server in conn->dst (which MUST be valid),
+ * using the configured namespace if needed, or the one passed by the proxy
+ * protocol if required to do so. It ultimately calls socket() or socketat()
+ * and returns the FD or error code.
+ */
+int sock_create_server_socket(struct connection *conn)
+{
+ const struct netns_entry *ns = NULL;
+
+#ifdef USE_NS
+ if (objt_server(conn->target)) {
+ if (__objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
+ ns = conn->proxy_netns;
+ else
+ ns = __objt_server(conn->target)->netns;
+ }
+#endif
+ return my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0);
+}
+
+/* Enables receiving on receiver <rx> once already bound. */
+void sock_enable(struct receiver *rx)
+{
+ if (rx->flags & RX_F_BOUND)
+ fd_want_recv_safe(rx->fd);
+}
+
+/* Disables receiving on receiver <rx> once already bound. */
+void sock_disable(struct receiver *rx)
+{
+ if (rx->flags & RX_F_BOUND)
+ fd_stop_recv(rx->fd);
+}
+
+/* stops, unbinds and possibly closes the FD associated with receiver rx */
+void sock_unbind(struct receiver *rx)
+{
+ /* There are a number of situations where we prefer to keep the FD and
+ * not to close it (unless we're stopping, of course):
+ * - worker process unbinding from a worker's non-suspendable FD (ABNS) => close
+ * - worker process unbinding from a worker's FD with socket transfer enabled => keep
+ * - master process unbinding from a master's inherited FD => keep
+ * - master process unbinding from a master's FD => close
+ * - master process unbinding from a worker's inherited FD => keep
+ * - master process unbinding from a worker's FD => close
+ * - worker process unbinding from a master's FD => close
+ * - worker process unbinding from a worker's FD => close
+ */
+ if (rx->flags & RX_F_BOUND)
+ rx->proto->rx_disable(rx);
+
+ if (!stopping && !master &&
+ !(rx->flags & RX_F_MWORKER) &&
+ !(rx->flags & RX_F_NON_SUSPENDABLE) &&
+ (global.tune.options & GTUNE_SOCKET_TRANSFER))
+ return;
+
+ if (!stopping && master &&
+ rx->flags & RX_F_INHERITED)
+ return;
+
+ rx->flags &= ~RX_F_BOUND;
+ if (rx->fd != -1)
+ fd_delete(rx->fd);
+ rx->fd = -1;
+}
+
+/*
+ * Retrieves the source address for the socket <fd>, with <dir> indicating
+ * if we're a listener (=0) or an initiator (!=0). It returns 0 in case of
+ * success, -1 in case of error. The socket's source address is stored in
+ * <sa> for <salen> bytes.
+ */
+int sock_get_src(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getsockname(fd, sa, &salen);
+ else
+ return getpeername(fd, sa, &salen);
+}
+
+/*
+ * Retrieves the original destination address for the socket <fd>, with <dir>
+ * indicating if we're a listener (=0) or an initiator (!=0). It returns 0 in
+ * case of success, -1 in case of error. The socket's source address is stored
+ * in <sa> for <salen> bytes.
+ */
+int sock_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getpeername(fd, sa, &salen);
+ else
+ return getsockname(fd, sa, &salen);
+}
+
+/* Try to retrieve exported sockets from worker at CLI <unixsocket>. These
+ * ones will be placed into the xfer_sock_list for later use by function
+ * sock_find_compatible_fd(). Returns 0 on success, -1 on failure.
+ */
+int sock_get_old_sockets(const char *unixsocket)
+{
+ char *cmsgbuf = NULL, *tmpbuf = NULL;
+ int *tmpfd = NULL;
+ struct sockaddr_un addr;
+ struct cmsghdr *cmsg;
+ struct msghdr msghdr;
+ struct iovec iov;
+ struct xfer_sock_list *xfer_sock = NULL;
+ struct timeval tv = { .tv_sec = 1, .tv_usec = 0 };
+ int sock = -1;
+ int ret = -1;
+ int ret2 = -1;
+ int fd_nb;
+ int got_fd = 0;
+ int cur_fd = 0;
+ size_t maxoff = 0, curoff = 0;
+
+ if (strncmp("sockpair@", unixsocket, strlen("sockpair@")) == 0) {
+ /* sockpair for master-worker usage */
+ int sv[2];
+ int dst_fd;
+
+ dst_fd = strtoll(unixsocket + strlen("sockpair@"), NULL, 0);
+
+ if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
+ ha_warning("socketpair(): Cannot create socketpair. Giving up.\n");
+ }
+
+ if (send_fd_uxst(dst_fd, sv[0]) == -1) {
+ ha_alert("socketpair: Cannot transfer the fd %d over sockpair@%d. Giving up.\n", sv[0], dst_fd);
+ close(sv[0]);
+ close(sv[1]);
+ goto out;
+ }
+
+ close(sv[0]); /* we don't need this side anymore */
+ sock = sv[1];
+
+ } else {
+ /* Unix socket */
+
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ ha_warning("Failed to connect to the old process socket '%s'\n", unixsocket);
+ goto out;
+ }
+
+ strncpy(addr.sun_path, unixsocket, sizeof(addr.sun_path) - 1);
+ addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
+ addr.sun_family = PF_UNIX;
+
+ ret = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ ha_warning("Failed to connect to the old process socket '%s'\n", unixsocket);
+ goto out;
+ }
+
+ }
+ memset(&msghdr, 0, sizeof(msghdr));
+ cmsgbuf = malloc(CMSG_SPACE(sizeof(int)) * MAX_SEND_FD);
+ if (!cmsgbuf) {
+ ha_warning("Failed to allocate memory to send sockets\n");
+ goto out;
+ }
+
+ setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv));
+ iov.iov_base = &fd_nb;
+ iov.iov_len = sizeof(fd_nb);
+ msghdr.msg_iov = &iov;
+ msghdr.msg_iovlen = 1;
+
+ if (send(sock, "_getsocks\n", strlen("_getsocks\n"), 0) != strlen("_getsocks\n")) {
+ ha_warning("Failed to get the number of sockets to be transferred !\n");
+ goto out;
+ }
+
+ /* First, get the number of file descriptors to be received */
+ if (recvmsg(sock, &msghdr, MSG_WAITALL) != sizeof(fd_nb)) {
+ ha_warning("Failed to get the number of sockets to be transferred !\n");
+ goto out;
+ }
+
+ if (fd_nb == 0) {
+ ret2 = 0;
+ goto out;
+ }
+
+ tmpbuf = malloc(fd_nb * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int)));
+ if (tmpbuf == NULL) {
+ ha_warning("Failed to allocate memory while receiving sockets\n");
+ goto out;
+ }
+
+ tmpfd = malloc(fd_nb * sizeof(int));
+ if (tmpfd == NULL) {
+ ha_warning("Failed to allocate memory while receiving sockets\n");
+ goto out;
+ }
+
+ msghdr.msg_control = cmsgbuf;
+ msghdr.msg_controllen = CMSG_SPACE(sizeof(int)) * MAX_SEND_FD;
+ iov.iov_len = MAX_SEND_FD * (1 + MAXPATHLEN + 1 + IFNAMSIZ + sizeof(int));
+
+ do {
+ int ret3;
+
+ iov.iov_base = tmpbuf + curoff;
+
+ ret = recvmsg(sock, &msghdr, 0);
+
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret <= 0)
+ break;
+
+ /* Send an ack to let the sender know we got the sockets
+ * and it can send some more
+ */
+ do {
+ ret3 = send(sock, &got_fd, sizeof(got_fd), 0);
+ } while (ret3 == -1 && errno == EINTR);
+
+ for (cmsg = CMSG_FIRSTHDR(&msghdr); cmsg != NULL; cmsg = CMSG_NXTHDR(&msghdr, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ size_t totlen = cmsg->cmsg_len - CMSG_LEN(0);
+
+ if (totlen / sizeof(int) + got_fd > fd_nb) {
+ ha_warning("Got to many sockets !\n");
+ goto out;
+ }
+
+ /*
+ * Be paranoid and use memcpy() to avoid any
+ * potential alignment issue.
+ */
+ memcpy(&tmpfd[got_fd], CMSG_DATA(cmsg), totlen);
+ got_fd += totlen / sizeof(int);
+ }
+ }
+ curoff += ret;
+ } while (got_fd < fd_nb);
+
+ if (got_fd != fd_nb) {
+ ha_warning("We didn't get the expected number of sockets (expecting %d got %d)\n",
+ fd_nb, got_fd);
+ goto out;
+ }
+
+ maxoff = curoff;
+ curoff = 0;
+
+ for (cur_fd = 0; cur_fd < got_fd; cur_fd++) {
+ int fd = tmpfd[cur_fd];
+ socklen_t socklen;
+ int val;
+ int len;
+
+ xfer_sock = calloc(1, sizeof(*xfer_sock));
+ if (!xfer_sock) {
+ ha_warning("Failed to allocate memory in get_old_sockets() !\n");
+ break;
+ }
+ xfer_sock->fd = -1;
+
+ socklen = sizeof(xfer_sock->addr);
+ if (getsockname(fd, (struct sockaddr *)&xfer_sock->addr, &socklen) != 0) {
+ ha_warning("Failed to get socket address\n");
+ ha_free(&xfer_sock);
+ continue;
+ }
+
+ if (curoff >= maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ len = tmpbuf[curoff++];
+ if (len > 0) {
+ /* We have a namespace */
+ if (curoff + len > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+ xfer_sock->namespace = malloc(len + 1);
+ if (!xfer_sock->namespace) {
+ ha_warning("Failed to allocate memory while transferring sockets\n");
+ goto out;
+ }
+ memcpy(xfer_sock->namespace, &tmpbuf[curoff], len);
+ xfer_sock->namespace[len] = 0;
+ xfer_sock->ns_namelen = len;
+ curoff += len;
+ }
+
+ if (curoff >= maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ len = tmpbuf[curoff++];
+ if (len > 0) {
+ /* We have an interface */
+ if (curoff + len > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+ xfer_sock->iface = malloc(len + 1);
+ if (!xfer_sock->iface) {
+ ha_warning("Failed to allocate memory while transferring sockets\n");
+ goto out;
+ }
+ memcpy(xfer_sock->iface, &tmpbuf[curoff], len);
+ xfer_sock->iface[len] = 0;
+ xfer_sock->if_namelen = len;
+ curoff += len;
+ }
+
+ if (curoff + sizeof(int) > maxoff) {
+ ha_warning("Inconsistency while transferring sockets\n");
+ goto out;
+ }
+
+ /* we used to have 32 bits of listener options here but we don't
+ * use them anymore.
+ */
+ curoff += sizeof(int);
+
+ /* determine the foreign status directly from the socket itself */
+ if (sock_inet_is_foreign(fd, xfer_sock->addr.ss_family))
+ xfer_sock->options |= SOCK_XFER_OPT_FOREIGN;
+
+ socklen = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, &val, &socklen) == 0 && val == SOCK_DGRAM)
+ xfer_sock->options |= SOCK_XFER_OPT_DGRAM;
+
+#if defined(IPV6_V6ONLY)
+ /* keep only the v6only flag depending on what's currently
+ * active on the socket, and always drop the v4v6 one.
+ */
+ socklen = sizeof(val);
+ if (xfer_sock->addr.ss_family == AF_INET6 &&
+ getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &socklen) == 0 && val > 0)
+ xfer_sock->options |= SOCK_XFER_OPT_V6ONLY;
+#endif
+
+ xfer_sock->fd = fd;
+ if (xfer_sock_list)
+ xfer_sock_list->prev = xfer_sock;
+ xfer_sock->next = xfer_sock_list;
+ xfer_sock->prev = NULL;
+ xfer_sock_list = xfer_sock;
+ xfer_sock = NULL;
+ }
+
+ ret2 = 0;
+out:
+ /* If we failed midway make sure to close the remaining
+ * file descriptors
+ */
+ if (tmpfd != NULL && cur_fd < got_fd) {
+ for (; cur_fd < got_fd; cur_fd++) {
+ close(tmpfd[cur_fd]);
+ }
+ }
+
+ free(tmpbuf);
+ free(tmpfd);
+ free(cmsgbuf);
+
+ if (sock != -1)
+ close(sock);
+
+ if (xfer_sock) {
+ free(xfer_sock->namespace);
+ free(xfer_sock->iface);
+ if (xfer_sock->fd != -1)
+ close(xfer_sock->fd);
+ free(xfer_sock);
+ }
+ return (ret2);
+}
+
+/* When binding the receivers, check if a socket has been sent to us by the
+ * previous process that we could reuse, instead of creating a new one. Note
+ * that some address family-specific options are checked on the listener and
+ * on the socket. Typically for AF_INET and AF_INET6, we check for transparent
+ * mode, and for AF_INET6 we also check for "v4v6" or "v6only". The reused
+ * socket is automatically removed from the list so that it's not proposed
+ * anymore.
+ */
+int sock_find_compatible_fd(const struct receiver *rx)
+{
+ struct xfer_sock_list *xfer_sock = xfer_sock_list;
+ int options = 0;
+ int if_namelen = 0;
+ int ns_namelen = 0;
+ int ret = -1;
+
+ if (!rx->proto->fam->addrcmp)
+ return -1;
+
+ if (rx->proto->proto_type == PROTO_TYPE_DGRAM)
+ options |= SOCK_XFER_OPT_DGRAM;
+
+ if (rx->settings->options & RX_O_FOREIGN)
+ options |= SOCK_XFER_OPT_FOREIGN;
+
+ if (rx->addr.ss_family == AF_INET6) {
+ /* Prepare to match the v6only option against what we really want. Note
+ * that sadly the two options are not exclusive to each other and that
+ * v6only is stronger than v4v6.
+ */
+ if ((rx->settings->options & RX_O_V6ONLY) ||
+ (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
+ options |= SOCK_XFER_OPT_V6ONLY;
+ }
+
+ if (rx->settings->interface)
+ if_namelen = strlen(rx->settings->interface);
+#ifdef USE_NS
+ if (rx->settings->netns)
+ ns_namelen = rx->settings->netns->name_len;
+#endif
+
+ while (xfer_sock) {
+ if ((options == xfer_sock->options) &&
+ (if_namelen == xfer_sock->if_namelen) &&
+ (ns_namelen == xfer_sock->ns_namelen) &&
+ (!if_namelen || strcmp(rx->settings->interface, xfer_sock->iface) == 0) &&
+#ifdef USE_NS
+ (!ns_namelen || strcmp(rx->settings->netns->node.key, xfer_sock->namespace) == 0) &&
+#endif
+ rx->proto->fam->addrcmp(&xfer_sock->addr, &rx->addr) == 0)
+ break;
+ xfer_sock = xfer_sock->next;
+ }
+
+ if (xfer_sock != NULL) {
+ ret = xfer_sock->fd;
+ if (xfer_sock == xfer_sock_list)
+ xfer_sock_list = xfer_sock->next;
+ if (xfer_sock->prev)
+ xfer_sock->prev->next = xfer_sock->next;
+ if (xfer_sock->next)
+ xfer_sock->next->prev = xfer_sock->prev;
+ free(xfer_sock->iface);
+ free(xfer_sock->namespace);
+ free(xfer_sock);
+ }
+ return ret;
+}
+
+/* After all protocols are bound, there may remain some old sockets that have
+ * been removed between the previous config and the new one. These ones must
+ * be dropped, otherwise they will remain open and may prevent a service from
+ * restarting.
+ */
+void sock_drop_unused_old_sockets()
+{
+ while (xfer_sock_list != NULL) {
+ struct xfer_sock_list *tmpxfer = xfer_sock_list->next;
+
+ close(xfer_sock_list->fd);
+ free(xfer_sock_list->iface);
+ free(xfer_sock_list->namespace);
+ free(xfer_sock_list);
+ xfer_sock_list = tmpxfer;
+ }
+}
+
+/* Tests if the receiver supports accepting connections. Returns positive on
+ * success, 0 if not possible, negative if the socket is non-recoverable. The
+ * rationale behind this is that inherited FDs may be broken and that shared
+ * FDs might have been paused by another process.
+ */
+int sock_accepting_conn(const struct receiver *rx)
+{
+ int opt_val = 0;
+ socklen_t opt_len = sizeof(opt_val);
+
+ if (getsockopt(rx->fd, SOL_SOCKET, SO_ACCEPTCONN, &opt_val, &opt_len) == -1)
+ return -1;
+
+ return opt_val;
+}
+
+/* This is the FD handler IO callback for stream sockets configured for
+ * accepting incoming connections. It's a pass-through to listener_accept()
+ * which will iterate over the listener protocol's accept_conn() function.
+ * The FD's owner must be a listener.
+ */
+void sock_accept_iocb(int fd)
+{
+ struct listener *l = fdtab[fd].owner;
+
+ if (!l)
+ return;
+
+ BUG_ON(!!master != !!(l->rx.flags & RX_F_MWORKER));
+ listener_accept(l);
+}
+
+/* This completes the initialization of connection <conn> by inserting its FD
+ * into the fdtab, associating it with the regular connection handler. It will
+ * be bound to the current thread only. This call cannot fail.
+ */
+void sock_conn_ctrl_init(struct connection *conn)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+ fd_insert(conn->handle.fd, conn, sock_conn_iocb, tgid, ti->ltid_bit);
+}
+
+/* This completes the release of connection <conn> by removing its FD from the
+ * fdtab and deleting it. The connection must not use the FD anymore past this
+ * point. The FD may be modified in the connection.
+ */
+void sock_conn_ctrl_close(struct connection *conn)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+ fd_delete(conn->handle.fd);
+ conn->handle.fd = DEAD_FD_MAGIC;
+}
+
+/* This is the callback which is set when a connection establishment is pending
+ * and we have nothing to send. It may update the FD polling status to indicate
+ * !READY. It returns 0 if it fails in a fatal way or needs to poll to go
+ * further, otherwise it returns non-zero and removes the CO_FL_WAIT_L4_CONN
+ * flag from the connection's flags. In case of error, it sets CO_FL_ERROR and
+ * leaves the error code in errno.
+ */
+int sock_conn_check(struct connection *conn)
+{
+ struct sockaddr_storage *addr;
+ int fd = conn->handle.fd;
+
+ if (conn->flags & CO_FL_ERROR)
+ return 0;
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ if (!(conn->flags & CO_FL_WAIT_L4_CONN))
+ return 1; /* strange we were called while ready */
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (!fd_send_ready(fd) && !(fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP)))
+ return 0;
+
+ /* Here we have 2 cases :
+ * - modern pollers, able to report ERR/HUP. If these ones return any
+ * of these flags then it's likely a failure, otherwise it possibly
+ * is a success (i.e. there may have been data received just before
+ * the error was reported).
+ * - select, which doesn't report these and with which it's always
+ * necessary either to try connect() again or to check for SO_ERROR.
+ * In order to simplify everything, we double-check using connect() as
+ * soon as we meet either of these delicate situations. Note that
+ * SO_ERROR would clear the error after reporting it!
+ */
+ if (cur_poller.flags & HAP_POLL_F_ERRHUP) {
+ /* modern poller, able to report ERR/HUP */
+ if ((fdtab[fd].state & (FD_POLL_IN|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_IN)
+ goto done;
+ if ((fdtab[fd].state & (FD_POLL_OUT|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_OUT)
+ goto done;
+ if (!(fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP)))
+ goto wait;
+ /* error present, fall through common error check path */
+ }
+
+ /* Use connect() to check the state of the socket. This has the double
+ * advantage of *not* clearing the error (so that health checks can
+ * still use getsockopt(SO_ERROR)) and giving us the following info :
+ * - error
+ * - connecting (EALREADY, EINPROGRESS)
+ * - connected (EISCONN, 0)
+ */
+ addr = conn->dst;
+ if ((conn->flags & CO_FL_SOCKS4) && obj_type(conn->target) == OBJ_TYPE_SERVER)
+ addr = &objt_server(conn->target)->socks4_addr;
+
+ if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
+ if (errno == EALREADY || errno == EINPROGRESS)
+ goto wait;
+
+ if (errno && errno != EISCONN)
+ goto out_error;
+ }
+
+ done:
+ /* The FD is ready now, we'll mark the connection as complete and
+ * forward the event to the transport layer which will notify the
+ * data layer.
+ */
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ fd_may_send(fd);
+ fd_cond_recv(fd);
+ errno = 0; // make health checks happy
+ return 1;
+
+ out_error:
+ /* Write error on the file descriptor. Report it to the connection
+ * and disable polling on this FD.
+ */
+ conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK);
+ fd_stop_both(fd);
+ return 0;
+
+ wait:
+ fd_cant_send(fd);
+ fd_want_send(fd);
+ return 0;
+}
+
+/* I/O callback for fd-based connections. It calls the read/write handlers
+ * provided by the connection's sock_ops, which must be valid.
+ */
+void sock_conn_iocb(int fd)
+{
+ struct connection *conn = fdtab[fd].owner;
+ unsigned int flags;
+ int need_wake = 0;
+ struct tasklet *t;
+
+ if (unlikely(!conn)) {
+ activity[tid].conn_dead++;
+ return;
+ }
+
+ flags = conn->flags & ~CO_FL_ERROR; /* ensure to call the wake handler upon error */
+
+ if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) &&
+ ((fd_send_ready(fd) && fd_send_active(fd)) ||
+ (fd_recv_ready(fd) && fd_recv_active(fd)))) {
+ /* Still waiting for a connection to establish and nothing was
+ * attempted yet to probe the connection. this will clear the
+ * CO_FL_WAIT_L4_CONN flag on success.
+ */
+ if (!sock_conn_check(conn))
+ goto leave;
+ need_wake = 1;
+ }
+
+ if (fd_send_ready(fd) && fd_send_active(fd)) {
+ /* force reporting of activity by clearing the previous flags :
+ * we'll have at least ERROR or CONNECTED at the end of an I/O,
+ * both of which will be detected below.
+ */
+ flags = 0;
+ if (conn->subs && conn->subs->events & SUB_RETRY_SEND) {
+ t = conn->subs->tasklet;
+ need_wake = 0; // wake will be called after this I/O
+ conn->subs->events &= ~SUB_RETRY_SEND;
+ if (!conn->subs->events)
+ conn->subs = NULL;
+ tasklet_wakeup(t);
+ }
+ fd_stop_send(fd);
+ }
+
+ /* The data transfer starts here and stops on error and handshakes. Note
+ * that we must absolutely test conn->xprt at each step in case it suddenly
+ * changes due to a quick unexpected close().
+ */
+ if (fd_recv_ready(fd) && fd_recv_active(fd)) {
+ /* force reporting of activity by clearing the previous flags :
+ * we'll have at least ERROR or CONNECTED at the end of an I/O,
+ * both of which will be detected below.
+ */
+ flags = 0;
+ if (conn->subs && conn->subs->events & SUB_RETRY_RECV) {
+ t = conn->subs->tasklet;
+ need_wake = 0; // wake will be called after this I/O
+ conn->subs->events &= ~SUB_RETRY_RECV;
+ if (!conn->subs->events)
+ conn->subs = NULL;
+ tasklet_wakeup(t);
+ }
+ fd_stop_recv(fd);
+ }
+
+ leave:
+ /* we may have to finish to install a mux or to wake it up based on
+ * what was just done above. It may kill the connection so we have to
+ * be prpared not to use it anymore.
+ */
+ if (conn_notify_mux(conn, flags, need_wake) < 0)
+ return;
+
+ /* commit polling changes in case of error.
+ * WT: it seems that the last case where this could still be relevant
+ * is if a mux wake function above report a connection error but does
+ * not stop polling. Shouldn't we enforce this into the mux instead of
+ * having to deal with this ?
+ */
+ if (unlikely(conn->flags & CO_FL_ERROR)) {
+ if (conn_ctrl_ready(conn))
+ fd_stop_both(fd);
+
+ if (conn->subs) {
+ t = conn->subs->tasklet;
+ conn->subs->events = 0;
+ if (!conn->subs->events)
+ conn->subs = NULL;
+ tasklet_wakeup(t);
+ }
+ }
+}
+
+/* Drains possibly pending incoming data on the file descriptor attached to the
+ * connection. This is used to know whether we need to disable lingering on
+ * close. Returns non-zero if it is safe to close without disabling lingering,
+ * otherwise zero.
+ */
+int sock_drain(struct connection *conn)
+{
+ int turns = 2;
+ int fd = conn->handle.fd;
+ int len;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP))
+ goto shut;
+
+ if (!(conn->flags & CO_FL_WANT_DRAIN) && !fd_recv_ready(fd))
+ return 0;
+
+ /* no drain function defined, use the generic one */
+
+ while (turns) {
+#ifdef MSG_TRUNC_CLEARS_INPUT
+ len = recv(fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
+ if (len == -1 && errno == EFAULT)
+#endif
+ len = recv(fd, trash.area, trash.size, MSG_DONTWAIT | MSG_NOSIGNAL);
+
+ if (len == 0)
+ goto shut;
+
+ if (len < 0) {
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ /* connection not closed yet */
+ fd_cant_recv(fd);
+ break;
+ }
+ if (errno == EINTR) /* oops, try again */
+ continue;
+ /* other errors indicate a dead connection, fine. */
+ goto shut;
+ }
+ /* OK we read some data, let's try again once */
+ turns--;
+ }
+
+ /* some data are still present, give up */
+ return 0;
+
+ shut:
+ /* we're certain the connection was shut down */
+ HA_ATOMIC_AND(&fdtab[fd].state, ~FD_LINGER_RISK);
+ return 1;
+}
+
+/* Checks the connection's FD for readiness of events <event_type>, which may
+ * only be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND. Those which are
+ * ready are returned. The ones that are not ready are enabled. The caller is
+ * expected to do what is needed to handle ready events and to deal with
+ * subsequent wakeups caused by the requested events' readiness.
+ */
+int sock_check_events(struct connection *conn, int event_type)
+{
+ int ret = 0;
+
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (event_type & SUB_RETRY_RECV) {
+ if (fd_recv_ready(conn->handle.fd))
+ ret |= SUB_RETRY_RECV;
+ else
+ fd_want_recv(conn->handle.fd);
+ }
+
+ if (event_type & SUB_RETRY_SEND) {
+ if (fd_send_ready(conn->handle.fd))
+ ret |= SUB_RETRY_SEND;
+ else
+ fd_want_send(conn->handle.fd);
+ }
+
+ return ret;
+}
+
+/* Ignore readiness events from connection's FD for events of types <event_type>
+ * which may only be a combination of SUB_RETRY_RECV and SUB_RETRY_SEND.
+ */
+void sock_ignore_events(struct connection *conn, int event_type)
+{
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ if (event_type & SUB_RETRY_RECV)
+ fd_stop_recv(conn->handle.fd);
+
+ if (event_type & SUB_RETRY_SEND)
+ fd_stop_send(conn->handle.fd);
+}
+
+/* Live check to see if a socket type supports SO_REUSEPORT for the specified
+ * family and socket() settings. Returns non-zero on success, 0 on failure. Use
+ * protocol_supports_flag() instead, which checks cached flags.
+ */
+int _sock_supports_reuseport(const struct proto_fam *fam, int type, int protocol)
+{
+ int ret = 0;
+#ifdef SO_REUSEPORT
+ struct sockaddr_storage ss;
+ socklen_t sl = sizeof(ss);
+ int fd1, fd2;
+
+ /* for the check, we'll need two sockets */
+ fd1 = fd2 = -1;
+
+ /* ignore custom sockets */
+ if (!fam || fam->sock_domain >= AF_MAX)
+ goto leave;
+
+ fd1 = socket(fam->sock_domain, type, protocol);
+ if (fd1 < 0)
+ goto leave;
+
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
+ goto leave;
+
+ /* bind to any address assigned by the kernel, we'll then try to do it twice */
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_family = fam->sock_family;
+ if (bind(fd1, (struct sockaddr *)&ss, fam->sock_addrlen) < 0)
+ goto leave;
+
+ if (getsockname(fd1, (struct sockaddr *)&ss, &sl) < 0)
+ goto leave;
+
+ fd2 = socket(fam->sock_domain, type, protocol);
+ if (fd2 < 0)
+ goto leave;
+
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0)
+ goto leave;
+
+ if (bind(fd2, (struct sockaddr *)&ss, sl) < 0)
+ goto leave;
+
+ /* OK we could bind twice to the same address:port, REUSEPORT
+ * is supported for this protocol.
+ */
+ ret = 1;
+
+ leave:
+ if (fd2 >= 0)
+ close(fd2);
+ if (fd1 >= 0)
+ close(fd1);
+#endif
+ return ret;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/sock_inet.c b/src/sock_inet.c
new file mode 100644
index 0000000..028ffaa
--- /dev/null
+++ b/src/sock_inet.c
@@ -0,0 +1,521 @@
+/*
+ * AF_INET/AF_INET6 socket management
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/namespace.h>
+#include <haproxy/receiver-t.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_inet.h>
+#include <haproxy/tools.h>
+
+struct proto_fam proto_fam_inet4 = {
+ .name = "inet4",
+ .sock_domain = PF_INET,
+ .sock_family = AF_INET,
+ .sock_addrlen = sizeof(struct sockaddr_in),
+ .l3_addrlen = 32/8,
+ .addrcmp = sock_inet4_addrcmp,
+ .bind = sock_inet_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_inet_get_dst,
+ .set_port = sock_inet_set_port,
+};
+
+struct proto_fam proto_fam_inet6 = {
+ .name = "inet6",
+ .sock_domain = PF_INET6,
+ .sock_family = AF_INET6,
+ .sock_addrlen = sizeof(struct sockaddr_in6),
+ .l3_addrlen = 128/8,
+ .addrcmp = sock_inet6_addrcmp,
+ .bind = sock_inet_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_get_dst,
+ .set_port = sock_inet_set_port,
+};
+
+/* PLEASE NOTE for function below:
+ * - sock_inet4_* is solely for AF_INET (IPv4)
+ * - sock_inet6_* is solely for AF_INET6 (IPv6)
+ * - sock_inet_* is for either
+ *
+ * The address family SHOULD always be checked. In some cases a function will
+ * be used in a situation where the address family is guaranteed (e.g. protocol
+ * definitions), so the test may be avoided. This special case must then be
+ * mentioned in the comment before the function definition.
+ */
+
+/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
+ * It also remains if IPv6 is not enabled/configured.
+ */
+int sock_inet6_v6only_default = 0;
+
+/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
+int sock_inet_tcp_maxseg_default = -1;
+int sock_inet6_tcp_maxseg_default = -1;
+
+/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
+ * if they do not match.
+ */
+int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
+ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_INET)
+ return -1;
+
+ if (a4->sin_port != b4->sin_port)
+ return -1;
+
+ return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
+}
+
+/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
+ * non-zero if they do not match.
+ */
+int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
+ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_INET6)
+ return -1;
+
+ if (a6->sin6_port != b6->sin6_port)
+ return -1;
+
+ return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
+}
+
+/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is
+ * determined from the sockaddr_storage's address family. Nothing is done for
+ * other families.
+ */
+void sock_inet_set_port(struct sockaddr_storage *addr, int port)
+{
+ if (addr->ss_family == AF_INET)
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ else if (addr->ss_family == AF_INET6)
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+}
+
+/*
+ * Retrieves the original destination address for the socket <fd> which must be
+ * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
+ * (=0) or an initiator (!=0). In the case of a listener, if the original
+ * destination address was translated, the original address is retrieved. It
+ * returns 0 in case of success, -1 in case of error. The socket's source
+ * address is stored in <sa> for <salen> bytes.
+ */
+int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
+{
+ if (dir)
+ return getpeername(fd, sa, &salen);
+ else {
+ int ret = getsockname(fd, sa, &salen);
+
+ if (ret < 0)
+ return ret;
+
+#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
+ /* For TPROXY and Netfilter's NAT, we can retrieve the original
+ * IPv4 address before DNAT/REDIRECT. We must not do that with
+ * other families because v6-mapped IPv4 addresses are still
+ * reported as v4.
+ */
+ if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
+ return 0;
+#endif
+ return ret;
+ }
+}
+
+/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
+ * according to the various supported socket options. The socket's address family
+ * must be passed in <family>.
+ */
+int sock_inet_is_foreign(int fd, sa_family_t family)
+{
+ int val __maybe_unused;
+ socklen_t len __maybe_unused;
+
+ switch (family) {
+ case AF_INET:
+#if defined(IP_TRANSPARENT)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_FREEBIND)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(SO_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+ break;
+
+ case AF_INET6:
+#if defined(IPV6_TRANSPARENT)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IP_FREEBIND)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(IPV6_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+#if defined(SO_BINDANY)
+ val = 0; len = sizeof(val);
+ if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
+ return 1;
+#endif
+ break;
+ }
+ return 0;
+}
+
+/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
+ * to a foreign address. The socket must already exist and must not be bound.
+ * 1 is returned on success, 0 on failure. The caller must check the address
+ * family before calling this function.
+ */
+int sock_inet4_make_foreign(int fd)
+{
+ return
+#if defined(IP_TRANSPARENT)
+ setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_FREEBIND)
+ setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_BINDANY)
+ setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(SO_BINDANY)
+ setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+ 0;
+}
+
+/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
+ * to a foreign address. The socket must already exist and must not be bound.
+ * 1 is returned on success, 0 on failure. The caller must check the address
+ * family before calling this function.
+ */
+int sock_inet6_make_foreign(int fd)
+{
+ return
+#if defined(IPV6_TRANSPARENT)
+ setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IP_FREEBIND)
+ setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(IPV6_BINDANY)
+ setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+#if defined(SO_BINDANY)
+ setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
+#endif
+ 0;
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
+ * context, respectively. Returns and error code made of ERR_* bits on failure
+ * or ERR_NONE on success. On failure, an error message may be passed into
+ * <errmsg>.
+ */
+int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ int fd, err, ext;
+ /* copy listener addr because sometimes we need to switch family */
+ struct sockaddr_storage addr_inet = rx->addr;
+
+ /* force to classic sock family, not AF_CUST_* */
+ addr_inet.ss_family = rx->proto->fam->sock_family;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ if (rx->flags & RX_F_MUST_DUP) {
+ /* this is a secondary receiver that is an exact copy of a
+ * reference which must already be bound (or has failed).
+ * We'll try to dup() the other one's FD and take it. We
+ * try hard not to reconfigure the socket since it's shared.
+ */
+ BUG_ON(!rx->shard_info);
+ if (!(rx->shard_info->ref->flags & RX_F_BOUND)) {
+ /* it's assumed that the first one has already reported
+ * the error, let's not spam with another one, and do
+ * not set ERR_ALERT.
+ */
+ err |= ERR_RETRYABLE;
+ goto bind_ret_err;
+ }
+ /* taking the other one's FD will result in it being marked
+ * extern and being dup()ed. Let's mark the receiver as
+ * inherited so that it properly bypasses all second-stage
+ * setup and avoids being passed to new processes.
+ */
+ rx->flags |= RX_F_INHERITED;
+ rx->fd = rx->shard_info->ref->fd;
+ }
+
+ /* if no FD was assigned yet, we'll have to either find a compatible
+ * one or create a new one.
+ */
+ if (rx->fd == -1)
+ rx->fd = sock_find_compatible_fd(rx);
+
+ /* if the receiver now has an fd assigned, then we were offered the fd
+ * by an external process (most likely the parent), and we don't want
+ * to create a new socket. However we still want to set a few flags on
+ * the socket.
+ */
+ fd = rx->fd;
+ ext = (fd >= 0);
+
+ if (!ext) {
+ fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
+ rx->proto->sock_type, rx->proto->sock_prot);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (ext && fd < global.maxsock && fdtab[fd].owner) {
+ /* This FD was already bound so this means that it was already
+ * known and registered before parsing, hence it's an inherited
+ * FD. The only reason why it's already known here is that it
+ * has been registered multiple times (multiple listeners on the
+ * same, or a "shards" directive on the line). There cannot be
+ * multiple listeners on one FD but at least we can create a
+ * new one from the original one. We won't reconfigure it,
+ * however, as this was already done for the first one.
+ */
+ fd = dup(fd);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
+ /* not fatal but should be reported */
+ memprintf(errmsg, "cannot do so_reuseaddr");
+ err |= ERR_ALERT;
+ }
+
+#ifdef SO_REUSEPORT
+ /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
+ * Linux, it might return an error that we will silently ignore.
+ */
+ if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED))
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+#endif
+
+#ifdef SO_REUSEPORT_LB
+ /* FreeBSD 12 and above use this to load-balance incoming connections.
+ * This is limited to 256 listeners per group however.
+ */
+ if (!ext && (rx->proto->flags & PROTO_F_REUSEPORT_SUPPORTED))
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &one, sizeof(one));
+#endif
+
+ if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
+ switch (addr_inet.ss_family) {
+ case AF_INET:
+ if (!sock_inet4_make_foreign(fd)) {
+ memprintf(errmsg, "cannot make receiving socket transparent");
+ err |= ERR_ALERT;
+ }
+ break;
+ case AF_INET6:
+ if (!sock_inet6_make_foreign(fd)) {
+ memprintf(errmsg, "cannot make receiving socket transparent");
+ err |= ERR_ALERT;
+ }
+ break;
+ }
+ }
+
+#ifdef SO_BINDTODEVICE
+ /* Note: this might fail if not CAP_NET_RAW */
+ if (!ext && rx->settings->interface) {
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ rx->settings->interface,
+ strlen(rx->settings->interface) + 1) == -1) {
+ memprintf(errmsg, "cannot bind receiver to device '%s' (%s)", rx->settings->interface, strerror(errno));
+ err |= ERR_WARN;
+ }
+ }
+#endif
+
+#if defined(IPV6_V6ONLY)
+ if (addr_inet.ss_family == AF_INET6 && !ext) {
+ /* Prepare to match the v6only option against what we really want. Note
+ * that sadly the two options are not exclusive to each other and that
+ * v6only is stronger than v4v6.
+ */
+ if ((rx->settings->options & RX_O_V6ONLY) ||
+ (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
+ setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
+ else
+ setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
+ }
+#endif
+
+ if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
+ goto bind_close_return;
+ }
+
+ rx->fd = fd;
+ rx->flags |= RX_F_BOUND;
+
+ fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
+
+ /* for now, all regularly bound TCP listeners are exportable */
+ if (!(rx->flags & RX_F_INHERITED))
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
+
+ bind_return:
+ if (errmsg && *errmsg) {
+ char pn[INET6_ADDRSTRLEN];
+
+ addr_to_str(&addr_inet, pn, sizeof(pn));
+ memprintf(errmsg, "%s for [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
+ }
+ bind_ret_err:
+ return err;
+
+ bind_close_return:
+ close(fd);
+ goto bind_return;
+}
+
+static void sock_inet_prepare()
+{
+ int fd, val;
+ socklen_t len;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd >= 0) {
+#ifdef TCP_MAXSEG
+ /* retrieve the OS' default mss for TCPv4 */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+ sock_inet_tcp_maxseg_default = val;
+#endif
+ close(fd);
+ }
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd >= 0) {
+#if defined(IPV6_V6ONLY)
+ /* retrieve the OS' bindv6only value */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
+ sock_inet6_v6only_default = 1;
+#endif
+
+#ifdef TCP_MAXSEG
+ /* retrieve the OS' default mss for TCPv6 */
+ len = sizeof(val);
+ if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
+ sock_inet6_tcp_maxseg_default = val;
+#endif
+ close(fd);
+ }
+}
+
+INITCALL0(STG_PREPARE, sock_inet_prepare);
+
+
+REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
+#if defined(IP_TRANSPARENT)
+ " IP_TRANSPARENT"
+#endif
+#if defined(IPV6_TRANSPARENT)
+ " IPV6_TRANSPARENT"
+#endif
+#if defined(IP_FREEBIND)
+ " IP_FREEBIND"
+#endif
+#if defined(IP_BINDANY)
+ " IP_BINDANY"
+#endif
+#if defined(IPV6_BINDANY)
+ " IPV6_BINDANY"
+#endif
+#if defined(SO_BINDANY)
+ " SO_BINDANY"
+#endif
+ "");
diff --git a/src/sock_unix.c b/src/sock_unix.c
new file mode 100644
index 0000000..ef749a5
--- /dev/null
+++ b/src/sock_unix.c
@@ -0,0 +1,387 @@
+/*
+ * SOCK_UNIX socket management
+ *
+ * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include <haproxy/api.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/listener.h>
+#include <haproxy/receiver-t.h>
+#include <haproxy/namespace.h>
+#include <haproxy/sock.h>
+#include <haproxy/sock_unix.h>
+#include <haproxy/tools.h>
+
+
+struct proto_fam proto_fam_unix = {
+ .name = "unix",
+ .sock_domain = PF_UNIX,
+ .sock_family = AF_UNIX,
+ .sock_addrlen = sizeof(struct sockaddr_un),
+ .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
+ .addrcmp = sock_unix_addrcmp,
+ .bind = sock_unix_bind_receiver,
+ .get_src = sock_get_src,
+ .get_dst = sock_get_dst,
+};
+
+/* PLEASE NOTE for functions below:
+ *
+ * The address family SHOULD always be checked. In some cases a function will
+ * be used in a situation where the address family is guaranteed (e.g. protocol
+ * definitions), so the test may be avoided. This special case must then be
+ * mentioned in the comment before the function definition.
+ */
+
+
+/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
+ * if they do not match. It also supports ABNS socket addresses (those starting
+ * with \0). For regular UNIX sockets however, this does explicitly support
+ * matching names ending exactly with .XXXXX.tmp which are newly bound sockets
+ * about to be replaced; this suffix is then ignored. Note that our UNIX socket
+ * paths are always zero-terminated.
+ */
+int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
+{
+ const struct sockaddr_un *au = (const struct sockaddr_un *)a;
+ const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
+ int idx, dot, idx2;
+
+ if (a->ss_family != b->ss_family)
+ return -1;
+
+ if (a->ss_family != AF_UNIX)
+ return -1;
+
+ if (au->sun_path[0] != bu->sun_path[0])
+ return -1;
+
+ if (au->sun_path[0] == 0)
+ return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
+
+ idx = 1; dot = 0;
+ while (au->sun_path[idx] == bu->sun_path[idx]) {
+ if (au->sun_path[idx] == 0)
+ return 0;
+ if (au->sun_path[idx] == '.')
+ dot = idx;
+ idx++;
+ }
+
+ /* Now we have a difference. It's OK if they are within or after a
+ * sequence of digits following a dot, and are followed by ".tmp".
+ *
+ * make sure to perform the check against tempname if the compared
+ * string is in "final" format (does not end with ".XXXX.tmp").
+ *
+ * Examples:
+ * /tmp/test matches with /tmp/test.1822.tmp
+ * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
+ */
+ if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) {
+ if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.')
+ dot = idx; /* try to match against temp path */
+ else
+ return -1; /* invalid temp path */
+ }
+
+ if (!dot)
+ return -1;
+
+ /* First, check in path "a" */
+ if (au->sun_path[idx] != 0) {
+ for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
+ idx2++;
+ if (strcmp(au->sun_path + idx2, ".tmp") != 0)
+ return -1;
+ }
+
+ /* Then check in path "b" */
+ if (bu->sun_path[idx] != 0) {
+ for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
+ ;
+ if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
+ return -1;
+ }
+
+ /* OK that's a match */
+ return 0;
+}
+
+/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
+ * context, respectively, with ->bind_thread as the thread mask. Returns an
+ * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
+ * an error message may be passed into <errmsg>.
+ */
+int sock_unix_bind_receiver(struct receiver *rx, char **errmsg)
+{
+ char tempname[MAXPATHLEN];
+ char backname[MAXPATHLEN];
+ struct sockaddr_un addr;
+ const char *path;
+ int maxpathlen;
+ int fd, err, ext, ret;
+
+ /* ensure we never return garbage */
+ if (errmsg)
+ *errmsg = 0;
+
+ err = ERR_NONE;
+
+ if (rx->flags & RX_F_BOUND)
+ return ERR_NONE;
+
+ if (rx->flags & RX_F_MUST_DUP) {
+ /* this is a secondary receiver that is an exact copy of a
+ * reference which must already be bound (or has failed).
+ * We'll try to dup() the other one's FD and take it. We
+ * try hard not to reconfigure the socket since it's shared.
+ */
+ BUG_ON(!rx->shard_info);
+ if (!(rx->shard_info->ref->flags & RX_F_BOUND)) {
+ /* it's assumed that the first one has already reported
+ * the error, let's not spam with another one, and do
+ * not set ERR_ALERT.
+ */
+ err |= ERR_RETRYABLE;
+ goto bind_ret_err;
+ }
+ /* taking the other one's FD will result in it being marked
+ * extern and being dup()ed. Let's mark the receiver as
+ * inherited so that it properly bypasses all second-stage
+ * setup and avoids being passed to new processes.
+ */
+ rx->flags |= RX_F_INHERITED;
+ rx->fd = rx->shard_info->ref->fd;
+ }
+
+ /* if no FD was assigned yet, we'll have to either find a compatible
+ * one or create a new one.
+ */
+ if (rx->fd == -1)
+ rx->fd = sock_find_compatible_fd(rx);
+
+ path = ((struct sockaddr_un *)&rx->addr)->sun_path;
+ maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
+
+ /* if the listener already has an fd assigned, then we were offered the
+ * fd by an external process (most likely the parent), and we don't want
+ * to create a new socket. However we still want to set a few flags on
+ * the socket.
+ */
+ fd = rx->fd;
+ ext = (fd >= 0);
+ if (ext)
+ goto fd_ready;
+
+ if (path[0]) {
+ ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
+ if (ret < 0 || ret >= sizeof(addr.sun_path)) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
+ if (ret < 0 || ret >= maxpathlen) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ /* 2. clean existing orphaned entries */
+ if (unlink(tempname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ if (unlink(backname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ /* 3. backup existing socket */
+ if (link(path, backname) < 0 && errno != ENOENT) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ /* Note: this test is redundant with the snprintf one above and
+ * will never trigger, it's just added as the only way to shut
+ * gcc's painfully dumb warning about possibly truncated output
+ * during strncpy(). Don't move it above or smart gcc will not
+ * see it!
+ */
+ if (strlen(tempname) >= sizeof(addr.sun_path)) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
+ goto bind_return;
+ }
+
+ strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
+ addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
+ }
+ else {
+ /* first char is zero, it's an abstract socket whose address
+ * is defined by all the bytes past this zero.
+ */
+ memcpy(addr.sun_path, path, sizeof(addr.sun_path));
+ }
+ addr.sun_family = AF_UNIX;
+
+ /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
+ fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
+ if (fd < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+
+ fd_ready:
+ if (ext && fd < global.maxsock && fdtab[fd].owner) {
+ /* This FD was already bound so this means that it was already
+ * known and registered before parsing, hence it's an inherited
+ * FD. The only reason why it's already known here is that it
+ * has been registered multiple times (multiple listeners on the
+ * same, or a "shards" directive on the line). There cannot be
+ * multiple listeners on one FD but at least we can create a
+ * new one from the original one. We won't reconfigure it,
+ * however, as this was already done for the first one.
+ */
+ fd = dup(fd);
+ if (fd == -1) {
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
+ goto bind_return;
+ }
+ }
+
+ if (fd >= global.maxsock) {
+ err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
+ memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
+ goto bind_close_return;
+ }
+
+ if (fd_set_nonblock(fd) == -1) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot make socket non-blocking");
+ goto bind_close_return;
+ }
+
+ if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ /* note that bind() creates the socket <tempname> on the file system */
+ if (errno == EADDRINUSE) {
+ /* the old process might still own it, let's retry */
+ err |= ERR_RETRYABLE | ERR_ALERT;
+ memprintf(errmsg, "cannot bind UNIX socket (already in use)");
+ goto bind_close_return;
+ }
+ else {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
+ goto bind_close_return;
+ }
+ }
+
+ /* <uid> and <gid> different of -1 will be used to change the socket owner.
+ * If <mode> is not 0, it will be used to restrict access to the socket.
+ * While it is known not to be portable on every OS, it's still useful
+ * where it works. We also don't change permissions on abstract sockets.
+ */
+ if (!ext && path[0] &&
+ (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
+ (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
+ (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
+ goto err_unlink_temp;
+ }
+
+ /* Point of no return: we are ready, we'll switch the sockets. We don't
+ * fear losing the socket <path> because we have a copy of it in
+ * backname. Abstract sockets are not renamed.
+ */
+ if (!ext && path[0] && rename(tempname, path) < 0) {
+ err |= ERR_FATAL | ERR_ALERT;
+ memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
+ goto err_rename;
+ }
+
+ /* Cleanup: only unlink if we didn't inherit the fd from the parent */
+ if (!ext && path[0])
+ unlink(backname);
+
+ rx->fd = fd;
+ rx->flags |= RX_F_BOUND;
+
+ if (!path[0]) {
+ /* ABNS sockets do not support suspend, and they conflict with
+ * other ones (no reuseport), so they must always be unbound.
+ */
+ rx->flags |= RX_F_NON_SUSPENDABLE;
+ }
+
+ fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
+
+ /* for now, all regularly bound TCP listeners are exportable */
+ if (!(rx->flags & RX_F_INHERITED))
+ HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
+
+ return err;
+
+ err_rename:
+ ret = rename(backname, path);
+ if (ret < 0 && errno == ENOENT)
+ unlink(path);
+ err_unlink_temp:
+ if (!ext && path[0])
+ unlink(tempname);
+ close(fd);
+ err_unlink_back:
+ if (!ext && path[0])
+ unlink(backname);
+ bind_return:
+ if (errmsg && *errmsg) {
+ if (!ext) {
+ char *path_str;
+
+ path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0);
+ memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : ""));
+ ha_free(&path_str);
+ }
+ else
+ memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
+ }
+ bind_ret_err:
+ return err;
+
+ bind_close_return:
+ close(fd);
+ goto bind_return;
+}
diff --git a/src/ssl_ckch.c b/src/ssl_ckch.c
new file mode 100644
index 0000000..ab39755
--- /dev/null
+++ b/src/ssl_ckch.c
@@ -0,0 +1,3968 @@
+/*
+ *
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/applet.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_ocsp.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+/* Uncommitted CKCH transaction */
+
+static struct {
+ struct ckch_store *new_ckchs;
+ struct ckch_store *old_ckchs;
+ char *path;
+} ckchs_transaction;
+
+/* Uncommitted CA file transaction */
+
+static struct {
+ struct cafile_entry *old_cafile_entry;
+ struct cafile_entry *new_cafile_entry;
+ char *path;
+} cafile_transaction;
+
+/* Uncommitted CRL file transaction */
+
+static struct {
+ struct cafile_entry *old_crlfile_entry;
+ struct cafile_entry *new_crlfile_entry;
+ char *path;
+} crlfile_transaction;
+
+/* CLI context used by "show cafile" */
+struct show_cafile_ctx {
+ struct cafile_entry *cur_cafile_entry;
+ struct cafile_entry *old_cafile_entry;
+ int ca_index;
+ int show_all;
+};
+
+/* CLI context used by "show crlfile" */
+struct show_crlfile_ctx {
+ struct cafile_entry *cafile_entry;
+ struct cafile_entry *old_crlfile_entry;
+ int index;
+};
+
+/* CLI context used by "show cert" */
+struct show_cert_ctx {
+ struct ckch_store *old_ckchs;
+ struct ckch_store *cur_ckchs;
+ int transaction;
+};
+
+/* CLI context used by "commit cert" */
+struct commit_cert_ctx {
+ struct ckch_store *old_ckchs;
+ struct ckch_store *new_ckchs;
+ struct ckch_inst *next_ckchi;
+ char *err;
+ enum {
+ CERT_ST_INIT = 0,
+ CERT_ST_GEN,
+ CERT_ST_INSERT,
+ CERT_ST_SUCCESS,
+ CERT_ST_FIN,
+ CERT_ST_ERROR,
+ } state;
+};
+
+/* CLI context used by "commit cafile" and "commit crlfile" */
+struct commit_cacrlfile_ctx {
+ struct cafile_entry *old_entry;
+ struct cafile_entry *new_entry;
+ struct ckch_inst_link *next_ckchi_link;
+ enum cafile_type cafile_type; /* either CA or CRL, depending on the current command */
+ char *err;
+ enum {
+ CACRL_ST_INIT = 0,
+ CACRL_ST_GEN,
+ CACRL_ST_INSERT,
+ CACRL_ST_SUCCESS,
+ CACRL_ST_FIN,
+ CACRL_ST_ERROR,
+ } state;
+};
+
+
+/******************** cert_key_and_chain functions *************************
+ * These are the functions that fills a cert_key_and_chain structure. For the
+ * functions filling a SSL_CTX from a cert_key_and_chain, see ssl_sock.c
+ */
+
+/*
+ * Try to parse Signed Certificate Timestamp List structure. This function
+ * makes only basic test if the data seems like SCTL. No signature validation
+ * is performed.
+ */
+static int ssl_sock_parse_sctl(struct buffer *sctl)
+{
+ int ret = 1;
+ int len, pos, sct_len;
+ unsigned char *data;
+
+ if (sctl->data < 2)
+ goto out;
+
+ data = (unsigned char *) sctl->area;
+ len = (data[0] << 8) | data[1];
+
+ if (len + 2 != sctl->data)
+ goto out;
+
+ data = data + 2;
+ pos = 0;
+ while (pos < len) {
+ if (len - pos < 2)
+ goto out;
+
+ sct_len = (data[pos] << 8) | data[pos + 1];
+ if (pos + sct_len + 2 > len)
+ goto out;
+
+ pos += sct_len + 2;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Try to load a sctl from a buffer <buf> if not NULL, or read the file <sctl_path>
+ * It fills the ckch->sctl buffer
+ * return 0 on success or != 0 on failure */
+int ssl_sock_load_sctl_from_file(const char *sctl_path, char *buf, struct ckch_data *data, char **err)
+{
+ int fd = -1;
+ int r = 0;
+ int ret = 1;
+ struct buffer tmp;
+ struct buffer *src;
+ struct buffer *sctl;
+
+ if (buf) {
+ chunk_initstr(&tmp, buf);
+ src = &tmp;
+ } else {
+ fd = open(sctl_path, O_RDONLY);
+ if (fd == -1)
+ goto end;
+
+ trash.data = 0;
+ while (trash.data < trash.size) {
+ r = read(fd, trash.area + trash.data, trash.size - trash.data);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ goto end;
+ }
+ else if (r == 0) {
+ break;
+ }
+ trash.data += r;
+ }
+ src = &trash;
+ }
+
+ ret = ssl_sock_parse_sctl(src);
+ if (ret)
+ goto end;
+
+ sctl = calloc(1, sizeof(*sctl));
+ if (!chunk_dup(sctl, src)) {
+ ha_free(&sctl);
+ goto end;
+ }
+ /* no error, fill ckch with new context, old context must be free */
+ if (data->sctl) {
+ ha_free(&data->sctl->area);
+ free(data->sctl);
+ }
+ data->sctl = sctl;
+ ret = 0;
+end:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+/*
+ * This function load the OCSP Response in DER format contained in file at
+ * path 'ocsp_path' or base64 in a buffer <buf>
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+int ssl_sock_load_ocsp_response_from_file(const char *ocsp_path, char *buf, struct ckch_data *data, char **err)
+{
+ int fd = -1;
+ int r = 0;
+ int ret = 1;
+ struct buffer *ocsp_response;
+ struct buffer *src = NULL;
+
+ if (buf) {
+ int i, j;
+ /* if it's from a buffer it will be base64 */
+
+ /* remove \r and \n from the payload */
+ for (i = 0, j = 0; buf[i]; i++) {
+ if (buf[i] == '\r' || buf[i] == '\n')
+ continue;
+ buf[j++] = buf[i];
+ }
+ buf[j] = 0;
+
+ ret = base64dec(buf, j, trash.area, trash.size);
+ if (ret < 0) {
+ memprintf(err, "Error reading OCSP response in base64 format");
+ goto end;
+ }
+ trash.data = ret;
+ src = &trash;
+ } else {
+ fd = open(ocsp_path, O_RDONLY);
+ if (fd == -1) {
+ memprintf(err, "Error opening OCSP response file");
+ goto end;
+ }
+
+ trash.data = 0;
+ while (trash.data < trash.size) {
+ r = read(fd, trash.area + trash.data, trash.size - trash.data);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+
+ memprintf(err, "Error reading OCSP response from file");
+ goto end;
+ }
+ else if (r == 0) {
+ break;
+ }
+ trash.data += r;
+ }
+ close(fd);
+ fd = -1;
+ src = &trash;
+ }
+
+ ocsp_response = calloc(1, sizeof(*ocsp_response));
+ if (!chunk_dup(ocsp_response, src)) {
+ ha_free(&ocsp_response);
+ goto end;
+ }
+ /* no error, fill data with new context, old context must be free */
+ if (data->ocsp_response) {
+ ha_free(&data->ocsp_response->area);
+ free(data->ocsp_response);
+ }
+ data->ocsp_response = ocsp_response;
+ ret = 0;
+end:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
+#endif
+
+/*
+ * Try to load in a ckch every files related to a ckch.
+ * (PEM, sctl, ocsp, issuer etc.)
+ *
+ * This function is only used to load files during the configuration parsing,
+ * it is not used with the CLI.
+ *
+ * This allows us to carry the contents of the file without having to read the
+ * file multiple times. The caller must call
+ * ssl_sock_free_cert_key_and_chain_contents.
+ *
+ * returns:
+ * 0 on Success
+ * 1 on SSL Failure
+ */
+int ssl_sock_load_files_into_ckch(const char *path, struct ckch_data *data, char **err)
+{
+ struct buffer *fp = NULL;
+ int ret = 1;
+ struct stat st;
+
+ /* try to load the PEM */
+ if (ssl_sock_load_pem_into_ckch(path, NULL, data , err) != 0) {
+ goto end;
+ }
+
+ fp = alloc_trash_chunk();
+ if (!fp) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (!chunk_strcpy(fp, path) || (b_data(fp) > MAXPATHLEN)) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ /* remove the ".crt" extension */
+ if (global_ssl.extra_files_noext) {
+ char *ext;
+
+ /* look for the extension */
+ if ((ext = strrchr(fp->area, '.'))) {
+
+ if (strcmp(ext, ".crt") == 0) {
+ *ext = '\0';
+ fp->data = strlen(fp->area);
+ }
+ }
+
+ }
+
+ if (data->key == NULL) {
+ /* If no private key was found yet and we cannot look for it in extra
+ * files, raise an error.
+ */
+ if (!(global_ssl.extra_files & SSL_GF_KEY)) {
+ memprintf(err, "%sNo Private Key found in '%s'.\n", err && *err ? *err : "", fp->area);
+ goto end;
+ }
+
+ /* try to load an external private key if it wasn't in the PEM */
+ if (!chunk_strcat(fp, ".key") || (b_data(fp) > MAXPATHLEN)) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_key_into_ckch(fp->area, NULL, data, err)) {
+ memprintf(err, "%s '%s' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", fp->area);
+ goto end;
+ }
+ }
+
+ if (data->key == NULL) {
+ memprintf(err, "%sNo Private Key found in '%s'.\n", err && *err ? *err : "", fp->area);
+ goto end;
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".key")) = '\0';
+ b_sub(fp, strlen(".key"));
+ }
+
+
+ if (!X509_check_private_key(data->cert, data->key)) {
+ memprintf(err, "%sinconsistencies between private key and certificate loaded '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+#ifdef HAVE_SSL_SCTL
+ /* try to load the sctl file */
+ if (global_ssl.extra_files & SSL_GF_SCTL) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".sctl") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_sctl_from_file(fp->area, NULL, data, err)) {
+ memprintf(err, "%s '%s.sctl' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".sctl")) = '\0';
+ b_sub(fp, strlen(".sctl"));
+ }
+#endif
+
+ /* try to load an ocsp response file */
+ if (global_ssl.extra_files & SSL_GF_OCSP) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".ocsp") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_ocsp_response_from_file(fp->area, NULL, data, err)) {
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".ocsp")) = '\0';
+ b_sub(fp, strlen(".ocsp"));
+ }
+
+#ifndef OPENSSL_IS_BORINGSSL /* Useless for BoringSSL */
+ if (data->ocsp_response && (global_ssl.extra_files & SSL_GF_OCSP_ISSUER)) {
+ /* if no issuer was found, try to load an issuer from the .issuer */
+ if (!data->ocsp_issuer) {
+ struct stat st;
+
+ if (!chunk_strcat(fp, ".issuer") || b_data(fp) > MAXPATHLEN) {
+ memprintf(err, "%s '%s' filename too long'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+
+ if (stat(fp->area, &st) == 0) {
+ if (ssl_sock_load_issuer_file_into_ckch(fp->area, NULL, data, err)) {
+ ret = 1;
+ goto end;
+ }
+
+ if (X509_check_issued(data->ocsp_issuer, data->cert) != X509_V_OK) {
+ memprintf(err, "%s '%s' is not an issuer'.\n",
+ err && *err ? *err : "", fp->area);
+ ret = 1;
+ goto end;
+ }
+ }
+ /* remove the added extension */
+ *(fp->area + fp->data - strlen(".issuer")) = '\0';
+ b_sub(fp, strlen(".issuer"));
+ }
+ }
+#endif
+
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+
+ /* Something went wrong in one of the reads */
+ if (ret != 0)
+ ssl_sock_free_cert_key_and_chain_contents(data);
+
+ free_trash_chunk(fp);
+
+ return ret;
+}
+
+/*
+ * Try to load a private key file from a <path> or a buffer <buf>
+ *
+ * If it failed you should not attempt to use the ckch but free it.
+ *
+ * Return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_key_into_ckch(const char *path, char *buf, struct ckch_data *data , char **err)
+{
+ BIO *in = NULL;
+ int ret = 1;
+ EVP_PKEY *key = NULL;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, path) <= 0)
+ goto end;
+ }
+
+ /* Read Private Key */
+ key = PEM_read_bio_PrivateKey(in, NULL, NULL, NULL);
+ if (key == NULL) {
+ memprintf(err, "%sunable to load private key from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ ret = 0;
+
+ SWAP(data->key, key);
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+ if (key)
+ EVP_PKEY_free(key);
+
+ return ret;
+}
+
+/*
+ * Try to load a PEM file from a <path> or a buffer <buf>
+ * The PEM must contain at least a Certificate,
+ * It could contain a DH, a certificate chain and a PrivateKey.
+ *
+ * If it failed you should not attempt to use the ckch but free it.
+ *
+ * Return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_pem_into_ckch(const char *path, char *buf, struct ckch_data *data , char **err)
+{
+ BIO *in = NULL;
+ int ret = 1;
+ X509 *ca;
+ X509 *cert = NULL;
+ EVP_PKEY *key = NULL;
+ HASSL_DH *dh = NULL;
+ STACK_OF(X509) *chain = NULL;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (BIO_read_filename(in, path) <= 0) {
+ memprintf(err, "%scannot open the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+ }
+
+ /* Read Private Key */
+ key = PEM_read_bio_PrivateKey(in, NULL, NULL, NULL);
+ /* no need to check for errors here, because the private key could be loaded later */
+
+#ifndef OPENSSL_NO_DH
+ /* Seek back to beginning of file */
+ if (BIO_reset(in) == -1) {
+ memprintf(err, "%san error occurred while reading the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ dh = ssl_sock_get_dh_from_bio(in);
+ ERR_clear_error();
+ /* no need to return an error there, dh is not mandatory */
+#endif
+
+ /* Seek back to beginning of file */
+ if (BIO_reset(in) == -1) {
+ memprintf(err, "%san error occurred while reading the file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+
+ /* Read Certificate */
+ cert = PEM_read_bio_X509_AUX(in, NULL, NULL, NULL);
+ if (cert == NULL) {
+ ret = ERR_get_error();
+ memprintf(err, "%sunable to load certificate from file '%s': %s.\n",
+ err && *err ? *err : "", path, ERR_reason_error_string(ret));
+ goto end;
+ }
+
+ /* Look for a Certificate Chain */
+ while ((ca = PEM_read_bio_X509(in, NULL, NULL, NULL))) {
+ if (chain == NULL)
+ chain = sk_X509_new_null();
+ if (!sk_X509_push(chain, ca)) {
+ X509_free(ca);
+ break;
+ }
+ }
+
+ ret = ERR_get_error();
+ if (ret && !(ERR_GET_LIB(ret) == ERR_LIB_PEM && ERR_GET_REASON(ret) == PEM_R_NO_START_LINE)) {
+ memprintf(err, "%sunable to load certificate chain from file '%s': %s\n",
+ err && *err ? *err : "", path, ERR_reason_error_string(ret));
+ goto end;
+ }
+
+ /* once it loaded the PEM, it should remove everything else in the data */
+ if (data->ocsp_response) {
+ ha_free(&data->ocsp_response->area);
+ ha_free(&data->ocsp_response);
+ }
+
+ if (data->sctl) {
+ ha_free(&data->sctl->area);
+ ha_free(&data->sctl);
+ }
+
+ if (data->ocsp_issuer) {
+ X509_free(data->ocsp_issuer);
+ data->ocsp_issuer = NULL;
+ }
+
+ /* no error, fill data with new context, old context will be free at end: */
+ SWAP(data->key, key);
+ SWAP(data->dh, dh);
+ SWAP(data->cert, cert);
+ SWAP(data->chain, chain);
+
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+ if (key)
+ EVP_PKEY_free(key);
+ if (dh)
+ HASSL_DH_free(dh);
+ if (cert)
+ X509_free(cert);
+ if (chain)
+ sk_X509_pop_free(chain, X509_free);
+
+ return ret;
+}
+
+/* Frees the contents of a cert_key_and_chain
+ */
+void ssl_sock_free_cert_key_and_chain_contents(struct ckch_data *data)
+{
+ if (!data)
+ return;
+
+ /* Free the certificate and set pointer to NULL */
+ if (data->cert)
+ X509_free(data->cert);
+ data->cert = NULL;
+
+ /* Free the key and set pointer to NULL */
+ if (data->key)
+ EVP_PKEY_free(data->key);
+ data->key = NULL;
+
+ /* Free each certificate in the chain */
+ if (data->chain)
+ sk_X509_pop_free(data->chain, X509_free);
+ data->chain = NULL;
+
+ if (data->dh)
+ HASSL_DH_free(data->dh);
+ data->dh = NULL;
+
+ if (data->sctl) {
+ ha_free(&data->sctl->area);
+ ha_free(&data->sctl);
+ }
+
+ if (data->ocsp_response) {
+ ha_free(&data->ocsp_response->area);
+ ha_free(&data->ocsp_response);
+ }
+
+ if (data->ocsp_issuer)
+ X509_free(data->ocsp_issuer);
+ data->ocsp_issuer = NULL;
+
+
+ /* We need to properly remove the reference to the corresponding
+ * certificate_ocsp structure if it exists (which it should).
+ */
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ if (data->ocsp_cid) {
+ struct certificate_ocsp *ocsp = NULL;
+ unsigned char certid[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned int certid_length = 0;
+
+ if (ssl_ocsp_build_response_key(data->ocsp_cid, (unsigned char*)certid, &certid_length) >= 0) {
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, certid, OCSP_MAX_CERTID_ASN1_LENGTH);
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ssl_sock_free_ocsp(ocsp);
+ }
+
+ OCSP_CERTID_free(data->ocsp_cid);
+ data->ocsp_cid = NULL;
+ }
+#endif
+}
+
+/*
+ *
+ * This function copy a cert_key_and_chain in memory
+ *
+ * It's used to try to apply changes on a ckch before committing them, because
+ * most of the time it's not possible to revert those changes
+ *
+ * Return a the dst or NULL
+ */
+struct ckch_data *ssl_sock_copy_cert_key_and_chain(struct ckch_data *src,
+ struct ckch_data *dst)
+{
+ if (!src || !dst)
+ return NULL;
+
+ if (src->cert) {
+ dst->cert = src->cert;
+ X509_up_ref(src->cert);
+ }
+
+ if (src->key) {
+ dst->key = src->key;
+ EVP_PKEY_up_ref(src->key);
+ }
+
+ if (src->chain) {
+ dst->chain = X509_chain_up_ref(src->chain);
+ }
+
+ if (src->dh) {
+#ifndef USE_OPENSSL_WOLFSSL
+ HASSL_DH_up_ref(src->dh);
+ dst->dh = src->dh;
+#else
+ dst->dh = wolfSSL_DH_dup(src->dh);
+ if (!dst->dh)
+ goto error;
+#endif
+ }
+
+ if (src->sctl) {
+ struct buffer *sctl;
+
+ sctl = calloc(1, sizeof(*sctl));
+ if (!chunk_dup(sctl, src->sctl)) {
+ ha_free(&sctl);
+ goto error;
+ }
+ dst->sctl = sctl;
+ }
+
+ if (src->ocsp_response) {
+ struct buffer *ocsp_response;
+
+ ocsp_response = calloc(1, sizeof(*ocsp_response));
+ if (!chunk_dup(ocsp_response, src->ocsp_response)) {
+ ha_free(&ocsp_response);
+ goto error;
+ }
+ dst->ocsp_response = ocsp_response;
+ }
+
+ if (src->ocsp_issuer) {
+ X509_up_ref(src->ocsp_issuer);
+ dst->ocsp_issuer = src->ocsp_issuer;
+ }
+
+ dst->ocsp_cid = OCSP_CERTID_dup(src->ocsp_cid);
+
+ dst->ocsp_update_mode = src->ocsp_update_mode;
+
+ return dst;
+
+error:
+
+ /* free everything */
+ ssl_sock_free_cert_key_and_chain_contents(dst);
+
+ return NULL;
+}
+
+/*
+ * return 0 on success or != 0 on failure
+ */
+int ssl_sock_load_issuer_file_into_ckch(const char *path, char *buf, struct ckch_data *data, char **err)
+{
+ int ret = 1;
+ BIO *in = NULL;
+ X509 *issuer;
+
+ if (buf) {
+ /* reading from a buffer */
+ in = BIO_new_mem_buf(buf, -1);
+ if (in == NULL) {
+ memprintf(err, "%sCan't allocate memory\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ } else {
+ /* reading from a file */
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, path) <= 0)
+ goto end;
+ }
+
+ issuer = PEM_read_bio_X509_AUX(in, NULL, NULL, NULL);
+ if (!issuer) {
+ memprintf(err, "%s'%s' cannot be read or parsed'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+ /* no error, fill data with new context, old context must be free */
+ if (data->ocsp_issuer)
+ X509_free(data->ocsp_issuer);
+ data->ocsp_issuer = issuer;
+ ret = 0;
+
+end:
+
+ ERR_clear_error();
+ if (in)
+ BIO_free(in);
+
+ return ret;
+}
+
+/******************** ckch_store functions ***********************************
+ * The ckch_store is a structure used to cache and index the SSL files used in
+ * configuration
+ */
+
+/*
+ * Free a ckch_store, its ckch, its instances and remove it from the ebtree
+ */
+void ckch_store_free(struct ckch_store *store)
+{
+ struct ckch_inst *inst, *inst_s;
+
+ if (!store)
+ return;
+
+ list_for_each_entry_safe(inst, inst_s, &store->ckch_inst, by_ckchs) {
+ ckch_inst_free(inst);
+ }
+ ebmb_delete(&store->node);
+
+ ssl_sock_free_cert_key_and_chain_contents(store->data);
+ ha_free(&store->data);
+
+ free(store);
+}
+
+/*
+ * create and initialize a ckch_store
+ * <path> is the key name
+ * <nmemb> is the number of store->ckch objects to allocate
+ *
+ * Return a ckch_store or NULL upon failure.
+ */
+struct ckch_store *ckch_store_new(const char *filename)
+{
+ struct ckch_store *store;
+ int pathlen;
+
+ pathlen = strlen(filename);
+ store = calloc(1, sizeof(*store) + pathlen + 1);
+ if (!store)
+ return NULL;
+
+ memcpy(store->path, filename, pathlen + 1);
+
+ LIST_INIT(&store->ckch_inst);
+ LIST_INIT(&store->crtlist_entry);
+
+ store->data = calloc(1, sizeof(*store->data));
+ if (!store->data)
+ goto error;
+
+ return store;
+error:
+ ckch_store_free(store);
+ return NULL;
+}
+
+/* allocate and duplicate a ckch_store
+ * Return a new ckch_store or NULL */
+struct ckch_store *ckchs_dup(const struct ckch_store *src)
+{
+ struct ckch_store *dst;
+
+ if (!src)
+ return NULL;
+
+ dst = ckch_store_new(src->path);
+ if (!dst)
+ return NULL;
+
+ if (!ssl_sock_copy_cert_key_and_chain(src->data, dst->data))
+ goto error;
+
+ return dst;
+
+error:
+ ckch_store_free(dst);
+
+ return NULL;
+}
+
+/*
+ * lookup a path into the ckchs tree.
+ */
+struct ckch_store *ckchs_lookup(char *path)
+{
+ struct ebmb_node *eb;
+
+ eb = ebst_lookup(&ckchs_tree, path);
+ if (!eb)
+ return NULL;
+
+ return ebmb_entry(eb, struct ckch_store, node);
+}
+
+/*
+ * This function allocate a ckch_store and populate it with certificates from files.
+ */
+struct ckch_store *ckchs_load_cert_file(char *path, char **err)
+{
+ struct ckch_store *ckchs;
+
+ ckchs = ckch_store_new(path);
+ if (!ckchs) {
+ memprintf(err, "%sunable to allocate memory.\n", err && *err ? *err : "");
+ goto end;
+ }
+
+ if (ssl_sock_load_files_into_ckch(path, ckchs->data, err) == 1)
+ goto end;
+
+ /* insert into the ckchs tree */
+ memcpy(ckchs->path, path, strlen(path) + 1);
+ ebst_insert(&ckchs_tree, &ckchs->node);
+ return ckchs;
+
+end:
+ ckch_store_free(ckchs);
+
+ return NULL;
+}
+
+
+/******************** ckch_inst functions ******************************/
+
+/* unlink a ckch_inst, free all SNIs, free the ckch_inst */
+/* The caller must use the lock of the bind_conf if used with inserted SNIs */
+void ckch_inst_free(struct ckch_inst *inst)
+{
+ struct sni_ctx *sni, *sni_s;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+
+ if (inst == NULL)
+ return;
+
+ list_for_each_entry_safe(sni, sni_s, &inst->sni_ctx, by_ckch_inst) {
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ ebmb_delete(&sni->name);
+ free(sni);
+ }
+ SSL_CTX_free(inst->ctx);
+ inst->ctx = NULL;
+ LIST_DELETE(&inst->by_ckchs);
+ LIST_DELETE(&inst->by_crtlist_entry);
+
+ /* Free the cafile_link_refs list */
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ if (link_ref->link && LIST_INLIST(&link_ref->link->list)) {
+ /* Try to detach and free the ckch_inst_link only if it
+ * was attached, this way it can be used to loop from
+ * the caller */
+ LIST_DEL_INIT(&link_ref->link->list);
+ ha_free(&link_ref->link);
+ }
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ }
+
+ free(inst);
+}
+
+/* Alloc and init a ckch_inst */
+struct ckch_inst *ckch_inst_new()
+{
+ struct ckch_inst *ckch_inst;
+
+ ckch_inst = calloc(1, sizeof *ckch_inst);
+ if (!ckch_inst)
+ return NULL;
+
+ LIST_INIT(&ckch_inst->sni_ctx);
+ LIST_INIT(&ckch_inst->by_ckchs);
+ LIST_INIT(&ckch_inst->by_crtlist_entry);
+ LIST_INIT(&ckch_inst->cafile_link_refs);
+
+ return ckch_inst;
+}
+
+
+/******************** ssl_store functions ******************************/
+struct eb_root cafile_tree = EB_ROOT;
+
+/*
+ * Returns the cafile_entry found in the cafile_tree indexed by the path 'path'.
+ * If 'oldest_entry' is 1, returns the "original" cafile_entry (since
+ * during a set cafile/commit cafile cycle there might be two entries for any
+ * given path, the original one and the new one set via the CLI but not
+ * committed yet).
+ */
+struct cafile_entry *ssl_store_get_cafile_entry(char *path, int oldest_entry)
+{
+ struct cafile_entry *ca_e = NULL;
+ struct ebmb_node *eb;
+
+ eb = ebst_lookup(&cafile_tree, path);
+ while (eb) {
+ ca_e = ebmb_entry(eb, struct cafile_entry, node);
+ /* The ebst_lookup in a tree that has duplicates returns the
+ * oldest entry first. If we want the latest entry, we need to
+ * iterate over all the duplicates until we find the last one
+ * (in our case there should never be more than two entries for
+ * any given path). */
+ if (oldest_entry)
+ return ca_e;
+ eb = ebmb_next_dup(eb);
+ }
+ return ca_e;
+}
+
+int ssl_store_add_uncommitted_cafile_entry(struct cafile_entry *entry)
+{
+ return (ebst_insert(&cafile_tree, &entry->node) != &entry->node);
+}
+
+X509_STORE* ssl_store_get0_locations_file(char *path)
+{
+ struct cafile_entry *ca_e = ssl_store_get_cafile_entry(path, 0);
+
+ if (ca_e)
+ return ca_e->ca_store;
+
+ return NULL;
+}
+
+/* Create a cafile_entry object, without adding it to the cafile_tree. */
+struct cafile_entry *ssl_store_create_cafile_entry(char *path, X509_STORE *store, enum cafile_type type)
+{
+ struct cafile_entry *ca_e;
+ int pathlen;
+
+ pathlen = strlen(path);
+
+ ca_e = calloc(1, sizeof(*ca_e) + pathlen + 1);
+ if (ca_e) {
+ memcpy(ca_e->path, path, pathlen + 1);
+ ca_e->ca_store = store;
+ ca_e->type = type;
+ LIST_INIT(&ca_e->ckch_inst_link);
+ }
+ return ca_e;
+}
+
+
+/* Duplicate a cafile_entry
+ * Allocate the X509_STORE and copy the X509 and CRL inside.
+ *
+ * Return the newly allocated cafile_entry or NULL.
+ *
+ */
+struct cafile_entry *ssl_store_dup_cafile_entry(struct cafile_entry *src)
+{
+ struct cafile_entry *dst = NULL;
+ X509_STORE *store = NULL;
+ STACK_OF(X509_OBJECT) *objs;
+ int i;
+
+ if (!src)
+ return NULL;
+
+ if (src->ca_store) {
+ /* if there was a store in the src, copy it */
+ store = X509_STORE_new();
+ if (!store)
+ goto err;
+
+ objs = X509_STORE_get0_objects(src->ca_store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ X509 *cert;
+ X509_CRL *crl;
+
+ cert = X509_OBJECT_get0_X509(sk_X509_OBJECT_value(objs, i));
+ if (cert) {
+ if (X509_STORE_add_cert(store, cert) == 0) {
+ /* only exits on error if the error is not about duplicate certificates */
+ if (!(ERR_GET_REASON(ERR_get_error()) == X509_R_CERT_ALREADY_IN_HASH_TABLE)) {
+ goto err;
+ }
+ }
+
+ }
+ crl = X509_OBJECT_get0_X509_CRL(sk_X509_OBJECT_value(objs, i));
+ if (crl) {
+ if (X509_STORE_add_crl(store, crl) == 0) {
+ /* only exits on error if the error is not about duplicate certificates */
+ if (!(ERR_GET_REASON(ERR_get_error()) == X509_R_CERT_ALREADY_IN_HASH_TABLE)) {
+ goto err;
+ }
+ }
+
+ }
+ }
+ }
+ dst = ssl_store_create_cafile_entry(src->path, store, src->type);
+
+ return dst;
+
+err:
+ X509_STORE_free(store);
+ ha_free(&dst);
+
+ return NULL;
+}
+
+/* Delete a cafile_entry. The caller is responsible from removing this entry
+ * from the cafile_tree first if is was previously added into it. */
+void ssl_store_delete_cafile_entry(struct cafile_entry *ca_e)
+{
+ struct ckch_inst_link *link, *link_s;
+ if (!ca_e)
+ return;
+
+ X509_STORE_free(ca_e->ca_store);
+
+ list_for_each_entry_safe(link, link_s, &ca_e->ckch_inst_link, list) {
+ struct ckch_inst *inst = link->ckch_inst;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ if (link_ref->link == link) {
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ break;
+ }
+ }
+ LIST_DELETE(&link->list);
+ free(link);
+ }
+
+ free(ca_e);
+}
+
+/*
+ * Fill a cafile_entry <ca_e> X509_STORE ca_e->store out of a buffer <cert_buf>
+ * instead of out of a file. The <append> field should be set to 1 if you want
+ * to keep the existing X509_STORE and append data to it.
+ *
+ * This function is used when the "set ssl ca-file" cli command is used.
+ * It can parse CERTIFICATE sections as well as CRL ones.
+ * Returns 0 in case of success, 1 otherwise.
+ *
+ * /!\ Warning: If there was an error the X509_STORE could have been modified so it's
+ * better to not use it after a return 1.
+ */
+int ssl_store_load_ca_from_buf(struct cafile_entry *ca_e, char *cert_buf, int append)
+{
+ BIO *bio = NULL;
+ STACK_OF(X509_INFO) *infos;
+ X509_INFO *info;
+ int i;
+ int retval = 1;
+ int retcert = 0;
+
+ if (!ca_e)
+ return 1;
+
+ if (!append) {
+ X509_STORE_free(ca_e->ca_store);
+ ca_e->ca_store = NULL;
+ }
+
+ if (!ca_e->ca_store)
+ ca_e->ca_store = X509_STORE_new();
+
+ if (!ca_e->ca_store)
+ goto end;
+
+ bio = BIO_new_mem_buf(cert_buf, strlen(cert_buf));
+ if (!bio)
+ goto end;
+
+ infos = PEM_X509_INFO_read_bio(bio, NULL, NULL, NULL);
+ if (!infos)
+ goto end;
+
+ for (i = 0; i < sk_X509_INFO_num(infos) && !retcert; i++) {
+ info = sk_X509_INFO_value(infos, i);
+
+ /* X509_STORE_add_cert and X509_STORE_add_crl return 1 on success */
+ if (info->x509)
+ retcert = !X509_STORE_add_cert(ca_e->ca_store, info->x509);
+ if (!retcert && info->crl)
+ retcert = !X509_STORE_add_crl(ca_e->ca_store, info->crl);
+ }
+
+ /* return an error if we didn't compute all the X509_INFO or if there was none
+ * set to 0 if everything was right */
+ if (!(retcert || (i != sk_X509_INFO_num(infos)) || (sk_X509_INFO_num(infos) == 0)))
+ retval = 0;
+
+ /* Cleanup */
+ sk_X509_INFO_pop_free(infos, X509_INFO_free);
+
+end:
+ BIO_free(bio);
+
+ return retval;
+}
+
+/*
+ * Try to load a ca-file from disk into the ca-file cache.
+ * <shuterror> allows you to to stop emitting the errors.
+ * Return 0 upon error
+ */
+int __ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type, int shuterror)
+{
+ X509_STORE *store = ssl_store_get0_locations_file(path);
+
+ /* If this function is called by the CLI, we should not call the
+ * X509_STORE_load_locations function because it performs forbidden disk
+ * accesses. */
+ if (!store && create_if_none) {
+ STACK_OF(X509_OBJECT) *objs;
+ int cert_count = 0;
+ struct stat buf;
+ struct cafile_entry *ca_e;
+ const char *file = NULL;
+ const char *dir = NULL;
+ unsigned long e;
+
+ store = X509_STORE_new();
+ if (!store) {
+ if (!shuterror)
+ ha_alert("Cannot allocate memory!\n");
+ goto err;
+ }
+
+ if (strcmp(path, "@system-ca") == 0) {
+ dir = X509_get_default_cert_dir();
+ if (!dir) {
+ if (!shuterror)
+ ha_alert("Couldn't get the system CA directory from X509_get_default_cert_dir().\n");
+ goto err;
+ }
+
+ } else {
+
+ if (stat(path, &buf) == -1) {
+ if (!shuterror)
+ ha_alert("Couldn't open the ca-file '%s' (%s).\n", path, strerror(errno));
+ goto err;
+ }
+
+ if (S_ISDIR(buf.st_mode))
+ dir = path;
+ else
+ file = path;
+ }
+
+ if (file) {
+ if (!X509_STORE_load_locations(store, file, NULL)) {
+ e = ERR_get_error();
+ if (!shuterror)
+ ha_alert("Couldn't open the ca-file '%s' (%s).\n", path, ERR_reason_error_string(e));
+ goto err;
+ }
+ } else if (dir) {
+ int n, i;
+ struct dirent **de_list;
+
+ n = scandir(dir, &de_list, 0, alphasort);
+ if (n < 0)
+ goto err;
+
+ for (i= 0; i < n; i++) {
+ char *end;
+ struct dirent *de = de_list[i];
+ BIO *in = NULL;
+ X509 *ca = NULL;;
+
+ ERR_clear_error();
+
+ /* we try to load the files that would have
+ * been loaded in an hashed directory loaded by
+ * X509_LOOKUP_hash_dir, so according to "man 1
+ * c_rehash", we should load ".pem", ".crt",
+ * ".cer", or ".crl". Files starting with a dot
+ * are ignored.
+ */
+ end = strrchr(de->d_name, '.');
+ if (!end || de->d_name[0] == '.' ||
+ (strcmp(end, ".pem") != 0 &&
+ strcmp(end, ".crt") != 0 &&
+ strcmp(end, ".cer") != 0 &&
+ strcmp(end, ".crl") != 0)) {
+ free(de);
+ continue;
+ }
+ in = BIO_new(BIO_s_file());
+ if (in == NULL)
+ goto scandir_err;
+
+ chunk_printf(&trash, "%s/%s", dir, de->d_name);
+
+ if (BIO_read_filename(in, trash.area) == 0)
+ goto scandir_err;
+
+ if (PEM_read_bio_X509_AUX(in, &ca, NULL, NULL) == NULL)
+ goto scandir_err;
+
+ if (X509_STORE_add_cert(store, ca) == 0) {
+ /* only exits on error if the error is not about duplicate certificates */
+ if (!(ERR_GET_REASON(ERR_get_error()) == X509_R_CERT_ALREADY_IN_HASH_TABLE)) {
+ goto scandir_err;
+ }
+ }
+
+ X509_free(ca);
+ BIO_free(in);
+ free(de);
+ continue;
+
+scandir_err:
+ e = ERR_get_error();
+ X509_free(ca);
+ BIO_free(in);
+ free(de);
+ /* warn if it can load one of the files, but don't abort */
+ if (!shuterror)
+ ha_warning("ca-file: '%s' couldn't load '%s' (%s)\n", path, trash.area, ERR_reason_error_string(e));
+
+ }
+ free(de_list);
+ } else {
+ if (!shuterror)
+ ha_alert("ca-file: couldn't load '%s'\n", path);
+ goto err;
+ }
+
+ objs = X509_STORE_get0_objects(store);
+ cert_count = sk_X509_OBJECT_num(objs);
+ if (cert_count == 0) {
+ if (!shuterror)
+ ha_warning("ca-file: 0 CA were loaded from '%s'\n", path);
+ }
+ ca_e = ssl_store_create_cafile_entry(path, store, type);
+ if (!ca_e) {
+ if (!shuterror)
+ ha_alert("Cannot allocate memory!\n");
+ goto err;
+ }
+ ebst_insert(&cafile_tree, &ca_e->node);
+ }
+ return (store != NULL);
+
+err:
+ X509_STORE_free(store);
+ store = NULL;
+ return 0;
+
+}
+
+int ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type)
+{
+ return __ssl_store_load_locations_file(path, create_if_none, type, 0);
+}
+
+/*************************** CLI commands ***********************/
+
+/* Type of SSL payloads that can be updated over the CLI */
+
+struct cert_exts cert_exts[] = {
+ { "", CERT_TYPE_PEM, &ssl_sock_load_pem_into_ckch }, /* default mode, no extensions */
+ { "key", CERT_TYPE_KEY, &ssl_sock_load_key_into_ckch },
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+ { "ocsp", CERT_TYPE_OCSP, &ssl_sock_load_ocsp_response_from_file },
+#endif
+#ifdef HAVE_SSL_SCTL
+ { "sctl", CERT_TYPE_SCTL, &ssl_sock_load_sctl_from_file },
+#endif
+ { "issuer", CERT_TYPE_ISSUER, &ssl_sock_load_issuer_file_into_ckch },
+ { NULL, CERT_TYPE_MAX, NULL },
+};
+
+
+/* release function of the `show ssl cert' command */
+static void cli_release_show_cert(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+/* IO handler of "show ssl cert <filename>".
+ * It makes use of a show_cert_ctx context, and ckchs_transaction in read-only.
+ */
+static int cli_io_handler_show_cert(struct appctx *appctx)
+{
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct ckch_store *ckchs = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_ckchs && ckchs_transaction.old_ckchs) {
+ ckchs = ckchs_transaction.old_ckchs;
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s\n", ckchs->path);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_ckchs = ckchs_transaction.old_ckchs;
+ }
+
+ if (!ctx->cur_ckchs) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&ckchs_tree);
+ } else {
+ node = &ctx->cur_ckchs->node;
+ }
+ while (node) {
+ ckchs = ebmb_entry(node, struct ckch_store, node);
+ chunk_appendf(trash, "%s\n", ckchs->path);
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cur_ckchs = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cur_ckchs = ckchs;
+ return 0; /* should come back */
+}
+
+/*
+ * Extract and format the DNS SAN extensions and copy result into a chuink
+ * Return 0;
+ */
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+static int ssl_sock_get_san_oneline(X509 *cert, struct buffer *out)
+{
+ int i;
+ char *str;
+ STACK_OF(GENERAL_NAME) *names = NULL;
+
+ names = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL);
+ if (names) {
+ for (i = 0; i < sk_GENERAL_NAME_num(names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(names, i);
+ if (i > 0)
+ chunk_appendf(out, ", ");
+ if (name->type == GEN_DNS) {
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+ chunk_appendf(out, "DNS:%s", str);
+ OPENSSL_free(str);
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(names, GENERAL_NAME_free);
+ }
+ return 0;
+}
+#endif
+
+/*
+ * Build the ckch_inst_link that will be chained in the CA file entry and the
+ * corresponding ckch_inst_link_ref that will be chained in the ckch instance.
+ * Return 0 in case of success.
+ */
+static int do_chain_inst_and_cafile(struct cafile_entry *cafile_entry, struct ckch_inst *ckch_inst)
+{
+ struct ckch_inst_link *new_link;
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ struct ckch_inst_link *link = LIST_ELEM(cafile_entry->ckch_inst_link.n,
+ typeof(link), list);
+ /* Do not add multiple references to the same
+ * instance in a cafile_entry */
+ if (link->ckch_inst == ckch_inst) {
+ return 1;
+ }
+ }
+
+ new_link = calloc(1, sizeof(*new_link));
+ if (new_link) {
+ struct ckch_inst_link_ref *new_link_ref = calloc(1, sizeof(*new_link_ref));
+ if (!new_link_ref) {
+ free(new_link);
+ return 1;
+ }
+
+ new_link->ckch_inst = ckch_inst;
+ new_link_ref->link = new_link;
+ LIST_INIT(&new_link->list);
+ LIST_INIT(&new_link_ref->list);
+
+ LIST_APPEND(&cafile_entry->ckch_inst_link, &new_link->list);
+ LIST_APPEND(&ckch_inst->cafile_link_refs, &new_link_ref->list);
+ }
+
+ return 0;
+}
+
+
+/*
+ * Link a CA file tree entry to the ckch instance that uses it.
+ * To determine if and which CA file tree entries need to be linked to the
+ * instance, we follow the same logic performed in ssl_sock_prepare_ctx when
+ * processing the verify option.
+ * This function works for a frontend as well as for a backend, depending on the
+ * configuration parameters given (bind_conf or server).
+ */
+void ckch_inst_add_cafile_link(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf,
+ struct ssl_bind_conf *ssl_conf, const struct server *srv)
+{
+ int verify = SSL_VERIFY_NONE;
+
+ if (srv) {
+
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ verify = SSL_VERIFY_PEER;
+ switch (srv->ssl_ctx.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER;
+ break;
+ }
+ }
+ else {
+ switch ((ssl_conf && ssl_conf->verify) ? ssl_conf->verify : bind_conf->ssl_conf.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_OPTIONAL:
+ verify = SSL_VERIFY_PEER;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT;
+ break;
+ }
+ }
+
+ if (verify & SSL_VERIFY_PEER) {
+ struct cafile_entry *ca_file_entry = NULL;
+ struct cafile_entry *ca_verify_file_entry = NULL;
+ struct cafile_entry *crl_file_entry = NULL;
+ if (srv) {
+ if (srv->ssl_ctx.ca_file) {
+ ca_file_entry = ssl_store_get_cafile_entry(srv->ssl_ctx.ca_file, 0);
+
+ }
+ if (srv->ssl_ctx.crl_file) {
+ crl_file_entry = ssl_store_get_cafile_entry(srv->ssl_ctx.crl_file, 0);
+ }
+ }
+ else {
+ char *ca_file = (ssl_conf && ssl_conf->ca_file) ? ssl_conf->ca_file : bind_conf->ssl_conf.ca_file;
+ char *ca_verify_file = (ssl_conf && ssl_conf->ca_verify_file) ? ssl_conf->ca_verify_file : bind_conf->ssl_conf.ca_verify_file;
+ char *crl_file = (ssl_conf && ssl_conf->crl_file) ? ssl_conf->crl_file : bind_conf->ssl_conf.crl_file;
+
+ if (ca_file)
+ ca_file_entry = ssl_store_get_cafile_entry(ca_file, 0);
+ if (ca_verify_file)
+ ca_verify_file_entry = ssl_store_get_cafile_entry(ca_verify_file, 0);
+ if (crl_file)
+ crl_file_entry = ssl_store_get_cafile_entry(crl_file, 0);
+ }
+
+ if (ca_file_entry) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(ca_file_entry, ckch_inst))
+ return;
+
+ }
+ if (ca_verify_file_entry && (ca_file_entry != ca_verify_file_entry)) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(ca_verify_file_entry, ckch_inst))
+ return;
+ }
+ if (crl_file_entry) {
+ /* If we have a ckch instance that is not already in the
+ * cafile_entry's list, add it to it. */
+ if (do_chain_inst_and_cafile(crl_file_entry, ckch_inst))
+ return;
+ }
+ }
+}
+
+
+
+static int show_cert_detail(X509 *cert, STACK_OF(X509) *chain, struct buffer *out)
+{
+ BIO *bio = NULL;
+ struct buffer *tmp = alloc_trash_chunk();
+ int i;
+ int write = -1;
+ unsigned int len = 0;
+ X509_NAME *name = NULL;
+
+ if (!tmp)
+ return -1;
+
+ if (!cert)
+ goto end;
+
+ if (chain == NULL) {
+ struct issuer_chain *issuer;
+ issuer = ssl_get0_issuer_chain(cert);
+ if (issuer) {
+ chain = issuer->chain;
+ chunk_appendf(out, "Chain Filename: ");
+ chunk_appendf(out, "%s\n", issuer->path);
+ }
+ }
+ chunk_appendf(out, "Serial: ");
+ if (ssl_sock_get_serial(cert, tmp) == -1)
+ goto end;
+ dump_binary(out, tmp->area, tmp->data);
+ chunk_appendf(out, "\n");
+
+ chunk_appendf(out, "notBefore: ");
+ chunk_reset(tmp);
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_getm_notBefore(cert)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ BIO_free(bio);
+ bio = NULL;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "notAfter: ");
+ chunk_reset(tmp);
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_getm_notAfter(cert)) == 0)
+ goto end;
+ if ((write = BIO_read(bio, tmp->area, tmp->size-1)) <= 0)
+ goto end;
+ tmp->area[write] = '\0';
+ BIO_free(bio);
+ bio = NULL;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ chunk_appendf(out, "Subject Alternative Name: ");
+ if (ssl_sock_get_san_oneline(cert, out) == -1)
+ goto end;
+ *(out->area + out->data) = '\0';
+ chunk_appendf(out, "\n");
+#endif
+ chunk_reset(tmp);
+ chunk_appendf(out, "Algorithm: ");
+ if (cert_get_pkey_algo(cert, tmp) == 0)
+ goto end;
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_reset(tmp);
+ chunk_appendf(out, "SHA1 FingerPrint: ");
+ if (X509_digest(cert, EVP_sha1(), (unsigned char *) tmp->area, &len) == 0)
+ goto end;
+ tmp->data = len;
+ dump_binary(out, tmp->area, tmp->data);
+ chunk_appendf(out, "\n");
+
+ chunk_appendf(out, "Subject: ");
+ if ((name = X509_get_subject_name(cert)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "Issuer: ");
+ if ((name = X509_get_issuer_name(cert)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ /* Displays subject of each certificate in the chain */
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ X509 *ca = sk_X509_value(chain, i);
+
+ chunk_appendf(out, "Chain Subject: ");
+ if ((name = X509_get_subject_name(ca)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ chunk_appendf(out, "Chain Issuer: ");
+ if ((name = X509_get_issuer_name(ca)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(name, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+ }
+
+end:
+ if (bio)
+ BIO_free(bio);
+ free_trash_chunk(tmp);
+
+ return 0;
+}
+
+/*
+ * Dump the OCSP certificate key (if it exists) of certificate <ckch> into
+ * buffer <out>.
+ * Returns 0 in case of success.
+ */
+static int ckch_store_show_ocsp_certid(struct ckch_store *ckch_store, struct buffer *out)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned int key_length = 0;
+ int i;
+
+ if (ssl_ocsp_build_response_key(ckch_store->data->ocsp_cid, (unsigned char*)key, &key_length) >= 0) {
+ /* Dump the CERTID info */
+ chunk_appendf(out, "OCSP Response Key: ");
+ for (i = 0; i < key_length; ++i) {
+ chunk_appendf(out, "%02x", key[i]);
+ }
+ chunk_appendf(out, "\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+/* IO handler of the details "show ssl cert <filename>".
+ * It uses a struct show_cert_ctx and ckchs_transaction in read-only.
+ */
+static int cli_io_handler_show_cert_detail(struct appctx *appctx)
+{
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct ckch_store *ckchs = ctx->cur_ckchs;
+ struct buffer *out = alloc_trash_chunk();
+ int retval = 0;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (ckchs == ckchs_transaction.new_ckchs)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", ckchs->path);
+
+ chunk_appendf(out, "Status: ");
+ if (ckchs->data->cert == NULL)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&ckchs->ckch_inst))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ retval = show_cert_detail(ckchs->data->cert, ckchs->data->chain, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval)
+ goto end;
+
+ ckch_store_show_ocsp_certid(ckchs, out);
+
+end:
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+
+/* IO handler of the details "show ssl cert <filename.ocsp>".
+ * It uses a show_cert_ctx.
+ */
+static int cli_io_handler_show_cert_ocsp_detail(struct appctx *appctx)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ struct show_cert_ctx *ctx = appctx->svcctx;
+ struct ckch_store *ckchs = ctx->cur_ckchs;
+ struct buffer *out = alloc_trash_chunk();
+ int from_transaction = ctx->transaction;
+
+ if (!out)
+ goto end_no_putchk;
+
+ /* If we try to display an ongoing transaction's OCSP response, we
+ * need to dump the ckch's ocsp_response buffer directly.
+ * Otherwise, we must rebuild the certificate's certid in order to
+ * look for the current OCSP response in the tree. */
+ if (from_transaction && ckchs->data->ocsp_response) {
+ if (ssl_ocsp_response_print(ckchs->data->ocsp_response, out))
+ goto end_no_putchk;
+ }
+ else {
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned int key_length = 0;
+
+ if (ssl_ocsp_build_response_key(ckchs->data->ocsp_cid, (unsigned char*)key, &key_length) < 0)
+ goto end_no_putchk;
+
+ if (ssl_get_ocspresponse_detail(key, out))
+ goto end_no_putchk;
+ }
+
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+/* parsing function for 'show ssl cert [certfile]' */
+static int cli_parse_show_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cert_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct ckch_store *ckchs;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+ int show_ocsp_detail = 0;
+ int from_transaction = 0;
+ char *end;
+
+ /* We manage the special case "certname.ocsp" through which we
+ * can show the details of an OCSP response. */
+ end = strrchr(args[3], '.');
+ if (end && strcmp(end+1, "ocsp") == 0) {
+ *end = '\0';
+ show_ocsp_detail = 1;
+ }
+
+ if (*args[3] == '*') {
+ from_transaction = 1;
+ if (!ckchs_transaction.new_ckchs)
+ goto error;
+
+ ckchs = ckchs_transaction.new_ckchs;
+
+ if (strcmp(args[3] + 1, ckchs->path) != 0)
+ goto error;
+
+ } else {
+ if ((ckchs = ckchs_lookup(args[3])) == NULL)
+ goto error;
+
+ }
+
+ ctx->cur_ckchs = ckchs;
+ /* use the IO handler that shows details */
+ if (show_ocsp_detail) {
+ ctx->transaction = from_transaction;
+ appctx->io_handler = cli_io_handler_show_cert_ocsp_detail;
+ }
+ else
+ appctx->io_handler = cli_io_handler_show_cert_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_err(appctx, "Can't display the certificate: Not found or the certificate is a bundle!\n");
+}
+
+/* release function of the `set ssl cert' command, free things and unlock the spinlock */
+static void cli_release_commit_cert(struct appctx *appctx)
+{
+ struct commit_cert_ctx *ctx = appctx->svcctx;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ /* free every new sni_ctx and the new store, which are not in the trees so no spinlock there */
+ if (ctx->new_ckchs)
+ ckch_store_free(ctx->new_ckchs);
+ ha_free(&ctx->err);
+}
+
+
+/*
+ * Rebuild a new instance 'new_inst' based on an old instance 'ckchi' and a
+ * specific ckch_store.
+ * Returns 0 in case of success, 1 otherwise.
+ */
+int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi,
+ struct ckch_inst **new_inst, char **err)
+{
+ int retval = 0;
+ int errcode = 0;
+ struct sni_ctx *sc0, *sc0s;
+ char **sni_filter = NULL;
+ int fcount = 0;
+
+ if (ckchi->crtlist_entry) {
+ sni_filter = ckchi->crtlist_entry->filters;
+ fcount = ckchi->crtlist_entry->fcount;
+ }
+
+ if (ckchi->is_server_instance)
+ errcode |= ckch_inst_new_load_srv_store(ckch_store->path, ckch_store, new_inst, err);
+ else
+ errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, new_inst, err);
+
+ if (errcode & ERR_CODE)
+ return 1;
+
+ /* if the previous ckchi was used as the default */
+ if (ckchi->is_default)
+ (*new_inst)->is_default = 1;
+
+ (*new_inst)->is_server_instance = ckchi->is_server_instance;
+ (*new_inst)->server = ckchi->server;
+ /* Create a new SSL_CTX and link it to the new instance. */
+ if ((*new_inst)->is_server_instance) {
+ retval = ssl_sock_prep_srv_ctx_and_inst(ckchi->server, (*new_inst)->ctx, (*new_inst));
+ if (retval)
+ return 1;
+ }
+
+ /* create the link to the crtlist_entry */
+ (*new_inst)->crtlist_entry = ckchi->crtlist_entry;
+
+ /* we need to initialize the SSL_CTX generated */
+ /* this iterate on the newly generated SNIs in the new instance to prepare their SSL_CTX */
+ list_for_each_entry_safe(sc0, sc0s, &(*new_inst)->sni_ctx, by_ckch_inst) {
+ if (!sc0->order) { /* we initialized only the first SSL_CTX because it's the same in the other sni_ctx's */
+ errcode |= ssl_sock_prep_ctx_and_inst(ckchi->bind_conf, ckchi->ssl_conf, sc0->ctx, *new_inst, err);
+ if (errcode & ERR_CODE)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Load all the new SNIs of a newly built ckch instance in the trees, or replace
+ * a server's main ckch instance.
+ */
+static void __ssl_sock_load_new_ckch_instance(struct ckch_inst *ckchi)
+{
+ /* The bind_conf will be null on server ckch_instances. */
+ if (ckchi->is_server_instance) {
+ int i;
+ /* a lock is needed here since we have to free the SSL cache */
+ HA_RWLOCK_WRLOCK(SSL_SERVER_LOCK, &ckchi->server->ssl_ctx.lock);
+ /* free the server current SSL_CTX */
+ SSL_CTX_free(ckchi->server->ssl_ctx.ctx);
+ /* Actual ssl context update */
+ SSL_CTX_up_ref(ckchi->ctx);
+ ckchi->server->ssl_ctx.ctx = ckchi->ctx;
+ ckchi->server->ssl_ctx.inst = ckchi;
+
+ /* flush the session cache of the server */
+ for (i = 0; i < global.nbthread; i++) {
+ ha_free(&ckchi->server->ssl_ctx.reused_sess[i].sni);
+ ha_free(&ckchi->server->ssl_ctx.reused_sess[i].ptr);
+ }
+ HA_RWLOCK_WRUNLOCK(SSL_SERVER_LOCK, &ckchi->server->ssl_ctx.lock);
+
+ } else {
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &ckchi->bind_conf->sni_lock);
+ ssl_sock_load_cert_sni(ckchi, ckchi->bind_conf);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &ckchi->bind_conf->sni_lock);
+ }
+}
+
+/*
+ * Delete a ckch instance that was replaced after a CLI command.
+ */
+static void __ckch_inst_free_locked(struct ckch_inst *ckchi)
+{
+ if (ckchi->is_server_instance) {
+ /* no lock for servers */
+ ckch_inst_free(ckchi);
+ } else {
+ struct bind_conf __maybe_unused *bind_conf = ckchi->bind_conf;
+
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &bind_conf->sni_lock);
+ ckch_inst_free(ckchi);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &bind_conf->sni_lock);
+ }
+}
+
+/* Replace a ckch_store in the ckch tree and insert the whole dependencies,
+* then free the previous dependencies and store.
+* Used in the case of a certificate update.
+*
+* Every dependencies must allocated before using this function.
+*
+* This function can't fail as it only update pointers, and does not alloc anything.
+*
+* /!\ This function must be used under the ckch lock. /!\
+*
+* - Insert every dependencies (SNI, crtlist_entry, ckch_inst, etc)
+* - Delete the old ckch_store from the tree
+* - Insert the new ckch_store
+* - Free the old dependencies and the old ckch_store
+*/
+void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckchs)
+{
+ struct crtlist_entry *entry;
+ struct ckch_inst *ckchi, *ckchis;
+
+ LIST_SPLICE(&new_ckchs->crtlist_entry, &old_ckchs->crtlist_entry);
+ list_for_each_entry(entry, &new_ckchs->crtlist_entry, by_ckch_store) {
+ ebpt_delete(&entry->node);
+ /* change the ptr and reinsert the node */
+ entry->node.key = new_ckchs;
+ ebpt_insert(&entry->crtlist->entries, &entry->node);
+ }
+ /* insert the new ckch_insts in the crtlist_entry */
+ list_for_each_entry(ckchi, &new_ckchs->ckch_inst, by_ckchs) {
+ if (ckchi->crtlist_entry)
+ LIST_INSERT(&ckchi->crtlist_entry->ckch_inst, &ckchi->by_crtlist_entry);
+ }
+ /* First, we insert every new SNIs in the trees, also replace the default_ctx */
+ list_for_each_entry_safe(ckchi, ckchis, &new_ckchs->ckch_inst, by_ckchs) {
+ __ssl_sock_load_new_ckch_instance(ckchi);
+ }
+ /* delete the old sni_ctx, the old ckch_insts and the ckch_store */
+ list_for_each_entry_safe(ckchi, ckchis, &old_ckchs->ckch_inst, by_ckchs) {
+ __ckch_inst_free_locked(ckchi);
+ }
+
+ ckch_store_free(old_ckchs);
+ ebst_insert(&ckchs_tree, &new_ckchs->node);
+}
+
+
+/*
+ * This function tries to create the new ckch_inst and their SNIs
+ *
+ * /!\ don't forget to update __hlua_ckch_commit() if you changes things there. /!\
+ */
+static int cli_io_handler_commit_cert(struct appctx *appctx)
+{
+ struct commit_cert_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ int y = 0;
+ struct ckch_store *old_ckchs, *new_ckchs = NULL;
+ struct ckch_inst *ckchi;
+
+ usermsgs_clr("CLI");
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ while (1) {
+ switch (ctx->state) {
+ case CERT_ST_INIT:
+ /* This state just print the update message */
+ chunk_printf(&trash, "Committing %s", ckchs_transaction.path);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+
+ ctx->state = CERT_ST_GEN;
+ __fallthrough;
+ case CERT_ST_GEN:
+ /*
+ * This state generates the ckch instances with their
+ * sni_ctxs and SSL_CTX.
+ *
+ * Since the SSL_CTX generation can be CPU consumer, we
+ * yield every 10 instances.
+ */
+
+ old_ckchs = ctx->old_ckchs;
+ new_ckchs = ctx->new_ckchs;
+
+ /* get the next ckchi to regenerate */
+ ckchi = ctx->next_ckchi;
+ /* we didn't start yet, set it to the first elem */
+ if (ckchi == NULL)
+ ckchi = LIST_ELEM(old_ckchs->ckch_inst.n, typeof(ckchi), by_ckchs);
+
+ /* walk through the old ckch_inst and creates new ckch_inst using the updated ckchs */
+ list_for_each_entry_from(ckchi, &old_ckchs->ckch_inst, by_ckchs) {
+ struct ckch_inst *new_inst;
+
+ /* save the next ckchi to compute in case of yield */
+ ctx->next_ckchi = ckchi;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y >= 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot per new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ ctx->err = NULL;
+ if (ckch_inst_rebuild(new_ckchs, ckchi, &new_inst, &ctx->err)) {
+ ctx->state = CERT_ST_ERROR;
+ goto error;
+ }
+
+ /* link the new ckch_inst to the duplicate */
+ LIST_APPEND(&new_ckchs->ckch_inst, &new_inst->by_ckchs);
+ y++;
+ }
+ ctx->state = CERT_ST_INSERT;
+ __fallthrough;
+ case CERT_ST_INSERT:
+ /* The generation is finished, we can insert everything */
+
+ old_ckchs = ctx->old_ckchs;
+ new_ckchs = ctx->new_ckchs;
+
+ /* insert everything and remove the previous objects */
+ ckch_store_replace(old_ckchs, new_ckchs);
+ ctx->new_ckchs = ctx->old_ckchs = NULL;
+ ctx->state = CERT_ST_SUCCESS;
+ __fallthrough;
+ case CERT_ST_SUCCESS:
+ chunk_printf(&trash, "\n%sSuccess!\n", usermsgs_str());
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = CERT_ST_FIN;
+ __fallthrough;
+ case CERT_ST_FIN:
+ /* we achieved the transaction, we can set everything to NULL */
+ ckchs_transaction.new_ckchs = NULL;
+ ckchs_transaction.old_ckchs = NULL;
+ ckchs_transaction.path = NULL;
+ goto end;
+
+ case CERT_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%s%sFailed!\n", usermsgs_str(), ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = CERT_ST_FIN;
+ break;
+ }
+ }
+end:
+ usermsgs_clr(NULL);
+ /* success: call the release function and don't come back */
+ return 1;
+
+yield:
+ usermsgs_clr(NULL);
+ return 0; /* should come back */
+}
+
+/*
+ * Parsing function of 'commit ssl cert'
+ */
+static int cli_parse_commit_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cert_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl cert' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the certificate!\nOperations on certificates are currently locked!\n");
+
+ if (!ckchs_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(ckchs_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", ckchs_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* if a certificate is here, a private key must be here too */
+ if (ckchs_transaction.new_ckchs->data->cert && !ckchs_transaction.new_ckchs->data->key) {
+ memprintf(&err, "The transaction must contain at least a certificate and a private key!\n");
+ goto error;
+ }
+
+ if (!X509_check_private_key(ckchs_transaction.new_ckchs->data->cert, ckchs_transaction.new_ckchs->data->key)) {
+ memprintf(&err, "inconsistencies between private key and certificate loaded '%s'.\n", ckchs_transaction.path);
+ goto error;
+ }
+
+ /* init the appctx structure */
+ ctx->state = CERT_ST_INIT;
+ ctx->next_ckchi = NULL;
+ ctx->new_ckchs = ckchs_transaction.new_ckchs;
+ ctx->old_ckchs = ckchs_transaction.old_ckchs;
+
+ /* we don't unlock there, it will be unlock after the IO handler, in the release handler */
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+
+
+/*
+ * Parsing function of `set ssl cert`, it updates or creates a temporary ckch.
+ * It uses a set_cert_ctx context, and ckchs_transaction under a lock.
+ */
+static int cli_parse_set_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *new_ckchs = NULL;
+ struct ckch_store *old_ckchs = NULL;
+ char *err = NULL;
+ int i;
+ int errcode = 0;
+ char *end;
+ struct cert_exts *cert_ext = &cert_exts[0]; /* default one, PEM */
+ struct ckch_data *data;
+ struct buffer *buf;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl cert' expects a filename and a certificate as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the certificate!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* check which type of file we want to update */
+ for (i = 0; cert_exts[i].ext != NULL; i++) {
+ end = strrchr(buf->area, '.');
+ if (end && *cert_exts[i].ext && (strcmp(end + 1, cert_exts[i].ext) == 0)) {
+ *end = '\0';
+ buf->data = strlen(buf->area);
+ cert_ext = &cert_exts[i];
+ break;
+ }
+ }
+
+ /* if there is an ongoing transaction */
+ if (ckchs_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(ckchs_transaction.path, buf->area) != 0) {
+ /* we didn't find the transaction, must try more cases below */
+
+ /* if the del-ext option is activated we should try to take a look at a ".crt" too. */
+ if (cert_ext->type != CERT_TYPE_PEM && global_ssl.extra_files_noext) {
+ if (!chunk_strcat(buf, ".crt")) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (strcmp(ckchs_transaction.path, buf->area) != 0) {
+ /* remove .crt of the error message */
+ *(b_orig(buf) + b_data(buf) + strlen(".crt")) = '\0';
+ b_sub(buf, strlen(".crt"));
+
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", ckchs_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+ }
+
+ old_ckchs = ckchs_transaction.new_ckchs;
+
+ } else {
+
+ /* lookup for the certificate in the tree */
+ old_ckchs = ckchs_lookup(buf->area);
+
+ if (!old_ckchs) {
+ /* if the del-ext option is activated we should try to take a look at a ".crt" too. */
+ if (cert_ext->type != CERT_TYPE_PEM && global_ssl.extra_files_noext) {
+ if (!chunk_strcat(buf, ".crt")) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ old_ckchs = ckchs_lookup(buf->area);
+ }
+ }
+ }
+
+ if (!old_ckchs) {
+ memprintf(&err, "%sCan't replace a certificate which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* duplicate the ckch store */
+ new_ckchs = ckchs_dup(old_ckchs);
+ if (!new_ckchs) {
+ memprintf(&err, "%sCannot allocate memory!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Reset the OCSP CID */
+ if (cert_ext->type == CERT_TYPE_PEM || cert_ext->type == CERT_TYPE_KEY ||
+ cert_ext->type == CERT_TYPE_ISSUER) {
+ OCSP_CERTID_free(new_ckchs->data->ocsp_cid);
+ new_ckchs->data->ocsp_cid = NULL;
+ }
+
+ data = new_ckchs->data;
+
+ /* apply the change on the duplicate */
+ if (cert_ext->load(buf->area, payload, data, &err) != 0) {
+ memprintf(&err, "%sCan't load the payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* we succeed, we can save the ckchs in the transaction */
+
+ /* if there wasn't a transaction, update the old ckchs */
+ if (!ckchs_transaction.old_ckchs) {
+ ckchs_transaction.old_ckchs = old_ckchs;
+ ckchs_transaction.path = old_ckchs->path;
+ err = memprintf(&err, "Transaction created for certificate %s!\n", ckchs_transaction.path);
+ } else {
+ err = memprintf(&err, "Transaction updated for certificate %s!\n", ckchs_transaction.path);
+
+ }
+
+ /* free the previous ckchs if there was a transaction */
+ ckch_store_free(ckchs_transaction.new_ckchs);
+
+ ckchs_transaction.new_ckchs = new_ckchs;
+
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ckch_store_free(new_ckchs);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+ /* TODO: handle the ERR_WARN which are not handled because of the io_handler */
+}
+
+/* parsing function of 'abort ssl cert' */
+static int cli_parse_abort_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl cert' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!ckchs_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(ckchs_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", ckchs_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the ckchs there, because the SNI and instances were not generated yet */
+ ckch_store_free(ckchs_transaction.new_ckchs);
+ ckchs_transaction.new_ckchs = NULL;
+ ckchs_transaction.old_ckchs = NULL;
+ ckchs_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'new ssl cert' */
+static int cli_parse_new_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl cert' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a certificate!\nOperations on certificates are currently locked!\n");
+
+ store = ckchs_lookup(path);
+ if (store != NULL) {
+ memprintf(&err, "Certificate '%s' already exists!\n", path);
+ store = NULL; /* we don't want to free it */
+ goto error;
+ }
+ /* we won't support multi-certificate bundle here */
+ store = ckch_store_new(path);
+ if (!store) {
+ memprintf(&err, "unable to allocate memory.\n");
+ goto error;
+ }
+
+ /* insert into the ckchs tree */
+ ebst_insert(&ckchs_tree, &store->node);
+ memprintf(&err, "New empty certificate store '%s'!\n", args[3]);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ free(store);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'del ssl cert' */
+static int cli_parse_del_cert(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl cert' expects a certificate name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the certificate!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (ckchs_transaction.path && strcmp(ckchs_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the certificate '%s'", filename);
+ goto error;
+ }
+
+ store = ckchs_lookup(filename);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+ if (!LIST_ISEMPTY(&store->ckch_inst)) {
+ memprintf(&err, "certificate '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ ebmb_delete(&store->node);
+ ckch_store_free(store);
+
+ memprintf(&err, "Certificate '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the certificate: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+
+
+/* parsing function of 'new ssl ca-file' */
+static int cli_parse_new_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl ca-file' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a CA file!\nOperations on certificates are currently locked!\n");
+
+ cafile_entry = ssl_store_get_cafile_entry(path, 0);
+ if (cafile_entry) {
+ memprintf(&err, "CA file '%s' already exists!\n", path);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_create_cafile_entry(path, NULL, CAFILE_CERT);
+ if (!cafile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n",
+ err ? err : "");
+ goto error;
+ }
+
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(cafile_entry))
+ goto error;
+
+ memprintf(&err, "New CA file created '%s'!\n", path);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/*
+ * Parsing function of `set ssl ca-file`
+ */
+static int cli_parse_set_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *old_cafile_entry = NULL;
+ struct cafile_entry *new_cafile_entry = NULL;
+ char *err = NULL;
+ int errcode = 0;
+ struct buffer *buf;
+ int add_cmd = 0;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ /* this is "add ssl ca-file" */
+ if (*args[0] == 'a')
+ add_cmd = 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl ca-file' expects a filename and CAs as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the CA file!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ old_cafile_entry = NULL;
+ new_cafile_entry = NULL;
+
+ /* if there is an ongoing transaction */
+ if (cafile_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(cafile_transaction.path, buf->area) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", cafile_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ old_cafile_entry = cafile_transaction.old_cafile_entry;
+ } else {
+ /* lookup for the certificate in the tree */
+ old_cafile_entry = ssl_store_get_cafile_entry(buf->area, 0);
+ }
+
+ if (!old_cafile_entry) {
+ memprintf(&err, "%sCan't replace a CA file which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* if the transaction is new, duplicate the old_ca_file_entry, otherwise duplicate the cafile in the current transaction */
+ if (cafile_transaction.new_cafile_entry)
+ new_cafile_entry = ssl_store_dup_cafile_entry(cafile_transaction.new_cafile_entry);
+ else
+ new_cafile_entry = ssl_store_dup_cafile_entry(old_cafile_entry);
+
+ if (!new_cafile_entry) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Fill the new entry with the new CAs. The add_cmd variable determine
+ if we flush the X509_STORE or not */
+ if (ssl_store_load_ca_from_buf(new_cafile_entry, payload, add_cmd)) {
+ memprintf(&err, "%sInvalid payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* we succeed, we can save the ca in the transaction */
+
+ /* if there wasn't a transaction, update the old CA */
+ if (!cafile_transaction.old_cafile_entry) {
+ cafile_transaction.old_cafile_entry = old_cafile_entry;
+ cafile_transaction.path = old_cafile_entry->path;
+ err = memprintf(&err, "transaction created for CA %s!\n", cafile_transaction.path);
+ } else {
+ err = memprintf(&err, "transaction updated for CA %s!\n", cafile_transaction.path);
+ }
+
+ /* free the previous CA if there was a transaction */
+ ssl_store_delete_cafile_entry(cafile_transaction.new_cafile_entry);
+
+ cafile_transaction.new_cafile_entry = new_cafile_entry;
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ssl_store_delete_cafile_entry(new_cafile_entry);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+}
+
+
+/*
+ * Parsing function of 'commit ssl ca-file'.
+ * It uses a commit_cacrlfile_ctx that's also shared with "commit ssl crl-file".
+ */
+static int cli_parse_commit_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cacrlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl ca-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the CA file!\nOperations on certificates are currently locked!\n");
+
+ if (!cafile_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(cafile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", cafile_transaction.path, args[3]);
+ goto error;
+ }
+ /* init the appctx structure */
+ ctx->state = CACRL_ST_INIT;
+ ctx->next_ckchi_link = NULL;
+ ctx->old_entry = cafile_transaction.old_cafile_entry;
+ ctx->new_entry = cafile_transaction.new_cafile_entry;
+ ctx->cafile_type = CAFILE_CERT;
+
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+/*
+ * This function tries to create new ckch instances and their SNIs using a newly
+ * set certificate authority (CA file) or a newly set Certificate Revocation
+ * List (CRL), depending on the command being called.
+ */
+static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ int y = 0;
+ struct cafile_entry *old_cafile_entry = ctx->old_entry;
+ struct cafile_entry *new_cafile_entry = ctx->new_entry;
+ struct ckch_inst_link *ckchi_link;
+ char *path;
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ /* The ctx was already validated by the ca-file/crl-file parsing
+ * function. Entries can only be NULL in CACRL_ST_SUCCESS or
+ * CACRL_ST_FIN states
+ */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ path = cafile_transaction.path;
+ break;
+ case CAFILE_CRL:
+ path = crlfile_transaction.path;
+ break;
+ default:
+ path = NULL;
+ goto error;
+ }
+
+ while (1) {
+ switch (ctx->state) {
+ case CACRL_ST_INIT:
+ /* This state just print the update message */
+ chunk_printf(&trash, "Committing %s", path);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+
+ ctx->state = CACRL_ST_GEN;
+ __fallthrough;
+ case CACRL_ST_GEN:
+ /*
+ * This state generates the ckch instances with their
+ * sni_ctxs and SSL_CTX.
+ *
+ * Since the SSL_CTX generation can be CPU consumer, we
+ * yield every 10 instances.
+ */
+
+ /* get the next ckchi to regenerate */
+ ckchi_link = ctx->next_ckchi_link;
+
+ /* we didn't start yet, set it to the first elem */
+ if (ckchi_link == NULL) {
+ ckchi_link = LIST_ELEM(old_cafile_entry->ckch_inst_link.n, typeof(ckchi_link), list);
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(new_cafile_entry)) {
+ ctx->state = CACRL_ST_ERROR;
+ goto error;
+ }
+ }
+
+ list_for_each_entry_from(ckchi_link, &old_cafile_entry->ckch_inst_link, list) {
+ struct ckch_inst *new_inst;
+
+ /* save the next ckchi to compute */
+ ctx->next_ckchi_link = ckchi_link;
+
+ /* it takes a lot of CPU to creates SSL_CTXs, so we yield every 10 CKCH instances */
+ if (y >= 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot per new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ /* Rebuild a new ckch instance that uses the same ckch_store
+ * than a reference ckchi instance but will use a new CA file. */
+ ctx->err = NULL;
+ if (ckch_inst_rebuild(ckchi_link->ckch_inst->ckch_store, ckchi_link->ckch_inst, &new_inst, &ctx->err)) {
+ ctx->state = CACRL_ST_ERROR;
+ goto error;
+ }
+
+ y++;
+ }
+
+ ctx->state = CACRL_ST_INSERT;
+ __fallthrough;
+ case CACRL_ST_INSERT:
+ /* The generation is finished, we can insert everything */
+
+ /* insert the new ckch_insts in the crtlist_entry */
+ list_for_each_entry(ckchi_link, &new_cafile_entry->ckch_inst_link, list) {
+ if (ckchi_link->ckch_inst->crtlist_entry)
+ LIST_INSERT(&ckchi_link->ckch_inst->crtlist_entry->ckch_inst,
+ &ckchi_link->ckch_inst->by_crtlist_entry);
+ }
+
+ /* First, we insert every new SNIs in the trees, also replace the default_ctx */
+ list_for_each_entry(ckchi_link, &new_cafile_entry->ckch_inst_link, list) {
+ __ssl_sock_load_new_ckch_instance(ckchi_link->ckch_inst);
+ }
+
+ /* delete the old sni_ctx, the old ckch_insts
+ * and the ckch_store. ckch_inst_free() also
+ * manipulates the list so it's cleaner to loop
+ * until it's empty */
+ while (!LIST_ISEMPTY(&old_cafile_entry->ckch_inst_link)) {
+ ckchi_link = LIST_ELEM(old_cafile_entry->ckch_inst_link.n, typeof(ckchi_link), list);
+
+ LIST_DEL_INIT(&ckchi_link->list); /* must reinit because ckch_inst checks the list */
+ __ckch_inst_free_locked(ckchi_link->ckch_inst);
+ free(ckchi_link);
+ }
+
+ /* Remove the old cafile entry from the tree */
+ ebmb_delete(&old_cafile_entry->node);
+ ssl_store_delete_cafile_entry(old_cafile_entry);
+
+ ctx->old_entry = ctx->new_entry = NULL;
+ ctx->state = CACRL_ST_SUCCESS;
+ __fallthrough;
+ case CACRL_ST_SUCCESS:
+ if (applet_putstr(appctx, "\nSuccess!\n") == -1)
+ goto yield;
+ ctx->state = CACRL_ST_FIN;
+ __fallthrough;
+ case CACRL_ST_FIN:
+ /* we achieved the transaction, we can set everything to NULL */
+ switch (ctx->cafile_type) {
+ case CAFILE_CERT:
+ cafile_transaction.old_cafile_entry = NULL;
+ cafile_transaction.new_cafile_entry = NULL;
+ cafile_transaction.path = NULL;
+ break;
+ case CAFILE_CRL:
+ crlfile_transaction.old_crlfile_entry = NULL;
+ crlfile_transaction.new_crlfile_entry = NULL;
+ crlfile_transaction.path = NULL;
+ break;
+ }
+ goto end;
+
+ case CACRL_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%sFailed!\n", ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = CACRL_ST_FIN;
+ break;
+ }
+ }
+end:
+ /* success: call the release function and don't come back */
+ return 1;
+yield:
+ return 0; /* should come back */
+}
+
+
+/* parsing function of 'abort ssl ca-file' */
+static int cli_parse_abort_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl ca-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!cafile_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(cafile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", cafile_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the uncommitted cafile_entry here, because the SNI and instances were not generated yet */
+ ssl_store_delete_cafile_entry(cafile_transaction.new_cafile_entry);
+ cafile_transaction.new_cafile_entry = NULL;
+ cafile_transaction.old_cafile_entry = NULL;
+ cafile_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+/* release function of the `commit ssl ca-file' command, free things and unlock the spinlock.
+ * It uses a commit_cacrlfile_ctx context.
+ */
+static void cli_release_commit_cafile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *new_cafile_entry = ctx->new_entry;
+
+ /* Remove the uncommitted cafile_entry from the tree. */
+ if (new_cafile_entry) {
+ ebmb_delete(&new_cafile_entry->node);
+ ssl_store_delete_cafile_entry(new_cafile_entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+
+/* IO handler of details "show ssl ca-file <filename[:index]>".
+ * It uses a show_cafile_ctx context, and the global
+ * cafile_transaction.new_cafile_entry in read-only.
+ */
+static int cli_io_handler_show_cafile_detail(struct appctx *appctx)
+{
+ struct show_cafile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *cafile_entry = ctx->cur_cafile_entry;
+ struct buffer *out = alloc_trash_chunk();
+ int i = 0;
+ X509 *cert;
+ STACK_OF(X509_OBJECT) *objs;
+ int retval = 0;
+ int ca_index = ctx->ca_index;
+ int show_all = ctx->show_all;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (cafile_entry == cafile_transaction.new_cafile_entry)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", cafile_entry->path);
+
+ chunk_appendf(out, "Status: ");
+ if (!cafile_entry->ca_store)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&cafile_entry->ckch_inst_link))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ if (!cafile_entry->ca_store)
+ goto end;
+
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ for (i = ca_index; i < sk_X509_OBJECT_num(objs); i++) {
+
+ cert = X509_OBJECT_get0_X509(sk_X509_OBJECT_value(objs, i));
+ if (!cert)
+ continue;
+
+ /* file starts at line 1 */
+ chunk_appendf(out, " \nCertificate #%d:\n", i+1);
+ retval = show_cert_detail(cert, NULL, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval)
+ goto yield;
+
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+ if (!show_all) /* only need to dump one certificate */
+ goto end;
+ }
+
+end:
+ free_trash_chunk(out);
+ return 1; /* end, don't come back */
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ /* save the current state */
+ ctx->ca_index = i;
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+
+/* parsing function for 'show ssl ca-file [cafile[:index]]'.
+ * It prepares a show_cafile_ctx context, and checks the global
+ * cafile_transaction under the ckch_lock (read only).
+ */
+static int cli_parse_show_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_cafile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct cafile_entry *cafile_entry;
+ int ca_index = 0;
+ char *colons;
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ ctx->show_all = 1; /* show all certificates */
+ ctx->ca_index = 0;
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+
+ /* Look for an optional CA index after the CA file name */
+ colons = strchr(args[3], ':');
+ if (colons) {
+ char *endptr;
+
+ ca_index = strtol(colons + 1, &endptr, 10);
+ /* Indexes start at 1 */
+ if (colons + 1 == endptr || *endptr != '\0' || ca_index <= 0) {
+ memprintf(&err, "wrong CA index after colons in '%s'!", args[3]);
+ goto error;
+ }
+ *colons = '\0';
+ ctx->ca_index = ca_index - 1; /* we start counting at 0 in the ca_store, but at 1 on the CLI */
+ ctx->show_all = 0; /* show only one certificate */
+ }
+
+ if (*args[3] == '*') {
+ if (!cafile_transaction.new_cafile_entry)
+ goto error;
+
+ cafile_entry = cafile_transaction.new_cafile_entry;
+
+ if (strcmp(args[3] + 1, cafile_entry->path) != 0)
+ goto error;
+
+ } else {
+ /* Get the "original" cafile_entry and not the
+ * uncommitted one if it exists. */
+ if ((cafile_entry = ssl_store_get_cafile_entry(args[3], 1)) == NULL || cafile_entry->type != CAFILE_CERT)
+ goto error;
+ }
+
+ ctx->cur_cafile_entry = cafile_entry;
+ /* use the IO handler that shows details */
+ appctx->io_handler = cli_io_handler_show_cafile_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ if (err)
+ return cli_dynerr(appctx, err);
+ return cli_err(appctx, "Can't display the CA file : Not found!\n");
+}
+
+
+/* release function of the 'show ssl ca-file' command */
+static void cli_release_show_cafile(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+
+/* This function returns the number of certificates in a cafile_entry. */
+static int get_certificate_count(struct cafile_entry *cafile_entry)
+{
+ int cert_count = 0;
+ STACK_OF(X509_OBJECT) *objs;
+
+ if (cafile_entry && cafile_entry->ca_store) {
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ if (objs)
+ cert_count = sk_X509_OBJECT_num(objs);
+ }
+ return cert_count;
+}
+
+/* IO handler of "show ssl ca-file". The command taking a specific CA file name
+ * is managed in cli_io_handler_show_cafile_detail.
+ * It uses a show_cafile_ctx and the global cafile_transaction.new_cafile_entry
+ * in read-only.
+ */
+static int cli_io_handler_show_cafile(struct appctx *appctx)
+{
+ struct show_cafile_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct cafile_entry *cafile_entry = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_cafile_entry && cafile_transaction.old_cafile_entry) {
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s", cafile_transaction.old_cafile_entry->path);
+ chunk_appendf(trash, " - %d certificate(s)\n", get_certificate_count(cafile_transaction.new_cafile_entry));
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_cafile_entry = cafile_transaction.new_cafile_entry;
+ }
+
+ /* First time in this io_handler. */
+ if (!ctx->cur_cafile_entry) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&cafile_tree);
+ } else {
+ /* We yielded during a previous call. */
+ node = &ctx->cur_cafile_entry->node;
+ }
+
+ while (node) {
+ cafile_entry = ebmb_entry(node, struct cafile_entry, node);
+ if (cafile_entry->type == CAFILE_CERT) {
+ chunk_appendf(trash, "%s", cafile_entry->path);
+
+ chunk_appendf(trash, " - %d certificate(s)\n", get_certificate_count(cafile_entry));
+ }
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cur_cafile_entry = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cur_cafile_entry = cafile_entry;
+ return 0; /* should come back */
+}
+
+/* parsing function of 'del ssl ca-file' */
+static int cli_parse_del_cafile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl ca-file' expects a CA file name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the CA file!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (cafile_transaction.path && strcmp(cafile_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the CA file '%s'", filename);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_get_cafile_entry(filename, 0);
+ if (!cafile_entry) {
+ memprintf(&err, "CA file '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ memprintf(&err, "CA file '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ /* Remove the cafile_entry from the tree */
+ ebmb_delete(&cafile_entry->node);
+ ssl_store_delete_cafile_entry(cafile_entry);
+
+ memprintf(&err, "CA file '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the CA file: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'new ssl crl-file' */
+static int cli_parse_new_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *path;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'new ssl crl-file' expects a filename\n");
+
+ path = args[3];
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't create a CRL file!\nOperations on certificates are currently locked!\n");
+
+ cafile_entry = ssl_store_get_cafile_entry(path, 0);
+ if (cafile_entry) {
+ memprintf(&err, "CRL file '%s' already exists!\n", path);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_create_cafile_entry(path, NULL, CAFILE_CRL);
+ if (!cafile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n", err ? err : "");
+ goto error;
+ }
+
+ /* Add the newly created cafile_entry to the tree so that
+ * any new ckch instance created from now can use it. */
+ if (ssl_store_add_uncommitted_cafile_entry(cafile_entry))
+ goto error;
+
+ memprintf(&err, "New CRL file created '%s'!\n", path);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* Parsing function of `set ssl crl-file` */
+static int cli_parse_set_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *old_crlfile_entry = NULL;
+ struct cafile_entry *new_crlfile_entry = NULL;
+ char *err = NULL;
+ int errcode = 0;
+ struct buffer *buf;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !payload)
+ return cli_err(appctx, "'set ssl crl-file' expects a filename and CRLs as a payload\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't update the CRL file!\nOperations on certificates are currently locked!\n");
+
+ if ((buf = alloc_trash_chunk()) == NULL) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!chunk_strcpy(buf, args[3])) {
+ memprintf(&err, "%sCan't allocate memory\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ old_crlfile_entry = NULL;
+ new_crlfile_entry = NULL;
+
+ /* if there is an ongoing transaction */
+ if (crlfile_transaction.path) {
+ /* if there is an ongoing transaction, check if this is the same file */
+ if (strcmp(crlfile_transaction.path, buf->area) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", crlfile_transaction.path, buf->area);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ old_crlfile_entry = crlfile_transaction.old_crlfile_entry;
+ }
+ else {
+ /* lookup for the certificate in the tree */
+ old_crlfile_entry = ssl_store_get_cafile_entry(buf->area, 0);
+ }
+
+ if (!old_crlfile_entry) {
+ memprintf(&err, "%sCan't replace a CRL file which is not referenced by the configuration!\n",
+ err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Create a new cafile_entry without adding it to the cafile tree. */
+ new_crlfile_entry = ssl_store_create_cafile_entry(old_crlfile_entry->path, NULL, CAFILE_CRL);
+ if (!new_crlfile_entry) {
+ memprintf(&err, "%sCannot allocate memory!\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* Fill the new entry with the new CRL. */
+ if (ssl_store_load_ca_from_buf(new_crlfile_entry, payload, 0)) {
+ memprintf(&err, "%sInvalid payload\n", err ? err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ /* we succeed, we can save the crl in the transaction */
+
+ /* if there wasn't a transaction, update the old CRL */
+ if (!crlfile_transaction.old_crlfile_entry) {
+ crlfile_transaction.old_crlfile_entry = old_crlfile_entry;
+ crlfile_transaction.path = old_crlfile_entry->path;
+ err = memprintf(&err, "transaction created for CRL %s!\n", crlfile_transaction.path);
+ } else {
+ err = memprintf(&err, "transaction updated for CRL %s!\n", crlfile_transaction.path);
+ }
+
+ /* free the previous CRL file if there was a transaction */
+ ssl_store_delete_cafile_entry(crlfile_transaction.new_crlfile_entry);
+
+ crlfile_transaction.new_crlfile_entry = new_crlfile_entry;
+
+ /* creates the SNI ctxs later in the IO handler */
+
+end:
+ free_trash_chunk(buf);
+
+ if (errcode & ERR_CODE) {
+ ssl_store_delete_cafile_entry(new_crlfile_entry);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't update %s!\n", err ? err : "", args[3]));
+ } else {
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+ }
+}
+
+/* Parsing function of 'commit ssl crl-file'.
+ * It uses a commit_cacrlfile_ctx that's also shared with "commit ssl ca-file".
+ */
+static int cli_parse_commit_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct commit_cacrlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'commit ssl ca-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't commit the CRL file!\nOperations on certificates are currently locked!\n");
+
+ if (!crlfile_transaction.path) {
+ memprintf(&err, "No ongoing transaction! !\n");
+ goto error;
+ }
+
+ if (strcmp(crlfile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to set '%s'\n", crlfile_transaction.path, args[3]);
+ goto error;
+ }
+ /* init the appctx structure */
+ ctx->state = CACRL_ST_INIT;
+ ctx->next_ckchi_link = NULL;
+ ctx->old_entry = crlfile_transaction.old_crlfile_entry;
+ ctx->new_entry = crlfile_transaction.new_crlfile_entry;
+ ctx->cafile_type = CAFILE_CRL;
+
+ return 0;
+
+error:
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "%sCan't commit %s!\n", err ? err : "", args[3]);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+/* release function of the `commit ssl crl-file' command, free things and unlock the spinlock.
+ * it uses a commit_cacrlfile_ctx that's the same as for "commit ssl ca-file".
+ */
+static void cli_release_commit_crlfile(struct appctx *appctx)
+{
+ struct commit_cacrlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *new_crlfile_entry = ctx->new_entry;
+
+ /* Remove the uncommitted cafile_entry from the tree. */
+ if (new_crlfile_entry) {
+ ebmb_delete(&new_crlfile_entry->node);
+ ssl_store_delete_cafile_entry(new_crlfile_entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+/* parsing function of 'del ssl crl-file' */
+static int cli_parse_del_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct cafile_entry *cafile_entry;
+ char *err = NULL;
+ char *filename;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'del ssl crl-file' expects a CRL file name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete the CRL file!\nOperations on certificates are currently locked!\n");
+
+ filename = args[3];
+
+ if (crlfile_transaction.path && strcmp(crlfile_transaction.path, filename) == 0) {
+ memprintf(&err, "ongoing transaction for the CRL file '%s'", filename);
+ goto error;
+ }
+
+ cafile_entry = ssl_store_get_cafile_entry(filename, 0);
+ if (!cafile_entry) {
+ memprintf(&err, "CRL file '%s' doesn't exist!\n", filename);
+ goto error;
+ }
+ if (cafile_entry->type != CAFILE_CRL) {
+ memprintf(&err, "'del ssl crl-file' does not work on CA files!\n");
+ goto error;
+ }
+
+ if (!LIST_ISEMPTY(&cafile_entry->ckch_inst_link)) {
+ memprintf(&err, "CRL file '%s' in use, can't be deleted!\n", filename);
+ goto error;
+ }
+
+ /* Remove the cafile_entry from the tree */
+ ebmb_delete(&cafile_entry->node);
+ ssl_store_delete_cafile_entry(cafile_entry);
+
+ memprintf(&err, "CRL file '%s' deleted!\n", filename);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ memprintf(&err, "Can't remove the CRL file: %s\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ return cli_dynerr(appctx, err);
+}
+
+/* parsing function of 'abort ssl crl-file' */
+static int cli_parse_abort_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3])
+ return cli_err(appctx, "'abort ssl crl-file' expects a filename\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't abort!\nOperations on certificates are currently locked!\n");
+
+ if (!crlfile_transaction.path) {
+ memprintf(&err, "No ongoing transaction!\n");
+ goto error;
+ }
+
+ if (strcmp(crlfile_transaction.path, args[3]) != 0) {
+ memprintf(&err, "The ongoing transaction is about '%s' but you are trying to abort a transaction for '%s'\n", crlfile_transaction.path, args[3]);
+ goto error;
+ }
+
+ /* Only free the uncommitted cafile_entry here, because the SNI and instances were not generated yet */
+ ssl_store_delete_cafile_entry(crlfile_transaction.new_crlfile_entry);
+ crlfile_transaction.new_crlfile_entry = NULL;
+ crlfile_transaction.old_crlfile_entry = NULL;
+ crlfile_transaction.path = NULL;
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ err = memprintf(&err, "Transaction aborted for certificate '%s'!\n", args[3]);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+ return cli_dynerr(appctx, err);
+}
+
+
+/*
+ * Display a Certificate Resignation List's information.
+ * The information displayed is inspired by the output of 'openssl crl -in
+ * crl.pem -text'.
+ * Returns 0 in case of success.
+ */
+static int show_crl_detail(X509_CRL *crl, struct buffer *out)
+{
+ BIO *bio = NULL;
+ struct buffer *tmp = alloc_trash_chunk();
+ long version;
+ X509_NAME *issuer;
+ int write = -1;
+#ifndef USE_OPENSSL_WOLFSSL
+ STACK_OF(X509_REVOKED) *rev = NULL;
+ X509_REVOKED *rev_entry = NULL;
+ int i;
+#endif
+
+ if (!tmp)
+ return -1;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+
+ /* Version (as displayed by 'openssl crl') */
+ version = X509_CRL_get_version(crl);
+ chunk_appendf(out, "Version %ld\n", version + 1);
+
+ /* Signature Algorithm */
+ chunk_appendf(out, "Signature Algorithm: %s\n", OBJ_nid2ln(X509_CRL_get_signature_nid(crl)));
+
+ /* Issuer */
+ chunk_appendf(out, "Issuer: ");
+ if ((issuer = X509_CRL_get_issuer(crl)) == NULL)
+ goto end;
+ if ((ssl_sock_get_dn_oneline(issuer, tmp)) == -1)
+ goto end;
+ *(tmp->area + tmp->data) = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+ /* Last Update */
+ chunk_appendf(out, "Last Update: ");
+ chunk_reset(tmp);
+ if (BIO_reset(bio) == -1)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_CRL_get0_lastUpdate(crl)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+
+ /* Next Update */
+ chunk_appendf(out, "Next Update: ");
+ chunk_reset(tmp);
+ if (BIO_reset(bio) == -1)
+ goto end;
+ if (ASN1_TIME_print(bio, X509_CRL_get0_nextUpdate(crl)) == 0)
+ goto end;
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s\n", tmp->area);
+
+#ifndef USE_OPENSSL_WOLFSSL
+ /* Revoked Certificates */
+ rev = X509_CRL_get_REVOKED(crl);
+ if (sk_X509_REVOKED_num(rev) > 0)
+ chunk_appendf(out, "Revoked Certificates:\n");
+ else
+ chunk_appendf(out, "No Revoked Certificates.\n");
+
+ for (i = 0; i < sk_X509_REVOKED_num(rev); i++) {
+ rev_entry = sk_X509_REVOKED_value(rev, i);
+
+ /* Serial Number and Revocation Date */
+ if (BIO_reset(bio) == -1)
+ goto end;
+ BIO_printf(bio , " Serial Number: ");
+ i2a_ASN1_INTEGER(bio, (ASN1_INTEGER*)X509_REVOKED_get0_serialNumber(rev_entry));
+ BIO_printf(bio, "\n Revocation Date: ");
+ if (ASN1_TIME_print(bio, X509_REVOKED_get0_revocationDate(rev_entry)) == 0)
+ goto end;
+ BIO_printf(bio, "\n");
+
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ tmp->area[write] = '\0';
+ chunk_appendf(out, "%s", tmp->area);
+ }
+#endif /* not USE_OPENSSL_WOLFSSL */
+
+end:
+ free_trash_chunk(tmp);
+ if (bio)
+ BIO_free(bio);
+
+ return 0;
+}
+
+/* IO handler of details "show ssl crl-file <filename[:index]>".
+ * It uses show_crlfile_ctx and the global
+ * crlfile_transaction.new_cafile_entry in read-only.
+ */
+static int cli_io_handler_show_crlfile_detail(struct appctx *appctx)
+{
+ struct show_crlfile_ctx *ctx = appctx->svcctx;
+ struct cafile_entry *cafile_entry = ctx->cafile_entry;
+ struct buffer *out = alloc_trash_chunk();
+ int i;
+ X509_CRL *crl;
+ STACK_OF(X509_OBJECT) *objs;
+ int retval = 0;
+ int index = ctx->index;
+
+ if (!out)
+ goto end_no_putchk;
+
+ chunk_appendf(out, "Filename: ");
+ if (cafile_entry == crlfile_transaction.new_crlfile_entry)
+ chunk_appendf(out, "*");
+ chunk_appendf(out, "%s\n", cafile_entry->path);
+
+ chunk_appendf(out, "Status: ");
+ if (!cafile_entry->ca_store)
+ chunk_appendf(out, "Empty\n");
+ else if (LIST_ISEMPTY(&cafile_entry->ckch_inst_link))
+ chunk_appendf(out, "Unused\n");
+ else
+ chunk_appendf(out, "Used\n");
+
+ if (!cafile_entry->ca_store)
+ goto end;
+
+ objs = X509_STORE_get0_objects(cafile_entry->ca_store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ crl = X509_OBJECT_get0_X509_CRL(sk_X509_OBJECT_value(objs, i));
+ if (!crl)
+ continue;
+
+ /* CRL indexes start at 1 on the CLI output. */
+ if (index && index-1 != i)
+ continue;
+
+ chunk_appendf(out, " \nCertificate Revocation List #%d:\n", i+1);
+ retval = show_crl_detail(crl, out);
+ if (retval < 0)
+ goto end_no_putchk;
+ else if (retval || index)
+ goto end;
+ }
+
+end:
+ if (applet_putchk(appctx, out) == -1)
+ goto yield;
+
+end_no_putchk:
+ free_trash_chunk(out);
+ return 1;
+yield:
+ free_trash_chunk(out);
+ return 0; /* should come back */
+}
+
+/* parsing function for 'show ssl crl-file [crlfile[:index]]'.
+ * It sets the context to a show_crlfile_ctx, and the global
+ * cafile_transaction.new_crlfile_entry under the ckch_lock.
+ */
+static int cli_parse_show_crlfile(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_crlfile_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct cafile_entry *cafile_entry;
+ long index = 0;
+ char *colons;
+ char *err = NULL;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return cli_err(appctx, "Can't allocate memory!\n");
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't show!\nOperations on certificates are currently locked!\n");
+
+ /* check if there is a certificate to lookup */
+ if (*args[3]) {
+
+ /* Look for an optional index after the CRL file name */
+ colons = strchr(args[3], ':');
+ if (colons) {
+ char *endptr;
+
+ index = strtol(colons + 1, &endptr, 10);
+ /* Indexes start at 1 */
+ if (colons + 1 == endptr || *endptr != '\0' || index <= 0) {
+ memprintf(&err, "wrong CRL index after colons in '%s'!", args[3]);
+ goto error;
+ }
+ *colons = '\0';
+ }
+
+ if (*args[3] == '*') {
+ if (!crlfile_transaction.new_crlfile_entry)
+ goto error;
+
+ cafile_entry = crlfile_transaction.new_crlfile_entry;
+
+ if (strcmp(args[3] + 1, cafile_entry->path) != 0)
+ goto error;
+
+ } else {
+ /* Get the "original" cafile_entry and not the
+ * uncommitted one if it exists. */
+ if ((cafile_entry = ssl_store_get_cafile_entry(args[3], 1)) == NULL || cafile_entry->type != CAFILE_CRL)
+ goto error;
+ }
+
+ ctx->cafile_entry = cafile_entry;
+ ctx->index = index;
+ /* use the IO handler that shows details */
+ appctx->io_handler = cli_io_handler_show_crlfile_detail;
+ }
+
+ return 0;
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ if (err)
+ return cli_dynerr(appctx, err);
+ return cli_err(appctx, "Can't display the CRL file : Not found!\n");
+}
+
+/* IO handler of "show ssl crl-file". The command taking a specific CRL file name
+ * is managed in cli_io_handler_show_crlfile_detail. */
+static int cli_io_handler_show_crlfile(struct appctx *appctx)
+{
+ struct show_crlfile_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *node;
+ struct cafile_entry *cafile_entry = NULL;
+
+ if (trash == NULL)
+ return 1;
+
+ if (!ctx->old_crlfile_entry && crlfile_transaction.old_crlfile_entry) {
+ chunk_appendf(trash, "# transaction\n");
+ chunk_appendf(trash, "*%s\n", crlfile_transaction.old_crlfile_entry->path);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ ctx->old_crlfile_entry = crlfile_transaction.old_crlfile_entry;
+ }
+
+ /* First time in this io_handler. */
+ if (!ctx->cafile_entry) {
+ chunk_appendf(trash, "# filename\n");
+ node = ebmb_first(&cafile_tree);
+ } else {
+ /* We yielded during a previous call. */
+ node = &ctx->cafile_entry->node;
+ }
+
+ while (node) {
+ cafile_entry = ebmb_entry(node, struct cafile_entry, node);
+ if (cafile_entry->type == CAFILE_CRL) {
+ chunk_appendf(trash, "%s\n", cafile_entry->path);
+ }
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ ctx->cafile_entry = NULL;
+ free_trash_chunk(trash);
+ return 1;
+yield:
+
+ free_trash_chunk(trash);
+ ctx->cafile_entry = cafile_entry;
+ return 0; /* should come back */
+}
+
+
+/* release function of the 'show ssl crl-file' command */
+static void cli_release_show_crlfile(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+}
+
+
+void ckch_deinit()
+{
+ struct eb_node *node, *next;
+ struct ckch_store *store;
+ struct ebmb_node *canode;
+
+ /* deinit the ckch stores */
+ node = eb_first(&ckchs_tree);
+ while (node) {
+ next = eb_next(node);
+ store = ebmb_entry(node, struct ckch_store, node);
+ ckch_store_free(store);
+ node = next;
+ }
+
+ /* deinit the ca-file store */
+ canode = ebmb_first(&cafile_tree);
+ while (canode) {
+ struct cafile_entry *entry = NULL;
+
+ entry = ebmb_entry(canode, struct cafile_entry, node);
+ canode = ebmb_next(canode);
+ ebmb_delete(&entry->node);
+ ssl_store_delete_cafile_entry(entry);
+ }
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "new", "ssl", "cert", NULL }, "new ssl cert <certfile> : create a new certificate file to be used in a crt-list or a directory", cli_parse_new_cert, NULL, NULL },
+ { { "set", "ssl", "cert", NULL }, "set ssl cert <certfile> <payload> : replace a certificate file", cli_parse_set_cert, NULL, NULL },
+ { { "commit", "ssl", "cert", NULL }, "commit ssl cert <certfile> : commit a certificate file", cli_parse_commit_cert, cli_io_handler_commit_cert, cli_release_commit_cert },
+ { { "abort", "ssl", "cert", NULL }, "abort ssl cert <certfile> : abort a transaction for a certificate file", cli_parse_abort_cert, NULL, NULL },
+ { { "del", "ssl", "cert", NULL }, "del ssl cert <certfile> : delete an unused certificate file", cli_parse_del_cert, NULL, NULL },
+ { { "show", "ssl", "cert", NULL }, "show ssl cert [<certfile>] : display the SSL certificates used in memory, or the details of a file", cli_parse_show_cert, cli_io_handler_show_cert, cli_release_show_cert },
+
+ { { "new", "ssl", "ca-file", NULL }, "new ssl ca-file <cafile> : create a new CA file to be used in a crt-list", cli_parse_new_cafile, NULL, NULL },
+ { { "add", "ssl", "ca-file", NULL }, "add ssl ca-file <cafile> <payload> : add a certificate into the CA file", cli_parse_set_cafile, NULL, NULL },
+ { { "set", "ssl", "ca-file", NULL }, "set ssl ca-file <cafile> <payload> : replace a CA file", cli_parse_set_cafile, NULL, NULL },
+ { { "commit", "ssl", "ca-file", NULL }, "commit ssl ca-file <cafile> : commit a CA file", cli_parse_commit_cafile, cli_io_handler_commit_cafile_crlfile, cli_release_commit_cafile },
+ { { "abort", "ssl", "ca-file", NULL }, "abort ssl ca-file <cafile> : abort a transaction for a CA file", cli_parse_abort_cafile, NULL, NULL },
+ { { "del", "ssl", "ca-file", NULL }, "del ssl ca-file <cafile> : delete an unused CA file", cli_parse_del_cafile, NULL, NULL },
+ { { "show", "ssl", "ca-file", NULL }, "show ssl ca-file [<cafile>[:<index>]] : display the SSL CA files used in memory, or the details of a <cafile>, or a single certificate of index <index> of a CA file <cafile>", cli_parse_show_cafile, cli_io_handler_show_cafile, cli_release_show_cafile },
+
+ { { "new", "ssl", "crl-file", NULL }, "new ssl crlfile <crlfile> : create a new CRL file to be used in a crt-list", cli_parse_new_crlfile, NULL, NULL },
+ { { "set", "ssl", "crl-file", NULL }, "set ssl crl-file <crlfile> <payload> : replace a CRL file", cli_parse_set_crlfile, NULL, NULL },
+ { { "commit", "ssl", "crl-file", NULL },"commit ssl crl-file <crlfile> : commit a CRL file", cli_parse_commit_crlfile, cli_io_handler_commit_cafile_crlfile, cli_release_commit_crlfile },
+ { { "abort", "ssl", "crl-file", NULL }, "abort ssl crl-file <crlfile> : abort a transaction for a CRL file", cli_parse_abort_crlfile, NULL, NULL },
+ { { "del", "ssl", "crl-file", NULL }, "del ssl crl-file <crlfile> : delete an unused CRL file", cli_parse_del_crlfile, NULL, NULL },
+ { { "show", "ssl", "crl-file", NULL }, "show ssl crl-file [<crlfile[:<index>>]] : display the SSL CRL files used in memory, or the details of a <crlfile>, or a single CRL of index <index> of CRL file <crlfile>", cli_parse_show_crlfile, cli_io_handler_show_crlfile, cli_release_show_crlfile },
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
diff --git a/src/ssl_crtlist.c b/src/ssl_crtlist.c
new file mode 100644
index 0000000..dcd9171
--- /dev/null
+++ b/src/ssl_crtlist.c
@@ -0,0 +1,1577 @@
+/*
+ *
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syslog.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+
+#include <haproxy/applet.h>
+#include <haproxy/channel.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_crtlist.h>
+#include <haproxy/ssl_ocsp.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+
+/* CLI context for "show ssl crt-list" or "dump ssl crt-list" */
+struct show_crtlist_ctx {
+ struct ebmb_node *crtlist_node; /* ebmb_node for the current crtlist */
+ struct crtlist_entry *entry; /* current entry */
+ int mode; /* 'd' for dump, 's' for show */
+};
+
+/* CLI context for "add ssl crt-list" */
+struct add_crtlist_ctx {
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry;
+ struct bind_conf_list *bind_conf_node;
+ char *err;
+ enum {
+ ADDCRT_ST_INIT = 0,
+ ADDCRT_ST_GEN,
+ ADDCRT_ST_INSERT,
+ ADDCRT_ST_SUCCESS,
+ ADDCRT_ST_ERROR,
+ ADDCRT_ST_FIN,
+ } state;
+};
+
+/* release ssl bind conf */
+void ssl_sock_free_ssl_conf(struct ssl_bind_conf *conf)
+{
+ if (conf) {
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ ha_free(&conf->npn_str);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ha_free(&conf->alpn_str);
+#endif
+ ha_free(&conf->ca_file);
+ ha_free(&conf->ca_verify_file);
+ ha_free(&conf->crl_file);
+ ha_free(&conf->ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ ha_free(&conf->ciphersuites);
+#endif
+ ha_free(&conf->curves);
+ ha_free(&conf->ecdhe);
+#if defined(SSL_CTX_set1_sigalgs_list)
+ ha_free(&conf->sigalgs);
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ ha_free(&conf->client_sigalgs);
+#endif
+ }
+}
+
+/*
+ * Allocate and copy a ssl_bind_conf structure
+ */
+struct ssl_bind_conf *crtlist_dup_ssl_conf(struct ssl_bind_conf *src)
+{
+ struct ssl_bind_conf *dst;
+
+ if (!src)
+ return NULL;
+
+ dst = calloc(1, sizeof(*dst));
+ if (!dst)
+ return NULL;
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ if (src->npn_str) {
+ dst->npn_str = strdup(src->npn_str);
+ if (!dst->npn_str)
+ goto error;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (src->alpn_str) {
+ dst->alpn_str = strdup(src->alpn_str);
+ if (!dst->alpn_str)
+ goto error;
+ }
+#endif
+ if (src->ca_file) {
+ dst->ca_file = strdup(src->ca_file);
+ if (!dst->ca_file)
+ goto error;
+ }
+ if (src->ca_verify_file) {
+ dst->ca_verify_file = strdup(src->ca_verify_file);
+ if (!dst->ca_verify_file)
+ goto error;
+ }
+ if (src->crl_file) {
+ dst->crl_file = strdup(src->crl_file);
+ if (!dst->crl_file)
+ goto error;
+ }
+ if (src->ciphers) {
+ dst->ciphers = strdup(src->ciphers);
+ if (!dst->ciphers)
+ goto error;
+ }
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (src->ciphersuites) {
+ dst->ciphersuites = strdup(src->ciphersuites);
+ if (!dst->ciphersuites)
+ goto error;
+ }
+#endif
+ if (src->curves) {
+ dst->curves = strdup(src->curves);
+ if (!dst->curves)
+ goto error;
+ }
+ if (src->ecdhe) {
+ dst->ecdhe = strdup(src->ecdhe);
+ if (!dst->ecdhe)
+ goto error;
+ }
+
+ dst->ssl_methods_cfg.flags = src->ssl_methods_cfg.flags;
+ dst->ssl_methods_cfg.min = src->ssl_methods_cfg.min;
+ dst->ssl_methods_cfg.max = src->ssl_methods_cfg.max;
+
+ dst->ssl_methods.flags = src->ssl_methods.flags;
+ dst->ssl_methods.min = src->ssl_methods.min;
+ dst->ssl_methods.max = src->ssl_methods.max;
+
+#if defined(SSL_CTX_set1_sigalgs_list)
+ if (src->sigalgs) {
+ dst->sigalgs = strdup(src->sigalgs);
+ if (!dst->sigalgs)
+ goto error;
+ }
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ if (src->client_sigalgs) {
+ dst->client_sigalgs = strdup(src->client_sigalgs);
+ if (!dst->client_sigalgs)
+ goto error;
+ }
+#endif
+ return dst;
+
+error:
+ ssl_sock_free_ssl_conf(dst);
+ free(dst);
+
+ return NULL;
+}
+
+/* free sni filters */
+void crtlist_free_filters(char **args)
+{
+ int i;
+
+ if (!args)
+ return;
+
+ for (i = 0; args[i]; i++)
+ free(args[i]);
+
+ free(args);
+}
+
+/* Alloc and duplicate a char ** array */
+char **crtlist_dup_filters(char **args, int fcount)
+{
+ char **dst;
+ int i;
+
+ if (fcount == 0)
+ return NULL;
+
+ dst = calloc(fcount + 1, sizeof(*dst));
+ if (!dst)
+ return NULL;
+
+ for (i = 0; i < fcount; i++) {
+ dst[i] = strdup(args[i]);
+ if (!dst[i])
+ goto error;
+ }
+ return dst;
+
+error:
+ crtlist_free_filters(dst);
+ return NULL;
+}
+
+/*
+ * Detach and free a crtlist_entry.
+ * Free the filters, the ssl_conf and call ckch_inst_free() for each ckch_inst
+ */
+void crtlist_entry_free(struct crtlist_entry *entry)
+{
+ struct ckch_inst *inst, *inst_s;
+
+ if (entry == NULL)
+ return;
+
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ ckch_inst_free(inst);
+ }
+ free(entry);
+}
+/*
+ * Duplicate a crt_list entry and its content (ssl_conf, filters/fcount)
+ * Return a pointer to the new entry
+ */
+struct crtlist_entry *crtlist_entry_dup(struct crtlist_entry *src)
+{
+ struct crtlist_entry *entry;
+
+ if (src == NULL)
+ return NULL;
+
+ entry = crtlist_entry_new();
+ if (entry == NULL)
+ return NULL;
+
+ if (src->filters) {
+ entry->filters = crtlist_dup_filters(src->filters, src->fcount);
+ if (!entry->filters)
+ goto error;
+ }
+ entry->fcount = src->fcount;
+ if (src->ssl_conf) {
+ entry->ssl_conf = crtlist_dup_ssl_conf(src->ssl_conf);
+ if (!entry->ssl_conf)
+ goto error;
+ }
+ entry->crtlist = src->crtlist;
+
+ return entry;
+
+error:
+
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+
+ return NULL;
+}
+
+/*
+ * Allocate and initialize a crtlist_entry
+ */
+struct crtlist_entry *crtlist_entry_new()
+{
+ struct crtlist_entry *entry;
+
+ entry = calloc(1, sizeof(*entry));
+ if (entry == NULL)
+ return NULL;
+
+ LIST_INIT(&entry->ckch_inst);
+
+ /* initialize the nodes so we can LIST_DELETE in any cases */
+ LIST_INIT(&entry->by_crtlist);
+ LIST_INIT(&entry->by_ckch_store);
+
+ return entry;
+}
+
+/* Free a crtlist, from the crt_entry to the content of the ssl_conf */
+void crtlist_free(struct crtlist *crtlist)
+{
+ struct crtlist_entry *entry, *s_entry;
+ struct bind_conf_list *bind_conf_node;
+
+ if (crtlist == NULL)
+ return;
+
+ bind_conf_node = crtlist->bind_conf;
+ while (bind_conf_node) {
+ struct bind_conf_list *next = bind_conf_node->next;
+ free(bind_conf_node);
+ bind_conf_node = next;
+ }
+
+ list_for_each_entry_safe(entry, s_entry, &crtlist->ord_entries, by_crtlist) {
+ crtlist_entry_free(entry);
+ }
+ ebmb_delete(&crtlist->node);
+ free(crtlist);
+}
+
+/* Alloc and initialize a struct crtlist
+ * <filename> is the key of the ebmb_node
+ * <unique> initialize the list of entries to be unique (1) or not (0)
+ */
+struct crtlist *crtlist_new(const char *filename, int unique)
+{
+ struct crtlist *newlist;
+
+ newlist = calloc(1, sizeof(*newlist) + strlen(filename) + 1);
+ if (newlist == NULL)
+ return NULL;
+
+ memcpy(newlist->node.key, filename, strlen(filename) + 1);
+ if (unique)
+ newlist->entries = EB_ROOT_UNIQUE;
+ else
+ newlist->entries = EB_ROOT;
+
+ LIST_INIT(&newlist->ord_entries);
+
+ return newlist;
+}
+
+/*
+ * Read a single crt-list line. /!\ alter the <line> string.
+ * Fill <crt_path> and <crtlist_entry>
+ * <crtlist_entry> must be alloc and free by the caller
+ * <crtlist_entry->ssl_conf> is alloc by the function
+ * <crtlist_entry->filters> is alloc by the function
+ * <crt_path> is a ptr in <line>
+ * Return an error code
+ */
+int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err)
+{
+ int cfgerr = 0;
+ int arg, newarg, cur_arg, i, ssl_b = 0, ssl_e = 0;
+ char *end;
+ char *args[MAX_CRT_ARGS + 1];
+ struct ssl_bind_conf *ssl_conf = NULL;
+
+ if (!line || !crt_path || !entry)
+ return ERR_ALERT | ERR_FATAL;
+
+ end = line + strlen(line);
+ if (end-line >= CRT_LINESIZE-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ memprintf(err, "parsing [%s:%d]: line too long, limit is %d characters",
+ file, linenum, CRT_LINESIZE-1);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ arg = 0;
+ newarg = 1;
+ while (*line) {
+ if (isspace((unsigned char)*line)) {
+ newarg = 1;
+ *line = 0;
+ } else if (*line == '[') {
+ if (ssl_b) {
+ memprintf(err, "parsing [%s:%d]: too many '['", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ if (!arg) {
+ memprintf(err, "parsing [%s:%d]: file must start with a cert", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ ssl_b = arg;
+ newarg = 1;
+ *line = 0;
+ } else if (*line == ']') {
+ if (ssl_e) {
+ memprintf(err, "parsing [%s:%d]: too many ']'", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ if (!ssl_b) {
+ memprintf(err, "parsing [%s:%d]: missing '['", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ ssl_e = arg;
+ newarg = 1;
+ *line = 0;
+ } else if (newarg) {
+ if (arg == MAX_CRT_ARGS) {
+ memprintf(err, "parsing [%s:%d]: too many args ", file, linenum);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ newarg = 0;
+ args[arg++] = line;
+ }
+ line++;
+ }
+ args[arg++] = line;
+
+ /* empty line */
+ if (!*args[0]) {
+ cfgerr |= ERR_NONE;
+ goto error;
+ }
+
+ *crt_path = args[0];
+
+ if (ssl_b) {
+ if (ssl_b > 1) {
+ memprintf(err, "parsing [%s:%d]: malformated line, filters can't be between filename and options!", file, linenum);
+ cfgerr |= ERR_WARN;
+ }
+
+ ssl_conf = calloc(1, sizeof *ssl_conf);
+ if (!ssl_conf) {
+ memprintf(err, "not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+
+ cur_arg = ssl_b ? ssl_b : 1;
+ while (cur_arg < ssl_e) {
+ newarg = 0;
+ for (i = 0; ssl_crtlist_kws[i].kw != NULL; i++) {
+ if (strcmp(ssl_crtlist_kws[i].kw, args[cur_arg]) == 0) {
+ newarg = 1;
+ cfgerr |= ssl_crtlist_kws[i].parse(args, cur_arg, NULL, ssl_conf, from_cli, err);
+ if (cur_arg + 1 + ssl_crtlist_kws[i].skip > ssl_e) {
+ memprintf(err, "parsing [%s:%d]: ssl args out of '[]' for %s",
+ file, linenum, args[cur_arg]);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ cur_arg += 1 + ssl_crtlist_kws[i].skip;
+ break;
+ }
+ }
+ if (!cfgerr && !newarg) {
+ memprintf(err, "parsing [%s:%d]: unknown ssl keyword %s",
+ file, linenum, args[cur_arg]);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ entry->linenum = linenum;
+ entry->ssl_conf = ssl_conf;
+ entry->filters = crtlist_dup_filters(&args[cur_arg], arg - cur_arg - 1);
+ entry->fcount = arg - cur_arg - 1;
+
+ return cfgerr;
+
+error:
+ crtlist_free_filters(entry->filters);
+ entry->filters = NULL;
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ ha_free(&entry->ssl_conf);
+ return cfgerr;
+}
+
+
+
+/* This function parse a crt-list file and store it in a struct crtlist, each line is a crtlist_entry structure
+ * Fill the <crtlist> argument with a pointer to a new crtlist struct
+ *
+ * This function tries to open and store certificate files.
+ */
+int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *curproxy, struct crtlist **crtlist, char **err)
+{
+ struct crtlist *newlist;
+ struct crtlist_entry *entry = NULL;
+ char thisline[CRT_LINESIZE];
+ FILE *f;
+ struct stat buf;
+ int linenum = 0;
+ int cfgerr = 0;
+ int missing_lf = -1;
+
+ if ((f = fopen(file, "r")) == NULL) {
+ memprintf(err, "cannot open file '%s' : %s", file, strerror(errno));
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ newlist = crtlist_new(file, 0);
+ if (newlist == NULL) {
+ memprintf(err, "Not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ while (fgets(thisline, sizeof(thisline), f) != NULL) {
+ char *end;
+ char *line = thisline;
+ char *crt_path;
+ char path[MAXPATHLEN+1];
+ struct ckch_store *ckchs;
+ int found = 0;
+
+ if (missing_lf != -1) {
+ memprintf(err, "parsing [%s:%d]: Stray NUL character at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ missing_lf = -1;
+ break;
+ }
+
+ linenum++;
+ end = line + strlen(line);
+ if (end-line == sizeof(thisline)-1 && *(end-1) != '\n') {
+ /* Check if we reached the limit and the last char is not \n.
+ * Watch out for the last line without the terminating '\n'!
+ */
+ memprintf(err, "parsing [%s:%d]: line too long, limit is %d characters",
+ file, linenum, (int)sizeof(thisline)-1);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ break;
+ }
+
+ if (*line == '#' || *line == '\n' || *line == '\r')
+ continue;
+
+ if (end > line && *(end-1) == '\n') {
+ /* kill trailing LF */
+ *(end - 1) = 0;
+ }
+ else {
+ /* mark this line as truncated */
+ missing_lf = end - line;
+ }
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(err, "Not enough memory!");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, file, linenum, 0, err);
+ if (cfgerr & ERR_CODE)
+ goto error;
+
+ /* empty line */
+ if (!crt_path || !*crt_path) {
+ crtlist_entry_free(entry);
+ entry = NULL;
+ continue;
+ }
+
+ if (*crt_path != '/' && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(crt_path)) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, crt_path) > sizeof(path)) {
+ memprintf(err, "parsing [%s:%d]: '%s' : path too long",
+ file, linenum, crt_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ crt_path = path;
+ }
+
+ /* Look for a ckch_store or create one */
+ ckchs = ckchs_lookup(crt_path);
+ if (ckchs == NULL) {
+ if (stat(crt_path, &buf) == 0) {
+ found++;
+
+ ckchs = ckchs_load_cert_file(crt_path, err);
+ if (ckchs == NULL) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ entry->node.key = ckchs;
+ entry->crtlist = newlist;
+ if (entry->ssl_conf)
+ ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update;
+ ebpt_insert(&newlist->entries, &entry->node);
+ LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+
+ } else if (global_ssl.extra_files & SSL_GF_BUNDLE) {
+ /* If we didn't find the file, this could be a
+ bundle, since 2.3 we don't support multiple
+ certificate in the same OpenSSL store, so we
+ emulate it by loading each file separately. To
+ do so we need to duplicate the entry in the
+ crt-list because it becomes independent */
+ char fp[MAXPATHLEN+1] = {0};
+ int n = 0;
+ struct crtlist_entry *entry_dup = entry; /* use the previous created entry */
+ for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) {
+ struct stat buf;
+ int ret;
+
+ ret = snprintf(fp, sizeof(fp), "%s.%s", crt_path, SSL_SOCK_KEYTYPE_NAMES[n]);
+ if (ret > sizeof(fp))
+ continue;
+
+ ckchs = ckchs_lookup(fp);
+ if (!ckchs) {
+ if (stat(fp, &buf) == 0) {
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (!ckchs) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ } else {
+ continue; /* didn't find this extension, skip */
+ }
+ }
+ found++;
+ linenum++; /* we duplicate the line for this entry in the bundle */
+ if (!entry_dup) { /* if the entry was used, duplicate one */
+ linenum++;
+ entry_dup = crtlist_entry_dup(entry);
+ if (!entry_dup) {
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ entry_dup->linenum = linenum;
+ }
+
+ entry_dup->node.key = ckchs;
+ entry_dup->crtlist = newlist;
+
+ cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err);
+ if (cfgerr & ERR_FATAL)
+ goto error;
+
+ if (entry->ssl_conf)
+ ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update;
+ ebpt_insert(&newlist->entries, &entry_dup->node);
+ LIST_APPEND(&newlist->ord_entries, &entry_dup->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry_dup->by_ckch_store);
+
+ entry_dup = NULL; /* the entry was used, we need a new one next round */
+ }
+#if HA_OPENSSL_VERSION_NUMBER < 0x10101000L
+ if (found) {
+ memprintf(err, "%sCan't load '%s'. Loading a multi certificates bundle requires OpenSSL >= 1.1.1\n",
+ err && *err ? *err : "", crt_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", crt_path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ } else {
+ entry->node.key = ckchs;
+ entry->crtlist = newlist;
+
+ cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err);
+ if (cfgerr & ERR_FATAL)
+ goto error;
+
+ if (entry->ssl_conf)
+ ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update;
+ ebpt_insert(&newlist->entries, &entry->node);
+ LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist);
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+ found++;
+ }
+ entry = NULL;
+ }
+
+ if (missing_lf != -1) {
+ memprintf(err, "parsing [%s:%d]: Missing LF on last line, file might have been truncated at position %d.\n",
+ file, linenum, (missing_lf + 1));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ if (cfgerr & ERR_CODE)
+ goto error;
+
+ newlist->linecount = linenum;
+
+ fclose(f);
+ *crtlist = newlist;
+
+ return cfgerr;
+error:
+ crtlist_entry_free(entry);
+
+ fclose(f);
+ crtlist_free(newlist);
+ return cfgerr;
+}
+
+/* This function reads a directory and stores it in a struct crtlist, each file is a crtlist_entry structure
+ * Fill the <crtlist> argument with a pointer to a new crtlist struct
+ *
+ * This function tries to open and store certificate files.
+ */
+int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlist **crtlist, char **err)
+{
+ struct crtlist *dir;
+ struct dirent **de_list;
+ int i, n;
+ struct stat buf;
+ char *end;
+ char fp[MAXPATHLEN+1];
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+
+ dir = crtlist_new(path, 1);
+ if (dir == NULL) {
+ memprintf(err, "not enough memory");
+ return ERR_ALERT | ERR_FATAL;
+ }
+
+ n = scandir(path, &de_list, 0, alphasort);
+ if (n < 0) {
+ memprintf(err, "%sunable to scan directory '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ for (i = 0; i < n; i++) {
+ struct crtlist_entry *entry;
+ struct dirent *de = de_list[i];
+
+ end = strrchr(de->d_name, '.');
+ if (end && (de->d_name[0] == '.' ||
+ strcmp(end, ".issuer") == 0 || strcmp(end, ".ocsp") == 0 ||
+ strcmp(end, ".sctl") == 0 || strcmp(end, ".key") == 0))
+ goto ignore_entry;
+
+ snprintf(fp, sizeof(fp), "%s/%s", path, de->d_name);
+ if (stat(fp, &buf) != 0) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", fp, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto ignore_entry;
+ }
+ if (!S_ISREG(buf.st_mode))
+ goto ignore_entry;
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(err, "not enough memory '%s'", fp);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto ignore_entry;
+ }
+
+ ckchs = ckchs_lookup(fp);
+ if (ckchs == NULL)
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (ckchs == NULL) {
+ free(de);
+ free(entry);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ entry->node.key = ckchs;
+ entry->crtlist = dir;
+ LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store);
+ LIST_APPEND(&dir->ord_entries, &entry->by_crtlist);
+ ebpt_insert(&dir->entries, &entry->node);
+
+ignore_entry:
+ free(de);
+ }
+end:
+ free(de_list);
+ }
+
+ if (cfgerr & ERR_CODE) {
+ /* free the dir and entries on error */
+ crtlist_free(dir);
+ } else {
+ *crtlist = dir;
+ }
+ return cfgerr;
+
+}
+
+/*
+ * Take an ssl_bind_conf structure and append the configuration line used to
+ * create it in the buffer
+ */
+static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf *conf)
+{
+ int space = 0;
+
+ if (conf == NULL)
+ return;
+
+ chunk_appendf(buf, " [");
+#ifdef OPENSSL_NPN_NEGOTIATED
+ if (conf->npn_str) {
+ int len = conf->npn_len;
+ char *ptr = conf->npn_str;
+ int comma = 0;
+
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "npn ");
+ while (len) {
+ unsigned short size;
+
+ size = *ptr;
+ ptr++;
+ if (comma)
+ chunk_memcat(buf, ",", 1);
+ chunk_memcat(buf, ptr, size);
+ ptr += size;
+ len -= size + 1;
+ comma = 1;
+ }
+ chunk_memcat(buf, "", 1); /* finish with a \0 */
+ space++;
+ }
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (conf->alpn_str) {
+ int len = conf->alpn_len;
+ char *ptr = conf->alpn_str;
+ int comma = 0;
+
+ if (space) chunk_appendf(buf, " ");
+ if (len)
+ chunk_appendf(buf, "alpn ");
+ else
+ chunk_appendf(buf, "no-alpn");
+ while (len) {
+ unsigned short size;
+
+ size = *ptr;
+ ptr++;
+ if (comma)
+ chunk_memcat(buf, ",", 1);
+ chunk_memcat(buf, ptr, size);
+ ptr += size;
+ len -= size + 1;
+ comma = 1;
+ }
+ chunk_memcat(buf, "", 1); /* finish with a \0 */
+ space++;
+ }
+#endif
+ /* verify */
+ {
+ if (conf->verify == SSL_SOCK_VERIFY_NONE) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify none");
+ space++;
+ } else if (conf->verify == SSL_SOCK_VERIFY_OPTIONAL) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify optional");
+ space++;
+ } else if (conf->verify == SSL_SOCK_VERIFY_REQUIRED) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "verify required");
+ space++;
+ }
+ }
+
+ if (conf->no_ca_names) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "no-ca-names");
+ space++;
+ }
+
+ if (conf->early_data) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "allow-0rtt");
+ space++;
+ }
+ if (conf->ca_file) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ca-file %s", conf->ca_file);
+ space++;
+ }
+ if (conf->crl_file) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "crl-file %s", conf->crl_file);
+ space++;
+ }
+ if (conf->ciphers) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ciphers %s", conf->ciphers);
+ space++;
+ }
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (conf->ciphersuites) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ciphersuites %s", conf->ciphersuites);
+ space++;
+ }
+#endif
+ if (conf->curves) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "curves %s", conf->curves);
+ space++;
+ }
+ if (conf->ecdhe) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ecdhe %s", conf->ecdhe);
+ space++;
+ }
+
+ /* the crt-lists only support ssl-min-ver and ssl-max-ver */
+ if (conf->ssl_methods_cfg.min) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ssl-min-ver %s", methodVersions[conf->ssl_methods_cfg.min].name);
+ space++;
+ }
+
+ if (conf->ssl_methods_cfg.max) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ssl-max-ver %s", methodVersions[conf->ssl_methods_cfg.max].name);
+ space++;
+ }
+
+ if (conf->ocsp_update != SSL_SOCK_OCSP_UPDATE_DFLT) {
+ if (space) chunk_appendf(buf, " ");
+ chunk_appendf(buf, "ocsp-update %s",
+ conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_OFF ? "off" : "on");
+ space++;
+ }
+
+ chunk_appendf(buf, "]");
+
+ return;
+}
+
+/* dump a list of filters */
+static void dump_crtlist_filters(struct buffer *buf, struct crtlist_entry *entry)
+{
+ int i;
+
+ if (!entry->fcount)
+ return;
+
+ for (i = 0; i < entry->fcount; i++) {
+ chunk_appendf(buf, " %s", entry->filters[i]);
+ }
+ return;
+}
+
+/************************** CLI functions ****************************/
+
+
+/* CLI IO handler for '(show|dump) ssl crt-list'.
+ * It uses show_crtlist_ctx for the context.
+ */
+static int cli_io_handler_dump_crtlist(struct appctx *appctx)
+{
+ struct show_crtlist_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct ebmb_node *lnode;
+
+ if (trash == NULL)
+ return 1;
+
+ /* dump the list of crt-lists */
+ lnode = ctx->crtlist_node;
+ if (lnode == NULL)
+ lnode = ebmb_first(&crtlists_tree);
+ while (lnode) {
+ chunk_appendf(trash, "%s\n", lnode->key);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ lnode = ebmb_next(lnode);
+ }
+ free_trash_chunk(trash);
+ return 1;
+yield:
+ ctx->crtlist_node = lnode;
+ free_trash_chunk(trash);
+ return 0;
+}
+
+/* CLI IO handler for '(show|dump) ssl crt-list <filename>' */
+static int cli_io_handler_dump_crtlist_entries(struct appctx *appctx)
+{
+ struct show_crtlist_ctx *ctx = appctx->svcctx;
+ struct buffer *trash = alloc_trash_chunk();
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry;
+
+ if (trash == NULL)
+ return 1;
+
+ crtlist = ebmb_entry(ctx->crtlist_node, struct crtlist, node);
+
+ entry = ctx->entry;
+ if (entry == NULL) {
+ entry = LIST_ELEM((crtlist->ord_entries).n, typeof(entry), by_crtlist);
+ chunk_appendf(trash, "# %s\n", crtlist->node.key);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+ list_for_each_entry_from(entry, &crtlist->ord_entries, by_crtlist) {
+ struct ckch_store *store;
+ const char *filename;
+
+ store = entry->node.key;
+ filename = store->path;
+ chunk_appendf(trash, "%s", filename);
+ if (ctx->mode == 's') /* show */
+ chunk_appendf(trash, ":%d", entry->linenum);
+ dump_crtlist_sslconf(trash, entry->ssl_conf);
+ dump_crtlist_filters(trash, entry);
+ chunk_appendf(trash, "\n");
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+ free_trash_chunk(trash);
+ return 1;
+yield:
+ ctx->entry = entry;
+ free_trash_chunk(trash);
+ return 0;
+}
+
+/* CLI argument parser for '(show|dump) ssl crt-list' */
+static int cli_parse_dump_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_crtlist_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ struct ebmb_node *lnode;
+ char *filename = NULL;
+ int mode;
+ char *end;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (*args[3] && strcmp(args[3], "-n") == 0) {
+ mode = 's';
+ filename = args[4];
+ } else {
+ mode = 'd';
+ filename = args[3];
+ }
+
+ if (mode == 's' && !*args[4])
+ return cli_err(appctx, "'show ssl crt-list -n' expects a filename or a directory\n");
+
+ if (filename && *filename) {
+
+
+ /* strip trailing slashes, including first one */
+ for (end = filename + strlen(filename) - 1; end >= filename && *end == '/'; end--)
+ *end = 0;
+
+ lnode = ebst_lookup(&crtlists_tree, filename);
+ if (lnode == NULL)
+ return cli_err(appctx, "didn't find the specified filename\n");
+
+ ctx->crtlist_node = lnode;
+ appctx->io_handler = cli_io_handler_dump_crtlist_entries;
+ }
+ ctx->mode = mode;
+
+ return 0;
+}
+
+/* release function of the "add ssl crt-list' command, free things and unlock
+ * the spinlock. It uses the add_crtlist_ctx.
+ */
+static void cli_release_add_crtlist(struct appctx *appctx)
+{
+ struct add_crtlist_ctx *ctx = appctx->svcctx;
+ struct crtlist_entry *entry = ctx->entry;
+
+ if (entry) {
+ struct ckch_inst *inst, *inst_s;
+
+ /* upon error free the ckch_inst and everything inside */
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_ckchs) {
+ ckch_inst_free(inst);
+ }
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ ha_free(&ctx->err);
+}
+
+
+/* IO Handler for the "add ssl crt-list" command It adds a new entry in the
+ * crt-list and generates the ckch_insts for each bind_conf that uses this crt-list
+ *
+ * The logic is the same as the "commit ssl cert" command but without the
+ * freeing of the old structures, because there are none.
+ *
+ * It uses the add_crtlist_ctx for the context.
+ */
+static int cli_io_handler_add_crtlist(struct appctx *appctx)
+{
+ struct add_crtlist_ctx *ctx = appctx->svcctx;
+ struct bind_conf_list *bind_conf_node;
+ struct stconn *sc = appctx_sc(appctx);
+ struct crtlist *crtlist = ctx->crtlist;
+ struct crtlist_entry *entry = ctx->entry;
+ struct ckch_store *store = entry->node.key;
+ struct ckch_inst *new_inst;
+ int i = 0;
+ int errcode = 0;
+
+ /* for each bind_conf which use the crt-list, a new ckch_inst must be
+ * created.
+ */
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE))
+ goto end;
+
+ switch (ctx->state) {
+ case ADDCRT_ST_INIT:
+ /* This state just print the update message */
+ chunk_printf(&trash, "Inserting certificate '%s' in crt-list '%s'", store->path, crtlist->node.key);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = ADDCRT_ST_GEN;
+ __fallthrough;
+ case ADDCRT_ST_GEN:
+ bind_conf_node = ctx->bind_conf_node; /* get the previous ptr from the yield */
+ if (bind_conf_node == NULL)
+ bind_conf_node = crtlist->bind_conf;
+ for (; bind_conf_node; bind_conf_node = bind_conf_node->next) {
+ struct bind_conf *bind_conf = bind_conf_node->bind_conf;
+ struct sni_ctx *sni;
+
+ ctx->bind_conf_node = bind_conf_node;
+
+ /* yield every 10 generations */
+ if (i > 10) {
+ applet_have_more_data(appctx); /* let's come back later */
+ goto yield;
+ }
+
+ /* display one dot for each new instance */
+ if (applet_putstr(appctx, ".") == -1)
+ goto yield;
+
+ /* we don't support multi-cert bundles, only simple ones */
+ ctx->err = NULL;
+ errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &new_inst, &ctx->err);
+ if (errcode & ERR_CODE) {
+ ctx->state = ADDCRT_ST_ERROR;
+ goto error;
+ }
+
+ /* we need to initialize the SSL_CTX generated */
+ /* this iterate on the newly generated SNIs in the new instance to prepare their SSL_CTX */
+ list_for_each_entry(sni, &new_inst->sni_ctx, by_ckch_inst) {
+ if (!sni->order) { /* we initialized only the first SSL_CTX because it's the same in the other sni_ctx's */
+ ctx->err = NULL;
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, new_inst->ssl_conf, sni->ctx, sni->ckch_inst, &ctx->err);
+ if (errcode & ERR_CODE) {
+ ctx->state = ADDCRT_ST_ERROR;
+ goto error;
+ }
+ }
+ }
+
+ i++;
+ LIST_APPEND(&store->ckch_inst, &new_inst->by_ckchs);
+ LIST_APPEND(&entry->ckch_inst, &new_inst->by_crtlist_entry);
+ new_inst->crtlist_entry = entry;
+ }
+ ctx->state = ADDCRT_ST_INSERT;
+ __fallthrough;
+ case ADDCRT_ST_INSERT:
+ /* the insertion is called for every instance of the store, not
+ * only the one we generated.
+ * But the ssl_sock_load_cert_sni() skip the sni already
+ * inserted. Not every instance has a bind_conf, it could be
+ * the store of a server so we should be careful */
+
+ list_for_each_entry(new_inst, &store->ckch_inst, by_ckchs) {
+ if (!new_inst->bind_conf) /* this is a server instance */
+ continue;
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &new_inst->bind_conf->sni_lock);
+ ssl_sock_load_cert_sni(new_inst, new_inst->bind_conf);
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &new_inst->bind_conf->sni_lock);
+ }
+ entry->linenum = ++crtlist->linecount;
+ ctx->entry = NULL;
+ ctx->state = ADDCRT_ST_SUCCESS;
+ __fallthrough;
+ case ADDCRT_ST_SUCCESS:
+ chunk_reset(&trash);
+ chunk_appendf(&trash, "\n");
+ if (ctx->err)
+ chunk_appendf(&trash, "%s", ctx->err);
+ chunk_appendf(&trash, "Success!\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ ctx->state = ADDCRT_ST_FIN;
+ break;
+
+ case ADDCRT_ST_ERROR:
+ error:
+ chunk_printf(&trash, "\n%sFailed!\n", ctx->err);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto yield;
+ break;
+
+ default:
+ break;
+ }
+
+end:
+ /* success: call the release function and don't come back */
+ return 1;
+yield:
+ return 0; /* should come back */
+}
+
+
+/*
+ * Parse a "add ssl crt-list <crt-list> <certfile>" line.
+ * Filters and option must be passed through payload.
+ * It sets a struct add_crtlist_ctx.
+ */
+static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct add_crtlist_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int cfgerr = 0;
+ struct ckch_store *store;
+ char *err = NULL;
+ char path[MAXPATHLEN+1];
+ char *crtlist_path;
+ char *cert_path = NULL;
+ struct ebmb_node *eb;
+ struct ebpt_node *inserted;
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry = NULL;
+ char *end;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || (!payload && !*args[4]))
+ return cli_err(appctx, "'add ssl crtlist' expects a filename and a certificate name\n");
+
+ crtlist_path = args[3];
+
+ /* strip trailing slashes, including first one */
+ for (end = crtlist_path + strlen(crtlist_path) - 1; end >= crtlist_path && *end == '/'; end--)
+ *end = 0;
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Operations on certificates are currently locked!\n");
+
+ eb = ebst_lookup(&crtlists_tree, crtlist_path);
+ if (!eb) {
+ memprintf(&err, "crt-list '%s' does not exist!", crtlist_path);
+ goto error;
+ }
+ crtlist = ebmb_entry(eb, struct crtlist, node);
+
+ entry = crtlist_entry_new();
+ if (entry == NULL) {
+ memprintf(&err, "Not enough memory!");
+ goto error;
+ }
+
+ if (payload) {
+ char *lf;
+
+ lf = strrchr(payload, '\n');
+ if (lf) {
+ memprintf(&err, "only one line of payload is supported!");
+ goto error;
+ }
+ /* cert_path is filled here */
+ cfgerr |= crtlist_parse_line(payload, &cert_path, entry, "CLI", 1, 1, &err);
+ if (cfgerr & ERR_CODE)
+ goto error;
+ } else {
+ cert_path = args[4];
+ }
+
+ if (!cert_path) {
+ memprintf(&err, "'add ssl crtlist' should contain the certificate name in the payload");
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ if (eb_gettag(crtlist->entries.b[EB_RGHT])) {
+ char *slash;
+
+ slash = strrchr(cert_path, '/');
+ if (!slash) {
+ memprintf(&err, "'%s' is a directory, certificate path '%s' must contain the directory path", (char *)crtlist->node.key, cert_path);
+ goto error;
+ }
+ /* temporary replace / by 0 to do an strcmp */
+ *slash = '\0';
+ if (strcmp(cert_path, (char*)crtlist->node.key) != 0) {
+ *slash = '/';
+ memprintf(&err, "'%s' is a directory, certificate path '%s' must contain the directory path", (char *)crtlist->node.key, cert_path);
+ goto error;
+ }
+ *slash = '/';
+ }
+
+ if (*cert_path != '/' && global_ssl.crt_base) {
+ if ((strlen(global_ssl.crt_base) + 1 + strlen(cert_path)) > sizeof(path) ||
+ snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, cert_path) > sizeof(path)) {
+ memprintf(&err, "'%s' : path too long", cert_path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ cert_path = path;
+ }
+
+ store = ckchs_lookup(cert_path);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' does not exist!", cert_path);
+ goto error;
+ }
+ if (store->data == NULL || store->data->cert == NULL) {
+ memprintf(&err, "certificate '%s' is empty!", cert_path);
+ goto error;
+ }
+
+ /* No need to check 'ocsp-update' inconsistency on a store that is not
+ * used yet (it was just added through the CLI for instance).
+ */
+ if (!LIST_ISEMPTY(&store->ckch_inst) &&
+ ocsp_update_check_cfg_consistency(store, entry, cert_path, &err))
+ goto error;
+
+ if (entry->ssl_conf)
+ store->data->ocsp_update_mode = entry->ssl_conf->ocsp_update;
+
+ /* check if it's possible to insert this new crtlist_entry */
+ entry->node.key = store;
+ inserted = ebpt_insert(&crtlist->entries, &entry->node);
+ if (inserted != &entry->node) {
+ memprintf(&err, "file already exists in this directory!");
+ goto error;
+ }
+
+ /* this is supposed to be a directory (EB_ROOT_UNIQUE), so no ssl_conf are allowed */
+ if ((entry->ssl_conf || entry->filters) && eb_gettag(crtlist->entries.b[EB_RGHT])) {
+ memprintf(&err, "this is a directory, SSL configuration and filters are not allowed");
+ goto error;
+ }
+
+ LIST_APPEND(&crtlist->ord_entries, &entry->by_crtlist);
+ entry->crtlist = crtlist;
+ LIST_APPEND(&store->crtlist_entry, &entry->by_ckch_store);
+
+ ctx->state = ADDCRT_ST_INIT;
+ ctx->crtlist = crtlist;
+ ctx->entry = entry;
+
+ /* unlock is done in the release handler */
+ return 0;
+
+error:
+ crtlist_entry_free(entry);
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Can't edit the crt-list: %s\n", err ? err : "");
+ return cli_dynerr(appctx, err);
+}
+
+/* Parse a "del ssl crt-list <crt-list> <certfile>" line. */
+static int cli_parse_del_crtlist(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct ckch_store *store;
+ char *err = NULL;
+ char *crtlist_path, *cert_path;
+ struct ebmb_node *ebmb;
+ struct ebpt_node *ebpt;
+ struct crtlist *crtlist;
+ struct crtlist_entry *entry = NULL;
+ struct ckch_inst *inst, *inst_s;
+ int linenum = 0;
+ char *colons;
+ char *end;
+ int error_message_dumped = 0;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ if (!*args[3] || !*args[4])
+ return cli_err(appctx, "'del ssl crtlist' expects a filename and a certificate name\n");
+
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock))
+ return cli_err(appctx, "Can't delete!\nOperations on certificates are currently locked!\n");
+
+ crtlist_path = args[3];
+ cert_path = args[4];
+
+ colons = strchr(cert_path, ':');
+ if (colons) {
+ char *endptr;
+
+ linenum = strtol(colons + 1, &endptr, 10);
+ if (colons + 1 == endptr || *endptr != '\0') {
+ memprintf(&err, "wrong line number after colons in '%s'!", cert_path);
+ goto error;
+ }
+ *colons = '\0';
+ }
+
+ /* strip trailing slashes, including first one */
+ for (end = crtlist_path + strlen(crtlist_path) - 1; end >= crtlist_path && *end == '/'; end--)
+ *end = 0;
+
+ /* look for crtlist */
+ ebmb = ebst_lookup(&crtlists_tree, crtlist_path);
+ if (!ebmb) {
+ memprintf(&err, "crt-list '%s' does not exist!", crtlist_path);
+ goto error;
+ }
+ crtlist = ebmb_entry(ebmb, struct crtlist, node);
+
+ /* look for store */
+ store = ckchs_lookup(cert_path);
+ if (store == NULL) {
+ memprintf(&err, "certificate '%s' does not exist!", cert_path);
+ goto error;
+ }
+ if (store->data == NULL || store->data->cert == NULL) {
+ memprintf(&err, "certificate '%s' is empty!", cert_path);
+ goto error;
+ }
+
+ ebpt = ebpt_lookup(&crtlist->entries, store);
+ if (!ebpt) {
+ memprintf(&err, "certificate '%s' can't be found in crt-list '%s'!", cert_path, crtlist_path);
+ goto error;
+ }
+
+ /* list the line number of entries for errors in err, and select the right ebpt */
+ for (; ebpt; ebpt = ebpt_next_dup(ebpt)) {
+ struct crtlist_entry *tmp;
+
+ tmp = ebpt_entry(ebpt, struct crtlist_entry, node);
+ memprintf(&err, "%s%s%d", err ? err : "", err ? ", " : "", tmp->linenum);
+
+ /* select the entry we wanted */
+ if (linenum == 0 || tmp->linenum == linenum) {
+ if (!entry)
+ entry = tmp;
+ }
+ }
+
+ /* we didn't found the specified entry */
+ if (!entry) {
+ memprintf(&err, "found a certificate '%s' but the line number is incorrect, please specify a correct line number preceded by colons (%s)!", cert_path, err ? err : NULL);
+ goto error;
+ }
+
+ /* we didn't specified a line number but there were several entries */
+ if (linenum == 0 && ebpt_next_dup(&entry->node)) {
+ memprintf(&err, "found the certificate '%s' in several entries, please specify a line number preceded by colons (%s)!", cert_path, err ? err : NULL);
+ goto error;
+ }
+
+ /* Iterate over all the instances in order to see if any of them is a
+ * default instance. If this is the case, the entry won't be suppressed. */
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ if (inst->is_default && !inst->bind_conf->strict_sni) {
+ if (!error_message_dumped) {
+ memprintf(&err, "certificate '%s' cannot be deleted, it is used as default certificate by the following frontends:\n", cert_path);
+ error_message_dumped = 1;
+ }
+ memprintf(&err, "%s\t- %s:%d\n", err, inst->bind_conf->file, inst->bind_conf->line);
+ }
+ }
+ if (error_message_dumped)
+ goto error;
+
+ /* upon error free the ckch_inst and everything inside */
+
+ ebpt_delete(&entry->node);
+ LIST_DELETE(&entry->by_crtlist);
+ LIST_DELETE(&entry->by_ckch_store);
+
+ list_for_each_entry_safe(inst, inst_s, &entry->ckch_inst, by_crtlist_entry) {
+ struct sni_ctx *sni, *sni_s;
+ struct ckch_inst_link_ref *link_ref, *link_ref_s;
+
+ HA_RWLOCK_WRLOCK(SNI_LOCK, &inst->bind_conf->sni_lock);
+ list_for_each_entry_safe(sni, sni_s, &inst->sni_ctx, by_ckch_inst) {
+ ebmb_delete(&sni->name);
+ LIST_DELETE(&sni->by_ckch_inst);
+ SSL_CTX_free(sni->ctx);
+ free(sni);
+ }
+ HA_RWLOCK_WRUNLOCK(SNI_LOCK, &inst->bind_conf->sni_lock);
+ LIST_DELETE(&inst->by_ckchs);
+ list_for_each_entry_safe(link_ref, link_ref_s, &inst->cafile_link_refs, list) {
+ LIST_DELETE(&link_ref->link->list);
+ LIST_DELETE(&link_ref->list);
+ free(link_ref);
+ }
+ ckch_inst_free(inst);
+ }
+
+ crtlist_free_filters(entry->filters);
+ ssl_sock_free_ssl_conf(entry->ssl_conf);
+ free(entry->ssl_conf);
+ free(entry);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Entry '%s' deleted in crtlist '%s'!\n", cert_path, crtlist_path);
+ return cli_dynmsg(appctx, LOG_NOTICE, err);
+
+error:
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ err = memprintf(&err, "Can't delete the entry: %s\n", err ? err : "");
+ return cli_dynerr(appctx, err);
+}
+
+
+/* unlink and free all crt-list and crt-list entries */
+void crtlist_deinit()
+{
+ struct eb_node *node, *next;
+ struct crtlist *crtlist;
+
+ node = eb_first(&crtlists_tree);
+ while (node) {
+ next = eb_next(node);
+ crtlist = ebmb_entry(node, struct crtlist, node);
+ crtlist_free(crtlist);
+ node = next;
+ }
+}
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "add", "ssl", "crt-list", NULL }, "add ssl crt-list <list> <cert> [opts]* : add to crt-list file <list> a line <cert> or a payload", cli_parse_add_crtlist, cli_io_handler_add_crtlist, cli_release_add_crtlist },
+ { { "del", "ssl", "crt-list", NULL }, "del ssl crt-list <list> <cert[:line]> : delete a line <cert> from crt-list file <list>", cli_parse_del_crtlist, NULL, NULL },
+ { { "show", "ssl", "crt-list", NULL }, "show ssl crt-list [-n] [<list>] : show the list of crt-lists or the content of a crt-list file <list>", cli_parse_dump_crtlist, cli_io_handler_dump_crtlist, NULL },
+ { { NULL }, NULL, NULL, NULL } }
+};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
diff --git a/src/ssl_ocsp.c b/src/ssl_ocsp.c
new file mode 100644
index 0000000..1adddc4
--- /dev/null
+++ b/src/ssl_ocsp.c
@@ -0,0 +1,1986 @@
+
+/*
+ * SSL/TLS OCSP-related functions
+ *
+ * Copyright (C) 2022 HAProxy Technologies, Remi Tricot-Le Breton <rlebreton@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Acknowledgement:
+ * We'd like to specially thank the Stud project authors for a very clean
+ * and well documented code which helped us understand how the OpenSSL API
+ * ought to be used in non-blocking mode. This is one difficult part which
+ * is not easy to get from the OpenSSL doc, and reading the Stud code made
+ * it much more obvious than the examples in the OpenSSL package. Keep up
+ * the good works, guys !
+ *
+ * Stud is an extremely efficient and scalable SSL/TLS proxy which combines
+ * particularly well with haproxy. For more info about this project, visit :
+ * https://github.com/bumptech/stud
+ *
+ */
+
+/* Note: do NOT include openssl/xxx.h here, do it in openssl-compat.h */
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+#include <import/lru.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/chunk.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/server.h>
+#include <haproxy/shctx.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_crtlist.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/ssl_ocsp-t.h>
+#include <haproxy/http_client.h>
+
+
+/* ***** READ THIS before adding code here! *****
+ *
+ * Due to API incompatibilities between multiple OpenSSL versions and their
+ * derivatives, it's often tempting to add macros to (re-)define certain
+ * symbols. Please do not do this here, and do it in common/openssl-compat.h
+ * exclusively so that the whole code consistently uses the same macros.
+ *
+ * Whenever possible if a macro is missing in certain versions, it's better
+ * to conditionally define it in openssl-compat.h than using lots of ifdefs.
+ */
+
+#ifndef OPENSSL_NO_OCSP
+int ocsp_ex_index = -1;
+
+int ssl_sock_get_ocsp_arg_kt_index(int evp_keytype)
+{
+ switch (evp_keytype) {
+ case EVP_PKEY_RSA:
+ return 2;
+ case EVP_PKEY_DSA:
+ return 0;
+ case EVP_PKEY_EC:
+ return 1;
+ }
+
+ return -1;
+}
+
+/*
+ * Callback used to set OCSP status extension content in server hello.
+ */
+int ssl_sock_ocsp_stapling_cbk(SSL *ssl, void *arg)
+{
+ struct certificate_ocsp *ocsp;
+ struct ocsp_cbk_arg *ocsp_arg;
+ char *ssl_buf;
+ SSL_CTX *ctx;
+ EVP_PKEY *ssl_pkey;
+ int key_type;
+ int index;
+
+ ctx = SSL_get_SSL_CTX(ssl);
+ if (!ctx)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ocsp_arg = SSL_CTX_get_ex_data(ctx, ocsp_ex_index);
+ if (!ocsp_arg)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ssl_pkey = SSL_get_privatekey(ssl);
+ if (!ssl_pkey)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ key_type = EVP_PKEY_base_id(ssl_pkey);
+
+ if (ocsp_arg->is_single && ocsp_arg->single_kt == key_type)
+ ocsp = ocsp_arg->s_ocsp;
+ else {
+ /* For multiple certs per context, we have to find the correct OCSP response based on
+ * the certificate type
+ */
+ index = ssl_sock_get_ocsp_arg_kt_index(key_type);
+
+ if (index < 0)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ocsp = ocsp_arg->m_ocsp[index];
+
+ }
+
+ if (!ocsp ||
+ !ocsp->response.area ||
+ !ocsp->response.data ||
+ (ocsp->expire < date.tv_sec))
+ return SSL_TLSEXT_ERR_NOACK;
+
+ ssl_buf = OPENSSL_malloc(ocsp->response.data);
+ if (!ssl_buf)
+ return SSL_TLSEXT_ERR_NOACK;
+
+ memcpy(ssl_buf, ocsp->response.area, ocsp->response.data);
+ SSL_set_tlsext_status_ocsp_resp(ssl, (unsigned char*)ssl_buf, ocsp->response.data);
+
+ return SSL_TLSEXT_ERR_OK;
+}
+
+#endif /* !defined(OPENSSL_NO_OCSP) */
+
+
+#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP)
+
+struct eb_root cert_ocsp_tree = EB_ROOT_UNIQUE;
+
+__decl_thread(HA_SPINLOCK_T ocsp_tree_lock);
+
+struct eb_root ocsp_update_tree = EB_ROOT; /* updatable ocsp responses sorted by next_update in absolute time */
+
+/*
+ * Convert an OCSP_CERTID structure into a char buffer that can be used as a key
+ * in the OCSP response tree. It takes an <ocsp_cid> as parameter and builds a
+ * key of length <key_length> into the <certid> buffer. The key length cannot
+ * exceed OCSP_MAX_CERTID_ASN1_LENGTH bytes.
+ * Returns a negative value in case of error.
+ */
+int ssl_ocsp_build_response_key(OCSP_CERTID *ocsp_cid, unsigned char certid[OCSP_MAX_CERTID_ASN1_LENGTH], unsigned int *key_length)
+{
+ unsigned char *p = NULL;
+ int i;
+
+ if (!key_length)
+ return -1;
+
+ *key_length = 0;
+
+ if (!ocsp_cid)
+ return 0;
+
+ i = i2d_OCSP_CERTID(ocsp_cid, NULL);
+ if (!i || (i > OCSP_MAX_CERTID_ASN1_LENGTH))
+ return 0;
+
+ p = certid;
+ *key_length = i2d_OCSP_CERTID(ocsp_cid, &p);
+
+end:
+ return *key_length > 0;
+}
+
+/* This function starts to check if the OCSP response (in DER format) contained
+ * in chunk 'ocsp_response' is valid (else exits on error).
+ * If 'cid' is not NULL, it will be compared to the OCSP certificate ID
+ * contained in the OCSP Response and exits on error if no match.
+ * If it's a valid OCSP Response:
+ * If 'ocsp' is not NULL, the chunk is copied in the OCSP response's container
+ * pointed by 'ocsp'.
+ * If 'ocsp' is NULL, the function looks up into the OCSP response's
+ * containers tree (using as index the ASN1 form of the OCSP Certificate ID extracted
+ * from the response) and exits on error if not found. Finally, If an OCSP response is
+ * already present in the container, it will be overwritten.
+ *
+ * Note: OCSP response containing more than one OCSP Single response is not
+ * considered valid.
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+int ssl_sock_load_ocsp_response(struct buffer *ocsp_response,
+ struct certificate_ocsp *ocsp,
+ OCSP_CERTID *cid, char **err)
+{
+ OCSP_RESPONSE *resp;
+ OCSP_BASICRESP *bs = NULL;
+ OCSP_SINGLERESP *sr;
+ OCSP_CERTID *id;
+ unsigned char *p = (unsigned char *) ocsp_response->area;
+ int rc , count_sr;
+ ASN1_GENERALIZEDTIME *revtime, *thisupd, *nextupd = NULL;
+ int reason;
+ int ret = 1;
+#ifdef HAVE_ASN1_TIME_TO_TM
+ struct tm nextupd_tm = {0};
+#endif
+
+ resp = d2i_OCSP_RESPONSE(NULL, (const unsigned char **)&p,
+ ocsp_response->data);
+ if (!resp) {
+ memprintf(err, "Unable to parse OCSP response");
+ goto out;
+ }
+
+ rc = OCSP_response_status(resp);
+ if (rc != OCSP_RESPONSE_STATUS_SUCCESSFUL) {
+ memprintf(err, "OCSP response status not successful");
+ goto out;
+ }
+
+ bs = OCSP_response_get1_basic(resp);
+ if (!bs) {
+ memprintf(err, "Failed to get basic response from OCSP Response");
+ goto out;
+ }
+
+ count_sr = OCSP_resp_count(bs);
+ if (count_sr > 1) {
+ memprintf(err, "OCSP response ignored because contains multiple single responses (%d)", count_sr);
+ goto out;
+ }
+
+ sr = OCSP_resp_get0(bs, 0);
+ if (!sr) {
+ memprintf(err, "Failed to get OCSP single response");
+ goto out;
+ }
+
+ id = (OCSP_CERTID*)OCSP_SINGLERESP_get0_id(sr);
+
+ rc = OCSP_single_get0_status(sr, &reason, &revtime, &thisupd, &nextupd);
+ if (rc != V_OCSP_CERTSTATUS_GOOD && rc != V_OCSP_CERTSTATUS_REVOKED) {
+ memprintf(err, "OCSP single response: certificate status is unknown");
+ goto out;
+ }
+
+ if (!nextupd) {
+ memprintf(err, "OCSP single response: missing nextupdate");
+ goto out;
+ }
+
+ rc = OCSP_check_validity(thisupd, nextupd, OCSP_MAX_RESPONSE_TIME_SKEW, -1);
+ if (!rc) {
+ memprintf(err, "OCSP single response: no longer valid.");
+ goto out;
+ }
+
+ if (cid) {
+ if (OCSP_id_cmp(id, cid)) {
+ memprintf(err, "OCSP single response: Certificate ID does not match certificate and issuer");
+ goto out;
+ }
+ }
+
+ if (!ocsp) {
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH];
+ unsigned char *p;
+
+ rc = i2d_OCSP_CERTID(id, NULL);
+ if (!rc) {
+ memprintf(err, "OCSP single response: Unable to encode Certificate ID");
+ goto out;
+ }
+
+ if (rc > OCSP_MAX_CERTID_ASN1_LENGTH) {
+ memprintf(err, "OCSP single response: Certificate ID too long");
+ goto out;
+ }
+
+ p = key;
+ memset(key, 0, OCSP_MAX_CERTID_ASN1_LENGTH);
+ i2d_OCSP_CERTID(id, &p);
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (!ocsp) {
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ memprintf(err, "OCSP single response: Certificate ID does not match any certificate or issuer");
+ goto out;
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ }
+
+ /* According to comments on "chunk_dup", the
+ previous chunk buffer will be freed */
+ if (!chunk_dup(&ocsp->response, ocsp_response)) {
+ memprintf(err, "OCSP response: Memory allocation error");
+ goto out;
+ }
+
+#ifdef HAVE_ASN1_TIME_TO_TM
+ if (ASN1_TIME_to_tm(nextupd, &nextupd_tm) == 0) {
+ memprintf(err, "OCSP single response: Invalid \"Next Update\" time");
+ goto out;
+ }
+ ocsp->expire = my_timegm(&nextupd_tm) - OCSP_MAX_RESPONSE_TIME_SKEW;
+#else
+ ocsp->expire = asn1_generalizedtime_to_epoch(nextupd) - OCSP_MAX_RESPONSE_TIME_SKEW;
+ if (ocsp->expire < 0) {
+ memprintf(err, "OCSP single response: Invalid \"Next Update\" time");
+ goto out;
+ }
+#endif
+
+ ret = 0;
+out:
+ ERR_clear_error();
+
+ if (bs)
+ OCSP_BASICRESP_free(bs);
+
+ if (resp)
+ OCSP_RESPONSE_free(resp);
+
+ return ret;
+}
+/*
+ * External function use to update the OCSP response in the OCSP response's
+ * containers tree. The chunk 'ocsp_response' must contain the OCSP response
+ * to update in DER format.
+ *
+ * Returns 0 on success, 1 in error case.
+ */
+int ssl_sock_update_ocsp_response(struct buffer *ocsp_response, char **err)
+{
+ return ssl_sock_load_ocsp_response(ocsp_response, NULL, NULL, err);
+}
+
+
+
+#if !defined OPENSSL_IS_BORINGSSL
+/*
+ * Decrease the refcount of the struct ocsp_response and frees it if it's not
+ * used anymore. Also removes it from the tree if free'd.
+ */
+void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp)
+{
+ if (!ocsp)
+ return;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp->refcount_store--;
+ if (ocsp->refcount_store <= 0) {
+ BUG_ON(ocsp->refcount_instance > 0);
+ ebmb_delete(&ocsp->key);
+ eb64_delete(&ocsp->next_update);
+ X509_free(ocsp->issuer);
+ ocsp->issuer = NULL;
+ sk_X509_pop_free(ocsp->chain, X509_free);
+ ocsp->chain = NULL;
+ chunk_destroy(&ocsp->response);
+ if (ocsp->uri) {
+ ha_free(&ocsp->uri->area);
+ ha_free(&ocsp->uri);
+ }
+
+ free(ocsp);
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+}
+
+void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp)
+{
+ if (!ocsp)
+ return;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp->refcount_instance--;
+ if (ocsp->refcount_instance <= 0) {
+ eb64_delete(&ocsp->next_update);
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+}
+
+
+/*
+ * This function dumps the details of an OCSP_CERTID. It is based on
+ * ocsp_certid_print in OpenSSL.
+ */
+static inline int ocsp_certid_print(BIO *bp, OCSP_CERTID *certid, int indent)
+{
+ ASN1_OCTET_STRING *piNameHash = NULL;
+ ASN1_OCTET_STRING *piKeyHash = NULL;
+ ASN1_INTEGER *pSerial = NULL;
+
+ if (OCSP_id_get0_info(&piNameHash, NULL, &piKeyHash, &pSerial, certid)) {
+
+ BIO_printf(bp, "%*sCertificate ID:\n", indent, "");
+ indent += 2;
+ BIO_printf(bp, "%*sIssuer Name Hash: ", indent, "");
+#ifndef USE_OPENSSL_WOLFSSL
+ i2a_ASN1_STRING(bp, piNameHash, 0);
+#else
+ wolfSSL_ASN1_STRING_print(bp, piNameHash);
+#endif
+ BIO_printf(bp, "\n%*sIssuer Key Hash: ", indent, "");
+#ifndef USE_OPENSSL_WOLFSSL
+ i2a_ASN1_STRING(bp, piKeyHash, 0);
+#else
+ wolfSSL_ASN1_STRING_print(bp, piNameHash);
+#endif
+ BIO_printf(bp, "\n%*sSerial Number: ", indent, "");
+ i2a_ASN1_INTEGER(bp, pSerial);
+ }
+ return 1;
+}
+
+
+enum {
+ SHOW_OCSPRESP_FMT_DFLT,
+ SHOW_OCSPRESP_FMT_TEXT,
+ SHOW_OCSPRESP_FMT_B64
+};
+
+struct show_ocspresp_cli_ctx {
+ struct certificate_ocsp *ocsp;
+ int format;
+};
+
+/*
+ * Dump the details about an OCSP response in DER format stored in
+ * <ocsp_response> into buffer <out>.
+ * Returns 0 in case of success.
+ */
+int ssl_ocsp_response_print(struct buffer *ocsp_response, struct buffer *out)
+{
+ BIO *bio = NULL;
+ int write = -1;
+ OCSP_RESPONSE *resp;
+ const unsigned char *p;
+ int retval = -1;
+
+ if (!ocsp_response)
+ return -1;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ return -1;
+
+ p = (const unsigned char*)ocsp_response->area;
+
+ resp = d2i_OCSP_RESPONSE(NULL, &p, ocsp_response->data);
+ if (!resp) {
+ chunk_appendf(out, "Unable to parse OCSP response");
+ goto end;
+ }
+
+#ifndef USE_OPENSSL_WOLFSSL
+ if (OCSP_RESPONSE_print(bio, resp, 0) != 0) {
+#else
+ if (wolfSSL_d2i_OCSP_RESPONSE_bio(bio, &resp) != 0) {
+#endif
+ struct buffer *trash = get_trash_chunk();
+ struct ist ist_block = IST_NULL;
+ struct ist ist_double_lf = IST_NULL;
+ static struct ist double_lf = IST("\n\n");
+
+ write = BIO_read(bio, trash->area, trash->size - 1);
+ if (write <= 0)
+ goto end;
+ trash->data = write;
+
+ /* Look for empty lines in the 'trash' buffer and add a space to
+ * the beginning to avoid having empty lines in the output
+ * (without changing the appearance of the information
+ * displayed).
+ */
+ ist_block = ist2(b_orig(trash), b_data(trash));
+
+ ist_double_lf = istist(ist_block, double_lf);
+
+ while (istlen(ist_double_lf)) {
+ /* istptr(ist_double_lf) points to the first \n of a
+ * \n\n pattern.
+ */
+ uint empty_line_offset = istptr(ist_double_lf) + 1 - istptr(ist_block);
+
+ /* Write up to the first '\n' of the "\n\n" pattern into
+ * the output buffer.
+ */
+ b_putblk(out, istptr(ist_block), empty_line_offset);
+ /* Add an extra space. */
+ b_putchr(out, ' ');
+
+ /* Keep looking for empty lines in the rest of the data. */
+ ist_block = istadv(ist_block, empty_line_offset);
+
+ ist_double_lf = istist(ist_block, double_lf);
+ }
+
+ retval = (b_istput(out, ist_block) <= 0);
+ }
+
+end:
+ if (bio)
+ BIO_free(bio);
+
+ OCSP_RESPONSE_free(resp);
+
+ return retval;
+}
+
+/*
+ * Dump the contents of an OCSP response in DER format stored in
+ * <ocsp_response> into buffer <out> after converting it to base64.
+ * Returns 0 in case of success.
+ */
+static int ssl_ocsp_response_print_base64(struct buffer *ocsp_response, struct buffer *out)
+{
+ int b64len = 0;
+
+ b64len = a2base64(b_orig(ocsp_response), b_data(ocsp_response),
+ b_orig(out), b_size(out));
+
+ if (b64len < 0)
+ return 1;
+
+ out->data = b64len;
+
+ /* Add empty line */
+ chunk_appendf(ocsp_response, "\n");
+
+ return 0;
+}
+
+/*
+ * Dump the details of the OCSP response of ID <ocsp_certid> into buffer <out>.
+ * Returns 0 in case of success.
+ */
+int ssl_get_ocspresponse_detail(unsigned char *ocsp_certid, struct buffer *out)
+{
+ struct certificate_ocsp *ocsp;
+ int ret = 0;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, ocsp_certid, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (!ocsp) {
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ return -1;
+ }
+
+ ret = ssl_ocsp_response_print(&ocsp->response, out);
+
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ return ret;
+}
+
+
+/* IO handler of details "show ssl ocsp-response <id>".
+ * The current entry is taken from appctx->svcctx.
+ */
+static int cli_io_handler_show_ocspresponse_detail(struct appctx *appctx)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct show_ocspresp_cli_ctx *ctx = appctx->svcctx;
+ struct certificate_ocsp *ocsp = ctx->ocsp;
+ int retval = 0;
+
+ switch (ctx->format) {
+ case SHOW_OCSPRESP_FMT_DFLT:
+ case SHOW_OCSPRESP_FMT_TEXT:
+ retval = ssl_ocsp_response_print(&ocsp->response, trash);
+ break;
+ case SHOW_OCSPRESP_FMT_B64:
+ retval = ssl_ocsp_response_print_base64(&ocsp->response, trash);
+ break;
+ }
+
+ if (retval)
+ return 1;
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+
+ appctx->svcctx = NULL;
+ return 1;
+
+yield:
+ return 0;
+}
+
+void ssl_sock_ocsp_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ struct ocsp_cbk_arg *ocsp_arg;
+
+ if (ptr) {
+ ocsp_arg = ptr;
+
+ if (ocsp_arg->is_single) {
+ ssl_sock_free_ocsp_instance(ocsp_arg->s_ocsp);
+ ocsp_arg->s_ocsp = NULL;
+ } else {
+ int i;
+
+ for (i = 0; i < SSL_SOCK_NUM_KEYTYPES; i++) {
+ ssl_sock_free_ocsp_instance(ocsp_arg->m_ocsp[i]);
+ ocsp_arg->m_ocsp[i] = NULL;
+ }
+ }
+ free(ocsp_arg);
+ }
+}
+
+/*
+ * Extract the first OCSP URI (if any) contained in <cert> and write it into
+ * <out>.
+ * Returns 0 in case of success, 1 otherwise.
+ */
+int ssl_ocsp_get_uri_from_cert(X509 *cert, struct buffer *out, char **err)
+{
+ STACK_OF(OPENSSL_STRING) *ocsp_uri_stk = NULL;
+ int ret = 1;
+
+ if (!cert || !out)
+ goto end;
+
+ ocsp_uri_stk = X509_get1_ocsp(cert);
+ if (ocsp_uri_stk == NULL) {
+ memprintf(err, "%sNo OCSP URL stack!\n", *err ? *err : "");
+ goto end;
+ }
+
+ if (!chunk_strcpy(out, sk_OPENSSL_STRING_value(ocsp_uri_stk, 0))) {
+ memprintf(err, "%sOCSP URI too long!\n", *err ? *err : "");
+ goto end;
+ }
+ if (b_data(out) == 0) {
+ memprintf(err, "%sNo OCSP URL!\n", *err ? *err : "");
+ goto end;
+ }
+
+ ret = 0;
+
+end:
+ X509_email_free(ocsp_uri_stk);
+ return ret;
+}
+
+/*
+ * Create the url and request body that make a proper OCSP request for the
+ * <certid>. The <req_url> parameter should already hold the OCSP URI that was
+ * extracted from the corresponding certificate. Depending on the size of the
+ * certid we will either append data to the <req_url> to create a proper URL
+ * that will be sent with a GET command, or the <req_body> will be constructed
+ * in case of a POST.
+ * Returns 0 in case of success.
+ */
+int ssl_ocsp_create_request_details(const OCSP_CERTID *certid, struct buffer *req_url,
+ struct buffer *req_body, char **err)
+{
+ int errcode = -1;
+ OCSP_REQUEST *ocsp;
+ struct buffer *bin_request = get_trash_chunk();
+ unsigned char *outbuf = (unsigned char*)b_orig(bin_request);
+
+ ocsp = OCSP_REQUEST_new();
+ if (ocsp == NULL) {
+ memprintf(err, "%sCan't create OCSP_REQUEST\n", *err ? *err : "");
+ goto end;
+ }
+
+ if (OCSP_request_add0_id(ocsp, (OCSP_CERTID*)certid) == NULL) {
+ memprintf(err, "%sOCSP_request_add0_id() error\n", *err ? *err : "");
+ goto end;
+ }
+
+ bin_request->data = i2d_OCSP_REQUEST(ocsp, &outbuf);
+ if (b_data(bin_request) <= 0) {
+ memprintf(err, "%si2d_OCSP_REQUEST() error\n", *err ? *err : "");
+ goto end;
+ }
+
+ /* HTTP based OCSP requests can use either the GET or the POST method to
+ * submit their requests. To enable HTTP caching, small requests (that
+ * after encoding are less than 255 bytes), MAY be submitted using GET.
+ * If HTTP caching is not important, or the request is greater than 255
+ * bytes, the request SHOULD be submitted using POST.
+ */
+ if (b_data(bin_request) + b_data(req_url) < 0xff) {
+ struct buffer *b64buf = get_trash_chunk();
+ char *ret = NULL;
+ int base64_ret = 0;
+
+ chunk_strcat(req_url, "/");
+
+ base64_ret = a2base64(b_orig(bin_request), b_data(bin_request),
+ b_orig(b64buf), b_size(b64buf));
+
+ if (base64_ret < 0) {
+ memprintf(err, "%sa2base64() error\n", *err ? *err : "");
+ goto end;
+ }
+
+ b64buf->data = base64_ret;
+
+ ret = encode_chunk((char*)b_stop(req_url), b_orig(req_url) + b_size(req_url), '%',
+ query_encode_map, b64buf);
+ if (ret && *ret == '\0') {
+ req_url->data = ret - b_orig(req_url);
+ errcode = 0;
+ }
+ }
+ else {
+ chunk_cpy(req_body, bin_request);
+ errcode = 0;
+ }
+
+
+end:
+ OCSP_REQUEST_free(ocsp);
+
+ return errcode;
+}
+
+/*
+ * Parse an OCSP_RESPONSE contained in <respbuf> and check its validity in
+ * regard to the contents of <ckch> or the <issuer> certificate.
+ * Certificate_ocsp structure does not keep a reference to the corresponding
+ * ckch_store so outside of a CLI context (see "send ssl ocsp-response"
+ * command), we only have an easy access to the issuer's certificate whose
+ * reference is held in the structure.
+ * Return 0 in case of success, 1 otherwise.
+ */
+int ssl_ocsp_check_response(STACK_OF(X509) *chain, X509 *issuer,
+ struct buffer *respbuf, char **err)
+{
+ int ret = 1;
+ int n;
+ OCSP_RESPONSE *response = NULL;
+ OCSP_BASICRESP *basic = NULL;
+ X509_STORE *store = NULL;
+ const unsigned char *start = (const unsigned char*)b_orig(respbuf);
+
+ if (!chain && !issuer) {
+ memprintf(err, "check_ocsp_response needs a certificate validation chain or an issuer certificate");
+ goto end;
+ }
+
+ response = d2i_OCSP_RESPONSE(NULL, &start, b_data(respbuf));
+ if (!response) {
+ memprintf(err, "d2i_OCSP_RESPONSE() failed");
+ goto end;
+ }
+
+ n = OCSP_response_status(response);
+
+ if (n != OCSP_RESPONSE_STATUS_SUCCESSFUL) {
+ memprintf(err, "OCSP response not successful (%d: %s)",
+ n, OCSP_response_status_str(n));
+ goto end;
+ }
+
+ basic = OCSP_response_get1_basic(response);
+ if (basic == NULL) {
+ memprintf(err, "OCSP_response_get1_basic() failed");
+ goto end;
+ }
+
+ /* Create a temporary store in which we add the certificate's chain
+ * certificates. We assume that all those certificates can be trusted
+ * because they were provided by the user.
+ * The only ssl item that needs to be verified here is the OCSP
+ * response.
+ */
+ store = X509_STORE_new();
+ if (!store) {
+ memprintf(err, "X509_STORE_new() failed");
+ goto end;
+ }
+
+ if (chain) {
+ int i = 0;
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ X509 *cert = sk_X509_value(chain, i);
+ X509_STORE_add_cert(store, cert);
+ }
+ }
+
+ if (issuer)
+ X509_STORE_add_cert(store, issuer);
+
+ if (OCSP_basic_verify(basic, chain, store, OCSP_TRUSTOTHER) != 1) {
+ memprintf(err, "OCSP_basic_verify() failed");
+ goto end;
+ }
+
+ ret = 0;
+
+end:
+ X509_STORE_free(store);
+ OCSP_RESPONSE_free(response);
+ OCSP_BASICRESP_free(basic);
+ return ret;
+}
+
+
+/*
+ * OCSP-UPDATE RELATED FUNCTIONS AND STRUCTURES
+ */
+
+struct task *ocsp_update_task __read_mostly = NULL;
+static struct proxy *httpclient_ocsp_update_px;
+
+static struct ssl_ocsp_task_ctx {
+ struct certificate_ocsp *cur_ocsp;
+ struct httpclient *hc;
+ struct appctx *appctx;
+ int flags;
+ int update_status;
+} ssl_ocsp_task_ctx;
+
+const struct http_hdr ocsp_request_hdrs[] = {
+ { IST("Content-Type"), IST("application/ocsp-request") },
+ { IST_NULL, IST_NULL }
+};
+
+enum {
+ OCSP_UPDT_UNKNOWN = 0,
+ OCSP_UPDT_OK = 1,
+ OCSP_UPDT_ERR_HTTP_STATUS = 2,
+ OCSP_UPDT_ERR_HTTP_HDR = 3,
+ OCSP_UPDT_ERR_CHECK = 4,
+ OCSP_UPDT_ERR_INSERT = 5,
+ OCSP_UPDT_ERR_LAST /* Must be last */
+};
+
+const struct ist ocsp_update_errors[] = {
+ [OCSP_UPDT_UNKNOWN] = IST("Unknown"),
+ [OCSP_UPDT_OK] = IST("Update successful"),
+ [OCSP_UPDT_ERR_HTTP_STATUS] = IST("HTTP error"),
+ [OCSP_UPDT_ERR_HTTP_HDR] = IST("Missing \"ocsp-response\" header"),
+ [OCSP_UPDT_ERR_CHECK] = IST("OCSP response check failure"),
+ [OCSP_UPDT_ERR_INSERT] = IST("Error during insertion")
+};
+
+static struct task *ssl_ocsp_update_responses(struct task *task, void *context, unsigned int state);
+
+/*
+ * Create the main OCSP update task that will iterate over the OCSP responses
+ * stored in ocsp_update_tree and send an OCSP request via the http_client
+ * applet to the corresponding OCSP responder. The task will then be in charge
+ * of processing the response, verifying it and resinserting it in the actual
+ * ocsp response tree if the response is valid.
+ * Returns 0 in case of success.
+ */
+int ssl_create_ocsp_update_task(char **err)
+{
+ if (ocsp_update_task)
+ return 0; /* Already created */
+
+ ocsp_update_task = task_new_anywhere();
+ if (!ocsp_update_task) {
+ memprintf(err, "parsing : failed to allocate global ocsp update task.");
+ return -1;
+ }
+
+ ocsp_update_task->process = ssl_ocsp_update_responses;
+ ocsp_update_task->context = NULL;
+
+ return 0;
+}
+
+static int ssl_ocsp_task_schedule()
+{
+ if (ocsp_update_task)
+ task_schedule(ocsp_update_task, now_ms);
+
+ return 0;
+}
+REGISTER_POST_CHECK(ssl_ocsp_task_schedule);
+
+void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp);
+
+void ssl_destroy_ocsp_update_task(void)
+{
+ struct eb64_node *node, *next;
+ if (!ocsp_update_task)
+ return;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ node = eb64_first(&ocsp_update_tree);
+ while (node) {
+ next = eb64_next(node);
+ eb64_delete(node);
+ node = next;
+ }
+
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ task_destroy(ocsp_update_task);
+ ocsp_update_task = NULL;
+
+ ssl_sock_free_ocsp(ssl_ocsp_task_ctx.cur_ocsp);
+ ssl_ocsp_task_ctx.cur_ocsp = NULL;
+
+ if (ssl_ocsp_task_ctx.hc) {
+ httpclient_stop_and_destroy(ssl_ocsp_task_ctx.hc);
+ ssl_ocsp_task_ctx.hc = NULL;
+ }
+}
+
+static inline void ssl_ocsp_set_next_update(struct certificate_ocsp *ocsp)
+{
+ int update_margin = (ocsp->expire >= SSL_OCSP_UPDATE_MARGIN) ? SSL_OCSP_UPDATE_MARGIN : 0;
+
+ ocsp->next_update.key = MIN(date.tv_sec + global_ssl.ocsp_update.delay_max,
+ ocsp->expire - update_margin);
+
+ /* An already existing valid OCSP response that expires within less than
+ * SSL_OCSP_UPDATE_DELAY_MIN or has no 'Next Update' field should not be
+ * updated more than once every 5 minutes in order to avoid continuous
+ * update of the same response. */
+ if (b_data(&ocsp->response))
+ ocsp->next_update.key = MAX(ocsp->next_update.key,
+ date.tv_sec + global_ssl.ocsp_update.delay_min);
+}
+
+/*
+ * Insert a certificate_ocsp structure into the ocsp_update_tree tree, in which
+ * entries are sorted by absolute date of the next update. The next_update key
+ * will be the smallest out of the actual expire value of the response and
+ * now+1H. This arbitrary 1H value ensures that ocsp responses are updated
+ * periodically even when they have a long expire time, while not overloading
+ * the system too much (in theory). Likewise, a minimum 5 minutes interval is
+ * defined in order to avoid updating too often responses that have a really
+ * short expire time or even no 'Next Update' at all.
+ */
+int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp)
+{
+ /* Set next_update based on current time and the various OCSP
+ * minimum/maximum update times.
+ */
+ ssl_ocsp_set_next_update(ocsp);
+
+ ocsp->fail_count = 0;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp->updating = 0;
+ /* An entry with update_once set to 1 was only supposed to be updated
+ * once, it does not need to be reinserted into the update tree.
+ */
+ if (!ocsp->update_once)
+ eb64_insert(&ocsp_update_tree, &ocsp->next_update);
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ return 0;
+}
+
+/*
+ * Reinsert an entry in the update tree. The entry's next update time can not
+ * occur before now+SSL_OCSP_HTTP_ERR_REPLAY.
+ * This is supposed to be used in case of http error (ocsp responder unreachable
+ * for instance). This ensures that the entry does not get reinserted at the
+ * beginning of the tree every time.
+ */
+int ssl_ocsp_update_insert_after_error(struct certificate_ocsp *ocsp)
+{
+ int replay_delay = 0;
+
+ /*
+ * Set next_update based on current time and the various OCSP
+ * minimum/maximum update times.
+ */
+ ssl_ocsp_set_next_update(ocsp);
+
+ ++ocsp->fail_count;
+
+ /*
+ * The replay delay will be increased for every consecutive update
+ * failure, up to the SSL_OCSP_UPDATE_DELAY_MAX delay. It will ensure
+ * that the replay delay will be one minute for the first failure and
+ * will be multiplied by 2 for every subsequent failures, while still
+ * being at most 1 hour (with the current default values).
+ */
+ replay_delay = MIN(SSL_OCSP_HTTP_ERR_REPLAY * (1 << ocsp->fail_count),
+ global_ssl.ocsp_update.delay_max);
+
+ if (ocsp->next_update.key < date.tv_sec + replay_delay)
+ ocsp->next_update.key = date.tv_sec + replay_delay;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp->updating = 0;
+ /* An entry with update_once set to 1 was only supposed to be updated
+ * once, it does not need to be reinserted into the update tree.
+ */
+ if (!ocsp->update_once)
+ eb64_insert(&ocsp_update_tree, &ocsp->next_update);
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ return 0;
+}
+
+void ocsp_update_response_stline_cb(struct httpclient *hc)
+{
+ struct task *task = hc->caller;
+
+ if (!task)
+ return;
+
+ ssl_ocsp_task_ctx.flags |= HC_F_RES_STLINE;
+ task_wakeup(task, TASK_WOKEN_MSG);
+}
+
+void ocsp_update_response_headers_cb(struct httpclient *hc)
+{
+ struct task *task = hc->caller;
+
+ if (!task)
+ return;
+
+ ssl_ocsp_task_ctx.flags |= HC_F_RES_HDR;
+ task_wakeup(task, TASK_WOKEN_MSG);
+}
+
+void ocsp_update_response_body_cb(struct httpclient *hc)
+{
+ struct task *task = hc->caller;
+
+ if (!task)
+ return;
+
+ ssl_ocsp_task_ctx.flags |= HC_F_RES_BODY;
+ task_wakeup(task, TASK_WOKEN_MSG);
+}
+
+void ocsp_update_response_end_cb(struct httpclient *hc)
+{
+ struct task *task = hc->caller;
+
+ if (!task)
+ return;
+
+ ssl_ocsp_task_ctx.flags |= HC_F_RES_END;
+ task_wakeup(task, TASK_WOKEN_MSG);
+}
+
+
+/*
+ * Send a log line that will use the dedicated proxy's error_logformat string.
+ * It uses the sess_log function instead of app_log for instance in order to
+ * benefit from the "generic" items that can be added to a log format line such
+ * as the date and frontend name that can be found at the beginning of the
+ * ocspupdate_log_format line.
+ */
+static void ssl_ocsp_send_log()
+{
+ if (!ssl_ocsp_task_ctx.appctx)
+ return;
+
+ sess_log(ssl_ocsp_task_ctx.appctx->sess);
+}
+
+/*
+ * This is the main function of the ocsp auto update mechanism. It has two
+ * distinct parts and the branching to one or the other is completely based on
+ * the fact that the cur_ocsp pointer of the ssl_ocsp_task_ctx member is set.
+ *
+ * If the pointer is not set, we need to look at the first item of the update
+ * tree and see if it needs to be updated. If it does not we simply wait until
+ * the time is right and let the task asleep. If it does need to be updated, we
+ * simply build and send the corresponding ocsp request thanks to the
+ * http_client. The task is then sent to sleep with an expire time set to
+ * infinity. The http_client will wake it back up once the response is received
+ * (or a timeout occurs). Just note that during this whole process the
+ * cetificate_ocsp object corresponding to the entry being updated is taken out
+ * of the update tree and only stored in the ssl_ocsp_task_ctx context.
+ *
+ * Once the task is waken up by the http_client, it branches on the response
+ * processing part of the function which basically checks that the response is
+ * valid and inserts it into the ocsp_response tree. The task then goes back to
+ * sleep until another entry needs to be updated.
+ */
+static struct task *ssl_ocsp_update_responses(struct task *task, void *context, unsigned int state)
+{
+ unsigned int next_wakeup = 0;
+ struct eb64_node *eb;
+ struct certificate_ocsp *ocsp;
+ struct httpclient *hc = NULL;
+ struct buffer *req_url = NULL;
+ struct buffer *req_body = NULL;
+ OCSP_CERTID *certid = NULL;
+ struct ssl_ocsp_task_ctx *ctx = &ssl_ocsp_task_ctx;
+
+ if (ctx->cur_ocsp) {
+ /* An update is in process */
+ ocsp = ctx->cur_ocsp;
+ hc = ctx->hc;
+ if (ctx->flags & HC_F_RES_STLINE) {
+ if (hc->res.status != 200) {
+ ctx->update_status = OCSP_UPDT_ERR_HTTP_STATUS;
+ goto http_error;
+ }
+ ctx->flags &= ~HC_F_RES_STLINE;
+ }
+
+ if (ctx->flags & HC_F_RES_HDR) {
+ struct http_hdr *hdr;
+ int found = 0;
+ /* Look for "Content-Type" header which should have
+ * "application/ocsp-response" value. */
+ for (hdr = hc->res.hdrs; isttest(hdr->v); hdr++) {
+ if (isteqi(hdr->n, ist("Content-Type")) &&
+ isteqi(hdr->v, ist("application/ocsp-response"))) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ ctx->update_status = OCSP_UPDT_ERR_HTTP_HDR;
+ goto http_error;
+ }
+ ctx->flags &= ~HC_F_RES_HDR;
+ }
+
+ /* If the HC_F_RES_BODY is set, we still need for the
+ * HC_F_RES_END flag to be set as well in order to be sure that
+ * the body is complete. */
+
+ /* we must close only if F_RES_END is the last flag */
+ if (ctx->flags & HC_F_RES_END) {
+
+ /* Process the body that must be complete since
+ * HC_F_RES_END is set. */
+ if (ctx->flags & HC_F_RES_BODY) {
+ if (ssl_ocsp_check_response(ocsp->chain, ocsp->issuer, &hc->res.buf, NULL)) {
+ ctx->update_status = OCSP_UPDT_ERR_CHECK;
+ goto http_error;
+ }
+
+ if (ssl_sock_update_ocsp_response(&hc->res.buf, NULL) != 0) {
+ ctx->update_status = OCSP_UPDT_ERR_INSERT;
+ goto http_error;
+ }
+
+ ctx->flags &= ~HC_F_RES_BODY;
+ }
+
+ ctx->flags &= ~HC_F_RES_END;
+
+ ++ocsp->num_success;
+ ocsp->last_update = date.tv_sec;
+ ctx->update_status = OCSP_UPDT_OK;
+ ocsp->last_update_status = ctx->update_status;
+
+ ssl_ocsp_send_log();
+
+ /* Reinsert the entry into the update list so that it can be updated later */
+ ssl_ocsp_update_insert(ocsp);
+ /* Release the reference kept on the updated ocsp response. */
+ ssl_sock_free_ocsp_instance(ctx->cur_ocsp);
+ ctx->cur_ocsp = NULL;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ /* Set next_wakeup to the new first entry of the tree */
+ eb = eb64_first(&ocsp_update_tree);
+ if (eb) {
+ if (eb->key > date.tv_sec)
+ next_wakeup = (eb->key - date.tv_sec)*1000;
+ else
+ next_wakeup = 0;
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ goto leave;
+ }
+
+ /* We did not receive the HC_F_RES_END flag yet, wait for it
+ * before trying to update a new ocsp response. */
+ goto wait;
+ } else {
+ /* Look for next entry that needs to be updated. */
+ const unsigned char *p = NULL;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ eb = eb64_first(&ocsp_update_tree);
+ if (!eb) {
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ goto wait;
+ }
+
+ if (eb->key > date.tv_sec) {
+ next_wakeup = (eb->key - date.tv_sec)*1000;
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ goto leave;
+ }
+
+ ocsp = eb64_entry(eb, struct certificate_ocsp, next_update);
+
+ /* Take the current entry out of the update tree, it will be
+ * reinserted after the response is processed. */
+ eb64_delete(&ocsp->next_update);
+
+ ocsp->updating = 1;
+ ocsp->refcount_instance++;
+ ctx->cur_ocsp = ocsp;
+ ocsp->last_update_status = OCSP_UPDT_UNKNOWN;
+
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ req_url = alloc_trash_chunk();
+ if (!req_url) {
+ goto leave;
+ }
+ req_body = alloc_trash_chunk();
+ if (!req_body) {
+ goto leave;
+ }
+
+ p = ocsp->key_data;
+
+ d2i_OCSP_CERTID(&certid, &p, ocsp->key_length);
+ if (!certid)
+ goto leave;
+
+ /* Copy OCSP URI stored in ocsp structure into req_url */
+ chunk_cpy(req_url, ocsp->uri);
+
+ /* Create ocsp request */
+ if (ssl_ocsp_create_request_details(certid, req_url, req_body, NULL) != 0) {
+ goto leave;
+ }
+
+ /* Depending on the processing that occurred in
+ * ssl_ocsp_create_request_details we could either have to send
+ * a GET or a POST request. */
+ hc = httpclient_new_from_proxy(httpclient_ocsp_update_px, task,
+ b_data(req_body) ? HTTP_METH_POST : HTTP_METH_GET,
+ ist2(b_orig(req_url), b_data(req_url)));
+ if (!hc) {
+ goto leave;
+ }
+
+ if (httpclient_req_gen(hc, hc->req.url, hc->req.meth,
+ b_data(req_body) ? ocsp_request_hdrs : NULL,
+ b_data(req_body) ? ist2(b_orig(req_body), b_data(req_body)) : IST_NULL) != ERR_NONE) {
+ goto leave;
+ }
+
+ hc->ops.res_stline = ocsp_update_response_stline_cb;
+ hc->ops.res_headers = ocsp_update_response_headers_cb;
+ hc->ops.res_payload = ocsp_update_response_body_cb;
+ hc->ops.res_end = ocsp_update_response_end_cb;
+
+ if (!(ctx->appctx = httpclient_start(hc))) {
+ goto leave;
+ }
+
+ ctx->flags = 0;
+ ctx->hc = hc;
+
+ /* We keep the lock, this indicates that an update is in process. */
+ goto wait;
+ }
+
+leave:
+ if (ctx->cur_ocsp) {
+ /* Something went wrong, reinsert the entry in the tree. */
+ ++ctx->cur_ocsp->num_failure;
+ ssl_ocsp_update_insert_after_error(ctx->cur_ocsp);
+ /* Release the reference kept on the updated ocsp response. */
+ ssl_sock_free_ocsp_instance(ctx->cur_ocsp);
+ ctx->cur_ocsp = NULL;
+ }
+ if (hc)
+ httpclient_stop_and_destroy(hc);
+ ctx->hc = NULL;
+ free_trash_chunk(req_url);
+ free_trash_chunk(req_body);
+ task->expire = tick_add(now_ms, next_wakeup);
+ return task;
+
+wait:
+ free_trash_chunk(req_url);
+ free_trash_chunk(req_body);
+ task->expire = TICK_ETERNITY;
+ return task;
+
+http_error:
+ ssl_ocsp_send_log();
+ /* Reinsert certificate into update list so that it can be updated later */
+ if (ocsp) {
+ ++ocsp->num_failure;
+ ocsp->last_update_status = ctx->update_status;
+ ssl_ocsp_update_insert_after_error(ocsp);
+ }
+
+ if (hc)
+ httpclient_stop_and_destroy(hc);
+ /* Release the reference kept on the updated ocsp response. */
+ ssl_sock_free_ocsp_instance(ctx->cur_ocsp);
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ /* Set next_wakeup to the new first entry of the tree */
+ eb = eb64_first(&ocsp_update_tree);
+ if (eb) {
+ if (eb->key > date.tv_sec)
+ next_wakeup = (eb->key - date.tv_sec)*1000;
+ else
+ next_wakeup = 0;
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ctx->cur_ocsp = NULL;
+ ctx->hc = NULL;
+ ctx->flags = 0;
+ task->expire = tick_add(now_ms, next_wakeup);
+ return task;
+}
+
+char ocspupdate_log_format[] = "%ci:%cp [%tr] %ft %[ssl_ocsp_certname] %[ssl_ocsp_status] %{+Q}[ssl_ocsp_status_str] %[ssl_ocsp_fail_cnt] %[ssl_ocsp_success_cnt]";
+
+/*
+ * Initialize the proxy for the OCSP update HTTP client with 2 servers, one for
+ * raw HTTP, the other for HTTPS.
+ */
+static int ssl_ocsp_update_precheck()
+{
+ /* initialize the OCSP update dedicated httpclient */
+ httpclient_ocsp_update_px = httpclient_create_proxy("<OCSP-UPDATE>");
+ if (!httpclient_ocsp_update_px)
+ return 1;
+ httpclient_ocsp_update_px->conf.error_logformat_string = strdup(ocspupdate_log_format);
+ httpclient_ocsp_update_px->conf.logformat_string = httpclient_log_format;
+ httpclient_ocsp_update_px->options2 |= PR_O2_NOLOGNORM;
+
+ return 0;
+}
+
+/* initialize the proxy and servers for the HTTP client */
+
+REGISTER_PRE_CHECK(ssl_ocsp_update_precheck);
+
+
+static int cli_parse_update_ocsp_response(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *err = NULL;
+ struct ckch_store *ckch_store = NULL;
+ struct certificate_ocsp *ocsp = NULL;
+ int update_once = 0;
+ unsigned char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ unsigned char *p;
+
+ if (!*args[3]) {
+ memprintf(&err, "'update ssl ocsp-response' expects a filename\n");
+ return cli_dynerr(appctx, err);
+ }
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock)) {
+ memprintf(&err, "%sCan't update the certificate!\nOperations on certificates are currently locked!\n", err ? err : "");
+ goto end;
+ }
+
+ ckch_store = ckchs_lookup(args[3]);
+
+ if (!ckch_store) {
+ memprintf(&err, "%sUnknown certificate! 'update ssl ocsp-response' expects an already known certificate file name.\n", err ? err : "");
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ goto end;
+ }
+
+ p = key;
+ i2d_OCSP_CERTID(ckch_store->data->ocsp_cid, &p);
+
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (!ocsp) {
+ memprintf(&err, "%s'update ssl ocsp-response' only works on certificates that already have a known OCSP response.\n", err ? err : "");
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ goto end;
+ }
+
+ /* No need to try to update this response, it is already being updated. */
+ if (!ocsp->updating) {
+ update_once = (ocsp->next_update.node.leaf_p == NULL);
+ eb64_delete(&ocsp->next_update);
+
+ /* Insert the entry at the beginning of the update tree.
+ * We don't need to increase the reference counter on the
+ * certificate_ocsp structure because we would not have a way to
+ * decrease it afterwards since this update operation is asynchronous.
+ * If the corresponding entry were to be destroyed before the update can
+ * be performed, which is pretty unlikely, it would not be such a
+ * problem because that would mean that the OCSP response is not
+ * actually used.
+ */
+ ocsp->next_update.key = 0;
+ eb64_insert(&ocsp_update_tree, &ocsp->next_update);
+ ocsp->update_once = update_once;
+ }
+
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ if (!ocsp_update_task)
+ ssl_create_ocsp_update_task(&err);
+
+ task_wakeup(ocsp_update_task, TASK_WOKEN_MSG);
+
+ free(err);
+
+ return 0;
+
+end:
+ return cli_dynerr(appctx, memprintf(&err, "%sCan't send ocsp request for %s!\n", err ? err : "", args[3]));
+}
+
+#endif /* !defined OPENSSL_IS_BORINGSSL */
+
+
+#endif /* (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) */
+
+
+static int cli_parse_set_ocspresponse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP)
+ char *err = NULL;
+ int i, j, ret;
+
+ if (!payload)
+ payload = args[3];
+
+ /* Expect one parameter: the new response in base64 encoding */
+ if (!*payload)
+ return cli_err(appctx, "'set ssl ocsp-response' expects response in base64 encoding.\n");
+
+ /* remove \r and \n from the payload */
+ for (i = 0, j = 0; payload[i]; i++) {
+ if (payload[i] == '\r' || payload[i] == '\n')
+ continue;
+ payload[j++] = payload[i];
+ }
+ payload[j] = 0;
+
+ ret = base64dec(payload, j, trash.area, trash.size);
+ if (ret < 0)
+ return cli_err(appctx, "'set ssl ocsp-response' received invalid base64 encoded response.\n");
+
+ trash.data = ret;
+ if (ssl_sock_update_ocsp_response(&trash, &err)) {
+ if (err)
+ return cli_dynerr(appctx, memprintf(&err, "%s.\n", err));
+ else
+ return cli_err(appctx, "Failed to update OCSP response.\n");
+ }
+
+ return cli_msg(appctx, LOG_INFO, "OCSP Response updated!\n");
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+
+}
+
+/* parsing function for 'show ssl ocsp-response [id]'. If an entry is forced,
+ * it's set into appctx->svcctx.
+ */
+static int cli_parse_show_ocspresponse(char **args, char *payload, struct appctx *appctx, void *private)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+
+ struct show_ocspresp_cli_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg_idx = 3;
+
+ if (*args[3]) {
+ struct certificate_ocsp *ocsp = NULL;
+ char key[OCSP_MAX_CERTID_ASN1_LENGTH] = {};
+ int key_length = OCSP_MAX_CERTID_ASN1_LENGTH;
+ char *key_ptr = key;
+ unsigned char *p;
+ struct ckch_store *ckch_store = NULL;
+
+ if (strcmp(args[3], "text") == 0) {
+ ctx->format = SHOW_OCSPRESP_FMT_TEXT;
+ ++arg_idx;
+ } else if (strcmp(args[3], "base64") == 0) {
+ ctx->format = SHOW_OCSPRESP_FMT_B64;
+ ++arg_idx;
+ }
+
+ if (ctx->format != SHOW_OCSPRESP_FMT_DFLT && !*args[arg_idx])
+ return cli_err(appctx, "'show ssl ocsp-response [text|base64]' expects a valid certid.\n");
+
+ /* Try to convert parameter into an OCSP certid first, and consider it
+ * as a filename if it fails. */
+ if (strlen(args[arg_idx]) > OCSP_MAX_CERTID_ASN1_LENGTH*2 ||
+ !parse_binary(args[arg_idx], &key_ptr, &key_length, NULL)) {
+
+ key_ptr = key;
+ key_length = 0;
+
+ /* The operations on the CKCH architecture are locked so we can
+ * manipulate ckch_store and ckch_inst */
+ if (HA_SPIN_TRYLOCK(CKCH_LOCK, &ckch_lock)) {
+ return cli_err(appctx, "Operations on certificates are currently locked!\n");
+ }
+
+ ckch_store = ckchs_lookup(args[arg_idx]);
+
+ if (ckch_store) {
+ p = (unsigned char*)key;
+ key_length = i2d_OCSP_CERTID(ckch_store->data->ocsp_cid, &p);
+ }
+ HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock);
+ }
+
+ if (key_length == 0) {
+ return cli_err(appctx, "'show ssl ocsp-response' expects a valid certid or certificate path.\n");
+ }
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, key, OCSP_MAX_CERTID_ASN1_LENGTH);
+
+ if (!ocsp) {
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ return cli_err(appctx, "Certificate ID or path does not match any certificate.\n");
+ }
+ ocsp->refcount_instance++;
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ ctx->ocsp = ocsp;
+ appctx->io_handler = cli_io_handler_show_ocspresponse_detail;
+ }
+
+ return 0;
+
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+/*
+ * IO handler of "show ssl ocsp-response". The command taking a specific ID
+ * is managed in cli_io_handler_show_ocspresponse_detail.
+ * The current entry is taken from appctx->svcctx.
+ */
+static int cli_io_handler_show_ocspresponse(struct appctx *appctx)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ struct buffer *trash = alloc_trash_chunk();
+ struct buffer *tmp = NULL;
+ struct ebmb_node *node;
+ struct certificate_ocsp *ocsp = NULL;
+ BIO *bio = NULL;
+ int write = -1;
+ struct show_ocspresp_cli_ctx *ctx = appctx->svcctx;
+
+ if (trash == NULL)
+ return 1;
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto end;
+
+ if ((bio = BIO_new(BIO_s_mem())) == NULL)
+ goto end;
+
+ if (!ctx->ocsp) {
+ chunk_appendf(trash, "# Certificate IDs\n");
+ node = ebmb_first(&cert_ocsp_tree);
+ } else {
+ node = &ctx->ocsp->key;
+ }
+
+ while (node) {
+ OCSP_CERTID *certid = NULL;
+ const unsigned char *p = NULL;
+ int i;
+
+ ocsp = ebmb_entry(node, struct certificate_ocsp, key);
+
+ /* Dump the key in hexadecimal */
+ chunk_appendf(trash, "Certificate ID key : ");
+ for (i = 0; i < ocsp->key_length; ++i) {
+ chunk_appendf(trash, "%02x", ocsp->key_data[i]);
+ }
+ chunk_appendf(trash, "\n");
+
+ /* Dump the certificate path */
+ chunk_appendf(trash, "Certificate path : %s\n", ocsp->path);
+
+ p = ocsp->key_data;
+
+ /* Decode the certificate ID (serialized into the key). */
+ d2i_OCSP_CERTID(&certid, &p, ocsp->key_length);
+ if (!certid)
+ goto end;
+
+ /* Dump the CERTID info */
+ ocsp_certid_print(bio, certid, 1);
+ OCSP_CERTID_free(certid);
+ write = BIO_read(bio, tmp->area, tmp->size-1);
+ /* strip trailing LFs */
+ while (write > 0 && tmp->area[write-1] == '\n')
+ write--;
+ tmp->area[write] = '\0';
+
+ chunk_appendf(trash, "%s\n", tmp->area);
+
+ node = ebmb_next(node);
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+ }
+
+end:
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ free_trash_chunk(trash);
+ free_trash_chunk(tmp);
+ BIO_free(bio);
+ return 1;
+
+yield:
+ free_trash_chunk(trash);
+ free_trash_chunk(tmp);
+ BIO_free(bio);
+
+ ocsp->refcount_instance++;
+ ctx->ocsp = ocsp;
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+ return 0;
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+static void cli_release_show_ocspresponse(struct appctx *appctx)
+{
+ struct show_ocspresp_cli_ctx *ctx = appctx->svcctx;
+
+ if (ctx)
+ ssl_sock_free_ocsp(ctx->ocsp);
+}
+
+/* Check if the ckch_store and the entry does have the same configuration */
+int ocsp_update_check_cfg_consistency(struct ckch_store *store, struct crtlist_entry *entry, char *crt_path, char **err)
+{
+ int err_code = ERR_NONE;
+
+ if (store->data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_DFLT || entry->ssl_conf) {
+ if ((!entry->ssl_conf && store->data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON)
+ || (entry->ssl_conf && store->data->ocsp_update_mode != entry->ssl_conf->ocsp_update)) {
+ memprintf(err, "%sIncompatibilities found in OCSP update mode for certificate %s\n", err && *err ? *err : "", crt_path);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+ return err_code;
+}
+
+struct show_ocsp_updates_ctx {
+ struct certificate_ocsp *cur_ocsp;
+};
+
+/*
+ * Parsing function for 'show ssl ocsp-updates [nb]'.
+ */
+static int cli_parse_show_ocsp_updates(char **args, char *payload, struct appctx *appctx, void *private)
+{
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ struct show_ocsp_updates_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ return 0;
+#else
+ return cli_err(appctx, "HAProxy was compiled against a version of OpenSSL that doesn't support OCSP stapling.\n");
+#endif
+}
+
+/*
+ * Dump information about an ocsp response concerning ocsp auto update.
+ * It follows the following format :
+ * OCSP Certid | Path | Next Update | Last Update | Successes | Failures | Last Update Status | Last Update Status (str)
+ * Return 0 in case of success.
+ */
+static int dump_ocsp_update_info(struct certificate_ocsp *ocsp, struct buffer *out)
+{
+ struct tm tm = {};
+ char *ret;
+ int i;
+ time_t next_update;
+
+ /* Dump OCSP certid */
+ for (i = 0; i < ocsp->key_length; ++i) {
+ chunk_appendf(out, "%02x", ocsp->key_data[i]);
+ }
+
+ chunk_appendf(out, " | ");
+
+ /* Dump path */
+ chunk_appendf(out, "%s", ocsp->path);
+
+ chunk_appendf(out, " | ");
+
+ /* Dump next update time */
+ if (ocsp->next_update.key != 0) {
+ next_update = ocsp->next_update.key;
+ get_localtime(ocsp->next_update.key, &tm);
+ } else {
+ next_update = date.tv_sec;
+ get_localtime(date.tv_sec, &tm);
+ }
+ ret = localdate2str_log(b_orig(out)+b_data(out), next_update, &tm, b_size(out)-b_data(out));
+
+ if (ret == NULL)
+ return 1;
+
+ out->data = (ret - out->area);
+
+ chunk_appendf(out, " | ");
+
+ /* Dump last update time or "-" if no update occurred yet */
+ if (ocsp->last_update) {
+ get_localtime(ocsp->last_update, &tm);
+ ret = localdate2str_log(b_orig(out)+b_data(out), ocsp->last_update, &tm, b_size(out)-b_data(out));
+
+ if (ret == NULL)
+ return 1;
+
+ out->data = (ret - out->area);
+ } else
+ chunk_appendf(out, "-");
+
+ chunk_appendf(out, " | ");
+
+ /* Number of successful updates */
+ chunk_appendf(out, "%d", ocsp->num_success);
+
+ chunk_appendf(out, " | ");
+
+ /* Number of failed updates */
+ chunk_appendf(out, "%d", ocsp->num_failure);
+
+ chunk_appendf(out, " | ");
+
+ /* Last update status */
+ chunk_appendf(out, "%d", ocsp->last_update_status);
+
+ chunk_appendf(out, " | ");
+
+ /* Last update status str */
+ if (ocsp->last_update_status >= OCSP_UPDT_ERR_LAST)
+ chunk_appendf(out, "-");
+ else
+ chunk_appendf(out, "%s", istptr(ocsp_update_errors[ocsp->last_update_status]));
+
+ chunk_appendf(out, "\n");
+
+ return 0;
+}
+
+static int cli_io_handler_show_ocsp_updates(struct appctx *appctx)
+{
+ struct show_ocsp_updates_ctx *ctx = appctx->svcctx;
+ struct eb64_node *node;
+ struct certificate_ocsp *ocsp = NULL;
+ struct buffer *trash = get_trash_chunk();
+
+ if (!ctx->cur_ocsp) {
+ node = eb64_first(&ocsp_update_tree);
+ chunk_appendf(trash, "OCSP Certid | Path | Next Update | Last Update | Successes | Failures | Last Update Status | Last Update Status (str)\n");
+
+ /* Look for an entry currently being updated */
+ ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+ if (ocsp) {
+ if (dump_ocsp_update_info(ocsp, trash))
+ goto end;
+ }
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+
+ } else {
+ node = &((struct certificate_ocsp*)ctx->cur_ocsp)->next_update;
+ }
+
+ while (node) {
+ ocsp = eb64_entry(node, struct certificate_ocsp, next_update);
+
+ chunk_reset(trash);
+ if (dump_ocsp_update_info(ocsp, trash))
+ goto end;
+
+ if (applet_putchk(appctx, trash) == -1) {
+ ctx->cur_ocsp = ocsp;
+ goto yield;
+ }
+
+ node = eb64_next(node);
+ }
+
+end:
+ return 1;
+
+yield:
+ return 0; /* should come back */
+}
+
+static void cli_release_show_ocsp_updates(struct appctx *appctx)
+{
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+}
+
+
+static int
+smp_fetch_ssl_ocsp_certid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *data = get_trash_chunk();
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ dump_binary(data, (char *)ocsp->key_data, ocsp->key_length);
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *data;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_ocsp_certname(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = ocsp->path;
+ smp->data.u.str.data = strlen(ocsp->path);
+ return 1;
+}
+
+static int
+smp_fetch_ssl_ocsp_status(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ssl_ocsp_task_ctx.update_status;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_ocsp_status_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ if (ssl_ocsp_task_ctx.update_status >= OCSP_UPDT_ERR_LAST)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = ist2buf(ocsp_update_errors[ssl_ocsp_task_ctx.update_status]);
+
+ return 1;
+}
+
+static int
+smp_fetch_ssl_ocsp_fail_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ocsp->num_failure;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_ocsp_success_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp;
+
+ if (!ocsp)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ocsp->num_success;
+ return 1;
+}
+
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "set", "ssl", "ocsp-response", NULL }, "set ssl ocsp-response <resp|payload> : update a certificate's OCSP Response from a base64-encode DER", cli_parse_set_ocspresponse, NULL },
+
+ { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [[text|base64] id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response (in text or base64 format)", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, cli_release_show_ocspresponse },
+ { { "show", "ssl", "ocsp-updates", NULL }, "show ssl ocsp-updates : display information about the next 'nb' ocsp responses that will be updated automatically", cli_parse_show_ocsp_updates, cli_io_handler_show_ocsp_updates, cli_release_show_ocsp_updates },
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ { { "update", "ssl", "ocsp-response", NULL }, "update ssl ocsp-response <certfile> : send ocsp request and update stored ocsp response", cli_parse_update_ocsp_response, NULL, NULL },
+#endif
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ *
+ * Those fetches only have a valid value during an OCSP update process so they
+ * can only be used in a log format of a log line built by the update process
+ * task itself.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "ssl_ocsp_certid", smp_fetch_ssl_ocsp_certid, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_ocsp_certname", smp_fetch_ssl_ocsp_certname, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_ocsp_status", smp_fetch_ssl_ocsp_status, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+ { "ssl_ocsp_status_str", smp_fetch_ssl_ocsp_status_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_ocsp_fail_cnt", smp_fetch_ssl_ocsp_fail_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+ { "ssl_ocsp_success_cnt", smp_fetch_ssl_ocsp_success_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ssl_sample.c b/src/ssl_sample.c
new file mode 100644
index 0000000..789637f
--- /dev/null
+++ b/src/ssl_sample.c
@@ -0,0 +1,2389 @@
+/*
+ * This file contains the sample fetches related to the SSL
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/base64.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/connection.h>
+#include <haproxy/obj_type.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/sample.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+
+
+/***** Below are some sample fetching functions for ACL/patterns *****/
+
+#if defined(HAVE_CRYPTO_memcmp)
+/* Compares bytestring with a variable containing a bytestring. Return value
+ * is `true` if both bytestrings are bytewise identical and `false` otherwise.
+ *
+ * Comparison will be performed in constant time if both bytestrings are of
+ * the same length. If the lengths differ execution time will not be constant.
+ */
+static int sample_conv_secure_memcmp(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample tmp;
+ int result;
+
+ smp_set_owner(&tmp, smp->px, smp->sess, smp->strm, smp->opt);
+ if (arg_p[0].type != ARGT_VAR)
+ return 0;
+
+ if (!sample_conv_var2smp(&arg_p[0].data.var, &tmp, SMP_T_BIN))
+ return 0;
+
+ if (smp->data.u.str.data != tmp.data.u.str.data) {
+ smp->data.u.sint = 0;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+ }
+
+ /* The following comparison is performed in constant time. */
+ result = CRYPTO_memcmp(smp->data.u.str.area, tmp.data.u.str.area, smp->data.u.str.data);
+
+ smp->data.u.sint = result == 0;
+ smp->data.type = SMP_T_BOOL;
+ return 1;
+}
+
+/* This function checks the "secure_memcmp" converter's arguments and extracts the
+ * variable name and its scope.
+ */
+static int smp_check_secure_memcmp(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!args[0].data.str.data) {
+ memprintf(err, "missing variable name");
+ return 0;
+ }
+
+ /* Try to decode a variable. */
+ if (vars_check_arg(&args[0], NULL))
+ return 1;
+
+ memprintf(err, "failed to register variable name '%s'",
+ args[0].data.str.area);
+ return 0;
+}
+#endif // HAVE_secure_memcmp()
+
+static int smp_check_sha2(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (args[0].type == ARGT_STOP)
+ return 1;
+ if (args[0].type != ARGT_SINT) {
+ memprintf(err, "Invalid type '%s'", arg_type_names[args[0].type]);
+ return 0;
+ }
+
+ switch (args[0].data.sint) {
+ case 224:
+ case 256:
+ case 384:
+ case 512:
+ /* this is okay */
+ return 1;
+ default:
+ memprintf(err, "Unsupported number of bits: '%lld'", args[0].data.sint);
+ return 0;
+ }
+}
+
+static int sample_conv_sha2(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ int bits = 256;
+ EVP_MD_CTX *mdctx;
+ const EVP_MD *evp = NULL;
+ unsigned int digest_length = 0;
+ if (arg_p->data.sint)
+ bits = arg_p->data.sint;
+
+ switch (bits) {
+ case 224:
+ evp = EVP_sha224();
+ break;
+ case 256:
+ evp = EVP_sha256();
+ break;
+ case 384:
+ evp = EVP_sha384();
+ break;
+ case 512:
+ evp = EVP_sha512();
+ break;
+ default:
+ return 0;
+ }
+
+ mdctx = EVP_MD_CTX_new();
+ if (!mdctx)
+ return 0;
+ EVP_DigestInit_ex(mdctx, evp, NULL);
+ EVP_DigestUpdate(mdctx, smp->data.u.str.area, smp->data.u.str.data);
+ EVP_DigestFinal_ex(mdctx, (unsigned char*)trash->area, &digest_length);
+ trash->data = digest_length;
+
+ EVP_MD_CTX_free(mdctx);
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* This function checks an <arg> and fills it with a variable type if the
+ * <arg> string contains a valid variable name. If failed, the function
+ * tries to perform a base64 decode operation on the same string, and
+ * fills the <arg> with the decoded content.
+ *
+ * Validation is skipped if the <arg> string is empty.
+ *
+ * This function returns 0 if the variable lookup fails and the specified
+ * <arg> string is not a valid base64 encoded string, as well if
+ * unexpected argument type is specified or memory allocation error
+ * occurs. Otherwise it returns 1.
+ */
+static inline int sample_check_arg_base64(struct arg *arg, char **err)
+{
+ char *dec = NULL;
+ int dec_size;
+
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "unexpected argument type");
+ return 0;
+ }
+
+ if (arg->data.str.data == 0) /* empty */
+ return 1;
+
+ if (vars_check_arg(arg, NULL))
+ return 1;
+
+ if (arg->data.str.data % 4) {
+ memprintf(err, "argument needs to be base64 encoded, and "
+ "can either be a string or a variable");
+ return 0;
+ }
+
+ dec_size = (arg->data.str.data / 4 * 3)
+ - (arg->data.str.area[arg->data.str.data-1] == '=' ? 1 : 0)
+ - (arg->data.str.area[arg->data.str.data-2] == '=' ? 1 : 0);
+
+ if ((dec = malloc(dec_size)) == NULL) {
+ memprintf(err, "memory allocation error");
+ return 0;
+ }
+
+ dec_size = base64dec(arg->data.str.area, arg->data.str.data, dec, dec_size);
+ if (dec_size < 0) {
+ memprintf(err, "argument needs to be base64 encoded, and "
+ "can either be a string or a variable");
+ free(dec);
+ return 0;
+ }
+
+ /* base64 decoded */
+ chunk_destroy(&arg->data.str);
+ arg->data.str.area = dec;
+ arg->data.str.data = dec_size;
+ return 1;
+}
+
+#ifdef EVP_CIPH_GCM_MODE
+static int check_aes_gcm(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ switch(args[0].data.sint) {
+ case 128:
+ case 192:
+ case 256:
+ break;
+ default:
+ memprintf(err, "key size must be 128, 192 or 256 (bits).");
+ return 0;
+ }
+
+ /* Try to decode variables. */
+ if (!sample_check_arg_base64(&args[1], err)) {
+ memprintf(err, "failed to parse nonce : %s", *err);
+ return 0;
+ }
+ if (!sample_check_arg_base64(&args[2], err)) {
+ memprintf(err, "failed to parse key : %s", *err);
+ return 0;
+ }
+ if (!sample_check_arg_base64(&args[3], err)) {
+ memprintf(err, "failed to parse aead_tag : %s", *err);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Arguments: AES size in bits, nonce, key, tag. The last three arguments are base64 encoded */
+static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct sample nonce, key, aead_tag;
+ struct buffer *smp_trash = NULL, *smp_trash_alloc = NULL;
+ EVP_CIPHER_CTX *ctx;
+ int dec_size, ret;
+
+ smp_trash_alloc = alloc_trash_chunk();
+ if (!smp_trash_alloc)
+ return 0;
+
+ /* smp copy */
+ smp_trash_alloc->data = smp->data.u.str.data;
+ if (unlikely(smp_trash_alloc->data > smp_trash_alloc->size))
+ smp_trash_alloc->data = smp_trash_alloc->size;
+ memcpy(smp_trash_alloc->area, smp->data.u.str.area, smp_trash_alloc->data);
+
+ ctx = EVP_CIPHER_CTX_new();
+
+ if (!ctx)
+ goto err;
+
+ smp_trash = alloc_trash_chunk();
+ if (!smp_trash)
+ goto err;
+
+ smp_set_owner(&nonce, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[1], &nonce))
+ goto err;
+
+ if (arg_p[1].type == ARGT_VAR) {
+ dec_size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash->data = dec_size;
+ nonce.data.u.str = *smp_trash;
+ }
+
+ /* Set cipher type and mode */
+ switch(arg_p[0].data.sint) {
+ case 128:
+ EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL);
+ break;
+ case 192:
+ EVP_DecryptInit_ex(ctx, EVP_aes_192_gcm(), NULL, NULL, NULL);
+ break;
+ case 256:
+ EVP_DecryptInit_ex(ctx, EVP_aes_256_gcm(), NULL, NULL, NULL);
+ break;
+ }
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, nonce.data.u.str.data, NULL);
+
+ /* Initialise IV */
+ if(!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area))
+ goto err;
+
+ smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[2], &key))
+ goto err;
+
+ if (arg_p[2].type == ARGT_VAR) {
+ dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash->data = dec_size;
+ key.data.u.str = *smp_trash;
+ }
+
+ /* Initialise key */
+ if (!EVP_DecryptInit_ex(ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL))
+ goto err;
+
+ if (!EVP_DecryptUpdate(ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data,
+ (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data))
+ goto err;
+
+ smp_set_owner(&aead_tag, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag))
+ goto err;
+
+ if (arg_p[3].type == ARGT_VAR) {
+ dec_size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, smp_trash_alloc->size);
+ if (dec_size < 0)
+ goto err;
+ smp_trash_alloc->data = dec_size;
+ aead_tag.data.u.str = *smp_trash_alloc;
+ }
+
+ dec_size = smp_trash->data;
+
+ EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, (void *) aead_tag.data.u.str.area);
+ ret = EVP_DecryptFinal_ex(ctx, (unsigned char *) smp_trash->area + smp_trash->data, (int *) &smp_trash->data);
+
+ if (ret <= 0)
+ goto err;
+
+ smp->data.u.str.data = dec_size + smp_trash->data;
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.type = SMP_T_BIN;
+ smp_dup(smp);
+ free_trash_chunk(smp_trash_alloc);
+ free_trash_chunk(smp_trash);
+ return 1;
+
+err:
+ free_trash_chunk(smp_trash_alloc);
+ free_trash_chunk(smp_trash);
+ return 0;
+}
+#endif
+
+static int check_crypto_digest(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+
+ if (evp)
+ return 1;
+
+ memprintf(err, "algorithm must be a valid OpenSSL message digest name.");
+ return 0;
+}
+
+static int sample_conv_crypto_digest(const struct arg *args, struct sample *smp, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+ unsigned char *md = (unsigned char*) trash->area;
+ unsigned int md_len = trash->size;
+ EVP_MD_CTX *ctx = EVP_MD_CTX_new();
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+
+ if (!ctx)
+ return 0;
+
+ if (!EVP_DigestInit_ex(ctx, evp, NULL) ||
+ !EVP_DigestUpdate(ctx, smp->data.u.str.area, smp->data.u.str.data) ||
+ !EVP_DigestFinal_ex(ctx, md, &md_len)) {
+ EVP_MD_CTX_free(ctx);
+ return 0;
+ }
+
+ EVP_MD_CTX_free(ctx);
+
+ trash->data = md_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+/* Take a numerical X509_V_ERR and return its constant name */
+static int sample_conv_x509_v_err(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ const char *res = x509_v_err_int_to_str(smp->data.u.sint);
+
+ /* if the value was found return its string */
+ if (res) {
+ smp->data.u.str.area = (char *)res;
+ smp->data.u.str.data = strlen(res);
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_CONST;
+
+ return 1;
+ } else {
+ struct buffer *smp_trash = get_trash_chunk();
+
+ /* if the conversion failed, output the numbers as string */
+ chunk_printf(smp_trash, "%llu", smp->data.u.sint);
+
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int check_crypto_hmac(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ if (!check_crypto_digest(args, conv, file, line, err))
+ return 0;
+
+ if (!sample_check_arg_base64(&args[1], err)) {
+ memprintf(err, "failed to parse key : %s", *err);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int sample_conv_crypto_hmac(const struct arg *args, struct sample *smp, void *private)
+{
+ struct sample key;
+ struct buffer *trash = NULL, *key_trash = NULL;
+ unsigned char *md;
+ unsigned int md_len;
+ const EVP_MD *evp = EVP_get_digestbyname(args[0].data.str.area);
+ int dec_size;
+
+ smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt);
+ if (!sample_conv_var2smp_str(&args[1], &key))
+ return 0;
+
+ if (args[1].type == ARGT_VAR) {
+ key_trash = alloc_trash_chunk();
+ if (!key_trash)
+ goto err;
+
+ dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, key_trash->area, key_trash->size);
+ if (dec_size < 0)
+ goto err;
+ key_trash->data = dec_size;
+ key.data.u.str = *key_trash;
+ }
+
+ trash = alloc_trash_chunk();
+ if (!trash)
+ goto err;
+
+ md = (unsigned char*) trash->area;
+ md_len = trash->size;
+ if (!HMAC(evp, key.data.u.str.area, key.data.u.str.data, (const unsigned char*) smp->data.u.str.area,
+ smp->data.u.str.data, md, &md_len))
+ goto err;
+
+ free_trash_chunk(key_trash);
+
+ trash->data = md_len;
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_BIN;
+ smp_dup(smp);
+ free_trash_chunk(trash);
+ return 1;
+
+err:
+ free_trash_chunk(key_trash);
+ free_trash_chunk(trash);
+ return 0;
+}
+
+static int
+smp_fetch_ssl_fc_has_early(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ SSL *ssl;
+ struct connection *conn;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->flags = 0;
+ smp->data.type = SMP_T_BOOL;
+#ifdef OPENSSL_IS_BORINGSSL
+ {
+ smp->data.u.sint = (SSL_in_early_data(ssl) &&
+ SSL_early_data_accepted(ssl));
+ }
+#else
+ smp->data.u.sint = ((conn->flags & CO_FL_EARLY_DATA) &&
+ (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS))) ? 1 : 0;
+#endif
+ return 1;
+}
+
+/* boolean, returns true if client cert was present */
+static int
+smp_fetch_ssl_fc_has_crt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = SSL_SOCK_ST_FL_VERIFY_DONE & ctx->xprt_st ? 1 : 0;
+
+ return 1;
+}
+
+/* string, returns a string of a formatted full dn \C=..\O=..\OU=.. \CN=.. of the
+ * client certificate's root CA.
+ */
+#ifdef HAVE_SSL_get0_verified_chain
+static int
+smp_fetch_ssl_r_dn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ X509 *crt = NULL;
+ X509_NAME *name;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ crt = ssl_sock_get_verified_chain_root(ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_subject_name(crt);
+ if (!name)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (args[0].type == ARGT_STR && args[0].data.str.data > 0) {
+ int pos = 1;
+
+ if (args[1].type == ARGT_SINT)
+ pos = args[1].data.sint;
+
+ if (ssl_sock_get_dn_entry(name, &args[0].data.str, pos, smp_trash) <= 0)
+ goto out;
+ }
+ else if (args[2].type == ARGT_STR && args[2].data.str.data > 0) {
+ if (ssl_sock_get_dn_formatted(name, &args[2].data.str, smp_trash) <= 0)
+ goto out;
+ }
+ else if (ssl_sock_get_dn_oneline(name, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *smp_trash;
+ ret = 1;
+out:
+ return ret;
+}
+#endif
+
+/* binary, returns a certificate in a binary chunk (der/raw).
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_der(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_crt2der(crt, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* binary, returns a chain certificate in a binary chunk (der/raw).
+ * The 5th keyword char is used to support only peer cert
+ */
+static int
+smp_fetch_ssl_x_chain_der(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ struct buffer *smp_trash;
+ struct buffer *tmp_trash = NULL;
+ struct connection *conn;
+ STACK_OF(X509) *certs = NULL;
+ X509 *crt = NULL;
+ SSL *ssl;
+ int ret = 0;
+ int num_certs;
+ int i;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ if (!conn)
+ return 0;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!cert_peer)
+ return 0;
+
+ certs = SSL_get_peer_cert_chain(ssl);
+ if (!certs)
+ return 0;
+
+ num_certs = sk_X509_num(certs);
+ if (!num_certs)
+ goto out;
+ smp_trash = get_trash_chunk();
+ tmp_trash = alloc_trash_chunk();
+ if (!tmp_trash)
+ goto out;
+ for (i = 0; i < num_certs; i++) {
+ crt = sk_X509_value(certs, i);
+ if (ssl_sock_crt2der(crt, tmp_trash) <= 0)
+ goto out;
+ chunk_cat(smp_trash, tmp_trash);
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ if (tmp_trash)
+ free_trash_chunk(tmp_trash);
+ return ret;
+}
+
+/* binary, returns serial of certificate in a binary chunk.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_serial(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_serial(crt, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* binary, returns the client certificate's SHA-1 fingerprint (SHA-1 hash of DER-encoded certificate) in a binary chunk.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_sha1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ const EVP_MD *digest;
+ int ret = 0;
+ unsigned int len = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ digest = EVP_sha1();
+ X509_digest(crt, digest, (unsigned char *) smp_trash->area, &len);
+ smp_trash->data = len;
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_BIN;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns certificate's notafter date in ASN1_UTCTIME format.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_notafter(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_time(X509_getm_notAfter(crt), smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_STR;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns a string of a formatted full dn \C=..\O=..\OU=.. \CN=.. of certificate's issuer
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_i_dn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ X509_NAME *name;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_issuer_name(crt);
+ if (!name)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (args[0].type == ARGT_STR && args[0].data.str.data > 0) {
+ int pos = 1;
+
+ if (args[1].type == ARGT_SINT)
+ pos = args[1].data.sint;
+
+ if (ssl_sock_get_dn_entry(name, &args[0].data.str, pos, smp_trash) <= 0)
+ goto out;
+ }
+ else if (args[2].type == ARGT_STR && args[2].data.str.data > 0) {
+ if (ssl_sock_get_dn_formatted(name, &args[2].data.str, smp_trash) <= 0)
+ goto out;
+ }
+ else if (ssl_sock_get_dn_oneline(name, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *smp_trash;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns notbefore date in ASN1_UTCTIME format.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_notbefore(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (ssl_sock_get_time(X509_getm_notBefore(crt), smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *smp_trash;
+ smp->data.type = SMP_T_STR;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* string, returns a string of a formatted full dn \C=..\O=..\OU=.. \CN=.. of certificate's subject
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_s_dn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt = NULL;
+ X509_NAME *name;
+ int ret = 0;
+ struct buffer *smp_trash;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_subject_name(crt);
+ if (!name)
+ goto out;
+
+ smp_trash = get_trash_chunk();
+ if (args[0].type == ARGT_STR && args[0].data.str.data > 0) {
+ int pos = 1;
+
+ if (args[1].type == ARGT_SINT)
+ pos = args[1].data.sint;
+
+ if (ssl_sock_get_dn_entry(name, &args[0].data.str, pos, smp_trash) <= 0)
+ goto out;
+ }
+ else if (args[2].type == ARGT_STR && args[2].data.str.data > 0) {
+ if (ssl_sock_get_dn_formatted(name, &args[2].data.str, smp_trash) <= 0)
+ goto out;
+ }
+ else if (ssl_sock_get_dn_oneline(name, smp_trash) <= 0)
+ goto out;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *smp_trash;
+ ret = 1;
+out:
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ if (cert_peer && crt)
+ X509_free(crt);
+ return ret;
+}
+
+/* integer, returns true if current session use a client certificate */
+static int
+smp_fetch_ssl_c_used(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ X509 *crt;
+ struct connection *conn;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ /* SSL_get_peer_certificate returns a ptr on allocated X509 struct */
+ crt = ssl_sock_get_peer_certificate(ssl);
+ if (crt) {
+ X509_free(crt);
+ }
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = (crt != NULL);
+ return 1;
+}
+
+/* integer, returns the certificate version
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_version(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+
+ X509 *crt;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.sint = (unsigned int)(1 + X509_get_version(crt));
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+/* string, returns the certificate's signature algorithm.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_sig_alg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt;
+ __OPENSSL_110_CONST__ ASN1_OBJECT *algorithm;
+ int nid;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ X509_ALGOR_get0(&algorithm, NULL, NULL, X509_get0_tbs_sigalg(crt));
+ nid = OBJ_obj2nid(algorithm);
+
+ smp->data.u.str.area = (char *)OBJ_nid2sn(nid);
+ if (!smp->data.u.str.area) {
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+
+ return 1;
+}
+
+/* string, returns the certificate's key algorithm.
+ * The 5th keyword char is used to know if SSL_get_certificate or SSL_get_peer_certificate
+ * should be use.
+ */
+static int
+smp_fetch_ssl_x_key_alg(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ int cert_peer = (kw[4] == 'c' || kw[4] == 's') ? 1 : 0;
+ int conn_server = (kw[4] == 's') ? 1 : 0;
+ X509 *crt;
+ ASN1_OBJECT *algorithm;
+ int nid;
+ struct connection *conn;
+ SSL *ssl;
+
+ if (conn_server)
+ conn = smp->strm ? sc_conn(smp->strm->scb) : NULL;
+ else
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (cert_peer)
+ crt = ssl_sock_get_peer_certificate(ssl);
+ else
+ crt = SSL_get_certificate(ssl);
+ if (!crt)
+ return 0;
+
+ X509_PUBKEY_get0_param(&algorithm, NULL, NULL, NULL, X509_get_X509_PUBKEY(crt));
+ nid = OBJ_obj2nid(algorithm);
+
+ smp->data.u.str.area = (char *)OBJ_nid2sn(nid);
+ if (!smp->data.u.str.area) {
+ /* SSL_get_peer_certificate increase X509 * ref count */
+ if (cert_peer)
+ X509_free(crt);
+ return 0;
+ }
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ if (cert_peer)
+ X509_free(crt);
+
+ return 1;
+}
+
+/* boolean, returns true if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = conn_is_ssl(conn);
+ return 1;
+}
+
+/* boolean, returns true if client present a SNI */
+static int
+smp_fetch_ssl_fc_has_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct connection *conn = objt_conn(smp->sess->origin);
+ SSL *ssl = ssl_sock_get_ssl_object(conn);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = ssl && SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name) != NULL;
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+/* boolean, returns true if client session has been resumed.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_is_resumed(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = ssl && SSL_session_reused(ssl);
+ return 1;
+}
+
+/*
+ * string, returns the EC curve used for key agreement on the
+ * front and backend connection.
+ *
+ * The function to get the curve name (SSL_get_negotiated_group) is only available
+ * in OpenSSLv3 onwards and not for previous versions.
+ */
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+static int
+smp_fetch_ssl_fc_ec(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ int __maybe_unused nid;
+ char *curve_name;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ /*
+ * SSL_get0_group_name is a function to get the curve name and is available from
+ * OpenSSL v3.2 onwards. For OpenSSL >=3.0 and <3.2, we will continue to use
+ * SSL_get_negotiated_group to get the curve name.
+ */
+ #if (HA_OPENSSL_VERSION_NUMBER >= 0x3020000fL)
+ curve_name = (char *)SSL_get0_group_name(ssl);
+ if (curve_name == NULL)
+ return 0;
+ else {
+ /**
+ * The curve name returned by SSL_get0_group_name is in lowercase whereas the curve
+ * name returned when we use `SSL_get_negotiated_group` and `OBJ_nid2sn` is the
+ * short name and is in upper case. To make the return value consistent across the
+ * different functional calls and to make it consistent while upgrading OpenSSL versions,
+ * will convert the curve name returned by SSL_get0_group_name to upper case.
+ */
+ for (int i = 0; curve_name[i]; i++)
+ curve_name[i] = toupper(curve_name[i]);
+ }
+ #else
+ nid = SSL_get_negotiated_group(ssl);
+ if (!nid)
+ return 0;
+ curve_name = (char *)OBJ_nid2sn(nid);
+ if (curve_name == NULL)
+ return 0;
+ #endif
+
+ smp->data.u.str.area = curve_name;
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+#endif
+
+/* string, returns the used cipher if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_cipher(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_cipher_name(ssl);
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* integer, returns the algoritm's keysize if front conn. transport layer
+ * is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_alg_keysize(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ int sint;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (!SSL_get_cipher_bits(ssl, &sint))
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.sint = sint;
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+/* integer, returns the used keysize if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_use_keysize(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.sint = (unsigned int)SSL_get_cipher_bits(ssl, NULL);
+ if (!smp->data.u.sint)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+
+ return 1;
+}
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+static int
+smp_fetch_ssl_fc_npn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str.area = NULL;
+ SSL_get0_next_proto_negotiated(ssl,
+ (const unsigned char **)&smp->data.u.str.area,
+ &len);
+
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+static int
+smp_fetch_ssl_fc_alpn(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = NULL;
+ SSL_get0_alpn_selected(ssl,
+ (const unsigned char **)&smp->data.u.str.area,
+ &len);
+
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+/* string, returns the used protocol if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+static int
+smp_fetch_ssl_fc_protocol(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_version(ssl);
+ if (!smp->data.u.str.area)
+ return 0;
+
+ smp->data.type = SMP_T_STR;
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+}
+
+/* binary, returns the SSL stream id if front conn. transport layer is SSL.
+ * This function is also usable on backend conn if the fetch keyword 5th
+ * char is 'b'.
+ */
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+static int
+smp_fetch_ssl_fc_session_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL_SESSION *ssl_sess;
+ SSL *ssl;
+ unsigned int len = 0;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ ssl_sess = SSL_get_session(ssl);
+ if (!ssl_sess)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_SESSION_get_id(ssl_sess, &len);
+ if (!smp->data.u.str.area || !len)
+ return 0;
+
+ smp->data.u.str.data = len;
+ return 1;
+}
+#endif
+
+
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+static int
+smp_fetch_ssl_fc_random(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct buffer *data;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ data = get_trash_chunk();
+ if (kw[7] == 'c')
+ data->data = SSL_get_client_random(ssl,
+ (unsigned char *) data->area,
+ data->size);
+ else
+ data->data = SSL_get_server_random(ssl,
+ (unsigned char *) data->area,
+ data->size);
+ if (!data->data)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_session_key(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL_SESSION *ssl_sess;
+ struct buffer *data;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ ssl_sess = SSL_get_session(ssl);
+ if (!ssl_sess)
+ return 0;
+
+ data = get_trash_chunk();
+ data->data = SSL_SESSION_get_master_key(ssl_sess,
+ (unsigned char *) data->area,
+ data->size);
+ if (!data->data)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+
+ return 1;
+}
+#endif
+
+static int
+smp_fetch_ssl_fc_sni(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct connection *conn;
+ SSL *ssl;
+
+ smp->flags = SMP_F_VOL_SESS | SMP_F_CONST;
+ smp->data.type = SMP_T_STR;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ smp->data.u.str.area = (char *)SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ if (!smp->data.u.str.area) {
+ /* We might have stored the SNI ourselves, look for it in the
+ * context's ex_data.
+ */
+ smp->data.u.str.area = SSL_get_ex_data(ssl, ssl_client_sni_index);
+
+ if (!smp->data.u.str.area)
+ return 0;
+ }
+
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+
+ return 1;
+#else
+ /* SNI not supported */
+ return 0;
+#endif
+}
+
+/* binary, returns tls client hello cipher list.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_cl_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->ciphersuite_offset, capture->ciphersuite_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->ciphersuite_offset;
+ smp->data.u.str.data = capture->ciphersuite_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello cipher list as hexadecimal string.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_cl_hex(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *data;
+
+ if (!smp_fetch_ssl_fc_cl_bin(args, smp, kw, private))
+ return 0;
+
+ data = get_trash_chunk();
+ dump_binary(data, smp->data.u.str.area, smp->data.u.str.data);
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str = *data;
+ return 1;
+}
+
+/* integer, returns xxh64 hash of tls client hello cipher list. */
+static int
+smp_fetch_ssl_fc_cl_xxh64(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = capture->xxh64;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ctx = conn_get_ssl_sock_ctx(conn);
+ if (!ctx)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ctx->error_code;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_protocol_hello_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = capture->protocol_version;
+ return 1;
+}
+
+static int
+smp_fetch_ssl_fc_err_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx;
+ const char *err_code_str;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ctx = conn_get_ssl_sock_ctx(conn);
+ if (!ctx || !ctx->error_code)
+ return 0;
+
+ err_code_str = ERR_error_string(ctx->error_code, NULL);
+
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str.area = (char*)err_code_str;
+ smp->data.u.str.data = strlen(err_code_str);
+
+ return 1;
+}
+
+/* binary, returns tls client hello extensions list.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_ext_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->extensions_offset, capture->extensions_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->extensions_offset;
+ smp->data.u.str.data = capture->extensions_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello supported elliptic curves.
+ * Arguments: filter_option (0,1)
+ */
+static int
+smp_fetch_ssl_fc_ecl_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct buffer *smp_trash;
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ if (args[0].data.sint) {
+ smp_trash = get_trash_chunk();
+ exclude_tls_grease(capture->data + capture->ec_offset, capture->ec_len, smp_trash);
+ smp->data.u.str.area = smp_trash->area;
+ smp->data.u.str.data = smp_trash->data;
+ smp->flags = SMP_F_VOL_SESS;
+ }
+ else {
+ smp->data.u.str.area = capture->data + capture->ec_offset;
+ smp->data.u.str.data = capture->ec_len;
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ }
+
+ smp->data.type = SMP_T_BIN;
+ return 1;
+}
+
+/* binary, returns tls client hello supported elliptic curve point formats */
+static int
+smp_fetch_ssl_fc_ecf_bin(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_capture *capture;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ capture = SSL_get_ex_data(ssl, ssl_capture_ptr_index);
+ if (!capture)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST | SMP_F_CONST;
+ smp->data.type = SMP_T_BIN;
+ smp->data.u.str.area = capture->data + capture->ec_formats_offset;
+ smp->data.u.str.data = capture->ec_formats_len;
+ return 1;
+}
+
+/* Dump the SSL keylog, it only works with "tune.ssl.keylog 1" */
+#ifdef HAVE_SSL_KEYLOG
+static int smp_fetch_ssl_x_keylog(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct ssl_keylog *keylog;
+ SSL *ssl;
+ char *src = NULL;
+ const char *sfx;
+
+ if (global_ssl.keylog <= 0)
+ return 0;
+
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ if (!conn)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ keylog = SSL_get_ex_data(ssl, ssl_keylog_index);
+ if (!keylog)
+ return 0;
+
+ sfx = kw + strlen("ssl_xx_");
+
+ if (strcmp(sfx, "client_early_traffic_secret") == 0) {
+ src = keylog->client_early_traffic_secret;
+ } else if (strcmp(sfx, "client_handshake_traffic_secret") == 0) {
+ src = keylog->client_handshake_traffic_secret;
+ } else if (strcmp(sfx, "server_handshake_traffic_secret") == 0) {
+ src = keylog->server_handshake_traffic_secret;
+ } else if (strcmp(sfx, "client_traffic_secret_0") == 0) {
+ src = keylog->client_traffic_secret_0;
+ } else if (strcmp(sfx, "server_traffic_secret_0") == 0) {
+ src = keylog->server_traffic_secret_0;
+ } else if (strcmp(sfx, "exporter_secret") == 0) {
+ src = keylog->exporter_secret;
+ } else if (strcmp(sfx, "early_exporter_secret") == 0) {
+ src = keylog->early_exporter_secret;
+ }
+
+ if (!src || !*src)
+ return 0;
+
+ smp->data.u.str.area = src;
+ smp->data.type = SMP_T_STR;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_CONST;
+ smp->data.u.str.data = strlen(smp->data.u.str.area);
+ return 1;
+}
+#endif
+
+static int
+smp_fetch_ssl_fc_cl_str(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+#if defined(OPENSSL_IS_BORINGSSL) || defined(SSL_CTRL_GET_RAW_CIPHERLIST)
+ struct buffer *data;
+ int i;
+
+ if (!smp_fetch_ssl_fc_cl_bin(args, smp, kw, private))
+ return 0;
+
+ data = get_trash_chunk();
+ for (i = 0; i + 1 < smp->data.u.str.data; i += 2) {
+ const char *str;
+ const SSL_CIPHER *cipher;
+ const unsigned char *bin = (const unsigned char *) smp->data.u.str.area + i;
+ uint16_t id = (bin[0] << 8) | bin[1];
+#if defined(OPENSSL_IS_BORINGSSL)
+ cipher = SSL_get_cipher_by_value(id);
+#else
+ struct connection *conn = __objt_conn(smp->sess->origin);
+ SSL *ssl = ssl_sock_get_ssl_object(conn);
+ cipher = SSL_CIPHER_find(ssl, bin);
+#endif
+ str = SSL_CIPHER_get_name(cipher);
+ if (!str || strcmp(str, "(NONE)") == 0)
+ chunk_appendf(data, "%sUNKNOWN(%04x)", i == 0 ? "" : ",", id);
+ else
+ chunk_appendf(data, "%s%s", i == 0 ? "" : ",", str);
+ }
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *data;
+ return 1;
+#else
+ return smp_fetch_ssl_fc_cl_xxh64(args, smp, kw, private);
+#endif
+}
+
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+static int
+smp_fetch_ssl_fc_unique_id(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ int finished_len;
+ struct buffer *finished_trash;
+ SSL *ssl;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ smp->flags = 0;
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ finished_trash = get_trash_chunk();
+ if (!SSL_session_reused(ssl))
+ finished_len = SSL_get_peer_finished(ssl,
+ finished_trash->area,
+ finished_trash->size);
+ else
+ finished_len = SSL_get_finished(ssl,
+ finished_trash->area,
+ finished_trash->size);
+
+ if (!finished_len)
+ return 0;
+
+ finished_trash->data = finished_len;
+ smp->flags = SMP_F_VOL_SESS;
+ smp->data.u.str = *finished_trash;
+ smp->data.type = SMP_T_BIN;
+
+ return 1;
+}
+#endif
+
+/* integer, returns the first verify error in CA chain of client certificate chain. */
+static int
+smp_fetch_ssl_c_ca_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (unsigned long long int)SSL_SOCK_ST_TO_CA_ERROR(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the depth of the first verify error in CA chain of client certificate chain. */
+static int
+smp_fetch_ssl_c_ca_err_depth(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_SOCK_ST_TO_CAEDEPTH(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the first verify error on client certificate */
+static int
+smp_fetch_ssl_c_err(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (conn && conn->flags & CO_FL_WAIT_XPRT && !conn->err_code) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ if (!ctx)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_SOCK_ST_TO_CRTERROR(ctx->xprt_st);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* integer, returns the verify result on client cert */
+static int
+smp_fetch_ssl_c_verify(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ SSL *ssl;
+
+ conn = objt_conn(smp->sess->origin);
+ ssl = ssl_sock_get_ssl_object(conn);
+ if (!ssl)
+ return 0;
+
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ smp->flags = SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = (long long int)SSL_get_verify_result(ssl);
+ smp->flags = SMP_F_VOL_SESS;
+
+ return 1;
+}
+
+/* Argument validation functions */
+
+/* This function is used to validate the arguments passed to any "x_dn" ssl
+ * keywords. These keywords support specifying a third parameter that must be
+ * either empty or the value "rfc2253". Returns 0 on error, non-zero if OK.
+ */
+int val_dnfmt(struct arg *arg, char **err_msg)
+{
+ if (arg && arg[2].type == ARGT_STR && arg[2].data.str.data > 0 && (strcmp(arg[2].data.str.area, "rfc2253") != 0)) {
+ memprintf(err_msg, "only rfc2253 or a blank value are currently supported as the format argument.");
+ return 0;
+ }
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "ssl_bc", smp_fetch_ssl_fc, 0, NULL, SMP_T_BOOL, SMP_USE_L5SRV },
+ { "ssl_bc_alg_keysize", smp_fetch_ssl_fc_alg_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ { "ssl_bc_alpn", smp_fetch_ssl_fc_alpn, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_cipher", smp_fetch_ssl_fc_cipher, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ { "ssl_bc_curve", smp_fetch_ssl_fc_ec, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#endif
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ { "ssl_bc_npn", smp_fetch_ssl_fc_npn, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_is_resumed", smp_fetch_ssl_fc_is_resumed, 0, NULL, SMP_T_BOOL, SMP_USE_L5SRV },
+ { "ssl_bc_protocol", smp_fetch_ssl_fc_protocol, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_bc_unique_id", smp_fetch_ssl_fc_unique_id, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_use_keysize", smp_fetch_ssl_fc_use_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_bc_session_id", smp_fetch_ssl_fc_session_id, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+#endif
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+ { "ssl_bc_client_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+ { "ssl_bc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV },
+#endif
+ { "ssl_bc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV },
+ { "ssl_bc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV },
+ { "ssl_c_ca_err", smp_fetch_ssl_c_ca_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_ca_err_depth", smp_fetch_ssl_c_ca_err_depth, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_chain_der", smp_fetch_ssl_x_chain_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_err", smp_fetch_ssl_c_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#ifdef HAVE_SSL_get0_verified_chain
+ { "ssl_c_r_dn", smp_fetch_ssl_r_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+ { "ssl_c_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_c_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_c_used", smp_fetch_ssl_c_used, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_c_verify", smp_fetch_ssl_c_verify, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_c_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_f_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_f_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_f_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc", smp_fetch_ssl_fc, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_alg_keysize", smp_fetch_ssl_fc_alg_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_cipher", smp_fetch_ssl_fc_cipher, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ { "ssl_fc_curve", smp_fetch_ssl_fc_ec, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+ { "ssl_fc_has_crt", smp_fetch_ssl_fc_has_crt, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_has_early", smp_fetch_ssl_fc_has_early, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_has_sni", smp_fetch_ssl_fc_has_sni, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+ { "ssl_fc_is_resumed", smp_fetch_ssl_fc_is_resumed, 0, NULL, SMP_T_BOOL, SMP_USE_L5CLI },
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ { "ssl_fc_npn", smp_fetch_ssl_fc_npn, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ { "ssl_fc_alpn", smp_fetch_ssl_fc_alpn, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+ { "ssl_fc_protocol", smp_fetch_ssl_fc_protocol, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_fc_unique_id", smp_fetch_ssl_fc_unique_id, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+ { "ssl_fc_use_keysize", smp_fetch_ssl_fc_use_keysize, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+#if HA_OPENSSL_VERSION_NUMBER > 0x0090800fL
+ { "ssl_fc_session_id", smp_fetch_ssl_fc_session_id, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+#ifdef HAVE_SSL_EXTRACT_RANDOM
+ { "ssl_fc_client_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+#endif
+
+#ifdef HAVE_SSL_KEYLOG
+ { "ssl_fc_client_early_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_client_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_server_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_client_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_server_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_early_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+#endif
+
+ { "ssl_fc_sni", smp_fetch_ssl_fc_sni, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_bin", smp_fetch_ssl_fc_cl_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_hex", smp_fetch_ssl_fc_cl_hex, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_str", smp_fetch_ssl_fc_cl_str, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_cipherlist_xxh", smp_fetch_ssl_fc_cl_xxh64, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_protocol_hello_id",smp_fetch_ssl_fc_protocol_hello_id,0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { "ssl_fc_extlist_bin", smp_fetch_ssl_fc_ext_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_eclist_bin", smp_fetch_ssl_fc_ecl_bin, ARG1(0,SINT), NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_fc_ecformats_bin", smp_fetch_ssl_fc_ecf_bin, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+
+/* SSL server certificate fetches */
+ { "ssl_s_der", smp_fetch_ssl_x_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_chain_der", smp_fetch_ssl_x_chain_der, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_key_alg", smp_fetch_ssl_x_key_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_notafter", smp_fetch_ssl_x_notafter, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_notbefore", smp_fetch_ssl_x_notbefore, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_sig_alg", smp_fetch_ssl_x_sig_alg, 0, NULL, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_s_dn", smp_fetch_ssl_x_s_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_i_dn", smp_fetch_ssl_x_i_dn, ARG3(0,STR,SINT,STR),val_dnfmt, SMP_T_STR, SMP_USE_L5CLI },
+ { "ssl_s_serial", smp_fetch_ssl_x_serial, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_sha1", smp_fetch_ssl_x_sha1, 0, NULL, SMP_T_BIN, SMP_USE_L5CLI },
+ { "ssl_s_version", smp_fetch_ssl_x_version, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI },
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "sha2", sample_conv_sha2, ARG1(0, SINT), smp_check_sha2, SMP_T_BIN, SMP_T_BIN },
+#ifdef EVP_CIPH_GCM_MODE
+ { "aes_gcm_dec", sample_conv_aes_gcm_dec, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN },
+#endif
+ { "x509_v_err_str", sample_conv_x509_v_err, 0, NULL, SMP_T_SINT, SMP_T_STR },
+ { "digest", sample_conv_crypto_digest, ARG1(1,STR), check_crypto_digest, SMP_T_BIN, SMP_T_BIN },
+ { "hmac", sample_conv_crypto_hmac, ARG2(2,STR,STR), check_crypto_hmac, SMP_T_BIN, SMP_T_BIN },
+#if defined(HAVE_CRYPTO_memcmp)
+ { "secure_memcmp", sample_conv_secure_memcmp, ARG1(1,STR), smp_check_secure_memcmp, SMP_T_BIN, SMP_T_BOOL },
+#endif
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct acl_kw_list acl_kws = {ILH, {
+ { "ssl_fc_sni_end", "ssl_fc_sni", PAT_MATCH_END },
+ { "ssl_fc_sni_reg", "ssl_fc_sni", PAT_MATCH_REG },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, acl_register_keywords, &acl_kws);
diff --git a/src/ssl_sock.c b/src/ssl_sock.c
new file mode 100644
index 0000000..6fbabb4
--- /dev/null
+++ b/src/ssl_sock.c
@@ -0,0 +1,8100 @@
+
+/*
+ * SSL/TLS transport layer over SOCK_STREAM sockets
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Acknowledgement:
+ * We'd like to specially thank the Stud project authors for a very clean
+ * and well documented code which helped us understand how the OpenSSL API
+ * ought to be used in non-blocking mode. This is one difficult part which
+ * is not easy to get from the OpenSSL doc, and reading the Stud code made
+ * it much more obvious than the examples in the OpenSSL package. Keep up
+ * the good works, guys !
+ *
+ * Stud is an extremely efficient and scalable SSL/TLS proxy which combines
+ * particularly well with haproxy. For more info about this project, visit :
+ * https://github.com/bumptech/stud
+ *
+ */
+
+/* Note: do NOT include openssl/xxx.h here, do it in openssl-compat.h */
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+
+#include <import/ebpttree.h>
+#include <import/ebsttree.h>
+#include <import/lru.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/base64.h>
+#include <haproxy/channel.h>
+#include <haproxy/chunk.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/log.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/quic_openssl_compat.h>
+#include <haproxy/quic_tp.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/shctx.h>
+#include <haproxy/ssl_ckch.h>
+#include <haproxy/ssl_crtlist.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/ssl_ocsp.h>
+
+
+/* ***** READ THIS before adding code here! *****
+ *
+ * Due to API incompatibilities between multiple OpenSSL versions and their
+ * derivatives, it's often tempting to add macros to (re-)define certain
+ * symbols. Please do not do this here, and do it in common/openssl-compat.h
+ * exclusively so that the whole code consistently uses the same macros.
+ *
+ * Whenever possible if a macro is missing in certain versions, it's better
+ * to conditionally define it in openssl-compat.h than using lots of ifdefs.
+ */
+
+int nb_engines = 0;
+
+static struct eb_root cert_issuer_tree = EB_ROOT; /* issuers tree from "issuers-chain-path" */
+
+struct global_ssl global_ssl = {
+#ifdef LISTEN_DEFAULT_CIPHERS
+ .listen_default_ciphers = LISTEN_DEFAULT_CIPHERS,
+#endif
+#ifdef CONNECT_DEFAULT_CIPHERS
+ .connect_default_ciphers = CONNECT_DEFAULT_CIPHERS,
+#endif
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ .listen_default_ciphersuites = LISTEN_DEFAULT_CIPHERSUITES,
+ .connect_default_ciphersuites = CONNECT_DEFAULT_CIPHERSUITES,
+#endif
+ .listen_default_ssloptions = BC_SSL_O_NONE,
+ .connect_default_ssloptions = SRV_SSL_O_NONE,
+
+ .listen_default_sslmethods.flags = MC_SSL_O_ALL,
+ .listen_default_sslmethods.min = CONF_TLSV_NONE,
+ .listen_default_sslmethods.max = CONF_TLSV_NONE,
+ .connect_default_sslmethods.flags = MC_SSL_O_ALL,
+ .connect_default_sslmethods.min = CONF_TLSV_NONE,
+ .connect_default_sslmethods.max = CONF_TLSV_NONE,
+
+#ifdef DEFAULT_SSL_MAX_RECORD
+ .max_record = DEFAULT_SSL_MAX_RECORD,
+#endif
+ .hard_max_record = 0,
+ .default_dh_param = SSL_DEFAULT_DH_PARAM,
+ .ctx_cache = DEFAULT_SSL_CTX_CACHE,
+ .capture_buffer_size = 0,
+ .extra_files = SSL_GF_ALL,
+ .extra_files_noext = 0,
+#ifdef HAVE_SSL_KEYLOG
+ .keylog = 0,
+#endif
+#ifndef OPENSSL_NO_OCSP
+ .ocsp_update.delay_max = SSL_OCSP_UPDATE_DELAY_MAX,
+ .ocsp_update.delay_min = SSL_OCSP_UPDATE_DELAY_MIN,
+#endif
+};
+
+static BIO_METHOD *ha_meth;
+
+DECLARE_STATIC_POOL(ssl_sock_ctx_pool, "ssl_sock_ctx", sizeof(struct ssl_sock_ctx));
+
+DECLARE_STATIC_POOL(ssl_sock_client_sni_pool, "ssl_sock_client_sni", TLSEXT_MAXLEN_host_name + 1);
+
+/* ssl stats module */
+enum {
+ SSL_ST_SESS,
+ SSL_ST_REUSED_SESS,
+ SSL_ST_FAILED_HANDSHAKE,
+
+ SSL_ST_STATS_COUNT /* must be the last member of the enum */
+};
+
+static struct name_desc ssl_stats[] = {
+ [SSL_ST_SESS] = { .name = "ssl_sess",
+ .desc = "Total number of ssl sessions established" },
+ [SSL_ST_REUSED_SESS] = { .name = "ssl_reused_sess",
+ .desc = "Total number of ssl sessions reused" },
+ [SSL_ST_FAILED_HANDSHAKE] = { .name = "ssl_failed_handshake",
+ .desc = "Total number of failed handshake" },
+};
+
+static struct ssl_counters {
+ long long sess;
+ long long reused_sess;
+ long long failed_handshake;
+} ssl_counters;
+
+static void ssl_fill_stats(void *data, struct field *stats)
+{
+ struct ssl_counters *counters = data;
+
+ stats[SSL_ST_SESS] = mkf_u64(FN_COUNTER, counters->sess);
+ stats[SSL_ST_REUSED_SESS] = mkf_u64(FN_COUNTER, counters->reused_sess);
+ stats[SSL_ST_FAILED_HANDSHAKE] = mkf_u64(FN_COUNTER, counters->failed_handshake);
+}
+
+static struct stats_module ssl_stats_module = {
+ .name = "ssl",
+ .fill_stats = ssl_fill_stats,
+ .stats = ssl_stats,
+ .stats_count = SSL_ST_STATS_COUNT,
+ .counters = &ssl_counters,
+ .counters_size = sizeof(ssl_counters),
+ .domain_flags = MK_STATS_PROXY_DOMAIN(STATS_PX_CAP_FE|STATS_PX_CAP_LI|STATS_PX_CAP_BE|STATS_PX_CAP_SRV),
+ .clearable = 1,
+};
+
+INITCALL1(STG_REGISTER, stats_register_module, &ssl_stats_module);
+
+/* CLI context for "show tls-keys" */
+struct show_keys_ctx {
+ struct tls_keys_ref *next_ref; /* next reference to be dumped */
+ int names_only; /* non-zero = only show file names */
+ int next_index; /* next index to be dumped */
+ int dump_entries; /* dump entries also */
+ enum {
+ SHOW_KEYS_INIT = 0,
+ SHOW_KEYS_LIST,
+ SHOW_KEYS_DONE,
+ } state; /* phase of the current dump */
+};
+
+/* ssl_sock_io_cb is exported to see it resolved in "show fd" */
+struct task *ssl_sock_io_cb(struct task *, void *, unsigned int);
+static int ssl_sock_handshake(struct connection *conn, unsigned int flag);
+
+/* Methods to implement OpenSSL BIO */
+static int ha_ssl_write(BIO *h, const char *buf, int num)
+{
+ struct buffer tmpbuf;
+ struct ssl_sock_ctx *ctx;
+ uint flags;
+ int ret;
+
+ ctx = BIO_get_data(h);
+ tmpbuf.size = num;
+ tmpbuf.area = (void *)(uintptr_t)buf;
+ tmpbuf.data = num;
+ tmpbuf.head = 0;
+ flags = (ctx->xprt_st & SSL_SOCK_SEND_MORE) ? CO_SFL_MSG_MORE : 0;
+ ret = ctx->xprt->snd_buf(ctx->conn, ctx->xprt_ctx, &tmpbuf, num, flags);
+ BIO_clear_retry_flags(h);
+ if (ret == 0 && !(ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_WR_SH))) {
+ BIO_set_retry_write(h);
+ ret = -1;
+ }
+ return ret;
+}
+
+static int ha_ssl_gets(BIO *h, char *buf, int size)
+{
+
+ return 0;
+}
+
+static int ha_ssl_puts(BIO *h, const char *str)
+{
+
+ return ha_ssl_write(h, str, strlen(str));
+}
+
+static int ha_ssl_read(BIO *h, char *buf, int size)
+{
+ struct buffer tmpbuf;
+ struct ssl_sock_ctx *ctx;
+ int ret;
+
+ ctx = BIO_get_data(h);
+ tmpbuf.size = size;
+ tmpbuf.area = buf;
+ tmpbuf.data = 0;
+ tmpbuf.head = 0;
+ ret = ctx->xprt->rcv_buf(ctx->conn, ctx->xprt_ctx, &tmpbuf, size, 0);
+ BIO_clear_retry_flags(h);
+ if (ret == 0 && !(ctx->conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))) {
+ BIO_set_retry_read(h);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static long ha_ssl_ctrl(BIO *h, int cmd, long arg1, void *arg2)
+{
+ int ret = 0;
+ switch (cmd) {
+ case BIO_CTRL_DUP:
+ case BIO_CTRL_FLUSH:
+ ret = 1;
+ break;
+ }
+ return ret;
+}
+
+static int ha_ssl_new(BIO *h)
+{
+ BIO_set_init(h, 1);
+ BIO_set_data(h, NULL);
+ BIO_clear_flags(h, ~0);
+ return 1;
+}
+
+static int ha_ssl_free(BIO *data)
+{
+
+ return 1;
+}
+
+
+#if defined(USE_THREAD) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+
+static HA_RWLOCK_T *ssl_rwlocks;
+
+
+unsigned long ssl_id_function(void)
+{
+ return (unsigned long)tid;
+}
+
+void ssl_locking_function(int mode, int n, const char * file, int line)
+{
+ if (mode & CRYPTO_LOCK) {
+ if (mode & CRYPTO_READ)
+ HA_RWLOCK_RDLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ else
+ HA_RWLOCK_WRLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ }
+ else {
+ if (mode & CRYPTO_READ)
+ HA_RWLOCK_RDUNLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ else
+ HA_RWLOCK_WRUNLOCK(SSL_LOCK, &ssl_rwlocks[n]);
+ }
+}
+
+static int ssl_locking_init(void)
+{
+ int i;
+
+ ssl_rwlocks = malloc(sizeof(HA_RWLOCK_T)*CRYPTO_num_locks());
+ if (!ssl_rwlocks)
+ return -1;
+
+ for (i = 0 ; i < CRYPTO_num_locks() ; i++)
+ HA_RWLOCK_INIT(&ssl_rwlocks[i]);
+
+ CRYPTO_set_id_callback(ssl_id_function);
+ CRYPTO_set_locking_callback(ssl_locking_function);
+
+ return 0;
+}
+
+#endif
+
+__decl_thread(HA_SPINLOCK_T ckch_lock);
+
+
+
+/* mimic what X509_STORE_load_locations do with store_ctx */
+static int ssl_set_cert_crl_file(X509_STORE *store_ctx, char *path)
+{
+ X509_STORE *store = NULL;
+ struct cafile_entry *ca_e = ssl_store_get_cafile_entry(path, 0);
+ if (ca_e)
+ store = ca_e->ca_store;
+ if (store_ctx && store) {
+ int i;
+ X509_OBJECT *obj;
+ STACK_OF(X509_OBJECT) *objs = X509_STORE_get0_objects(store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ obj = sk_X509_OBJECT_value(objs, i);
+ switch (X509_OBJECT_get_type(obj)) {
+ case X509_LU_X509:
+ X509_STORE_add_cert(store_ctx, X509_OBJECT_get0_X509(obj));
+ break;
+ case X509_LU_CRL:
+ X509_STORE_add_crl(store_ctx, X509_OBJECT_get0_X509_CRL(obj));
+ break;
+ default:
+ break;
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* SSL_CTX_load_verify_locations substitute, internally call X509_STORE_load_locations */
+static int ssl_set_verify_locations_file(SSL_CTX *ctx, char *path)
+{
+ X509_STORE *store_ctx = SSL_CTX_get_cert_store(ctx);
+ return ssl_set_cert_crl_file(store_ctx, path);
+}
+
+/*
+ Extract CA_list from CA_file already in tree.
+ Duplicate ca_name is tracking with ebtree. It's simplify openssl compatibility.
+ Return a shared ca_list: SSL_dup_CA_list must be used before set it on SSL_CTX.
+*/
+static STACK_OF(X509_NAME)* ssl_get_client_ca_file(char *path)
+{
+ struct ebmb_node *eb;
+ struct cafile_entry *ca_e;
+
+ eb = ebst_lookup(&cafile_tree, path);
+ if (!eb)
+ return NULL;
+ ca_e = ebmb_entry(eb, struct cafile_entry, node);
+
+ if (ca_e->ca_list == NULL) {
+ int i;
+ unsigned long key;
+ struct eb_root ca_name_tree = EB_ROOT;
+ struct eb64_node *node, *back;
+ struct {
+ struct eb64_node node;
+ X509_NAME *xname;
+ } *ca_name;
+ STACK_OF(X509_OBJECT) *objs;
+ STACK_OF(X509_NAME) *skn;
+ X509 *x;
+ X509_NAME *xn;
+
+ skn = sk_X509_NAME_new_null();
+ /* take x509 from cafile_tree */
+ objs = X509_STORE_get0_objects(ca_e->ca_store);
+ for (i = 0; i < sk_X509_OBJECT_num(objs); i++) {
+ x = X509_OBJECT_get0_X509(sk_X509_OBJECT_value(objs, i));
+ if (!x)
+ continue;
+ xn = X509_get_subject_name(x);
+ if (!xn)
+ continue;
+ /* Check for duplicates. */
+ key = X509_NAME_hash(xn);
+ for (node = eb64_lookup(&ca_name_tree, key), ca_name = NULL;
+ node && ca_name == NULL;
+ node = eb64_next(node)) {
+ ca_name = container_of(node, typeof(*ca_name), node);
+ if (X509_NAME_cmp(xn, ca_name->xname) != 0)
+ ca_name = NULL;
+ }
+ /* find a duplicate */
+ if (ca_name)
+ continue;
+ ca_name = calloc(1, sizeof *ca_name);
+ xn = X509_NAME_dup(xn);
+ if (!ca_name ||
+ !xn ||
+ !sk_X509_NAME_push(skn, xn)) {
+ free(ca_name);
+ X509_NAME_free(xn);
+ sk_X509_NAME_pop_free(skn, X509_NAME_free);
+ sk_X509_NAME_free(skn);
+ skn = NULL;
+ break;
+ }
+ ca_name->node.key = key;
+ ca_name->xname = xn;
+ eb64_insert(&ca_name_tree, &ca_name->node);
+ }
+ ca_e->ca_list = skn;
+ /* remove temporary ca_name tree */
+ node = eb64_first(&ca_name_tree);
+ while (node) {
+ ca_name = container_of(node, typeof(*ca_name), node);
+ back = eb64_next(node);
+ eb64_delete(node);
+ free(ca_name);
+ node = back;
+ }
+ }
+ return ca_e->ca_list;
+}
+
+struct pool_head *pool_head_ssl_capture __read_mostly = NULL;
+int ssl_capture_ptr_index = -1;
+int ssl_app_data_index = -1;
+#ifdef USE_QUIC
+int ssl_qc_app_data_index = -1;
+#endif /* USE_QUIC */
+
+#ifdef HAVE_SSL_KEYLOG
+int ssl_keylog_index = -1;
+struct pool_head *pool_head_ssl_keylog __read_mostly = NULL;
+struct pool_head *pool_head_ssl_keylog_str __read_mostly = NULL;
+#endif
+
+int ssl_client_crt_ref_index = -1;
+
+/* Used to store the client's SNI in case of ClientHello callback error */
+int ssl_client_sni_index = -1;
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+struct list tlskeys_reference = LIST_HEAD_INIT(tlskeys_reference);
+#endif
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+unsigned int openssl_engines_initialized;
+struct list openssl_engines = LIST_HEAD_INIT(openssl_engines);
+struct ssl_engine_list {
+ struct list list;
+ ENGINE *e;
+};
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+struct list openssl_providers = LIST_HEAD_INIT(openssl_providers);
+struct ssl_provider_list {
+ struct list list;
+ OSSL_PROVIDER *provider;
+};
+#endif
+
+#ifndef OPENSSL_NO_DH
+static int ssl_dh_ptr_index = -1;
+static HASSL_DH *global_dh = NULL;
+static HASSL_DH *local_dh_1024 = NULL;
+static HASSL_DH *local_dh_2048 = NULL;
+static HASSL_DH *local_dh_4096 = NULL;
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+static DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen);
+#else
+static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey);
+#endif
+#endif /* OPENSSL_NO_DH */
+
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+/* X509V3 Extensions that will be added on generated certificates */
+#define X509V3_EXT_SIZE 5
+static char *x509v3_ext_names[X509V3_EXT_SIZE] = {
+ "basicConstraints",
+ "nsComment",
+ "subjectKeyIdentifier",
+ "authorityKeyIdentifier",
+ "keyUsage",
+};
+static char *x509v3_ext_values[X509V3_EXT_SIZE] = {
+ "CA:FALSE",
+ "\"OpenSSL Generated Certificate\"",
+ "hash",
+ "keyid,issuer:always",
+ "nonRepudiation,digitalSignature,keyEncipherment"
+};
+/* LRU cache to store generated certificate */
+static struct lru64_head *ssl_ctx_lru_tree = NULL;
+static unsigned int ssl_ctx_lru_seed = 0;
+static unsigned int ssl_ctx_serial;
+__decl_rwlock(ssl_ctx_lru_rwlock);
+
+#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME
+
+/* The order here matters for picking a default context,
+ * keep the most common keytype at the bottom of the list
+ */
+const char *SSL_SOCK_KEYTYPE_NAMES[] = {
+ "dsa",
+ "ecdsa",
+ "rsa"
+};
+
+static struct shared_context *ssl_shctx = NULL; /* ssl shared session cache */
+static struct eb_root *sh_ssl_sess_tree; /* ssl shared session tree */
+
+/* Dedicated callback functions for heartbeat and clienthello.
+ */
+#ifdef TLS1_RT_HEARTBEAT
+static void ssl_sock_parse_heartbeat(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+#endif
+static void ssl_sock_parse_clienthello(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_init_keylog(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl);
+#endif
+
+/* List head of all registered SSL/TLS protocol message callbacks. */
+struct list ssl_sock_msg_callbacks = LIST_HEAD_INIT(ssl_sock_msg_callbacks);
+
+/* Registers the function <func> in order to be called on SSL/TLS protocol
+ * message processing. It will return 0 if the function <func> is not set
+ * or if it fails to allocate memory.
+ */
+int ssl_sock_register_msg_callback(ssl_sock_msg_callback_func func)
+{
+ struct ssl_sock_msg_callback *cbk;
+
+ if (!func)
+ return 0;
+
+ cbk = calloc(1, sizeof(*cbk));
+ if (!cbk) {
+ ha_alert("out of memory in ssl_sock_register_msg_callback().\n");
+ return 0;
+ }
+
+ cbk->func = func;
+
+ LIST_APPEND(&ssl_sock_msg_callbacks, &cbk->list);
+
+ return 1;
+}
+
+/* Used to register dedicated SSL/TLS protocol message callbacks.
+ */
+static int ssl_sock_register_msg_callbacks(void)
+{
+#ifdef TLS1_RT_HEARTBEAT
+ if (!ssl_sock_register_msg_callback(ssl_sock_parse_heartbeat))
+ return ERR_ABORT;
+#endif
+ if (global_ssl.capture_buffer_size > 0) {
+ if (!ssl_sock_register_msg_callback(ssl_sock_parse_clienthello))
+ return ERR_ABORT;
+ }
+#ifdef HAVE_SSL_KEYLOG
+ if (global_ssl.keylog > 0) {
+ if (!ssl_sock_register_msg_callback(ssl_init_keylog))
+ return ERR_ABORT;
+ }
+#endif
+#ifdef USE_QUIC_OPENSSL_COMPAT
+ if (!ssl_sock_register_msg_callback(quic_tls_compat_msg_callback))
+ return ERR_ABORT;
+#endif
+
+ return ERR_NONE;
+}
+
+/* Used to free all SSL/TLS protocol message callbacks that were
+ * registered by using ssl_sock_register_msg_callback().
+ */
+static void ssl_sock_unregister_msg_callbacks(void)
+{
+ struct ssl_sock_msg_callback *cbk, *cbkback;
+
+ list_for_each_entry_safe(cbk, cbkback, &ssl_sock_msg_callbacks, list) {
+ LIST_DELETE(&cbk->list);
+ free(cbk);
+ }
+}
+
+static struct ssl_sock_ctx *ssl_sock_get_ctx(struct connection *conn)
+{
+ if (!conn || conn->xprt != xprt_get(XPRT_SSL) || !conn->xprt_ctx)
+ return NULL;
+
+ return (struct ssl_sock_ctx *)conn->xprt_ctx;
+}
+
+SSL *ssl_sock_get_ssl_object(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ return ctx ? ctx->ssl : NULL;
+}
+/*
+ * This function gives the detail of the SSL error. It is used only
+ * if the debug mode and the verbose mode are activated. It dump all
+ * the SSL error until the stack was empty.
+ */
+static forceinline void ssl_sock_dump_errors(struct connection *conn,
+ struct quic_conn *qc)
+{
+ unsigned long ret;
+
+ if (unlikely(global.mode & MODE_DEBUG)) {
+ while(1) {
+ const char *func = NULL;
+ ERR_peek_error_func(&func);
+
+ ret = ERR_get_error();
+ if (ret == 0)
+ return;
+ if (conn) {
+ fprintf(stderr, "fd[%#x] OpenSSL error[0x%lx] %s: %s\n",
+ conn_fd(conn), ret,
+ func, ERR_reason_error_string(ret));
+ }
+#ifdef USE_QUIC
+ else {
+ /* TODO: we are not sure <conn> is always initialized for QUIC connections */
+ fprintf(stderr, "qc @%p OpenSSL error[0x%lx] %s: %s\n", qc, ret,
+ func, ERR_reason_error_string(ret));
+ }
+#endif
+ }
+ }
+}
+
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+int ssl_init_single_engine(const char *engine_id, const char *def_algorithms)
+{
+ int err_code = ERR_ABORT;
+ ENGINE *engine;
+ struct ssl_engine_list *el;
+
+ /* grab the structural reference to the engine */
+ engine = ENGINE_by_id(engine_id);
+ if (engine == NULL) {
+ ha_alert("ssl-engine %s: failed to get structural reference\n", engine_id);
+ goto fail_get;
+ }
+
+ if (!ENGINE_init(engine)) {
+ /* the engine couldn't initialise, release it */
+ ha_alert("ssl-engine %s: failed to initialize\n", engine_id);
+ goto fail_init;
+ }
+
+ if (ENGINE_set_default_string(engine, def_algorithms) == 0) {
+ ha_alert("ssl-engine %s: failed on ENGINE_set_default_string\n", engine_id);
+ goto fail_set_method;
+ }
+
+ el = calloc(1, sizeof(*el));
+ if (!el)
+ goto fail_alloc;
+ el->e = engine;
+ LIST_INSERT(&openssl_engines, &el->list);
+ nb_engines++;
+ if (global_ssl.async)
+ global.ssl_used_async_engines = nb_engines;
+ return 0;
+
+fail_alloc:
+fail_set_method:
+ /* release the functional reference from ENGINE_init() */
+ ENGINE_finish(engine);
+
+fail_init:
+ /* release the structural reference from ENGINE_by_id() */
+ ENGINE_free(engine);
+
+fail_get:
+ return err_code;
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+int ssl_init_provider(const char *provider_name)
+{
+ int err_code = ERR_ABORT;
+ struct ssl_provider_list *prov = NULL;
+
+ prov = calloc(1, sizeof(*prov));
+ if (!prov) {
+ ha_alert("ssl-provider %s: memory allocation failure\n", provider_name);
+ goto error;
+ }
+
+ if ((prov->provider = OSSL_PROVIDER_load(NULL, provider_name)) == NULL) {
+ ha_alert("ssl-provider %s: unknown provider\n", provider_name);
+ goto error;
+ }
+
+ LIST_INSERT(&openssl_providers, &prov->list);
+
+ return 0;
+
+error:
+ ha_free(&prov);
+ return err_code;
+}
+#endif /* HAVE_SSL_PROVIDERS */
+
+#ifdef SSL_MODE_ASYNC
+/*
+ * openssl async fd handler
+ */
+void ssl_async_fd_handler(int fd)
+{
+ struct ssl_sock_ctx *ctx = fdtab[fd].owner;
+
+ /* fd is an async enfine fd, we must stop
+ * to poll this fd until it is requested
+ */
+ fd_stop_recv(fd);
+ fd_cant_recv(fd);
+
+ /* crypto engine is available, let's notify the associated
+ * connection that it can pursue its processing.
+ */
+ tasklet_wakeup(ctx->wait_event.tasklet);
+}
+
+/*
+ * openssl async delayed SSL_free handler
+ */
+void ssl_async_fd_free(int fd)
+{
+ SSL *ssl = fdtab[fd].owner;
+ OSSL_ASYNC_FD all_fd[32];
+ size_t num_all_fds = 0;
+ int i;
+
+ /* We suppose that the async job for a same SSL *
+ * are serialized. So if we are awake it is
+ * because the running job has just finished
+ * and we can remove all async fds safely
+ */
+ SSL_get_all_async_fds(ssl, NULL, &num_all_fds);
+ if (num_all_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_all_async_fds(ssl, all_fd, &num_all_fds);
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[all_fd[i]].state |= FD_DISOWN;
+ fd_delete(all_fd[i]);
+ }
+
+ /* Now we can safely call SSL_free, no more pending job in engines */
+ SSL_free(ssl);
+ _HA_ATOMIC_DEC(&global.sslconns);
+ _HA_ATOMIC_DEC(&jobs);
+}
+/*
+ * function used to manage a returned SSL_ERROR_WANT_ASYNC
+ * and enable/disable polling for async fds
+ */
+static inline void ssl_async_process_fds(struct ssl_sock_ctx *ctx)
+{
+ OSSL_ASYNC_FD add_fd[32];
+ OSSL_ASYNC_FD del_fd[32];
+ SSL *ssl = ctx->ssl;
+ size_t num_add_fds = 0;
+ size_t num_del_fds = 0;
+ int i;
+
+ SSL_get_changed_async_fds(ssl, NULL, &num_add_fds, NULL,
+ &num_del_fds);
+ if (num_add_fds > 32 || num_del_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_changed_async_fds(ssl, add_fd, &num_add_fds, del_fd, &num_del_fds);
+
+ /* We remove unused fds from the fdtab */
+ for (i=0 ; i < num_del_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[del_fd[i]].state |= FD_DISOWN;
+ fd_delete(del_fd[i]);
+ }
+
+ /* We add new fds to the fdtab */
+ for (i=0 ; i < num_add_fds ; i++) {
+ fd_insert(add_fd[i], ctx, ssl_async_fd_handler, tgid, ti->ltid_bit);
+ }
+
+ num_add_fds = 0;
+ SSL_get_all_async_fds(ssl, NULL, &num_add_fds);
+ if (num_add_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ /* We activate the polling for all known async fds */
+ SSL_get_all_async_fds(ssl, add_fd, &num_add_fds);
+ for (i=0 ; i < num_add_fds ; i++) {
+ fd_want_recv(add_fd[i]);
+ /* To ensure that the fd cache won't be used
+ * We'll prefer to catch a real RD event
+ * because handling an EAGAIN on this fd will
+ * result in a context switch and also
+ * some engines uses a fd in blocking mode.
+ */
+ fd_cant_recv(add_fd[i]);
+ }
+
+}
+#endif
+
+
+/*
+ * Initialize an HMAC context <hctx> using the <key> and <md> parameters.
+ * Returns -1 in case of error, 1 otherwise.
+ */
+static int ssl_hmac_init(MAC_CTX *hctx, unsigned char *key, int key_len, const EVP_MD *md)
+{
+#ifdef HAVE_OSSL_PARAM
+ OSSL_PARAM params[3];
+
+ params[0] = OSSL_PARAM_construct_octet_string(OSSL_MAC_PARAM_KEY, key, key_len);
+ params[1] = OSSL_PARAM_construct_utf8_string(OSSL_MAC_PARAM_DIGEST, (char*)EVP_MD_name(md), 0);
+ params[2] = OSSL_PARAM_construct_end();
+ if (EVP_MAC_CTX_set_params(hctx, params) == 0)
+ return -1; /* error in mac initialisation */
+
+#else
+ HMAC_Init_ex(hctx, key, key_len, md, NULL);
+#endif
+ return 1;
+}
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+
+static int ssl_tlsext_ticket_key_cb(SSL *s, unsigned char key_name[16], unsigned char *iv, EVP_CIPHER_CTX *ectx, MAC_CTX *hctx, int enc)
+{
+ struct tls_keys_ref *ref = NULL;
+ union tls_sess_key *keys;
+ int head;
+ int i;
+ int ret = -1; /* error by default */
+ struct connection *conn = SSL_get_ex_data(s, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(s, ssl_qc_app_data_index);
+#endif
+
+ if (conn)
+ ref = __objt_listener(conn->target)->bind_conf->keys_ref;
+#ifdef USE_QUIC
+ else if (qc)
+ ref = qc->li->bind_conf->keys_ref;
+#endif
+
+ if (!ref) {
+ /* must never happen */
+ ABORT_NOW();
+ }
+
+ HA_RWLOCK_RDLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+
+ keys = ref->tlskeys;
+ head = ref->tls_ticket_enc_index;
+
+ if (enc) {
+ memcpy(key_name, keys[head].name, 16);
+
+ if(!RAND_pseudo_bytes(iv, EVP_MAX_IV_LENGTH))
+ goto end;
+
+ if (ref->key_size_bits == 128) {
+
+ if(!EVP_EncryptInit_ex(ectx, EVP_aes_128_cbc(), NULL, keys[head].key_128.aes_key, iv))
+ goto end;
+
+ if (ssl_hmac_init(hctx, keys[head].key_128.hmac_key, 16, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ ret = 1;
+ }
+ else if (ref->key_size_bits == 256 ) {
+
+ if(!EVP_EncryptInit_ex(ectx, EVP_aes_256_cbc(), NULL, keys[head].key_256.aes_key, iv))
+ goto end;
+
+ if (ssl_hmac_init(hctx, keys[head].key_256.hmac_key, 32, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ ret = 1;
+ }
+ } else {
+ for (i = 0; i < TLS_TICKETS_NO; i++) {
+ if (!memcmp(key_name, keys[(head + i) % TLS_TICKETS_NO].name, 16))
+ goto found;
+ }
+ ret = 0;
+ goto end;
+
+ found:
+ if (ref->key_size_bits == 128) {
+ if (ssl_hmac_init(hctx, keys[(head + i) % TLS_TICKETS_NO].key_128.hmac_key, 16, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ if(!EVP_DecryptInit_ex(ectx, EVP_aes_128_cbc(), NULL, keys[(head + i) % TLS_TICKETS_NO].key_128.aes_key, iv))
+ goto end;
+ /* 2 for key renewal, 1 if current key is still valid */
+ ret = i ? 2 : 1;
+ }
+ else if (ref->key_size_bits == 256) {
+ if (ssl_hmac_init(hctx, keys[(head + i) % TLS_TICKETS_NO].key_256.hmac_key, 32, TLS_TICKET_HASH_FUNCT()) < 0)
+ goto end;
+ if(!EVP_DecryptInit_ex(ectx, EVP_aes_256_cbc(), NULL, keys[(head + i) % TLS_TICKETS_NO].key_256.aes_key, iv))
+ goto end;
+ /* 2 for key renewal, 1 if current key is still valid */
+ ret = i ? 2 : 1;
+ }
+ }
+
+ end:
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ return ret;
+}
+
+struct tls_keys_ref *tlskeys_ref_lookup(const char *filename)
+{
+ struct tls_keys_ref *ref;
+
+ list_for_each_entry(ref, &tlskeys_reference, list)
+ if (ref->filename && strcmp(filename, ref->filename) == 0)
+ return ref;
+ return NULL;
+}
+
+struct tls_keys_ref *tlskeys_ref_lookupid(int unique_id)
+{
+ struct tls_keys_ref *ref;
+
+ list_for_each_entry(ref, &tlskeys_reference, list)
+ if (ref->unique_id == unique_id)
+ return ref;
+ return NULL;
+}
+
+/* Update the key into ref: if keysize doesn't
+ * match existing ones, this function returns -1
+ * else it returns 0 on success.
+ */
+int ssl_sock_update_tlskey_ref(struct tls_keys_ref *ref,
+ struct buffer *tlskey)
+{
+ if (ref->key_size_bits == 128) {
+ if (tlskey->data != sizeof(struct tls_sess_key_128))
+ return -1;
+ }
+ else if (ref->key_size_bits == 256) {
+ if (tlskey->data != sizeof(struct tls_sess_key_256))
+ return -1;
+ }
+ else
+ return -1;
+
+ HA_RWLOCK_WRLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ memcpy((char *) (ref->tlskeys + ((ref->tls_ticket_enc_index + 2) % TLS_TICKETS_NO)),
+ tlskey->area, tlskey->data);
+ ref->tls_ticket_enc_index = (ref->tls_ticket_enc_index + 1) % TLS_TICKETS_NO;
+ HA_RWLOCK_WRUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+
+ return 0;
+}
+
+int ssl_sock_update_tlskey(char *filename, struct buffer *tlskey, char **err)
+{
+ struct tls_keys_ref *ref = tlskeys_ref_lookup(filename);
+
+ if(!ref) {
+ memprintf(err, "Unable to locate the referenced filename: %s", filename);
+ return 1;
+ }
+ if (ssl_sock_update_tlskey_ref(ref, tlskey) < 0) {
+ memprintf(err, "Invalid key size");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This function finalize the configuration parsing. Its set all the
+ * automatic ids. It's called just after the basic checks. It returns
+ * 0 on success otherwise ERR_*.
+ */
+static int tlskeys_finalize_config(void)
+{
+ int i = 0;
+ struct tls_keys_ref *ref, *ref2, *ref3;
+ struct list tkr = LIST_HEAD_INIT(tkr);
+
+ list_for_each_entry(ref, &tlskeys_reference, list) {
+ if (ref->unique_id == -1) {
+ /* Look for the first free id. */
+ while (1) {
+ list_for_each_entry(ref2, &tlskeys_reference, list) {
+ if (ref2->unique_id == i) {
+ i++;
+ break;
+ }
+ }
+ if (&ref2->list == &tlskeys_reference)
+ break;
+ }
+
+ /* Uses the unique id and increment it for the next entry. */
+ ref->unique_id = i;
+ i++;
+ }
+ }
+
+ /* This sort the reference list by id. */
+ list_for_each_entry_safe(ref, ref2, &tlskeys_reference, list) {
+ LIST_DELETE(&ref->list);
+ list_for_each_entry(ref3, &tkr, list) {
+ if (ref->unique_id < ref3->unique_id) {
+ LIST_APPEND(&ref3->list, &ref->list);
+ break;
+ }
+ }
+ if (&ref3->list == &tkr)
+ LIST_APPEND(&tkr, &ref->list);
+ }
+
+ /* swap root */
+ LIST_SPLICE(&tlskeys_reference, &tkr);
+ return ERR_NONE;
+}
+#endif /* SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB */
+
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+/*
+ * This function enables the handling of OCSP status extension on 'ctx' if a
+ * ocsp_response buffer was found in the cert_key_and_chain. To enable OCSP
+ * status extension, the issuer's certificate is mandatory. It should be
+ * present in ckch->ocsp_issuer.
+ *
+ * In addition, the ckch->ocsp_reponse buffer is loaded as a DER format of an
+ * OCSP response. If file is empty or content is not a valid OCSP response,
+ * OCSP status extension is enabled but OCSP response is ignored (a warning is
+ * displayed).
+ *
+ * Returns 1 if no ".ocsp" file found, 0 if OCSP status extension is
+ * successfully enabled, or -1 in other error case.
+ */
+static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data *data, STACK_OF(X509) *chain)
+{
+ X509 *x, *issuer;
+ int i, ret = -1;
+ struct certificate_ocsp *ocsp = NULL, *iocsp;
+ char *warn = NULL;
+ unsigned char *p;
+#ifndef USE_OPENSSL_WOLFSSL
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ int (*callback) (SSL *, void *);
+#else
+ void (*callback) (void);
+#endif
+#else
+ tlsextStatusCb callback;
+#endif
+ struct buffer *ocsp_uri = get_trash_chunk();
+ char *err = NULL;
+ size_t path_len;
+ int inc_refcount_store = 0;
+
+ x = data->cert;
+ if (!x)
+ goto out;
+
+ ssl_ocsp_get_uri_from_cert(x, ocsp_uri, &err);
+ /* We should have an "OCSP URI" field in order for auto update to work. */
+ if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON && b_data(ocsp_uri) == 0)
+ goto out;
+
+ /* In case of ocsp update mode set to 'on', this function might be
+ * called with no known ocsp response. If no ocsp uri can be found in
+ * the certificate, nothing needs to be done here. */
+ if (!data->ocsp_response && !data->ocsp_cid) {
+ if (data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_ON || b_data(ocsp_uri) == 0) {
+ ret = 0;
+ goto out;
+ }
+ }
+
+ issuer = data->ocsp_issuer;
+ /* take issuer from chain over ocsp_issuer, is what is done historicaly */
+ if (chain) {
+ /* check if one of the certificate of the chain is the issuer */
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ X509 *ti = sk_X509_value(chain, i);
+ if (X509_check_issued(ti, x) == X509_V_OK) {
+ issuer = ti;
+ break;
+ }
+ }
+ }
+ if (!issuer)
+ goto out;
+
+ if (!data->ocsp_cid) {
+ data->ocsp_cid = OCSP_cert_to_id(0, x, issuer);
+ inc_refcount_store = 1;
+ }
+ if (!data->ocsp_cid)
+ goto out;
+
+ i = i2d_OCSP_CERTID(data->ocsp_cid, NULL);
+ if (!i || (i > OCSP_MAX_CERTID_ASN1_LENGTH))
+ goto out;
+
+ path_len = strlen(path);
+ ocsp = calloc(1, sizeof(*ocsp) + path_len + 1);
+ if (!ocsp)
+ goto out;
+
+ p = ocsp->key_data;
+ ocsp->key_length = i2d_OCSP_CERTID(data->ocsp_cid, &p);
+
+ HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock);
+ iocsp = (struct certificate_ocsp *)ebmb_insert(&cert_ocsp_tree, &ocsp->key, OCSP_MAX_CERTID_ASN1_LENGTH);
+ if (iocsp == ocsp)
+ ocsp = NULL;
+
+#ifndef SSL_CTX_get_tlsext_status_cb
+# define SSL_CTX_get_tlsext_status_cb(ctx, cb) \
+ *cb = (void (*) (void))ctx->tlsext_status_cb;
+#endif
+ SSL_CTX_get_tlsext_status_cb(ctx, &callback);
+
+ if (inc_refcount_store)
+ iocsp->refcount_store++;
+
+ if (!callback) {
+ struct ocsp_cbk_arg *cb_arg;
+ EVP_PKEY *pkey;
+
+ cb_arg = calloc(1, sizeof(*cb_arg));
+ if (!cb_arg)
+ goto out;
+
+ cb_arg->is_single = 1;
+ cb_arg->s_ocsp = iocsp;
+ iocsp->refcount_instance++;
+
+ pkey = X509_get_pubkey(x);
+ cb_arg->single_kt = EVP_PKEY_base_id(pkey);
+ EVP_PKEY_free(pkey);
+
+ SSL_CTX_set_tlsext_status_cb(ctx, ssl_sock_ocsp_stapling_cbk);
+ SSL_CTX_set_ex_data(ctx, ocsp_ex_index, cb_arg); /* we use the ex_data instead of the cb_arg function here, so we can use the cleanup callback to free */
+
+ } else {
+ /*
+ * If the ctx has a status CB, then we have previously set an OCSP staple for this ctx
+ * Update that cb_arg with the new cert's staple
+ */
+ struct ocsp_cbk_arg *cb_arg;
+ struct certificate_ocsp *tmp_ocsp;
+ int index;
+ int key_type;
+ EVP_PKEY *pkey;
+
+ cb_arg = SSL_CTX_get_ex_data(ctx, ocsp_ex_index);
+
+ /*
+ * The following few lines will convert cb_arg from a single ocsp to multi ocsp
+ * the order of operations below matter, take care when changing it
+ */
+ tmp_ocsp = cb_arg->s_ocsp;
+ index = ssl_sock_get_ocsp_arg_kt_index(cb_arg->single_kt);
+ cb_arg->s_ocsp = NULL;
+ cb_arg->m_ocsp[index] = tmp_ocsp;
+ cb_arg->is_single = 0;
+ cb_arg->single_kt = 0;
+
+ pkey = X509_get_pubkey(x);
+ key_type = EVP_PKEY_base_id(pkey);
+ EVP_PKEY_free(pkey);
+
+ index = ssl_sock_get_ocsp_arg_kt_index(key_type);
+ if (index >= 0 && !cb_arg->m_ocsp[index]) {
+ cb_arg->m_ocsp[index] = iocsp;
+ iocsp->refcount_instance++;
+ }
+ }
+ HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock);
+
+ ret = 0;
+
+ warn = NULL;
+ if (data->ocsp_response && ssl_sock_load_ocsp_response(data->ocsp_response, iocsp, data->ocsp_cid, &warn)) {
+ memprintf(&warn, "Loading: %s. Content will be ignored", warn ? warn : "failure");
+ ha_warning("%s.\n", warn);
+ }
+
+
+ /* Do not insert the same certificate_ocsp structure in the
+ * update tree more than once. */
+ if (!ocsp) {
+ /* Issuer certificate is not included in the certificate
+ * chain, it will have to be treated separately during
+ * ocsp response validation. */
+ if (issuer == data->ocsp_issuer) {
+ iocsp->issuer = issuer;
+ X509_up_ref(issuer);
+ }
+ if (data->chain)
+ iocsp->chain = X509_chain_up_ref(data->chain);
+
+ iocsp->uri = calloc(1, sizeof(*iocsp->uri));
+ if (!chunk_dup(iocsp->uri, ocsp_uri)) {
+ ha_free(&iocsp->uri);
+ goto out;
+ }
+
+ /* Note: if we arrive here, ocsp==NULL because iocsp==ocsp
+ * after the ebmb_insert(), which indicates that we've
+ * just inserted this new node and that it's the one for
+ * which we previously allocated enough room for path_len+1
+ * chars.
+ */
+ memcpy(iocsp->path, path, path_len + 1);
+
+ if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) {
+ ssl_ocsp_update_insert(iocsp);
+ /* If we are during init the update task is not
+ * scheduled yet so a wakeup won't do anything.
+ * Otherwise, if the OCSP was added through the CLI, we
+ * wake the task up to manage the case of a new entry
+ * that needs to be updated before the previous first
+ * entry.
+ */
+ if (ocsp_update_task)
+ task_wakeup(ocsp_update_task, TASK_WOKEN_MSG);
+ }
+ } else if (iocsp->uri && data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) {
+ /* This unlikely case can happen if a series of "del ssl
+ * crt-list" / "add ssl crt-list" commands are made on the CLI.
+ * In such a case, the OCSP response tree entry will be created
+ * prior to the activation of the ocsp auto update and in such a
+ * case we must "force" insertion in the auto update tree.
+ */
+ if (iocsp->next_update.node.leaf_p == NULL) {
+ ssl_ocsp_update_insert(iocsp);
+ /* If we are during init the update task is not
+ * scheduled yet so a wakeup won't do anything.
+ * Otherwise, if the OCSP was added through the CLI, we
+ * wake the task up to manage the case of a new entry
+ * that needs to be updated before the previous first
+ * entry.
+ */
+ if (ocsp_update_task)
+ task_wakeup(ocsp_update_task, TASK_WOKEN_MSG);
+ }
+ }
+
+out:
+ if (ret && data->ocsp_cid) {
+ OCSP_CERTID_free(data->ocsp_cid);
+ data->ocsp_cid = NULL;
+ }
+
+ if (!ret && data->ocsp_response) {
+ ha_free(&data->ocsp_response->area);
+ ha_free(&data->ocsp_response);
+ }
+
+ if (ocsp)
+ ssl_sock_free_ocsp(ocsp);
+
+ if (warn)
+ free(warn);
+
+ free(err);
+
+ return ret;
+}
+
+#endif
+
+#ifdef OPENSSL_IS_BORINGSSL
+static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data *data, STACK_OF(X509) *chain)
+{
+ return SSL_CTX_set_ocsp_response(ctx, (const uint8_t *)ckch->ocsp_response->area, ckch->ocsp_response->data);
+}
+#endif
+
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+
+#define CT_EXTENSION_TYPE 18
+
+int sctl_ex_index = -1;
+
+int ssl_sock_sctl_add_cbk(SSL *ssl, unsigned ext_type, const unsigned char **out, size_t *outlen, int *al, void *add_arg)
+{
+ struct buffer *sctl = add_arg;
+
+ *out = (unsigned char *) sctl->area;
+ *outlen = sctl->data;
+
+ return 1;
+}
+
+int ssl_sock_sctl_parse_cbk(SSL *s, unsigned int ext_type, const unsigned char *in, size_t inlen, int *al, void *parse_arg)
+{
+ return 1;
+}
+
+static int ssl_sock_load_sctl(SSL_CTX *ctx, struct buffer *sctl)
+{
+ int ret = -1;
+
+ if (!SSL_CTX_add_server_custom_ext(ctx, CT_EXTENSION_TYPE, ssl_sock_sctl_add_cbk, NULL, sctl, ssl_sock_sctl_parse_cbk, NULL))
+ goto out;
+
+ SSL_CTX_set_ex_data(ctx, sctl_ex_index, sctl);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+#endif
+
+void ssl_sock_infocbk(const SSL *ssl, int where, int ret)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ struct ssl_sock_ctx *ctx = NULL;
+
+ BIO *write_bio;
+ (void)ret; /* shut gcc stupid warning */
+
+ if (conn)
+ ctx = conn_get_ssl_sock_ctx(conn);
+#ifdef USE_QUIC
+ else if (qc)
+ ctx = qc->xprt_ctx;
+#endif /* USE_QUIC */
+
+ if (!ctx) {
+ /* must never happen */
+ ABORT_NOW();
+ return;
+ }
+
+#ifndef SSL_OP_NO_RENEGOTIATION
+ /* Please note that BoringSSL defines this macro to zero so don't
+ * change this to #if and do not assign a default value to this macro!
+ */
+ if (where & SSL_CB_HANDSHAKE_START) {
+ /* Disable renegotiation (CVE-2009-3555) */
+ if (conn && (conn->flags & (CO_FL_WAIT_L6_CONN | CO_FL_EARLY_SSL_HS | CO_FL_EARLY_DATA)) == 0) {
+ conn->flags |= CO_FL_ERROR;
+ conn->err_code = CO_ER_SSL_RENEG;
+ }
+ }
+#endif
+
+ if ((where & SSL_CB_ACCEPT_LOOP) == SSL_CB_ACCEPT_LOOP) {
+ if (!(ctx->xprt_st & SSL_SOCK_ST_FL_16K_WBFSIZE)) {
+ /* Long certificate chains optimz
+ If write and read bios are different, we
+ consider that the buffering was activated,
+ so we rise the output buffer size from 4k
+ to 16k */
+ write_bio = SSL_get_wbio(ssl);
+ if (write_bio != SSL_get_rbio(ssl)) {
+ BIO_set_write_buffer_size(write_bio, 16384);
+ ctx->xprt_st |= SSL_SOCK_ST_FL_16K_WBFSIZE;
+ }
+ }
+ }
+}
+
+/* Callback is called for each certificate of the chain during a verify
+ ok is set to 1 if preverify detect no error on current certificate.
+ Returns 0 to break the handshake, 1 otherwise. */
+int ssl_sock_bind_verifycbk(int ok, X509_STORE_CTX *x_store)
+{
+ SSL *ssl;
+ struct connection *conn;
+ struct ssl_sock_ctx *ctx = NULL;
+ int err, depth;
+ X509 *client_crt;
+ STACK_OF(X509) *certs;
+ struct bind_conf *bind_conf = NULL;
+ struct quic_conn *qc = NULL;
+
+ ssl = X509_STORE_CTX_get_ex_data(x_store, SSL_get_ex_data_X509_STORE_CTX_idx());
+ conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ client_crt = SSL_get_ex_data(ssl, ssl_client_crt_ref_index);
+
+ if (conn) {
+ bind_conf = __objt_listener(conn->target)->bind_conf;
+ ctx = __conn_get_ssl_sock_ctx(conn);
+ }
+#ifdef USE_QUIC
+ else {
+ qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+ BUG_ON(!qc); /* Must never happen */
+ bind_conf = qc->li->bind_conf;
+ ctx = qc->xprt_ctx;
+ }
+#endif
+
+ BUG_ON(!ctx || !bind_conf);
+ ALREADY_CHECKED(ctx);
+ ALREADY_CHECKED(bind_conf);
+
+ ctx->xprt_st |= SSL_SOCK_ST_FL_VERIFY_DONE;
+
+ depth = X509_STORE_CTX_get_error_depth(x_store);
+ err = X509_STORE_CTX_get_error(x_store);
+
+ if (ok) /* no errors */
+ return ok;
+
+ /* Keep a reference to the client's certificate in order to be able to
+ * dump some fetches values in a log even when the verification process
+ * fails. */
+ if (depth == 0) {
+ X509_free(client_crt);
+ client_crt = X509_STORE_CTX_get0_cert(x_store);
+ if (client_crt) {
+ X509_up_ref(client_crt);
+ SSL_set_ex_data(ssl, ssl_client_crt_ref_index, client_crt);
+ }
+ }
+ else {
+ /* An error occurred on a CA certificate of the certificate
+ * chain, we might never call this verify callback on the client
+ * certificate's depth (which is 0) so we try to store the
+ * reference right now. */
+ certs = X509_STORE_CTX_get1_chain(x_store);
+ if (certs) {
+ client_crt = sk_X509_value(certs, 0);
+ if (client_crt) {
+ X509_up_ref(client_crt);
+ SSL_set_ex_data(ssl, ssl_client_crt_ref_index, client_crt);
+ }
+ sk_X509_pop_free(certs, X509_free);
+ }
+ }
+
+ /* check if CA error needs to be ignored */
+ if (depth > 0) {
+ if (!SSL_SOCK_ST_TO_CA_ERROR(ctx->xprt_st)) {
+ ctx->xprt_st |= SSL_SOCK_CA_ERROR_TO_ST(err);
+ ctx->xprt_st |= SSL_SOCK_CAEDEPTH_TO_ST(depth);
+ }
+
+ if (err <= SSL_MAX_VFY_ERROR_CODE &&
+ cert_ignerr_bitfield_get(bind_conf->ca_ignerr_bitfield, err))
+ goto err_ignored;
+
+ /* TODO: for QUIC connection, this error code is lost */
+ if (conn)
+ conn->err_code = CO_ER_SSL_CA_FAIL;
+ return 0;
+ }
+
+ if (!SSL_SOCK_ST_TO_CRTERROR(ctx->xprt_st))
+ ctx->xprt_st |= SSL_SOCK_CRTERROR_TO_ST(err);
+
+ /* check if certificate error needs to be ignored */
+ if (err <= SSL_MAX_VFY_ERROR_CODE &&
+ cert_ignerr_bitfield_get(bind_conf->crt_ignerr_bitfield, err))
+ goto err_ignored;
+
+ /* TODO: for QUIC connection, this error code is lost */
+ if (conn)
+ conn->err_code = CO_ER_SSL_CRT_FAIL;
+ return 0;
+
+ err_ignored:
+ ssl_sock_dump_errors(conn, qc);
+ ERR_clear_error();
+ return 1;
+}
+
+#ifdef TLS1_RT_HEARTBEAT
+static void ssl_sock_parse_heartbeat(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ /* test heartbeat received (write_p is set to 0
+ for a received record) */
+ if ((content_type == TLS1_RT_HEARTBEAT) && (write_p == 0)) {
+ struct ssl_sock_ctx *ctx = __conn_get_ssl_sock_ctx(conn);
+ const unsigned char *p = buf;
+ unsigned int payload;
+
+ ctx->xprt_st |= SSL_SOCK_RECV_HEARTBEAT;
+
+ /* Check if this is a CVE-2014-0160 exploitation attempt. */
+ if (*p != TLS1_HB_REQUEST)
+ return;
+
+ if (len < 1 + 2 + 16) /* 1 type + 2 size + 0 payload + 16 padding */
+ goto kill_it;
+
+ payload = (p[1] * 256) + p[2];
+ if (3 + payload + 16 <= len)
+ return; /* OK no problem */
+ kill_it:
+ /* We have a clear heartbleed attack (CVE-2014-0160), the
+ * advertised payload is larger than the advertised packet
+ * length, so we have garbage in the buffer between the
+ * payload and the end of the buffer (p+len). We can't know
+ * if the SSL stack is patched, and we don't know if we can
+ * safely wipe out the area between p+3+len and payload.
+ * So instead, we prevent the response from being sent by
+ * setting the max_send_fragment to 0 and we report an SSL
+ * error, which will kill this connection. It will be reported
+ * above as SSL_ERROR_SSL while an other handshake failure with
+ * a heartbeat message will be reported as SSL_ERROR_SYSCALL.
+ */
+ ssl->max_send_fragment = 0;
+ SSLerr(SSL_F_TLS1_HEARTBEAT, SSL_R_SSL_HANDSHAKE_FAILURE);
+ }
+}
+#endif
+
+static void ssl_sock_parse_clienthello(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ struct ssl_capture *capture;
+ uchar *msg;
+ uchar *end;
+ uchar *extensions_end;
+ uchar *ec_start = NULL;
+ uchar *ec_formats_start = NULL;
+ uchar *list_end;
+ ushort protocol_version;
+ ushort extension_id;
+ ushort ec_len = 0;
+ uchar ec_formats_len = 0;
+ int offset = 0;
+ int rec_len;
+
+ /* This function is called for "from client" and "to server"
+ * connections. The combination of write_p == 0 and content_type == 22
+ * is only available during "from client" connection.
+ */
+
+ /* "write_p" is set to 0 is the bytes are received messages,
+ * otherwise it is set to 1.
+ */
+ if (write_p != 0)
+ return;
+
+ /* content_type contains the type of message received or sent
+ * according with the SSL/TLS protocol spec. This message is
+ * encoded with one byte. The value 256 (two bytes) is used
+ * for designing the SSL/TLS record layer. According with the
+ * rfc6101, the expected message (other than 256) are:
+ * - change_cipher_spec(20)
+ * - alert(21)
+ * - handshake(22)
+ * - application_data(23)
+ * - (255)
+ * We are interessed by the handshake and specially the client
+ * hello.
+ */
+ if (content_type != 22)
+ return;
+
+ /* The message length is at least 4 bytes, containing the
+ * message type and the message length.
+ */
+ if (len < 4)
+ return;
+
+ /* First byte of the handshake message id the type of
+ * message. The known types are:
+ * - hello_request(0)
+ * - client_hello(1)
+ * - server_hello(2)
+ * - certificate(11)
+ * - server_key_exchange (12)
+ * - certificate_request(13)
+ * - server_hello_done(14)
+ * We are interested by the client hello.
+ */
+ msg = (unsigned char *)buf;
+ if (msg[0] != 1)
+ return;
+
+ /* Next three bytes are the length of the message. The total length
+ * must be this decoded length + 4. If the length given as argument
+ * is not the same, we abort the protocol dissector.
+ */
+ rec_len = (msg[1] << 16) + (msg[2] << 8) + msg[3];
+ if (len < rec_len + 4)
+ return;
+ msg += 4;
+ end = msg + rec_len;
+ if (end < msg)
+ return;
+
+ /* Expect 2 bytes for protocol version
+ * (1 byte for major and 1 byte for minor)
+ */
+ if (msg + 2 > end)
+ return;
+ protocol_version = (msg[0] << 8) + msg[1];
+ msg += 2;
+
+ /* Expect the random, composed by 4 bytes for the unix time and
+ * 28 bytes for unix payload. So we jump 4 + 28.
+ */
+ msg += 4 + 28;
+ if (msg > end)
+ return;
+
+ /* Next, is session id:
+ * if present, we have to jump by length + 1 for the size information
+ * if not present, we have to jump by 1 only
+ */
+ if (msg[0] > 0)
+ msg += msg[0];
+ msg += 1;
+ if (msg > end)
+ return;
+
+ /* Next two bytes are the ciphersuite length. */
+ if (msg + 2 > end)
+ return;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+ if (msg + rec_len > end || msg + rec_len < msg)
+ return;
+
+ capture = pool_zalloc(pool_head_ssl_capture);
+ if (!capture)
+ return;
+ /* Compute the xxh64 of the ciphersuite. */
+ capture->xxh64 = XXH64(msg, rec_len, 0);
+
+ /* Capture the ciphersuite. */
+ capture->ciphersuite_len = MIN(global_ssl.capture_buffer_size, rec_len);
+ capture->ciphersuite_offset = 0;
+ memcpy(capture->data, msg, capture->ciphersuite_len);
+ msg += rec_len;
+ offset += capture->ciphersuite_len;
+
+ /* Initialize other data */
+ capture->protocol_version = protocol_version;
+
+ /* Next, compression methods:
+ * if present, we have to jump by length + 1 for the size information
+ * if not present, we have to jump by 1 only
+ */
+ if (msg[0] > 0)
+ msg += msg[0];
+ msg += 1;
+ if (msg > end)
+ goto store_capture;
+
+ /* We reached extensions */
+ if (msg + 2 > end)
+ goto store_capture;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+ if (msg + rec_len > end || msg + rec_len < msg)
+ goto store_capture;
+ extensions_end = msg + rec_len;
+ capture->extensions_offset = offset;
+
+ /* Parse each extension */
+ while (msg + 4 < extensions_end) {
+ /* Add 2 bytes of extension_id */
+ if (global_ssl.capture_buffer_size >= offset + 2) {
+ capture->data[offset++] = msg[0];
+ capture->data[offset++] = msg[1];
+ capture->extensions_len += 2;
+ }
+ else
+ break;
+ extension_id = (msg[0] << 8) + msg[1];
+ /* Length of the extension */
+ rec_len = (msg[2] << 8) + msg[3];
+
+ /* Expect 2 bytes extension id + 2 bytes extension size */
+ msg += 2 + 2;
+ if (msg + rec_len > extensions_end || msg + rec_len < msg)
+ goto store_capture;
+ /* TLS Extensions
+ * https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml */
+ if (extension_id == 0x000a) {
+ /* Elliptic Curves:
+ * https://www.rfc-editor.org/rfc/rfc8422.html
+ * https://www.rfc-editor.org/rfc/rfc7919.html */
+ list_end = msg + rec_len;
+ if (msg + 2 > list_end)
+ goto store_capture;
+ rec_len = (msg[0] << 8) + msg[1];
+ msg += 2;
+
+ if (msg + rec_len > list_end || msg + rec_len < msg)
+ goto store_capture;
+ /* Store location/size of the list */
+ ec_start = msg;
+ ec_len = rec_len;
+ }
+ else if (extension_id == 0x000b) {
+ /* Elliptic Curves Point Formats:
+ * https://www.rfc-editor.org/rfc/rfc8422.html */
+ list_end = msg + rec_len;
+ if (msg + 1 > list_end)
+ goto store_capture;
+ rec_len = msg[0];
+ msg += 1;
+
+ if (msg + rec_len > list_end || msg + rec_len < msg)
+ goto store_capture;
+ /* Store location/size of the list */
+ ec_formats_start = msg;
+ ec_formats_len = rec_len;
+ }
+ msg += rec_len;
+ }
+
+ if (ec_start) {
+ rec_len = ec_len;
+ if (offset + rec_len > global_ssl.capture_buffer_size)
+ rec_len = global_ssl.capture_buffer_size - offset;
+ memcpy(capture->data + offset, ec_start, rec_len);
+ capture->ec_offset = offset;
+ capture->ec_len = rec_len;
+ offset += rec_len;
+ }
+ if (ec_formats_start) {
+ rec_len = ec_formats_len;
+ if (offset + rec_len > global_ssl.capture_buffer_size)
+ rec_len = global_ssl.capture_buffer_size - offset;
+ memcpy(capture->data + offset, ec_formats_start, rec_len);
+ capture->ec_formats_offset = offset;
+ capture->ec_formats_len = rec_len;
+ offset += rec_len;
+ }
+
+ store_capture:
+ SSL_set_ex_data(ssl, ssl_capture_ptr_index, capture);
+}
+
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_init_keylog(struct connection *conn, int write_p, int version,
+ int content_type, const void *buf, size_t len,
+ SSL *ssl)
+{
+ struct ssl_keylog *keylog;
+
+ if (SSL_get_ex_data(ssl, ssl_keylog_index))
+ return;
+
+ keylog = pool_zalloc(pool_head_ssl_keylog);
+ if (!keylog)
+ return;
+
+ if (!SSL_set_ex_data(ssl, ssl_keylog_index, keylog)) {
+ pool_free(pool_head_ssl_keylog, keylog);
+ return;
+ }
+}
+#endif
+
+/* Callback is called for ssl protocol analyse */
+void ssl_sock_msgcbk(int write_p, int version, int content_type, const void *buf, size_t len, SSL *ssl, void *arg)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ struct ssl_sock_msg_callback *cbk;
+
+ /* Try to call all callback functions that were registered by using
+ * ssl_sock_register_msg_callback().
+ */
+ list_for_each_entry(cbk, &ssl_sock_msg_callbacks, list) {
+ cbk->func(conn, write_p, version, content_type, buf, len, ssl);
+ }
+}
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+static int ssl_sock_srv_select_protos(SSL *s, unsigned char **out, unsigned char *outlen,
+ const unsigned char *in, unsigned int inlen,
+ void *arg)
+{
+ struct server *srv = arg;
+
+ if (SSL_select_next_proto(out, outlen, in, inlen, (unsigned char *)srv->ssl_ctx.npn_str,
+ srv->ssl_ctx.npn_len) == OPENSSL_NPN_NEGOTIATED)
+ return SSL_TLSEXT_ERR_OK;
+ return SSL_TLSEXT_ERR_NOACK;
+}
+#endif
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+/* This callback is used so that the server advertises the list of
+ * negotiable protocols for NPN.
+ */
+static int ssl_sock_advertise_npn_protos(SSL *s, const unsigned char **data,
+ unsigned int *len, void *arg)
+{
+ struct ssl_bind_conf *conf = arg;
+
+ *data = (const unsigned char *)conf->npn_str;
+ *len = conf->npn_len;
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+/* This callback is used so that the server advertises the list of
+ * negotiable protocols for ALPN.
+ */
+static int ssl_sock_advertise_alpn_protos(SSL *s, const unsigned char **out,
+ unsigned char *outlen,
+ const unsigned char *server,
+ unsigned int server_len, void *arg)
+{
+ struct ssl_bind_conf *conf = arg;
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(s, ssl_qc_app_data_index);
+#endif
+
+ if (SSL_select_next_proto((unsigned char**) out, outlen, (const unsigned char *)conf->alpn_str,
+ conf->alpn_len, server, server_len) != OPENSSL_NPN_NEGOTIATED) {
+#ifdef USE_QUIC
+ if (qc)
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+#endif
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+#ifdef USE_QUIC
+ if (qc && !quic_set_app_ops(qc, *out, *outlen)) {
+ quic_set_tls_alert(qc, SSL_AD_NO_APPLICATION_PROTOCOL);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+#endif
+
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+#ifndef SSL_NO_GENERATE_CERTIFICATES
+
+/* Configure a DNS SAN extension on a certificate. */
+int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) {
+ int failure = 0;
+ X509_EXTENSION *san_ext = NULL;
+ CONF *conf = NULL;
+ struct buffer *san_name = get_trash_chunk();
+
+ conf = NCONF_new(NULL);
+ if (!conf) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Build an extension based on the DNS entry above */
+ chunk_appendf(san_name, "DNS:%s", servername);
+ san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area);
+ if (!san_ext) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Add the extension */
+ if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ /* Success */
+ failure = 0;
+
+cleanup:
+ if (NULL != san_ext) X509_EXTENSION_free(san_ext);
+ if (NULL != conf) NCONF_free(conf);
+
+ return failure;
+}
+
+/* Create a X509 certificate with the specified servername and serial. This
+ * function returns a SSL_CTX object or NULL if an error occurs. */
+static SSL_CTX *
+ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl)
+{
+ X509 *cacert = bind_conf->ca_sign_ckch->cert;
+ EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key;
+ SSL_CTX *ssl_ctx = NULL;
+ X509 *newcrt = NULL;
+ EVP_PKEY *pkey = NULL;
+ SSL *tmp_ssl = NULL;
+ CONF *ctmp = NULL;
+ X509_NAME *name;
+ const EVP_MD *digest;
+ X509V3_CTX ctx;
+ unsigned int i;
+ int key_type;
+
+ /* Get the private key of the default certificate and use it */
+#ifdef HAVE_SSL_CTX_get0_privatekey
+ pkey = SSL_CTX_get0_privatekey(bind_conf->default_ctx);
+#else
+ tmp_ssl = SSL_new(bind_conf->default_ctx);
+ if (tmp_ssl)
+ pkey = SSL_get_privatekey(tmp_ssl);
+#endif
+ if (!pkey)
+ goto mkcert_error;
+
+ /* Create the certificate */
+ if (!(newcrt = X509_new()))
+ goto mkcert_error;
+
+ /* Set version number for the certificate (X509v3) and the serial
+ * number */
+ if (X509_set_version(newcrt, 2L) != 1)
+ goto mkcert_error;
+ ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1));
+
+ /* Set duration for the certificate */
+ if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) ||
+ !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365))
+ goto mkcert_error;
+
+ /* set public key in the certificate */
+ if (X509_set_pubkey(newcrt, pkey) != 1)
+ goto mkcert_error;
+
+ /* Set issuer name from the CA */
+ if (!(name = X509_get_subject_name(cacert)))
+ goto mkcert_error;
+ if (X509_set_issuer_name(newcrt, name) != 1)
+ goto mkcert_error;
+
+ /* Set the subject name using the same, but the CN */
+ name = X509_NAME_dup(name);
+ if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC,
+ (const unsigned char *)servername,
+ -1, -1, 0) != 1) {
+ X509_NAME_free(name);
+ goto mkcert_error;
+ }
+ if (X509_set_subject_name(newcrt, name) != 1) {
+ X509_NAME_free(name);
+ goto mkcert_error;
+ }
+ X509_NAME_free(name);
+
+ /* Add x509v3 extensions as specified */
+ ctmp = NCONF_new(NULL);
+ X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0);
+ for (i = 0; i < X509V3_EXT_SIZE; i++) {
+ X509_EXTENSION *ext;
+
+ if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i])))
+ goto mkcert_error;
+ if (!X509_add_ext(newcrt, ext, -1)) {
+ X509_EXTENSION_free(ext);
+ goto mkcert_error;
+ }
+ X509_EXTENSION_free(ext);
+ }
+
+ /* Add SAN extension */
+ if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) {
+ goto mkcert_error;
+ }
+
+ /* Sign the certificate with the CA private key */
+
+ key_type = EVP_PKEY_base_id(capkey);
+
+ if (key_type == EVP_PKEY_DSA)
+ digest = EVP_sha1();
+ else if (key_type == EVP_PKEY_RSA)
+ digest = EVP_sha256();
+ else if (key_type == EVP_PKEY_EC)
+ digest = EVP_sha256();
+ else {
+#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID
+ int nid;
+
+ if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0)
+ goto mkcert_error;
+ if (!(digest = EVP_get_digestbynid(nid)))
+ goto mkcert_error;
+#else
+ goto mkcert_error;
+#endif
+ }
+
+ if (!(X509_sign(newcrt, capkey, digest)))
+ goto mkcert_error;
+
+ /* Create and set the new SSL_CTX */
+ if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method())))
+ goto mkcert_error;
+ if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey))
+ goto mkcert_error;
+ if (!SSL_CTX_use_certificate(ssl_ctx, newcrt))
+ goto mkcert_error;
+ if (!SSL_CTX_check_private_key(ssl_ctx))
+ goto mkcert_error;
+
+ /* Build chaining the CA cert and the rest of the chain, keep these order */
+#if defined(SSL_CTX_add1_chain_cert)
+ if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) {
+ goto mkcert_error;
+ }
+
+ if (bind_conf->ca_sign_ckch->chain) {
+ for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) {
+ X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i);
+ if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) {
+ goto mkcert_error;
+ }
+ }
+ }
+#endif
+
+ if (newcrt) X509_free(newcrt);
+
+#ifndef OPENSSL_NO_DH
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk);
+#else
+ ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey);
+#endif
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+#if defined(SSL_CTX_set1_curves_list)
+ {
+ const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE);
+ if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe))
+ goto end;
+ }
+#endif
+#else
+#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH)
+ {
+ const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE);
+ EC_KEY *ecc;
+ int nid;
+
+ if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef)
+ goto end;
+ if (!(ecc = EC_KEY_new_by_curve_name(nid)))
+ goto end;
+ SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc);
+ EC_KEY_free(ecc);
+ }
+#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */
+#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */
+ end:
+ return ssl_ctx;
+
+ mkcert_error:
+ if (ctmp) NCONF_free(ctmp);
+ if (tmp_ssl) SSL_free(tmp_ssl);
+ if (ssl_ctx) SSL_CTX_free(ssl_ctx);
+ if (newcrt) X509_free(newcrt);
+ return NULL;
+}
+
+
+/* Do a lookup for a certificate in the LRU cache used to store generated
+ * certificates and immediately assign it to the SSL session if not null. */
+SSL_CTX *
+ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl)
+{
+ struct lru64 *lru = NULL;
+
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0);
+ if (lru && lru->domain) {
+ if (ssl)
+ SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return (SSL_CTX *)lru->data;
+ }
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ }
+ return NULL;
+}
+
+/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This
+ * function is not thread-safe, it should only be used to check if a certificate
+ * exists in the lru cache (with no warranty it will not be removed by another
+ * thread). It is kept for backward compatibility. */
+SSL_CTX *
+ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf)
+{
+ return ssl_sock_assign_generated_cert(key, bind_conf, NULL);
+}
+
+/* Set a certificate int the LRU cache used to store generated
+ * certificate. Return 0 on success, otherwise -1 */
+int
+ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf)
+{
+ struct lru64 *lru = NULL;
+
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0);
+ if (!lru) {
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return -1;
+ }
+ if (lru->domain && lru->data)
+ lru->free((SSL_CTX *)lru->data);
+ lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return 0;
+ }
+ return -1;
+}
+
+/* Compute the key of the certificate. */
+unsigned int
+ssl_sock_generated_cert_key(const void *data, size_t len)
+{
+ return XXH32(data, len, ssl_ctx_lru_seed);
+}
+
+/* Generate a cert and immediately assign it to the SSL session so that the cert's
+ * refcount is maintained regardless of the cert's presence in the LRU cache.
+ */
+static int
+ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl)
+{
+ X509 *cacert = bind_conf->ca_sign_ckch->cert;
+ SSL_CTX *ssl_ctx = NULL;
+ struct lru64 *lru = NULL;
+ unsigned int key;
+
+ key = ssl_sock_generated_cert_key(servername, strlen(servername));
+ if (ssl_ctx_lru_tree) {
+ HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0);
+ if (lru && lru->domain)
+ ssl_ctx = (SSL_CTX *)lru->data;
+ if (!ssl_ctx && lru) {
+ ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl);
+ lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free);
+ }
+ SSL_set_SSL_CTX(ssl, ssl_ctx);
+ HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock);
+ return 1;
+ }
+ else {
+ ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl);
+ SSL_set_SSL_CTX(ssl, ssl_ctx);
+ /* No LRU cache, this CTX will be released as soon as the session dies */
+ SSL_CTX_free(ssl_ctx);
+ return 1;
+ }
+ return 0;
+}
+static int
+ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl)
+{
+ unsigned int key;
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+
+ if (conn_get_dst(conn)) {
+ key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst));
+ if (ssl_sock_assign_generated_cert(key, bind_conf, ssl))
+ return 1;
+ }
+ return 0;
+}
+#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+
+static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c)
+{
+#if SSL_OP_NO_SSLv3
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, SSLv3_server_method())
+ : SSL_CTX_set_ssl_version(ctx, SSLv3_client_method());
+#endif
+}
+static void ctx_set_TLSv10_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_client_method());
+}
+static void ctx_set_TLSv11_func(SSL_CTX *ctx, set_context_func c) {
+#if SSL_OP_NO_TLSv1_1
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_1_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_1_client_method());
+#endif
+}
+static void ctx_set_TLSv12_func(SSL_CTX *ctx, set_context_func c) {
+#if SSL_OP_NO_TLSv1_2
+ c == SET_SERVER ? SSL_CTX_set_ssl_version(ctx, TLSv1_2_server_method())
+ : SSL_CTX_set_ssl_version(ctx, TLSv1_2_client_method());
+#endif
+}
+/* TLSv1.2 is the last supported version in this context. */
+static void ctx_set_TLSv13_func(SSL_CTX *ctx, set_context_func c) {}
+/* Unusable in this context. */
+static void ssl_set_SSLv3_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv10_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv11_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv12_func(SSL *ssl, set_context_func c) {}
+static void ssl_set_TLSv13_func(SSL *ssl, set_context_func c) {}
+#else /* openssl >= 1.1.0 */
+
+static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, SSL3_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, SSL3_VERSION);
+}
+static void ssl_set_SSLv3_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, SSL3_VERSION)
+ : SSL_set_min_proto_version(ssl, SSL3_VERSION);
+}
+static void ctx_set_TLSv10_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION);
+}
+static void ssl_set_TLSv10_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_VERSION);
+}
+static void ctx_set_TLSv11_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_1_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_1_VERSION);
+}
+static void ssl_set_TLSv11_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_1_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_1_VERSION);
+}
+static void ctx_set_TLSv12_func(SSL_CTX *ctx, set_context_func c) {
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_2_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_2_VERSION);
+}
+static void ssl_set_TLSv12_func(SSL *ssl, set_context_func c) {
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_2_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_2_VERSION);
+}
+static void ctx_set_TLSv13_func(SSL_CTX *ctx, set_context_func c) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ c == SET_MAX ? SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION)
+ : SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+#endif
+}
+static void ssl_set_TLSv13_func(SSL *ssl, set_context_func c) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+ c == SET_MAX ? SSL_set_max_proto_version(ssl, TLS1_3_VERSION)
+ : SSL_set_min_proto_version(ssl, TLS1_3_VERSION);
+#endif
+}
+#endif
+static void ctx_set_None_func(SSL_CTX *ctx, set_context_func c) { }
+static void ssl_set_None_func(SSL *ssl, set_context_func c) { }
+
+struct methodVersions methodVersions[] = {
+ {0, 0, ctx_set_None_func, ssl_set_None_func, "NONE"}, /* CONF_TLSV_NONE */
+ {SSL_OP_NO_SSLv3, MC_SSL_O_NO_SSLV3, ctx_set_SSLv3_func, ssl_set_SSLv3_func, "SSLv3"}, /* CONF_SSLV3 */
+ {SSL_OP_NO_TLSv1, MC_SSL_O_NO_TLSV10, ctx_set_TLSv10_func, ssl_set_TLSv10_func, "TLSv1.0"}, /* CONF_TLSV10 */
+ {SSL_OP_NO_TLSv1_1, MC_SSL_O_NO_TLSV11, ctx_set_TLSv11_func, ssl_set_TLSv11_func, "TLSv1.1"}, /* CONF_TLSV11 */
+ {SSL_OP_NO_TLSv1_2, MC_SSL_O_NO_TLSV12, ctx_set_TLSv12_func, ssl_set_TLSv12_func, "TLSv1.2"}, /* CONF_TLSV12 */
+ {SSL_OP_NO_TLSv1_3, MC_SSL_O_NO_TLSV13, ctx_set_TLSv13_func, ssl_set_TLSv13_func, "TLSv1.3"}, /* CONF_TLSV13 */
+};
+
+static void ssl_sock_switchctx_set(SSL *ssl, SSL_CTX *ctx)
+{
+ SSL_set_verify(ssl, SSL_CTX_get_verify_mode(ctx), ssl_sock_bind_verifycbk);
+ SSL_set_client_CA_list(ssl, SSL_dup_CA_list(SSL_CTX_get_client_CA_list(ctx)));
+ SSL_set_SSL_CTX(ssl, ctx);
+}
+
+/*
+ * Return the right sni_ctx for a <bind_conf> and a chosen <servername> (must be in lowercase)
+ * RSA <have_rsa_sig> and ECDSA <have_ecdsa_sig> capabilities of the client can also be used.
+ *
+ * This function does a lookup in the bind_conf sni tree so the caller should lock its tree.
+ */
+static __maybe_unused struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername,
+ int have_rsa_sig, int have_ecdsa_sig)
+{
+ struct ebmb_node *node, *n, *node_ecdsa = NULL, *node_rsa = NULL, *node_anonymous = NULL;
+ const char *wildp = NULL;
+ int i;
+
+ /* look for the first dot for wildcard search */
+ for (i = 0; servername[i] != '\0'; i++) {
+ if (servername[i] == '.') {
+ wildp = &servername[i];
+ break;
+ }
+ }
+
+ /* Look for an ECDSA, RSA and DSA certificate, first in the single
+ * name and if not found in the wildcard */
+ for (i = 0; i < 2; i++) {
+ if (i == 0) /* lookup in full qualified names */
+ node = ebst_lookup(&s->sni_ctx, trash.area);
+ else if (i == 1 && wildp) /* lookup in wildcards names */
+ node = ebst_lookup(&s->sni_w_ctx, wildp);
+ else
+ break;
+
+ for (n = node; n; n = ebmb_next_dup(n)) {
+
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ struct sni_ctx *sni, *sni_tmp;
+ int skip = 0;
+
+ if (i == 1 && wildp) { /* wildcard */
+ /* If this is a wildcard, look for an exclusion on the same crt-list line */
+ sni = container_of(n, struct sni_ctx, name);
+ list_for_each_entry(sni_tmp, &sni->ckch_inst->sni_ctx, by_ckch_inst) {
+ if (sni_tmp->neg && (strcmp((const char *)sni_tmp->name.key, trash.area) == 0)) {
+ skip = 1;
+ break;
+ }
+ }
+ if (skip)
+ continue;
+ }
+
+ switch(container_of(n, struct sni_ctx, name)->kinfo.sig) {
+ case TLSEXT_signature_ecdsa:
+ if (!node_ecdsa)
+ node_ecdsa = n;
+ break;
+ case TLSEXT_signature_rsa:
+ if (!node_rsa)
+ node_rsa = n;
+ break;
+ default: /* TLSEXT_signature_anonymous|dsa */
+ if (!node_anonymous)
+ node_anonymous = n;
+ break;
+ }
+ }
+ }
+ }
+ /* Once the certificates are found, select them depending on what is
+ * supported in the client and by key_signature priority order: EDSA >
+ * RSA > DSA */
+ if (have_ecdsa_sig && node_ecdsa)
+ node = node_ecdsa;
+ else if (have_rsa_sig && node_rsa)
+ node = node_rsa;
+ else if (node_anonymous)
+ node = node_anonymous;
+ else if (node_ecdsa)
+ node = node_ecdsa; /* no ecdsa signature case (< TLSv1.2) */
+ else
+ node = node_rsa; /* no rsa signature case (far far away) */
+
+ if (node)
+ return container_of(node, struct sni_ctx, name);
+
+ return NULL;
+}
+
+#ifdef HAVE_SSL_CLIENT_HELLO_CB
+
+int ssl_sock_switchctx_err_cbk(SSL *ssl, int *al, void *priv)
+{
+ struct bind_conf *s = priv;
+ (void)al; /* shut gcc stupid warning */
+
+ if (SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name) || (s->options & BC_O_GENERATE_CERTS))
+ return SSL_TLSEXT_ERR_OK;
+ return SSL_TLSEXT_ERR_NOACK;
+}
+
+#ifdef OPENSSL_IS_BORINGSSL
+int ssl_sock_switchctx_cbk(const struct ssl_early_callback_ctx *ctx)
+{
+ SSL *ssl = ctx->ssl;
+#else
+int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg)
+{
+#endif
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+#ifdef USE_QUIC
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ struct bind_conf *s = NULL;
+ const uint8_t *extension_data;
+ size_t extension_len;
+ int has_rsa_sig = 0, has_ecdsa_sig = 0;
+ struct sni_ctx *sni_ctx;
+ const char *servername;
+ size_t servername_len;
+ int allow_early = 0;
+ int i;
+
+ if (conn)
+ s = __objt_listener(conn->target)->bind_conf;
+#ifdef USE_QUIC
+ else if (qc)
+ s = qc->li->bind_conf;
+#endif /* USE_QUIC */
+
+ if (!s) {
+ /* must never happen */
+ ABORT_NOW();
+ return 0;
+ }
+
+#ifdef USE_QUIC
+ if (qc) {
+ /* Look for the QUIC transport parameters. */
+#ifdef OPENSSL_IS_BORINGSSL
+ if (!SSL_early_callback_ctx_extension_get(ctx, qc->tps_tls_ext,
+ &extension_data, &extension_len))
+#else
+ if (!SSL_client_hello_get0_ext(ssl, qc->tps_tls_ext,
+ &extension_data, &extension_len))
+#endif
+ {
+ /* This is not redundant. It we only return 0 without setting
+ * <*al>, this has as side effect to generate another TLS alert
+ * which would be set after calling quic_set_tls_alert().
+ */
+ *al = SSL_AD_MISSING_EXTENSION;
+ quic_set_tls_alert(qc, SSL_AD_MISSING_EXTENSION);
+ return 0;
+ }
+
+ if (!quic_transport_params_store(qc, 0, extension_data,
+ extension_data + extension_len))
+ goto abort;
+
+ qc->flags |= QUIC_FL_CONN_TX_TP_RECEIVED;
+ }
+#endif /* USE_QUIC */
+
+ if (s->ssl_conf.early_data)
+ allow_early = 1;
+#ifdef OPENSSL_IS_BORINGSSL
+ if (SSL_early_callback_ctx_extension_get(ctx, TLSEXT_TYPE_server_name,
+ &extension_data, &extension_len)) {
+#else
+ if (SSL_client_hello_get0_ext(ssl, TLSEXT_TYPE_server_name, &extension_data, &extension_len)) {
+#endif
+ /*
+ * The server_name extension was given too much extensibility when it
+ * was written, so parsing the normal case is a bit complex.
+ */
+ size_t len;
+ if (extension_len <= 2)
+ goto abort;
+ /* Extract the length of the supplied list of names. */
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len + 2 != extension_len)
+ goto abort;
+ /*
+ * The list in practice only has a single element, so we only consider
+ * the first one.
+ */
+ if (len == 0 || *extension_data++ != TLSEXT_NAMETYPE_host_name)
+ goto abort;
+ extension_len = len - 1;
+ /* Now we can finally pull out the byte array with the actual hostname. */
+ if (extension_len <= 2)
+ goto abort;
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len == 0 || len + 2 > extension_len || len > TLSEXT_MAXLEN_host_name
+ || memchr(extension_data, 0, len) != NULL)
+ goto abort;
+ servername = (char *)extension_data;
+ servername_len = len;
+ } else {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate_from_conn(s, ssl)) {
+ goto allow_early;
+ }
+#endif
+ /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */
+ if (!s->strict_sni) {
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+ goto abort;
+ }
+
+ /* extract/check clientHello information */
+#ifdef OPENSSL_IS_BORINGSSL
+ if (SSL_early_callback_ctx_extension_get(ctx, TLSEXT_TYPE_signature_algorithms, &extension_data, &extension_len)) {
+#else
+ if (SSL_client_hello_get0_ext(ssl, TLSEXT_TYPE_signature_algorithms, &extension_data, &extension_len)) {
+#endif
+ uint8_t sign;
+ size_t len;
+ if (extension_len < 2)
+ goto abort;
+ len = (*extension_data++) << 8;
+ len |= *extension_data++;
+ if (len + 2 != extension_len)
+ goto abort;
+ if (len % 2 != 0)
+ goto abort;
+ for (; len > 0; len -= 2) {
+ extension_data++; /* hash */
+ sign = *extension_data++;
+ switch (sign) {
+ case TLSEXT_signature_rsa:
+ has_rsa_sig = 1;
+ break;
+ case TLSEXT_signature_ecdsa:
+ has_ecdsa_sig = 1;
+ break;
+ default:
+ continue;
+ }
+ if (has_ecdsa_sig && has_rsa_sig)
+ break;
+ }
+ } else {
+ /* without TLSEXT_TYPE_signature_algorithms extension (< TLSv1.2) */
+ has_rsa_sig = 1;
+ }
+ if (has_ecdsa_sig) { /* in very rare case: has ecdsa sign but not a ECDSA cipher */
+ const SSL_CIPHER *cipher;
+ uint32_t cipher_id;
+ size_t len;
+ const uint8_t *cipher_suites;
+ has_ecdsa_sig = 0;
+#ifdef OPENSSL_IS_BORINGSSL
+ len = ctx->cipher_suites_len;
+ cipher_suites = ctx->cipher_suites;
+#else
+ len = SSL_client_hello_get0_ciphers(ssl, &cipher_suites);
+#endif
+ if (len % 2 != 0)
+ goto abort;
+ for (; len != 0; len -= 2, cipher_suites += 2) {
+#ifdef OPENSSL_IS_BORINGSSL
+ uint16_t cipher_suite = (cipher_suites[0] << 8) | cipher_suites[1];
+ cipher = SSL_get_cipher_by_value(cipher_suite);
+#else
+ cipher = SSL_CIPHER_find(ssl, cipher_suites);
+#endif
+ if (!cipher)
+ continue;
+
+ cipher_id = SSL_CIPHER_get_id(cipher);
+ /* skip the SCSV "fake" signaling ciphersuites because they are NID_auth_any (RFC 7507) */
+ if (cipher_id == SSL3_CK_SCSV || cipher_id == SSL3_CK_FALLBACK_SCSV)
+ continue;
+
+ if (SSL_CIPHER_get_auth_nid(cipher) == NID_auth_ecdsa
+ || SSL_CIPHER_get_auth_nid(cipher) == NID_auth_any) {
+ has_ecdsa_sig = 1;
+ break;
+ }
+ }
+ }
+
+ /* we need to transform this a NULL-ended string in lowecase */
+ for (i = 0; i < trash.size && i < servername_len; i++)
+ trash.area[i] = tolower(servername[i]);
+ trash.area[i] = 0;
+ servername = trash.area;
+
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ sni_ctx = ssl_sock_chose_sni_ctx(s, servername, has_rsa_sig, has_ecdsa_sig);
+ if (sni_ctx) {
+ /* switch ctx */
+ struct ssl_bind_conf *conf = sni_ctx->conf;
+ ssl_sock_switchctx_set(ssl, sni_ctx->ctx);
+ if (conf) {
+ methodVersions[conf->ssl_methods.min].ssl_set_version(ssl, SET_MIN);
+ methodVersions[conf->ssl_methods.max].ssl_set_version(ssl, SET_MAX);
+ if (conf->early_data)
+ allow_early = 1;
+ }
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(servername, s, ssl)) {
+ /* switch ctx done in ssl_sock_generate_certificate */
+ goto allow_early;
+ }
+#endif
+ if (!s->strict_sni) {
+ /* no certificate match, is the default_ctx */
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ /* We are about to raise an handshake error so the servername extension
+ * callback will never be called and the SNI will never be stored in the
+ * SSL context. In order for the ssl_fc_sni sample fetch to still work
+ * in such a case, we store the SNI ourselves as an ex_data information
+ * in the SSL context.
+ */
+ {
+ char *client_sni = pool_alloc(ssl_sock_client_sni_pool);
+ if (client_sni) {
+ strncpy(client_sni, servername, TLSEXT_MAXLEN_host_name);
+ client_sni[TLSEXT_MAXLEN_host_name] = '\0';
+ SSL_set_ex_data(ssl, ssl_client_sni_index, client_sni);
+ }
+ }
+
+ /* other cases fallback on abort, if strict-sni is set but no node was found */
+
+ abort:
+ /* abort handshake (was SSL_TLSEXT_ERR_ALERT_FATAL) */
+ if (conn)
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#ifdef OPENSSL_IS_BORINGSSL
+ return ssl_select_cert_error;
+#else
+ *al = SSL_AD_UNRECOGNIZED_NAME;
+ return 0;
+#endif
+
+allow_early:
+#ifdef OPENSSL_IS_BORINGSSL
+ if (allow_early)
+ SSL_set_early_data_enabled(ssl, 1);
+#else
+ if (!allow_early)
+ SSL_set_max_early_data(ssl, 0);
+#endif
+ return 1;
+}
+
+#else /* ! HAVE_SSL_CLIENT_HELLO_CB */
+
+/* Sets the SSL ctx of <ssl> to match the advertised server name. Returns a
+ * warning when no match is found, which implies the default (first) cert
+ * will keep being used.
+ */
+int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv)
+{
+ const char *servername;
+ const char *wildp = NULL;
+ struct ebmb_node *node, *n;
+ struct bind_conf *s = priv;
+#ifdef USE_QUIC
+ const uint8_t *extension_data;
+ size_t extension_len;
+ struct quic_conn *qc = SSL_get_ex_data(ssl, ssl_qc_app_data_index);
+#endif /* USE_QUIC */
+ int i;
+ (void)al; /* shut gcc stupid warning */
+
+#ifdef USE_QUIC
+ if (qc) {
+
+ /* Look for the QUIC transport parameters. */
+ SSL_get_peer_quic_transport_params(ssl, &extension_data, &extension_len);
+ if (extension_len == 0) {
+ /* This is not redundant. It we only return 0 without setting
+ * <*al>, this has as side effect to generate another TLS alert
+ * which would be set after calling quic_set_tls_alert().
+ */
+ *al = SSL_AD_MISSING_EXTENSION;
+ quic_set_tls_alert(qc, SSL_AD_MISSING_EXTENSION);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+ if (!quic_transport_params_store(qc, 0, extension_data,
+ extension_data + extension_len))
+ return SSL_TLSEXT_ERR_NOACK;
+
+ qc->flags |= QUIC_FL_CONN_TX_TP_RECEIVED;
+ }
+#endif /* USE_QUIC */
+
+ servername = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ if (!servername) {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate_from_conn(s, ssl))
+ return SSL_TLSEXT_ERR_OK;
+#endif
+ if (s->strict_sni)
+ return SSL_TLSEXT_ERR_ALERT_FATAL;
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_NOACK;
+ }
+
+ for (i = 0; i < trash.size; i++) {
+ if (!servername[i])
+ break;
+ trash.area[i] = tolower((unsigned char)servername[i]);
+ if (!wildp && (trash.area[i] == '.'))
+ wildp = &trash.area[i];
+ }
+ trash.area[i] = 0;
+
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ node = NULL;
+ /* lookup in full qualified names */
+ for (n = ebst_lookup(&s->sni_ctx, trash.area); n; n = ebmb_next_dup(n)) {
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ node = n;
+ break;
+ }
+ }
+ if (!node && wildp) {
+ /* lookup in wildcards names */
+ for (n = ebst_lookup(&s->sni_w_ctx, wildp); n; n = ebmb_next_dup(n)) {
+ /* lookup a not neg filter */
+ if (!container_of(n, struct sni_ctx, name)->neg) {
+ node = n;
+ break;
+ }
+ }
+ }
+ if (!node) {
+#if (!defined SSL_NO_GENERATE_CERTIFICATES)
+ if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(servername, s, ssl)) {
+ /* switch ctx done in ssl_sock_generate_certificate */
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+ }
+#endif
+ if (s->strict_sni) {
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_ALERT_FATAL;
+ }
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+ }
+
+ /* switch ctx */
+ ssl_sock_switchctx_set(ssl, container_of(node, struct sni_ctx, name)->ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ return SSL_TLSEXT_ERR_OK;
+}
+#endif /* (!) OPENSSL_IS_BORINGSSL */
+#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */
+
+#if 0 && defined(USE_OPENSSL_WOLFSSL)
+/* This implement the equivalent of the clientHello Callback but using the cert_cb.
+ * WolfSSL is able to extract the sigalgs and ciphers of the client byt using the API
+ * provided in https://github.com/wolfSSL/wolfssl/pull/6963
+ *
+ * Not activated for now since the PR is not merged.
+ */
+static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg)
+{
+ struct bind_conf *s = arg;
+ int has_rsa_sig = 0, has_ecdsa_sig = 0;
+ const char *servername;
+ struct sni_ctx *sni_ctx;
+ int i;
+
+ if (!s) {
+ /* must never happen */
+ ABORT_NOW();
+ return 0;
+ }
+
+ servername = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ if (!servername) {
+ /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */
+ if (!s->strict_sni) {
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+ goto abort;
+ }
+
+ /* extract sigalgs and ciphers */
+ {
+ const byte* suites = NULL;
+ word16 suiteSz = 0;
+ const byte* hashSigAlgo = NULL;
+ word16 hashSigAlgoSz = 0;
+ word16 idx = 0;
+
+ wolfSSL_get_client_suites_sigalgs(ssl, &suites, &suiteSz, &hashSigAlgo, &hashSigAlgoSz);
+ if (suites == NULL || suiteSz == 0 || hashSigAlgo == NULL || hashSigAlgoSz == 0)
+ return 0;
+
+ if (SSL_version(ssl) != TLS1_3_VERSION) {
+ for (idx = 0; idx < suiteSz; idx += 2) {
+ WOLFSSL_CIPHERSUITE_INFO info;
+ info = wolfSSL_get_ciphersuite_info(suites[idx], suites[idx+1]);
+ if (info.rsaAuth)
+ has_rsa_sig = 1;
+ else if (info.eccAuth)
+ has_ecdsa_sig = 1;
+ }
+ }
+
+ if (hashSigAlgoSz > 0) {
+ /* sigalgs extension takes precedence over ciphersuites */
+ has_ecdsa_sig = 0;
+ has_rsa_sig = 0;
+ }
+ for (idx = 0; idx < hashSigAlgoSz; idx += 2) {
+ int hashAlgo;
+ int sigAlgo;
+
+ wolfSSL_get_sigalg_info(hashSigAlgo[idx+0], hashSigAlgo[idx+1], &hashAlgo, &sigAlgo);
+
+ if (sigAlgo == RSAk || sigAlgo == RSAPSSk)
+ has_rsa_sig = 1;
+ else if (sigAlgo == ECDSAk)
+ has_ecdsa_sig = 1;
+ }
+ }
+
+ /* we need to transform this into a NULL-ended string in lowecase */
+ for (i = 0; i < trash.size && servername[i] != '\0'; i++)
+ trash.area[i] = tolower(servername[i]);
+ trash.area[i] = 0;
+ servername = trash.area;
+
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ sni_ctx = ssl_sock_chose_sni_ctx(s, servername, has_rsa_sig, has_ecdsa_sig);
+ if (sni_ctx) {
+ /* switch ctx */
+ struct ssl_bind_conf *conf = sni_ctx->conf;
+ ssl_sock_switchctx_set(ssl, sni_ctx->ctx);
+ if (conf) {
+ methodVersions[conf->ssl_methods.min].ssl_set_version(ssl, SET_MIN);
+ methodVersions[conf->ssl_methods.max].ssl_set_version(ssl, SET_MAX);
+ }
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ if (!s->strict_sni) {
+ /* no certificate match, is the default_ctx */
+ HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock);
+ ssl_sock_switchctx_set(ssl, s->default_ctx);
+ HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock);
+ goto allow_early;
+ }
+
+ /* We are about to raise an handshake error so the servername extension
+ * callback will never be called and the SNI will never be stored in the
+ * SSL context. In order for the ssl_fc_sni sample fetch to still work
+ * in such a case, we store the SNI ourselves as an ex_data information
+ * in the SSL context.
+ */
+ {
+ char *client_sni = pool_alloc(ssl_sock_client_sni_pool);
+ if (client_sni) {
+ strncpy(client_sni, servername, TLSEXT_MAXLEN_host_name);
+ client_sni[TLSEXT_MAXLEN_host_name] = '\0';
+ SSL_set_ex_data(ssl, ssl_client_sni_index, client_sni);
+ }
+ }
+
+ /* other cases fallback on abort, if strict-sni is set but no node was found */
+
+ abort:
+ /* abort handshake (was SSL_TLSEXT_ERR_ALERT_FATAL) */
+ return 0;
+
+allow_early:
+ return 1;
+}
+#endif
+
+#ifndef OPENSSL_NO_DH
+
+static inline HASSL_DH *ssl_new_dh_fromdata(BIGNUM *p, BIGNUM *g)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ OSSL_PARAM_BLD *tmpl = NULL;
+ OSSL_PARAM *params = NULL;
+ EVP_PKEY_CTX *ctx = NULL;
+ EVP_PKEY *pkey = NULL;
+
+ if ((tmpl = OSSL_PARAM_BLD_new()) == NULL
+ || !OSSL_PARAM_BLD_push_BN(tmpl, OSSL_PKEY_PARAM_FFC_P, p)
+ || !OSSL_PARAM_BLD_push_BN(tmpl, OSSL_PKEY_PARAM_FFC_G, g)
+ || (params = OSSL_PARAM_BLD_to_param(tmpl)) == NULL) {
+ goto end;
+ }
+ ctx = EVP_PKEY_CTX_new_from_name(NULL, "DH", NULL);
+ if (ctx == NULL
+ || !EVP_PKEY_fromdata_init(ctx)
+ || !EVP_PKEY_fromdata(ctx, &pkey, EVP_PKEY_KEY_PARAMETERS, params)) {
+ goto end;
+ }
+
+end:
+ EVP_PKEY_CTX_free(ctx);
+ OSSL_PARAM_free(params);
+ OSSL_PARAM_BLD_free(tmpl);
+ BN_free(p);
+ BN_free(g);
+ return pkey;
+#else
+
+ HASSL_DH *dh = DH_new();
+
+ if (!dh)
+ return NULL;
+
+ DH_set0_pqg(dh, p, NULL, g);
+
+ return dh;
+#endif
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+static inline HASSL_DH *ssl_get_dh_by_nid(int nid)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ OSSL_PARAM params[2];
+ EVP_PKEY *pkey = NULL;
+ EVP_PKEY_CTX *pctx = EVP_PKEY_CTX_new_from_name(NULL, "DH", NULL);
+ const char *named_group = NULL;
+
+ if (!pctx)
+ goto end;
+
+ named_group = OBJ_nid2ln(nid);
+
+ if (!named_group)
+ goto end;
+
+ params[0] = OSSL_PARAM_construct_utf8_string("group", (char*)named_group, 0);
+ params[1] = OSSL_PARAM_construct_end();
+
+ if (EVP_PKEY_keygen_init(pctx) && EVP_PKEY_CTX_set_params(pctx, params))
+ EVP_PKEY_generate(pctx, &pkey);
+
+end:
+ EVP_PKEY_CTX_free(pctx);
+ return pkey;
+#else
+
+ HASSL_DH *dh = NULL;
+ dh = DH_new_by_nid(nid);
+ return dh;
+#endif
+}
+#endif
+
+
+static HASSL_DH * ssl_get_dh_1024(void)
+{
+ static unsigned char dh1024_p[]={
+ 0xFA,0xF9,0x2A,0x22,0x2A,0xA7,0x7F,0xE1,0x67,0x4E,0x53,0xF7,
+ 0x56,0x13,0xC3,0xB1,0xE3,0x29,0x6B,0x66,0x31,0x6A,0x7F,0xB3,
+ 0xC2,0x68,0x6B,0xCB,0x1D,0x57,0x39,0x1D,0x1F,0xFF,0x1C,0xC9,
+ 0xA6,0xA4,0x98,0x82,0x31,0x5D,0x25,0xFF,0x8A,0xE0,0x73,0x96,
+ 0x81,0xC8,0x83,0x79,0xC1,0x5A,0x04,0xF8,0x37,0x0D,0xA8,0x3D,
+ 0xAE,0x74,0xBC,0xDB,0xB6,0xA4,0x75,0xD9,0x71,0x8A,0xA0,0x17,
+ 0x9E,0x2D,0xC8,0xA8,0xDF,0x2C,0x5F,0x82,0x95,0xF8,0x92,0x9B,
+ 0xA7,0x33,0x5F,0x89,0x71,0xC8,0x2D,0x6B,0x18,0x86,0xC4,0x94,
+ 0x22,0xA5,0x52,0x8D,0xF6,0xF6,0xD2,0x37,0x92,0x0F,0xA5,0xCC,
+ 0xDB,0x7B,0x1D,0x3D,0xA1,0x31,0xB7,0x80,0x8F,0x0B,0x67,0x5E,
+ 0x36,0xA5,0x60,0x0C,0xF1,0x95,0x33,0x8B,
+ };
+ static unsigned char dh1024_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh1024_p, sizeof dh1024_p, NULL);
+ g = BN_bin2bn(dh1024_g, sizeof dh1024_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+}
+
+static HASSL_DH *ssl_get_dh_2048(void)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L)
+ static unsigned char dh2048_p[]={
+ 0xEC,0x86,0xF8,0x70,0xA0,0x33,0x16,0xEC,0x05,0x1A,0x73,0x59,
+ 0xCD,0x1F,0x8B,0xF8,0x29,0xE4,0xD2,0xCF,0x52,0xDD,0xC2,0x24,
+ 0x8D,0xB5,0x38,0x9A,0xFB,0x5C,0xA4,0xE4,0xB2,0xDA,0xCE,0x66,
+ 0x50,0x74,0xA6,0x85,0x4D,0x4B,0x1D,0x30,0xB8,0x2B,0xF3,0x10,
+ 0xE9,0xA7,0x2D,0x05,0x71,0xE7,0x81,0xDF,0x8B,0x59,0x52,0x3B,
+ 0x5F,0x43,0x0B,0x68,0xF1,0xDB,0x07,0xBE,0x08,0x6B,0x1B,0x23,
+ 0xEE,0x4D,0xCC,0x9E,0x0E,0x43,0xA0,0x1E,0xDF,0x43,0x8C,0xEC,
+ 0xBE,0xBE,0x90,0xB4,0x51,0x54,0xB9,0x2F,0x7B,0x64,0x76,0x4E,
+ 0x5D,0xD4,0x2E,0xAE,0xC2,0x9E,0xAE,0x51,0x43,0x59,0xC7,0x77,
+ 0x9C,0x50,0x3C,0x0E,0xED,0x73,0x04,0x5F,0xF1,0x4C,0x76,0x2A,
+ 0xD8,0xF8,0xCF,0xFC,0x34,0x40,0xD1,0xB4,0x42,0x61,0x84,0x66,
+ 0x42,0x39,0x04,0xF8,0x68,0xB2,0x62,0xD7,0x55,0xED,0x1B,0x74,
+ 0x75,0x91,0xE0,0xC5,0x69,0xC1,0x31,0x5C,0xDB,0x7B,0x44,0x2E,
+ 0xCE,0x84,0x58,0x0D,0x1E,0x66,0x0C,0xC8,0x44,0x9E,0xFD,0x40,
+ 0x08,0x67,0x5D,0xFB,0xA7,0x76,0x8F,0x00,0x11,0x87,0xE9,0x93,
+ 0xF9,0x7D,0xC4,0xBC,0x74,0x55,0x20,0xD4,0x4A,0x41,0x2F,0x43,
+ 0x42,0x1A,0xC1,0xF2,0x97,0x17,0x49,0x27,0x37,0x6B,0x2F,0x88,
+ 0x7E,0x1C,0xA0,0xA1,0x89,0x92,0x27,0xD9,0x56,0x5A,0x71,0xC1,
+ 0x56,0x37,0x7E,0x3A,0x9D,0x05,0xE7,0xEE,0x5D,0x8F,0x82,0x17,
+ 0xBC,0xE9,0xC2,0x93,0x30,0x82,0xF9,0xF4,0xC9,0xAE,0x49,0xDB,
+ 0xD0,0x54,0xB4,0xD9,0x75,0x4D,0xFA,0x06,0xB8,0xD6,0x38,0x41,
+ 0xB7,0x1F,0x77,0xF3,
+ };
+ static unsigned char dh2048_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh2048_p, sizeof dh2048_p, NULL);
+ g = BN_bin2bn(dh2048_g, sizeof dh2048_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+#else
+ return ssl_get_dh_by_nid(NID_ffdhe2048);
+#endif
+}
+
+static HASSL_DH *ssl_get_dh_4096(void)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10101000L)
+ static unsigned char dh4096_p[]={
+ 0xDE,0x16,0x94,0xCD,0x99,0x58,0x07,0xF1,0xF7,0x32,0x96,0x11,
+ 0x04,0x82,0xD4,0x84,0x72,0x80,0x99,0x06,0xCA,0xF0,0xA3,0x68,
+ 0x07,0xCE,0x64,0x50,0xE7,0x74,0x45,0x20,0x80,0x5E,0x4D,0xAD,
+ 0xA5,0xB6,0xED,0xFA,0x80,0x6C,0x3B,0x35,0xC4,0x9A,0x14,0x6B,
+ 0x32,0xBB,0xFD,0x1F,0x17,0x8E,0xB7,0x1F,0xD6,0xFA,0x3F,0x7B,
+ 0xEE,0x16,0xA5,0x62,0x33,0x0D,0xED,0xBC,0x4E,0x58,0xE5,0x47,
+ 0x4D,0xE9,0xAB,0x8E,0x38,0xD3,0x6E,0x90,0x57,0xE3,0x22,0x15,
+ 0x33,0xBD,0xF6,0x43,0x45,0xB5,0x10,0x0A,0xBE,0x2C,0xB4,0x35,
+ 0xB8,0x53,0x8D,0xAD,0xFB,0xA7,0x1F,0x85,0x58,0x41,0x7A,0x79,
+ 0x20,0x68,0xB3,0xE1,0x3D,0x08,0x76,0xBF,0x86,0x0D,0x49,0xE3,
+ 0x82,0x71,0x8C,0xB4,0x8D,0x81,0x84,0xD4,0xE7,0xBE,0x91,0xDC,
+ 0x26,0x39,0x48,0x0F,0x35,0xC4,0xCA,0x65,0xE3,0x40,0x93,0x52,
+ 0x76,0x58,0x7D,0xDD,0x51,0x75,0xDC,0x69,0x61,0xBF,0x47,0x2C,
+ 0x16,0x68,0x2D,0xC9,0x29,0xD3,0xE6,0xC0,0x99,0x48,0xA0,0x9A,
+ 0xC8,0x78,0xC0,0x6D,0x81,0x67,0x12,0x61,0x3F,0x71,0xBA,0x41,
+ 0x1F,0x6C,0x89,0x44,0x03,0xBA,0x3B,0x39,0x60,0xAA,0x28,0x55,
+ 0x59,0xAE,0xB8,0xFA,0xCB,0x6F,0xA5,0x1A,0xF7,0x2B,0xDD,0x52,
+ 0x8A,0x8B,0xE2,0x71,0xA6,0x5E,0x7E,0xD8,0x2E,0x18,0xE0,0x66,
+ 0xDF,0xDD,0x22,0x21,0x99,0x52,0x73,0xA6,0x33,0x20,0x65,0x0E,
+ 0x53,0xE7,0x6B,0x9B,0xC5,0xA3,0x2F,0x97,0x65,0x76,0xD3,0x47,
+ 0x23,0x77,0x12,0xB6,0x11,0x7B,0x24,0xED,0xF1,0xEF,0xC0,0xE2,
+ 0xA3,0x7E,0x67,0x05,0x3E,0x96,0x4D,0x45,0xC2,0x18,0xD1,0x73,
+ 0x9E,0x07,0xF3,0x81,0x6E,0x52,0x63,0xF6,0x20,0x76,0xB9,0x13,
+ 0xD2,0x65,0x30,0x18,0x16,0x09,0x16,0x9E,0x8F,0xF1,0xD2,0x10,
+ 0x5A,0xD3,0xD4,0xAF,0x16,0x61,0xDA,0x55,0x2E,0x18,0x5E,0x14,
+ 0x08,0x54,0x2E,0x2A,0x25,0xA2,0x1A,0x9B,0x8B,0x32,0xA9,0xFD,
+ 0xC2,0x48,0x96,0xE1,0x80,0xCA,0xE9,0x22,0x17,0xBB,0xCE,0x3E,
+ 0x9E,0xED,0xC7,0xF1,0x1F,0xEC,0x17,0x21,0xDC,0x7B,0x82,0x48,
+ 0x8E,0xBB,0x4B,0x9D,0x5B,0x04,0x04,0xDA,0xDB,0x39,0xDF,0x01,
+ 0x40,0xC3,0xAA,0x26,0x23,0x89,0x75,0xC6,0x0B,0xD0,0xA2,0x60,
+ 0x6A,0xF1,0xCC,0x65,0x18,0x98,0x1B,0x52,0xD2,0x74,0x61,0xCC,
+ 0xBD,0x60,0xAE,0xA3,0xA0,0x66,0x6A,0x16,0x34,0x92,0x3F,0x41,
+ 0x40,0x31,0x29,0xC0,0x2C,0x63,0xB2,0x07,0x8D,0xEB,0x94,0xB8,
+ 0xE8,0x47,0x92,0x52,0x93,0x6A,0x1B,0x7E,0x1A,0x61,0xB3,0x1B,
+ 0xF0,0xD6,0x72,0x9B,0xF1,0xB0,0xAF,0xBF,0x3E,0x65,0xEF,0x23,
+ 0x1D,0x6F,0xFF,0x70,0xCD,0x8A,0x4C,0x8A,0xA0,0x72,0x9D,0xBE,
+ 0xD4,0xBB,0x24,0x47,0x4A,0x68,0xB5,0xF5,0xC6,0xD5,0x7A,0xCD,
+ 0xCA,0x06,0x41,0x07,0xAD,0xC2,0x1E,0xE6,0x54,0xA7,0xAD,0x03,
+ 0xD9,0x12,0xC1,0x9C,0x13,0xB1,0xC9,0x0A,0x43,0x8E,0x1E,0x08,
+ 0xCE,0x50,0x82,0x73,0x5F,0xA7,0x55,0x1D,0xD9,0x59,0xAC,0xB5,
+ 0xEA,0x02,0x7F,0x6C,0x5B,0x74,0x96,0x98,0x67,0x24,0xA3,0x0F,
+ 0x15,0xFC,0xA9,0x7D,0x3E,0x67,0xD1,0x70,0xF8,0x97,0xF3,0x67,
+ 0xC5,0x8C,0x88,0x44,0x08,0x02,0xC7,0x2B,
+ };
+ static unsigned char dh4096_g[]={
+ 0x02,
+ };
+
+ BIGNUM *p;
+ BIGNUM *g;
+
+ HASSL_DH *dh = NULL;
+
+ p = BN_bin2bn(dh4096_p, sizeof dh4096_p, NULL);
+ g = BN_bin2bn(dh4096_g, sizeof dh4096_g, NULL);
+
+ if (p && g)
+ dh = ssl_new_dh_fromdata(p, g);
+
+ return dh;
+#else
+ return ssl_get_dh_by_nid(NID_ffdhe4096);
+#endif
+}
+
+static HASSL_DH *ssl_get_tmp_dh(EVP_PKEY *pkey)
+{
+ HASSL_DH *dh = NULL;
+ int type;
+ int keylen = 0;
+
+ type = pkey ? EVP_PKEY_base_id(pkey) : EVP_PKEY_NONE;
+
+ if (type == EVP_PKEY_EC) {
+ keylen = global_ssl.default_dh_param;
+ }
+
+ /* The keylen supplied by OpenSSL can only be 512 or 1024.
+ See ssl3_send_server_key_exchange() in ssl/s3_srvr.c
+ */
+ if (type == EVP_PKEY_RSA || type == EVP_PKEY_DSA) {
+ keylen = EVP_PKEY_bits(pkey);
+ }
+
+ if (keylen > global_ssl.default_dh_param) {
+ keylen = global_ssl.default_dh_param;
+ }
+
+ if (keylen >= 4096) {
+ if (!local_dh_4096)
+ local_dh_4096 = ssl_get_dh_4096();
+ dh = local_dh_4096;
+ }
+ else if (keylen >= 2048) {
+ if (!local_dh_2048)
+ local_dh_2048 = ssl_get_dh_2048();
+ dh = local_dh_2048;
+ }
+ else {
+ if (!local_dh_1024)
+ local_dh_1024 = ssl_get_dh_1024();
+ dh = local_dh_1024;
+ }
+
+ return dh;
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+/* Returns Diffie-Hellman parameters matching the private key length
+ but not exceeding global_ssl.default_dh_param */
+static HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen)
+{
+ EVP_PKEY *pkey = SSL_get_privatekey(ssl);
+
+ return ssl_get_tmp_dh(pkey);
+}
+#endif
+
+static int ssl_sock_set_tmp_dh(SSL_CTX *ctx, HASSL_DH *dh)
+{
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ return SSL_CTX_set_tmp_dh(ctx, dh);
+#else
+ int retval = 0;
+ HASSL_DH_up_ref(dh);
+
+ retval = SSL_CTX_set0_tmp_dh_pkey(ctx, dh);
+
+ if (!retval)
+ HASSL_DH_free(dh);
+
+ return retval;
+#endif
+}
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey)
+{
+ HASSL_DH *dh = NULL;
+ if (pkey && (dh = ssl_get_tmp_dh(pkey))) {
+ HASSL_DH_up_ref(dh);
+ if (!SSL_CTX_set0_tmp_dh_pkey(ctx, dh))
+ HASSL_DH_free(dh);
+ }
+}
+#endif
+
+HASSL_DH *ssl_sock_get_dh_from_bio(BIO *bio)
+{
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL)
+ HASSL_DH *dh = NULL;
+ OSSL_DECODER_CTX *dctx = NULL;
+ const char *format = "PEM";
+ const char *keytype = "DH";
+
+ dctx = OSSL_DECODER_CTX_new_for_pkey(&dh, format, NULL, keytype,
+ OSSL_KEYMGMT_SELECT_DOMAIN_PARAMETERS,
+ NULL, NULL);
+
+ if (dctx == NULL || OSSL_DECODER_CTX_get_num_decoders(dctx) == 0)
+ goto end;
+
+ /* The DH parameters might not be the first section found in the PEM
+ * file so we need to iterate over all of them until we find the right
+ * one.
+ */
+ while (!BIO_eof(bio) && !dh)
+ OSSL_DECODER_from_bio(dctx, bio);
+
+end:
+ OSSL_DECODER_CTX_free(dctx);
+ return dh;
+#else
+ HASSL_DH *dh = NULL;
+
+ dh = PEM_read_bio_DHparams(bio, NULL, NULL, NULL);
+
+ return dh;
+#endif
+}
+
+static HASSL_DH * ssl_sock_get_dh_from_file(const char *filename)
+{
+ HASSL_DH *dh = NULL;
+ BIO *in = BIO_new(BIO_s_file());
+
+ if (in == NULL)
+ goto end;
+
+ if (BIO_read_filename(in, filename) <= 0)
+ goto end;
+
+ dh = ssl_sock_get_dh_from_bio(in);
+
+end:
+ if (in)
+ BIO_free(in);
+
+ ERR_clear_error();
+
+ return dh;
+}
+
+int ssl_sock_load_global_dh_param_from_file(const char *filename)
+{
+ global_dh = ssl_sock_get_dh_from_file(filename);
+
+ if (global_dh) {
+ return 0;
+ }
+
+ return -1;
+}
+#endif
+
+/* This function allocates a sni_ctx and adds it to the ckch_inst */
+static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst,
+ struct bind_conf *s, struct ssl_bind_conf *conf,
+ struct pkey_info kinfo, char *name, int order)
+{
+ struct sni_ctx *sc;
+ int wild = 0, neg = 0;
+
+ if (*name == '!') {
+ neg = 1;
+ name++;
+ }
+ if (*name == '*') {
+ wild = 1;
+ name++;
+ }
+ /* !* filter is a nop */
+ if (neg && wild)
+ return order;
+ if (*name) {
+ int j, len;
+ len = strlen(name);
+ for (j = 0; j < len && j < trash.size; j++)
+ trash.area[j] = tolower((unsigned char)name[j]);
+ if (j >= trash.size)
+ return -1;
+ trash.area[j] = 0;
+
+ sc = malloc(sizeof(struct sni_ctx) + len + 1);
+ if (!sc)
+ return -1;
+ memcpy(sc->name.key, trash.area, len + 1);
+ SSL_CTX_up_ref(ctx);
+ sc->ctx = ctx;
+ sc->conf = conf;
+ sc->kinfo = kinfo;
+ sc->order = order++;
+ sc->neg = neg;
+ sc->wild = wild;
+ sc->name.node.leaf_p = NULL;
+ sc->ckch_inst = ckch_inst;
+ LIST_APPEND(&ckch_inst->sni_ctx, &sc->by_ckch_inst);
+ }
+ return order;
+}
+
+/*
+ * Insert the sni_ctxs that are listed in the ckch_inst, in the bind_conf's sni_ctx tree
+ * This function can't return an error.
+ *
+ * *CAUTION*: The caller must lock the sni tree if called in multithreading mode
+ */
+void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf)
+{
+
+ struct sni_ctx *sc0, *sc0b, *sc1;
+ struct ebmb_node *node;
+
+ list_for_each_entry_safe(sc0, sc0b, &ckch_inst->sni_ctx, by_ckch_inst) {
+
+ /* ignore if sc0 was already inserted in a tree */
+ if (sc0->name.node.leaf_p)
+ continue;
+
+ /* Check for duplicates. */
+ if (sc0->wild)
+ node = ebst_lookup(&bind_conf->sni_w_ctx, (char *)sc0->name.key);
+ else
+ node = ebst_lookup(&bind_conf->sni_ctx, (char *)sc0->name.key);
+
+ for (; node; node = ebmb_next_dup(node)) {
+ sc1 = ebmb_entry(node, struct sni_ctx, name);
+ if (sc1->ctx == sc0->ctx && sc1->conf == sc0->conf
+ && sc1->neg == sc0->neg && sc1->wild == sc0->wild) {
+ /* it's a duplicate, we should remove and free it */
+ LIST_DELETE(&sc0->by_ckch_inst);
+ SSL_CTX_free(sc0->ctx);
+ ha_free(&sc0);
+ break;
+ }
+ }
+
+ /* if duplicate, ignore the insertion */
+ if (!sc0)
+ continue;
+
+ if (sc0->wild)
+ ebst_insert(&bind_conf->sni_w_ctx, &sc0->name);
+ else
+ ebst_insert(&bind_conf->sni_ctx, &sc0->name);
+ }
+
+ /* replace the default_ctx if required with the instance's ctx. */
+ if (ckch_inst->is_default) {
+ SSL_CTX_free(bind_conf->default_ctx);
+ SSL_CTX_up_ref(ckch_inst->ctx);
+ bind_conf->default_ctx = ckch_inst->ctx;
+ bind_conf->default_inst = ckch_inst;
+ }
+}
+
+/*
+ * tree used to store the ckchs ordered by filename/bundle name
+ */
+struct eb_root ckchs_tree = EB_ROOT_UNIQUE;
+
+/* tree of crtlist (crt-list/directory) */
+struct eb_root crtlists_tree = EB_ROOT_UNIQUE;
+
+/* Loads Diffie-Hellman parameter from a ckchs to an SSL_CTX.
+ * If there is no DH parameter available in the ckchs, the global
+ * DH parameter is loaded into the SSL_CTX and if there is no
+ * DH parameter available in ckchs nor in global, the default
+ * DH parameters are applied on the SSL_CTX.
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if a reason of the error is availabine in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+#ifndef OPENSSL_NO_DH
+static int ssl_sock_load_dh_params(SSL_CTX *ctx, const struct ckch_data *data,
+ const char *path, char **err)
+{
+ int ret = 0;
+ HASSL_DH *dh = NULL;
+
+ if (data && data->dh) {
+ dh = data->dh;
+ if (!ssl_sock_set_tmp_dh(ctx, dh)) {
+ memprintf(err, "%sunable to load the DH parameter specified in '%s'",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+
+ if (ssl_dh_ptr_index >= 0) {
+ /* store a pointer to the DH params to avoid complaining about
+ ssl-default-dh-param not being set for this SSL_CTX */
+ SSL_CTX_set_ex_data(ctx, ssl_dh_ptr_index, dh);
+ }
+ }
+ else if (global_dh) {
+ if (!ssl_sock_set_tmp_dh(ctx, global_dh)) {
+ memprintf(err, "%sunable to use the global DH parameter for certificate '%s'",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+ }
+ else {
+ /* Clear openssl global errors stack */
+ ERR_clear_error();
+
+ /* We do not want DHE ciphers to be added to the cipher list
+ * unless there is an explicit global dh option in the conf.
+ */
+ if (global_ssl.default_dh_param) {
+ if (global_ssl.default_dh_param <= 1024) {
+ /* we are limited to DH parameter of 1024 bits anyway */
+ if (local_dh_1024 == NULL)
+ local_dh_1024 = ssl_get_dh_1024();
+
+ if (local_dh_1024 == NULL) {
+ memprintf(err, "%sunable to load default 1024 bits DH parameter for certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (!ssl_sock_set_tmp_dh(ctx, local_dh_1024)) {
+ memprintf(err, "%sunable to load default 1024 bits DH parameter for certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ memprintf(err, "%s, DH ciphers won't be available.\n",
+ err && *err ? *err : "");
+ ret |= ERR_WARN;
+ goto end;
+ }
+ }
+ else {
+#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL)
+ SSL_CTX_set_tmp_dh_callback(ctx, ssl_get_tmp_dh_cbk);
+#else
+ ssl_sock_set_tmp_dh_from_pkey(ctx, data ? data->key : NULL);
+#endif
+ }
+ }
+ }
+
+end:
+ ERR_clear_error();
+ return ret;
+}
+#endif
+
+
+/* Load a certificate chain into an SSL context.
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The caller is responsible of freeing the newly built or newly refcounted
+ * find_chain element.
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_load_cert_chain(const char *path, const struct ckch_data *data,
+ SSL_CTX *ctx, STACK_OF(X509) **find_chain, char **err)
+{
+ int errcode = 0;
+ int ret;
+
+ ERR_clear_error();
+
+ if (find_chain == NULL) {
+ errcode |= ERR_FATAL;
+ goto end;
+ }
+
+ if (!SSL_CTX_use_certificate(ctx, data->cert)) {
+ ret = ERR_get_error();
+ memprintf(err, "%sunable to load SSL certificate into SSL Context '%s': %s.\n",
+ err && *err ? *err : "", path, ERR_reason_error_string(ret));
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+
+ if (data->chain) {
+ *find_chain = X509_chain_up_ref(data->chain);
+ } else {
+ /* Find Certificate Chain in global */
+ struct issuer_chain *issuer;
+ issuer = ssl_get0_issuer_chain(data->cert);
+ if (issuer)
+ *find_chain = X509_chain_up_ref(issuer->chain);
+ }
+
+ if (!*find_chain) {
+ /* always put a null chain stack in the SSL_CTX so it does not
+ * try to build the chain from the verify store */
+ *find_chain = sk_X509_new_null();
+ }
+
+ /* Load all certs in the data into the ctx_chain for the ssl_ctx */
+#ifdef SSL_CTX_set1_chain
+ if (!SSL_CTX_set1_chain(ctx, *find_chain)) {
+ ret = ERR_get_error();
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s': %s.\n",
+ err && *err ? *err : "", path, ERR_reason_error_string(ret));
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+#else
+ { /* legacy compat (< openssl 1.0.2) */
+ X509 *ca;
+ while ((ca = sk_X509_shift(*find_chain)))
+ if (!SSL_CTX_add_extra_chain_cert(ctx, ca)) {
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ X509_free(ca);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+#ifdef SSL_CTX_build_cert_chain
+ /* remove the Root CA from the SSL_CTX if the option is activated */
+ if (global_ssl.skip_self_issued_ca) {
+ if (!SSL_CTX_build_cert_chain(ctx, SSL_BUILD_CHAIN_FLAG_NO_ROOT|SSL_BUILD_CHAIN_FLAG_UNTRUSTED|SSL_BUILD_CHAIN_FLAG_IGNORE_ERROR)) {
+ memprintf(err, "%sunable to load chain certificate into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+end:
+ return errcode;
+}
+
+
+/* Loads the info in ckch into ctx
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_data *data, SSL_CTX *ctx, char **err)
+{
+ int errcode = 0;
+ STACK_OF(X509) *find_chain = NULL;
+
+ ERR_clear_error();
+
+ if (SSL_CTX_use_PrivateKey(ctx, data->key) <= 0) {
+ int ret;
+
+ ret = ERR_get_error();
+ memprintf(err, "%sunable to load SSL private key into SSL Context '%s': %s.\n",
+ err && *err ? *err : "", path, ERR_reason_error_string(ret));
+ errcode |= ERR_ALERT | ERR_FATAL;
+ return errcode;
+ }
+
+ /* Load certificate chain */
+ errcode |= ssl_sock_load_cert_chain(path, data, ctx, &find_chain, err);
+ if (errcode & ERR_CODE)
+ goto end;
+
+#ifndef OPENSSL_NO_DH
+ /* store a NULL pointer to indicate we have not yet loaded
+ a custom DH param file */
+ if (ssl_dh_ptr_index >= 0) {
+ SSL_CTX_set_ex_data(ctx, ssl_dh_ptr_index, NULL);
+ }
+
+ errcode |= ssl_sock_load_dh_params(ctx, data, path, err);
+ if (errcode & ERR_CODE) {
+ memprintf(err, "%sunable to load DH parameters from file '%s'.\n",
+ err && *err ? *err : "", path);
+ goto end;
+ }
+#endif
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+ if (sctl_ex_index >= 0 && data->sctl) {
+ if (ssl_sock_load_sctl(ctx, data->sctl) < 0) {
+ memprintf(err, "%s '%s.sctl' is present but cannot be read or parsed'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+ }
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) || defined OPENSSL_IS_BORINGSSL)
+ /* Load OCSP Info into context
+ * If OCSP update mode is set to 'on', an entry will be created in the
+ * ocsp tree even if no ocsp_response was known during init, unless the
+ * frontend's conf disables ocsp update explicitly.
+ */
+ if (ssl_sock_load_ocsp(path, ctx, data, find_chain) < 0) {
+ if (data->ocsp_response)
+ memprintf(err, "%s '%s.ocsp' is present and activates OCSP but it is impossible to compute the OCSP certificate ID (maybe the issuer could not be found)'.\n",
+ err && *err ? *err : "", path);
+ else
+ memprintf(err, "%s '%s' has an OCSP auto-update set to 'on' but an error occurred (maybe the OCSP URI or the issuer could not be found)'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto end;
+ }
+#endif
+
+ end:
+ sk_X509_pop_free(find_chain, X509_free);
+ return errcode;
+}
+
+
+/* Loads the info of a ckch built out of a backend certificate into an SSL ctx
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ * The value 0 means there is no error nor warning and
+ * the operation succeed.
+ */
+static int ssl_sock_put_srv_ckch_into_ctx(const char *path, const struct ckch_data *data,
+ SSL_CTX *ctx, char **err)
+{
+ int errcode = 0;
+ STACK_OF(X509) *find_chain = NULL;
+
+ /* Load the private key */
+ if (SSL_CTX_use_PrivateKey(ctx, data->key) <= 0) {
+ memprintf(err, "%sunable to load SSL private key into SSL Context '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ }
+
+ /* Load certificate chain */
+ errcode |= ssl_sock_load_cert_chain(path, data, ctx, &find_chain, err);
+ if (errcode & ERR_CODE)
+ goto end;
+
+ if (SSL_CTX_check_private_key(ctx) <= 0) {
+ memprintf(err, "%sinconsistencies between private key and certificate loaded from PEM file '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ }
+
+end:
+ sk_X509_pop_free(find_chain, X509_free);
+ return errcode;
+}
+
+
+/*
+ * This function allocate a ckch_inst and create its snis
+ *
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ */
+int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf,
+ struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err)
+{
+ SSL_CTX *ctx;
+ int i;
+ int order = 0;
+ X509_NAME *xname;
+ char *str;
+ EVP_PKEY *pkey;
+ struct pkey_info kinfo = { .sig = TLSEXT_signature_anonymous, .bits = 0 };
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ STACK_OF(GENERAL_NAME) *names;
+#endif
+ struct ckch_data *data;
+ struct ckch_inst *ckch_inst = NULL;
+ int errcode = 0;
+
+ *ckchi = NULL;
+
+ if (!ckchs || !ckchs->data)
+ return ERR_FATAL;
+
+ data = ckchs->data;
+
+ ctx = SSL_CTX_new(SSLv23_server_method());
+ if (!ctx) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ errcode |= ssl_sock_put_ckch_into_ctx(path, data, ctx, err);
+ if (errcode & ERR_CODE)
+ goto error;
+
+ ckch_inst = ckch_inst_new();
+ if (!ckch_inst) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ pkey = X509_get_pubkey(data->cert);
+ if (pkey) {
+ kinfo.bits = EVP_PKEY_bits(pkey);
+ switch(EVP_PKEY_base_id(pkey)) {
+ case EVP_PKEY_RSA:
+ kinfo.sig = TLSEXT_signature_rsa;
+ break;
+ case EVP_PKEY_EC:
+ kinfo.sig = TLSEXT_signature_ecdsa;
+ break;
+ case EVP_PKEY_DSA:
+ kinfo.sig = TLSEXT_signature_dsa;
+ break;
+ }
+ EVP_PKEY_free(pkey);
+ }
+
+ if (fcount) {
+ while (fcount--) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, sni_filter[fcount], order);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ else {
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ names = X509_get_ext_d2i(data->cert, NID_subject_alt_name, NULL, NULL);
+ if (names) {
+ for (i = 0; i < sk_GENERAL_NAME_num(names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(names, i);
+ if (name->type == GEN_DNS) {
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, str, order);
+ OPENSSL_free(str);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(names, GENERAL_NAME_free);
+ }
+#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */
+ xname = X509_get_subject_name(data->cert);
+ i = -1;
+ while ((i = X509_NAME_get_index_by_NID(xname, NID_commonName, i)) != -1) {
+ X509_NAME_ENTRY *entry = X509_NAME_get_entry(xname, i);
+ ASN1_STRING *value;
+
+ value = X509_NAME_ENTRY_get_data(entry);
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, value) >= 0) {
+ order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, str, order);
+ OPENSSL_free(str);
+ if (order < 0) {
+ memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+ }
+ }
+ }
+ /* we must not free the SSL_CTX anymore below, since it's already in
+ * the tree, so it will be discovered and cleaned in time.
+ */
+
+#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ if (bind_conf->default_ctx) {
+ memprintf(err, "%sthis version of openssl cannot load multiple SSL certificates.\n",
+ err && *err ? *err : "");
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+#endif
+ if (!bind_conf->default_ctx) {
+ bind_conf->default_ctx = ctx;
+ bind_conf->default_ssl_conf = ssl_conf;
+ ckch_inst->is_default = 1;
+ SSL_CTX_up_ref(ctx);
+ bind_conf->default_inst = ckch_inst;
+ }
+
+ /* Always keep a reference to the newly constructed SSL_CTX in the
+ * instance. This way if the instance has no SNIs, the SSL_CTX will
+ * still be linked. */
+ SSL_CTX_up_ref(ctx);
+ ckch_inst->ctx = ctx;
+
+ /* everything succeed, the ckch instance can be used */
+ ckch_inst->bind_conf = bind_conf;
+ ckch_inst->ssl_conf = ssl_conf;
+ ckch_inst->ckch_store = ckchs;
+
+ SSL_CTX_free(ctx); /* we need to free the ctx since we incremented the refcount where it's used */
+
+ *ckchi = ckch_inst;
+ return errcode;
+
+error:
+ /* free the allocated sni_ctxs */
+ if (ckch_inst) {
+ if (ckch_inst->is_default)
+ SSL_CTX_free(ctx);
+
+ ckch_inst_free(ckch_inst);
+ ckch_inst = NULL;
+ }
+ SSL_CTX_free(ctx);
+
+ return errcode;
+}
+
+
+/*
+ * This function allocate a ckch_inst that will be used on the backend side
+ * (server line)
+ *
+ * Returns a bitfield containing the flags:
+ * ERR_FATAL in any fatal error case
+ * ERR_ALERT if the reason of the error is available in err
+ * ERR_WARN if a warning is available into err
+ */
+int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs,
+ struct ckch_inst **ckchi, char **err)
+{
+ SSL_CTX *ctx;
+ struct ckch_data *data;
+ struct ckch_inst *ckch_inst = NULL;
+ int errcode = 0;
+
+ *ckchi = NULL;
+
+ if (!ckchs || !ckchs->data)
+ return ERR_FATAL;
+
+ data = ckchs->data;
+
+ ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ctx) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ errcode |= ssl_sock_put_srv_ckch_into_ctx(path, data, ctx, err);
+ if (errcode & ERR_CODE)
+ goto error;
+
+ ckch_inst = ckch_inst_new();
+ if (!ckch_inst) {
+ memprintf(err, "%sunable to allocate SSL context for cert '%s'.\n",
+ err && *err ? *err : "", path);
+ errcode |= ERR_ALERT | ERR_FATAL;
+ goto error;
+ }
+
+ /* everything succeed, the ckch instance can be used */
+ ckch_inst->bind_conf = NULL;
+ ckch_inst->ssl_conf = NULL;
+ ckch_inst->ckch_store = ckchs;
+ ckch_inst->ctx = ctx;
+ ckch_inst->is_server_instance = 1;
+
+ *ckchi = ckch_inst;
+ return errcode;
+
+error:
+ SSL_CTX_free(ctx);
+
+ return errcode;
+}
+
+/* Returns a set of ERR_* flags possibly with an error in <err>. */
+static int ssl_sock_load_ckchs(const char *path, struct ckch_store *ckchs,
+ struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf,
+ char **sni_filter, int fcount, struct ckch_inst **ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ /* we found the ckchs in the tree, we can use it directly */
+ errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, ckch_inst, err);
+
+ if (errcode & ERR_CODE)
+ return errcode;
+
+ ssl_sock_load_cert_sni(*ckch_inst, bind_conf);
+
+ /* succeed, add the instance to the ckch_store's list of instance */
+ LIST_APPEND(&ckchs->ckch_inst, &((*ckch_inst)->by_ckchs));
+ return errcode;
+}
+
+/* This function generates a <struct ckch_inst *> for a <struct server *>, and
+ * fill the SSL_CTX of the server.
+ *
+ * Returns a set of ERR_* flags possibly with an error in <err>. */
+static int ssl_sock_load_srv_ckchs(const char *path, struct ckch_store *ckchs,
+ struct server *server, struct ckch_inst **ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ /* we found the ckchs in the tree, we can use it directly */
+ errcode |= ckch_inst_new_load_srv_store(path, ckchs, ckch_inst, err);
+
+ if (errcode & ERR_CODE)
+ return errcode;
+
+ (*ckch_inst)->server = server;
+ /* Keep the reference to the SSL_CTX in the server. */
+ SSL_CTX_up_ref((*ckch_inst)->ctx);
+ server->ssl_ctx.ctx = (*ckch_inst)->ctx;
+ /* succeed, add the instance to the ckch_store's list of instance */
+ LIST_APPEND(&ckchs->ckch_inst, &((*ckch_inst)->by_ckchs));
+ return errcode;
+}
+
+
+
+
+/* Make sure openssl opens /dev/urandom before the chroot. The work is only
+ * done once. Zero is returned if the operation fails. No error is returned
+ * if the random is said as not implemented, because we expect that openssl
+ * will use another method once needed.
+ */
+int ssl_initialize_random(void)
+{
+ unsigned char random;
+ static int random_initialized = 0;
+
+ if (!random_initialized && RAND_bytes(&random, 1) != 0)
+ random_initialized = 1;
+
+ return random_initialized;
+}
+
+/* Load a crt-list file, this is done in 2 parts:
+ * - store the content of the file in a crtlist structure with crtlist_entry structures
+ * - generate the instances by iterating on entries in the crtlist struct
+ *
+ * Nothing is locked there, this function is used in the configuration parser.
+ *
+ * Returns a set of ERR_* flags possibly with an error in <err>.
+ */
+int ssl_sock_load_cert_list_file(char *file, int dir, struct bind_conf *bind_conf, struct proxy *curproxy, char **err)
+{
+ struct crtlist *crtlist = NULL;
+ struct ebmb_node *eb;
+ struct crtlist_entry *entry = NULL;
+ struct bind_conf_list *bind_conf_node = NULL;
+ int cfgerr = 0;
+ char *end;
+
+ bind_conf_node = malloc(sizeof(*bind_conf_node));
+ if (!bind_conf_node) {
+ memprintf(err, "%sCan't alloc memory!\n", err && *err ? *err : "");
+ cfgerr |= ERR_FATAL | ERR_ALERT;
+ goto error;
+ }
+ bind_conf_node->next = NULL;
+ bind_conf_node->bind_conf = bind_conf;
+
+ /* strip trailing slashes, including first one */
+ for (end = file + strlen(file) - 1; end >= file && *end == '/'; end--)
+ *end = 0;
+
+ /* look for an existing crtlist or create one */
+ eb = ebst_lookup(&crtlists_tree, file);
+ if (eb) {
+ crtlist = ebmb_entry(eb, struct crtlist, node);
+ } else {
+ /* load a crt-list OR a directory */
+ if (dir)
+ cfgerr |= crtlist_load_cert_dir(file, bind_conf, &crtlist, err);
+ else
+ cfgerr |= crtlist_parse_file(file, bind_conf, curproxy, &crtlist, err);
+
+ if (!(cfgerr & ERR_CODE))
+ ebst_insert(&crtlists_tree, &crtlist->node);
+ }
+
+ if (cfgerr & ERR_CODE) {
+ cfgerr |= ERR_FATAL | ERR_ALERT;
+ goto error;
+ }
+
+ /* generates ckch instance from the crtlist_entry */
+ list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) {
+ struct ckch_store *store;
+ struct ckch_inst *ckch_inst = NULL;
+
+ store = entry->node.key;
+ cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &ckch_inst, err);
+ if (cfgerr & ERR_CODE) {
+ memprintf(err, "error processing line %d in file '%s' : %s", entry->linenum, file, *err);
+ goto error;
+ }
+ LIST_APPEND(&entry->ckch_inst, &ckch_inst->by_crtlist_entry);
+ ckch_inst->crtlist_entry = entry;
+ }
+
+ /* add the bind_conf to the list */
+ bind_conf_node->next = crtlist->bind_conf;
+ crtlist->bind_conf = bind_conf_node;
+
+ return cfgerr;
+error:
+ {
+ struct crtlist_entry *lastentry;
+ struct ckch_inst *inst, *s_inst;
+
+ lastentry = entry; /* which entry we tried to generate last */
+ if (lastentry) {
+ list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) {
+ if (entry == lastentry) /* last entry we tried to generate, no need to go further */
+ break;
+
+ list_for_each_entry_safe(inst, s_inst, &entry->ckch_inst, by_crtlist_entry) {
+
+ /* this was not generated for this bind_conf, skip */
+ if (inst->bind_conf != bind_conf)
+ continue;
+
+ /* free the sni_ctx and instance */
+ ckch_inst_free(inst);
+ }
+ }
+ }
+ free(bind_conf_node);
+ }
+ return cfgerr;
+}
+
+/* Returns a set of ERR_* flags possibly with an error in <err>. */
+int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err)
+{
+ struct stat buf;
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+ struct ckch_inst *ckch_inst = NULL;
+ int found = 0; /* did we found a file to load ? */
+
+ if ((ckchs = ckchs_lookup(path))) {
+ /* we found the ckchs in the tree, we can use it directly */
+ cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ found++;
+ } else if (stat(path, &buf) == 0) {
+ found++;
+ if (S_ISDIR(buf.st_mode) == 0) {
+ ckchs = ckchs_load_cert_file(path, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ } else {
+ cfgerr |= ssl_sock_load_cert_list_file(path, 1, bind_conf, bind_conf->frontend, err);
+ }
+ } else {
+ /* stat failed, could be a bundle */
+ if (global_ssl.extra_files & SSL_GF_BUNDLE) {
+ char fp[MAXPATHLEN+1] = {0};
+ int n = 0;
+
+ /* Load all possible certs and keys in separate ckch_store */
+ for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) {
+ struct stat buf;
+ int ret;
+
+ ret = snprintf(fp, sizeof(fp), "%s.%s", path, SSL_SOCK_KEYTYPE_NAMES[n]);
+ if (ret > sizeof(fp))
+ continue;
+
+ if ((ckchs = ckchs_lookup(fp))) {
+ cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ found++;
+ } else {
+ if (stat(fp, &buf) == 0) {
+ found++;
+ ckchs = ckchs_load_cert_file(fp, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err);
+ }
+ }
+ }
+#if HA_OPENSSL_VERSION_NUMBER < 0x10101000L
+ if (found) {
+ memprintf(err, "%sCan't load '%s'. Loading a multi certificates bundle requires OpenSSL >= 1.1.1\n",
+ err && *err ? *err : "", path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+ }
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+ return cfgerr;
+}
+
+
+/* Create a full ssl context and ckch instance that will be used for a specific
+ * backend server (server configuration line).
+ * Returns a set of ERR_* flags possibly with an error in <err>.
+ */
+int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none, char **err)
+{
+ struct stat buf;
+ int cfgerr = 0;
+ struct ckch_store *ckchs;
+ int found = 0; /* did we found a file to load ? */
+
+ if ((ckchs = ckchs_lookup(path))) {
+ /* we found the ckchs in the tree, we can use it directly */
+ cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err);
+ found++;
+ } else {
+ if (!create_if_none) {
+ memprintf(err, "%sunable to stat SSL certificate '%s'.\n",
+ err && *err ? *err : "", path);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (stat(path, &buf) == 0) {
+ /* We do not manage directories on backend side. */
+ if (S_ISDIR(buf.st_mode) == 0) {
+ ++found;
+ ckchs = ckchs_load_cert_file(path, err);
+ if (!ckchs)
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err);
+ }
+ }
+ }
+ if (!found) {
+ memprintf(err, "%sunable to stat SSL certificate from file '%s' : %s.\n",
+ err && *err ? *err : "", path, strerror(errno));
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+out:
+ return cfgerr;
+}
+
+/* Create an initial CTX used to start the SSL connection before switchctx */
+static int
+ssl_sock_initial_ctx(struct bind_conf *bind_conf)
+{
+ SSL_CTX *ctx = NULL;
+ long options =
+ SSL_OP_ALL | /* all known workarounds for bugs */
+ SSL_OP_NO_SSLv2 |
+ SSL_OP_NO_COMPRESSION |
+ SSL_OP_SINGLE_DH_USE |
+ SSL_OP_SINGLE_ECDH_USE |
+ SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION |
+ SSL_OP_PRIORITIZE_CHACHA |
+ SSL_OP_CIPHER_SERVER_PREFERENCE;
+ long mode =
+ SSL_MODE_ENABLE_PARTIAL_WRITE |
+ SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER |
+ SSL_MODE_RELEASE_BUFFERS |
+ SSL_MODE_SMALL_BUFFERS;
+ struct tls_version_filter *conf_ssl_methods = &bind_conf->ssl_conf.ssl_methods;
+ int i, min, max, hole;
+ int flags = MC_SSL_O_ALL;
+ int cfgerr = 0;
+ const int default_min_ver = CONF_TLSV12;
+
+ ctx = SSL_CTX_new(SSLv23_server_method());
+ bind_conf->initial_ctx = ctx;
+
+ if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max))
+ ha_warning("Proxy '%s': no-sslv3/no-tlsv1x are ignored for bind '%s' at [%s:%d]. "
+ "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ else
+ flags = conf_ssl_methods->flags;
+
+ min = conf_ssl_methods->min;
+ max = conf_ssl_methods->max;
+
+ /* default minimum is TLSV12, */
+ if (!min) {
+ if (!max || (max >= default_min_ver)) {
+ min = default_min_ver;
+ } else {
+ ha_warning("Proxy '%s': Ambiguous configuration for bind '%s' at [%s:%d]: the ssl-min-ver value is not configured and the ssl-max-ver value is lower than the default ssl-min-ver value (%s). "
+ "Setting the ssl-min-ver to %s. Use 'ssl-min-ver' to fix this.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line, methodVersions[default_min_ver].name, methodVersions[max].name);
+ min = max;
+ }
+ }
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ if (min)
+ flags |= (methodVersions[min].flag - 1);
+ if (max)
+ flags |= ~((methodVersions[max].flag << 1) - 1);
+ /* find min, max and holes */
+ min = max = CONF_TLSV_NONE;
+ hole = 0;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ /* version is in openssl && version not disable in configuration */
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min) {
+ if (hole) {
+ ha_warning("Proxy '%s': SSL/TLS versions range not contiguous for bind '%s' at [%s:%d]. "
+ "Hole find for %s. Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line,
+ methodVersions[hole].name);
+ hole = 0;
+ }
+ max = i;
+ }
+ else {
+ min = max = i;
+ }
+ }
+ else {
+ if (min)
+ hole = i;
+ }
+ if (!min) {
+ ha_alert("Proxy '%s': all SSL/TLS versions are disabled for bind '%s' at [%s:%d].\n",
+ bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr += 1;
+ }
+ /* save real min/max in bind_conf */
+ conf_ssl_methods->min = min;
+ conf_ssl_methods->max = max;
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+ /* Keep force-xxx implementation as it is in older haproxy. It's a
+ precautionary measure to avoid any surprise with older openssl version. */
+ if (min == max)
+ methodVersions[min].ctx_set_version(ctx, SET_SERVER);
+ else
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++) {
+ /* clear every version flags in case SSL_CTX_new()
+ * returns an SSL_CTX with disabled versions */
+ SSL_CTX_clear_options(ctx, methodVersions[i].option);
+
+ if (flags & methodVersions[i].flag)
+ options |= methodVersions[i].option;
+
+ }
+#else /* openssl >= 1.1.0 */
+ /* set the max_version is required to cap TLS version or activate new TLS (v1.3) */
+ methodVersions[min].ctx_set_version(ctx, SET_MIN);
+ methodVersions[max].ctx_set_version(ctx, SET_MAX);
+#endif
+
+ if (bind_conf->ssl_options & BC_SSL_O_NO_TLS_TICKETS)
+ options |= SSL_OP_NO_TICKET;
+ if (bind_conf->ssl_options & BC_SSL_O_PREF_CLIE_CIPH)
+ options &= ~SSL_OP_CIPHER_SERVER_PREFERENCE;
+
+#ifdef SSL_OP_NO_RENEGOTIATION
+ options |= SSL_OP_NO_RENEGOTIATION;
+#endif
+
+ SSL_CTX_set_options(ctx, options);
+
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async)
+ mode |= SSL_MODE_ASYNC;
+#endif
+ SSL_CTX_set_mode(ctx, mode);
+ if (global_ssl.life_time)
+ SSL_CTX_set_timeout(ctx, global_ssl.life_time);
+
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+# ifdef OPENSSL_IS_BORINGSSL
+ SSL_CTX_set_select_certificate_cb(ctx, ssl_sock_switchctx_cbk);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# elif defined(HAVE_SSL_CLIENT_HELLO_CB)
+# if defined(SSL_OP_NO_ANTI_REPLAY)
+ if (bind_conf->ssl_conf.early_data)
+ SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY);
+# endif /* ! SSL_OP_NO_ANTI_REPLAY */
+ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL);
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk);
+# elif 0 && defined(USE_OPENSSL_WOLFSSL)
+ SSL_CTX_set_cert_cb(ctx, ssl_sock_switchctx_wolfSSL_cbk, bind_conf);
+# else
+ /* ! OPENSSL_IS_BORINGSSL && ! HAVE_SSL_CLIENT_HELLO_CB */
+ SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_cbk);
+# endif
+ SSL_CTX_set_tlsext_servername_arg(ctx, bind_conf);
+#endif /* ! SSL_CTRL_SET_TLSEXT_HOSTNAME */
+ return cfgerr;
+}
+
+
+static inline void sh_ssl_sess_free_blocks(struct shared_block *first, void *data)
+{
+ struct sh_ssl_sess_hdr *sh_ssl_sess = (struct sh_ssl_sess_hdr *)first->data;
+ if (first->len > 0)
+ sh_ssl_sess_tree_delete(sh_ssl_sess);
+}
+
+/* return first block from sh_ssl_sess */
+static inline struct shared_block *sh_ssl_sess_first_block(struct sh_ssl_sess_hdr *sh_ssl_sess)
+{
+ return (struct shared_block *)((unsigned char *)sh_ssl_sess - offsetof(struct shared_block, data));
+
+}
+
+/* store a session into the cache
+ * s_id : session id padded with zero to SSL_MAX_SSL_SESSION_ID_LENGTH
+ * data: asn1 encoded session
+ * data_len: asn1 encoded session length
+ * Returns 1 id session was stored (else 0)
+ */
+static int sh_ssl_sess_store(unsigned char *s_id, unsigned char *data, int data_len)
+{
+ struct shared_block *first;
+ struct sh_ssl_sess_hdr *sh_ssl_sess, *oldsh_ssl_sess;
+
+ first = shctx_row_reserve_hot(ssl_shctx, NULL, data_len + sizeof(struct sh_ssl_sess_hdr));
+ if (!first) {
+ /* Could not retrieve enough free blocks to store that session */
+ return 0;
+ }
+
+ shctx_wrlock(ssl_shctx);
+
+ /* STORE the key in the first elem */
+ sh_ssl_sess = (struct sh_ssl_sess_hdr *)first->data;
+ memcpy(sh_ssl_sess->key_data, s_id, SSL_MAX_SSL_SESSION_ID_LENGTH);
+ first->len = sizeof(struct sh_ssl_sess_hdr);
+
+ /* it returns the already existing node
+ or current node if none, never returns null */
+ oldsh_ssl_sess = sh_ssl_sess_tree_insert(sh_ssl_sess);
+ if (oldsh_ssl_sess != sh_ssl_sess) {
+ /* NOTE: Row couldn't be in use because we lock read & write function */
+ /* release the reserved row */
+ first->len = 0; /* the len must be liberated in order not to call the release callback on it */
+ shctx_row_reattach(ssl_shctx, first);
+ /* replace the previous session already in the tree */
+ sh_ssl_sess = oldsh_ssl_sess;
+ /* ignore the previous session data, only use the header */
+ first = sh_ssl_sess_first_block(sh_ssl_sess);
+ shctx_row_detach(ssl_shctx, first);
+ first->len = sizeof(struct sh_ssl_sess_hdr);
+ }
+
+ if (shctx_row_data_append(ssl_shctx, first, data, data_len) < 0) {
+ shctx_row_reattach(ssl_shctx, first);
+ return 0;
+ }
+
+ shctx_row_reattach(ssl_shctx, first);
+
+ shctx_wrunlock(ssl_shctx);
+
+ return 1;
+}
+
+/* SSL callback used when a new session is created while connecting to a server */
+static int ssl_sess_new_srv_cb(SSL *ssl, SSL_SESSION *sess)
+{
+ struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ struct server *s;
+ uint old_tid;
+
+ s = __objt_server(conn->target);
+
+ /* RWLOCK: only read lock the SSL cache even when writing in it because there is
+ * one cache per thread, it only prevents to flush it from the CLI in
+ * another thread. However, we also write-lock our session element while
+ * updating it to make sure no other thread is reading it while we're copying
+ * or releasing it.
+ */
+
+ if (!(s->ssl_ctx.options & SRV_SSL_O_NO_REUSE)) {
+ int len;
+ unsigned char *ptr;
+ const char *sni;
+
+ /* determine the required len to store this new session */
+ len = i2d_SSL_SESSION(sess, NULL);
+ sni = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name);
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+
+ ptr = s->ssl_ctx.reused_sess[tid].ptr;
+
+ /* we're updating the possibly shared session right now */
+ HA_RWLOCK_WRLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.reused_sess[tid].sess_lock);
+
+ if (!ptr || s->ssl_ctx.reused_sess[tid].allocated_size < len) {
+ /* insufficient storage, reallocate */
+ len = (len + 7) & -8; /* round to the nearest 8 bytes */
+ ptr = realloc(ptr, len);
+ if (!ptr)
+ free(s->ssl_ctx.reused_sess[tid].ptr);
+ s->ssl_ctx.reused_sess[tid].ptr = ptr;
+ s->ssl_ctx.reused_sess[tid].allocated_size = len;
+ }
+
+ if (ptr) {
+ /* store the new session into ptr and advance it; save the
+ * resulting size. It's guaranteed to be equal to the returned
+ * len above, and the pointer to be advanced by as much.
+ */
+ s->ssl_ctx.reused_sess[tid].size = i2d_SSL_SESSION(sess, &ptr);
+ }
+
+ /* done updating the session */
+
+ /* Now we'll try to add or remove this entry as a valid one:
+ * - if no entry is set and we have one, let's share it
+ * - if our entry was set and we have no more, let's clear it
+ */
+ old_tid = HA_ATOMIC_LOAD(&s->ssl_ctx.last_ssl_sess_tid); // 0=none, >0 = tid + 1
+ if (!s->ssl_ctx.reused_sess[tid].ptr && old_tid == tid + 1)
+ HA_ATOMIC_CAS(&s->ssl_ctx.last_ssl_sess_tid, &old_tid, 0); // no more valid
+ else if (s->ssl_ctx.reused_sess[tid].ptr && !old_tid)
+ HA_ATOMIC_CAS(&s->ssl_ctx.last_ssl_sess_tid, &old_tid, tid + 1);
+
+ if (s->ssl_ctx.reused_sess[tid].sni) {
+ /* if the new sni is empty or isn' t the same as the old one */
+ if ((!sni) || strcmp(s->ssl_ctx.reused_sess[tid].sni, sni) != 0) {
+ ha_free(&s->ssl_ctx.reused_sess[tid].sni);
+ if (sni)
+ s->ssl_ctx.reused_sess[tid].sni = strdup(sni);
+ }
+ } else if (sni) {
+ /* if there wasn't an old sni but there is a new one */
+ s->ssl_ctx.reused_sess[tid].sni = strdup(sni);
+ }
+ HA_RWLOCK_WRUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.reused_sess[tid].sess_lock);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ } else {
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+
+ if (s->ssl_ctx.reused_sess[tid].ptr) {
+ HA_RWLOCK_WRLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.reused_sess[tid].sess_lock);
+ ha_free(&s->ssl_ctx.reused_sess[tid].ptr);
+ HA_RWLOCK_WRUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.reused_sess[tid].sess_lock);
+ }
+
+ old_tid = HA_ATOMIC_LOAD(&s->ssl_ctx.last_ssl_sess_tid); // 0=none, >0 = tid + 1
+ if (old_tid == tid + 1)
+ HA_ATOMIC_CAS(&s->ssl_ctx.last_ssl_sess_tid, &old_tid, 0); // no more valid
+
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ }
+
+ return 0;
+}
+
+
+/* SSL callback used on new session creation */
+int sh_ssl_sess_new_cb(SSL *ssl, SSL_SESSION *sess)
+{
+ unsigned char encsess[SHSESS_MAX_DATA_LEN]; /* encoded session */
+ unsigned char encid[SSL_MAX_SSL_SESSION_ID_LENGTH]; /* encoded id */
+ unsigned char *p;
+ int data_len;
+ unsigned int sid_length;
+ const unsigned char *sid_data;
+
+ /* Session id is already stored in to key and session id is known
+ * so we don't store it to keep size.
+ * note: SSL_SESSION_set1_id is using
+ * a memcpy so we need to use a different pointer
+ * than sid_data or sid_ctx_data to avoid valgrind
+ * complaining.
+ */
+
+ sid_data = SSL_SESSION_get_id(sess, &sid_length);
+
+ /* copy value in an other buffer */
+ memcpy(encid, sid_data, sid_length);
+
+ /* pad with 0 */
+ if (sid_length < SSL_MAX_SSL_SESSION_ID_LENGTH)
+ memset(encid + sid_length, 0, SSL_MAX_SSL_SESSION_ID_LENGTH-sid_length);
+
+ /* force length to zero to avoid ASN1 encoding */
+ SSL_SESSION_set1_id(sess, encid, 0);
+
+ /* force length to zero to avoid ASN1 encoding */
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, 0);
+
+ /* check if buffer is large enough for the ASN1 encoded session */
+ data_len = i2d_SSL_SESSION(sess, NULL);
+ if (data_len > SHSESS_MAX_DATA_LEN)
+ goto err;
+
+ p = encsess;
+
+ /* process ASN1 session encoding before the lock */
+ i2d_SSL_SESSION(sess, &p);
+
+
+ /* store to cache */
+ sh_ssl_sess_store(encid, encsess, data_len);
+err:
+ /* reset original length values */
+ SSL_SESSION_set1_id(sess, encid, sid_length);
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+
+ return 0; /* do not increment session reference count */
+}
+
+/* SSL callback used on lookup an existing session cause none found in internal cache */
+SSL_SESSION *sh_ssl_sess_get_cb(SSL *ssl, __OPENSSL_110_CONST__ unsigned char *key, int key_len, int *do_copy)
+{
+ struct sh_ssl_sess_hdr *sh_ssl_sess;
+ unsigned char data[SHSESS_MAX_DATA_LEN], *p;
+ unsigned char tmpkey[SSL_MAX_SSL_SESSION_ID_LENGTH];
+ SSL_SESSION *sess;
+ struct shared_block *first;
+
+ _HA_ATOMIC_INC(&global.shctx_lookups);
+
+ /* allow the session to be freed automatically by openssl */
+ *do_copy = 0;
+
+ /* tree key is zeros padded sessionid */
+ if (key_len < SSL_MAX_SSL_SESSION_ID_LENGTH) {
+ memcpy(tmpkey, key, key_len);
+ memset(tmpkey + key_len, 0, SSL_MAX_SSL_SESSION_ID_LENGTH - key_len);
+ key = tmpkey;
+ }
+
+ /* lock cache */
+ shctx_wrlock(ssl_shctx);
+
+ /* lookup for session */
+ sh_ssl_sess = sh_ssl_sess_tree_lookup(key);
+ if (!sh_ssl_sess) {
+ /* no session found: unlock cache and exit */
+ shctx_wrunlock(ssl_shctx);
+ _HA_ATOMIC_INC(&global.shctx_misses);
+ return NULL;
+ }
+
+ /* sh_ssl_sess (shared_block->data) is at the end of shared_block */
+ first = sh_ssl_sess_first_block(sh_ssl_sess);
+
+ shctx_row_data_get(ssl_shctx, first, data, sizeof(struct sh_ssl_sess_hdr), first->len-sizeof(struct sh_ssl_sess_hdr));
+
+ shctx_wrunlock(ssl_shctx);
+
+ /* decode ASN1 session */
+ p = data;
+ sess = d2i_SSL_SESSION(NULL, (const unsigned char **)&p, first->len-sizeof(struct sh_ssl_sess_hdr));
+ /* Reset session id and session id contenxt */
+ if (sess) {
+ SSL_SESSION_set1_id(sess, key, key_len);
+ SSL_SESSION_set1_id_context(sess, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+ }
+
+ return sess;
+}
+
+
+/* SSL callback used to signal session is no more used in internal cache */
+void sh_ssl_sess_remove_cb(SSL_CTX *ctx, SSL_SESSION *sess)
+{
+ struct sh_ssl_sess_hdr *sh_ssl_sess;
+ unsigned char tmpkey[SSL_MAX_SSL_SESSION_ID_LENGTH];
+ unsigned int sid_length;
+ const unsigned char *sid_data;
+ (void)ctx;
+
+ sid_data = SSL_SESSION_get_id(sess, &sid_length);
+ /* tree key is zeros padded sessionid */
+ if (sid_length < SSL_MAX_SSL_SESSION_ID_LENGTH) {
+ memcpy(tmpkey, sid_data, sid_length);
+ memset(tmpkey+sid_length, 0, SSL_MAX_SSL_SESSION_ID_LENGTH - sid_length);
+ sid_data = tmpkey;
+ }
+
+ shctx_wrlock(ssl_shctx);
+
+ /* lookup for session */
+ sh_ssl_sess = sh_ssl_sess_tree_lookup(sid_data);
+ if (sh_ssl_sess) {
+ /* free session */
+ sh_ssl_sess_tree_delete(sh_ssl_sess);
+ }
+
+ /* unlock cache */
+ shctx_wrunlock(ssl_shctx);
+}
+
+/* Set session cache mode to server and disable openssl internal cache.
+ * Set shared cache callbacks on an ssl context.
+ * Shared context MUST be firstly initialized */
+void ssl_set_shctx(SSL_CTX *ctx)
+{
+ SSL_CTX_set_session_id_context(ctx, (const unsigned char *)SHCTX_APPNAME, strlen(SHCTX_APPNAME));
+
+ if (!ssl_shctx) {
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_OFF);
+ return;
+ }
+
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_SERVER |
+ SSL_SESS_CACHE_NO_INTERNAL |
+ SSL_SESS_CACHE_NO_AUTO_CLEAR);
+
+ /* Set callbacks */
+ SSL_CTX_sess_set_new_cb(ctx, sh_ssl_sess_new_cb);
+ SSL_CTX_sess_set_get_cb(ctx, sh_ssl_sess_get_cb);
+ SSL_CTX_sess_set_remove_cb(ctx, sh_ssl_sess_remove_cb);
+}
+
+/*
+ * https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format
+ *
+ * The format is:
+ * * <Label> <space> <ClientRandom> <space> <Secret>
+ * We only need to copy the secret as there is a sample fetch for the ClientRandom
+ */
+
+#ifdef HAVE_SSL_KEYLOG
+void SSL_CTX_keylog(const SSL *ssl, const char *line)
+{
+ struct ssl_keylog *keylog;
+ char *lastarg = NULL;
+ char *dst = NULL;
+
+#ifdef USE_QUIC_OPENSSL_COMPAT
+ quic_tls_compat_keylog_callback(ssl, line);
+#endif
+ keylog = SSL_get_ex_data(ssl, ssl_keylog_index);
+ if (!keylog)
+ return;
+
+ lastarg = strrchr(line, ' ');
+ if (lastarg == NULL || ++lastarg == NULL)
+ return;
+
+ dst = pool_alloc(pool_head_ssl_keylog_str);
+ if (!dst)
+ return;
+
+ strncpy(dst, lastarg, SSL_KEYLOG_MAX_SECRET_SIZE-1);
+ dst[SSL_KEYLOG_MAX_SECRET_SIZE-1] = '\0';
+
+ if (strncmp(line, "CLIENT_RANDOM ", strlen("CLIENT RANDOM ")) == 0) {
+ if (keylog->client_random)
+ goto error;
+ keylog->client_random = dst;
+
+ } else if (strncmp(line, "CLIENT_EARLY_TRAFFIC_SECRET ", strlen("CLIENT_EARLY_TRAFFIC_SECRET ")) == 0) {
+ if (keylog->client_early_traffic_secret)
+ goto error;
+ keylog->client_early_traffic_secret = dst;
+
+ } else if (strncmp(line, "CLIENT_HANDSHAKE_TRAFFIC_SECRET ", strlen("CLIENT_HANDSHAKE_TRAFFIC_SECRET ")) == 0) {
+ if(keylog->client_handshake_traffic_secret)
+ goto error;
+ keylog->client_handshake_traffic_secret = dst;
+
+ } else if (strncmp(line, "SERVER_HANDSHAKE_TRAFFIC_SECRET ", strlen("SERVER_HANDSHAKE_TRAFFIC_SECRET ")) == 0) {
+ if (keylog->server_handshake_traffic_secret)
+ goto error;
+ keylog->server_handshake_traffic_secret = dst;
+
+ } else if (strncmp(line, "CLIENT_TRAFFIC_SECRET_0 ", strlen("CLIENT_TRAFFIC_SECRET_0 ")) == 0) {
+ if (keylog->client_traffic_secret_0)
+ goto error;
+ keylog->client_traffic_secret_0 = dst;
+
+ } else if (strncmp(line, "SERVER_TRAFFIC_SECRET_0 ", strlen("SERVER_TRAFFIC_SECRET_0 ")) == 0) {
+ if (keylog->server_traffic_secret_0)
+ goto error;
+ keylog->server_traffic_secret_0 = dst;
+
+ } else if (strncmp(line, "EARLY_EXPORTER_SECRET ", strlen("EARLY_EXPORTER_SECRET ")) == 0) {
+ if (keylog->early_exporter_secret)
+ goto error;
+ keylog->early_exporter_secret = dst;
+
+ } else if (strncmp(line, "EXPORTER_SECRET ", strlen("EXPORTER_SECRET ")) == 0) {
+ if (keylog->exporter_secret)
+ goto error;
+ keylog->exporter_secret = dst;
+ } else {
+ goto error;
+ }
+
+ return;
+
+error:
+ pool_free(pool_head_ssl_keylog_str, dst);
+
+ return;
+}
+#endif
+
+/*
+ * This function applies the SSL configuration on a SSL_CTX
+ * It returns an error code and fills the <err> buffer
+ */
+static int ssl_sock_prepare_ctx(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf, SSL_CTX *ctx, char **err)
+{
+ struct proxy *curproxy = bind_conf->frontend;
+ int cfgerr = 0;
+ int verify = SSL_VERIFY_NONE;
+ struct ssl_bind_conf __maybe_unused *ssl_conf_cur;
+ const char *conf_ciphers;
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ const char *conf_ciphersuites;
+#endif
+ const char *conf_curves = NULL;
+ X509_STORE *store = SSL_CTX_get_cert_store(ctx);
+#if defined(SSL_CTX_set1_sigalgs_list)
+ const char *conf_sigalgs = NULL;
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ const char *conf_client_sigalgs = NULL;
+#endif
+
+ if (ssl_conf) {
+ struct tls_version_filter *conf_ssl_methods = &ssl_conf->ssl_methods;
+ int i, min, max;
+ int flags = MC_SSL_O_ALL;
+
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ min = conf_ssl_methods->min ? conf_ssl_methods->min : bind_conf->ssl_conf.ssl_methods.min;
+ max = conf_ssl_methods->max ? conf_ssl_methods->max : bind_conf->ssl_conf.ssl_methods.max;
+ if (min)
+ flags |= (methodVersions[min].flag - 1);
+ if (max)
+ flags |= ~((methodVersions[max].flag << 1) - 1);
+ min = max = CONF_TLSV_NONE;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min)
+ max = i;
+ else
+ min = max = i;
+ }
+ /* save real min/max */
+ conf_ssl_methods->min = min;
+ conf_ssl_methods->max = max;
+ if (!min) {
+ memprintf(err, "%sProxy '%s': all SSL/TLS versions are disabled for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", bind_conf->frontend->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+
+ switch ((ssl_conf && ssl_conf->verify) ? ssl_conf->verify : bind_conf->ssl_conf.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_OPTIONAL:
+ verify = SSL_VERIFY_PEER;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT;
+ break;
+ }
+ SSL_CTX_set_verify(ctx, verify, ssl_sock_bind_verifycbk);
+ if (verify & SSL_VERIFY_PEER) {
+ char *ca_file = (ssl_conf && ssl_conf->ca_file) ? ssl_conf->ca_file : bind_conf->ssl_conf.ca_file;
+ char *ca_verify_file = (ssl_conf && ssl_conf->ca_verify_file) ? ssl_conf->ca_verify_file : bind_conf->ssl_conf.ca_verify_file;
+ char *crl_file = (ssl_conf && ssl_conf->crl_file) ? ssl_conf->crl_file : bind_conf->ssl_conf.crl_file;
+ if (ca_file || ca_verify_file) {
+ /* set CAfile to verify */
+ if (ca_file && !ssl_set_verify_locations_file(ctx, ca_file)) {
+ memprintf(err, "%sProxy '%s': unable to set CA file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ca_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ if (ca_verify_file && !ssl_set_verify_locations_file(ctx, ca_verify_file)) {
+ memprintf(err, "%sProxy '%s': unable to set CA-no-names file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ca_verify_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ if (ca_file && !((ssl_conf && ssl_conf->no_ca_names) || bind_conf->ssl_conf.no_ca_names)) {
+ /* set CA names for client cert request, function returns void */
+ SSL_CTX_set_client_CA_list(ctx, SSL_dup_CA_list(ssl_get_client_ca_file(ca_file)));
+ }
+#ifdef USE_OPENSSL_WOLFSSL
+ /* WolfSSL activates CRL checks by default so we need to disable it */
+ X509_STORE_set_flags(store, 0) ;
+#endif
+ }
+ else {
+ memprintf(err, "%sProxy '%s': verify is enabled but no CA file specified for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#ifdef X509_V_FLAG_CRL_CHECK
+ if (crl_file) {
+
+ if (!ssl_set_cert_crl_file(store, crl_file)) {
+ memprintf(err, "%sProxy '%s': unable to configure CRL file '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, crl_file, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ X509_STORE_set_flags(store, X509_V_FLAG_CRL_CHECK|X509_V_FLAG_CRL_CHECK_ALL);
+ }
+ }
+#endif
+ ERR_clear_error();
+ }
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ if(bind_conf->keys_ref) {
+ if (!SSL_CTX_set_tlsext_ticket_key_evp_cb(ctx, ssl_tlsext_ticket_key_cb)) {
+ memprintf(err, "%sProxy '%s': unable to set callback for TLS ticket validation for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#endif
+
+ ssl_set_shctx(ctx);
+ conf_ciphers = (ssl_conf && ssl_conf->ciphers) ? ssl_conf->ciphers : bind_conf->ssl_conf.ciphers;
+ if (conf_ciphers &&
+ !SSL_CTX_set_cipher_list(ctx, conf_ciphers)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL cipher list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_ciphers, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ conf_ciphersuites = (ssl_conf && ssl_conf->ciphersuites) ? ssl_conf->ciphersuites : bind_conf->ssl_conf.ciphersuites;
+ if (conf_ciphersuites &&
+ !SSL_CTX_set_ciphersuites(ctx, conf_ciphersuites)) {
+ memprintf(err, "%sProxy '%s': unable to set TLS 1.3 cipher suites to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_ciphersuites, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif
+
+#ifndef OPENSSL_NO_DH
+ if (!local_dh_1024)
+ local_dh_1024 = ssl_get_dh_1024();
+ if (!local_dh_2048)
+ local_dh_2048 = ssl_get_dh_2048();
+ if (!local_dh_4096)
+ local_dh_4096 = ssl_get_dh_4096();
+#endif /* OPENSSL_NO_DH */
+
+ SSL_CTX_set_info_callback(ctx, ssl_sock_infocbk);
+#ifdef SSL_CTRL_SET_MSG_CALLBACK
+ SSL_CTX_set_msg_callback(ctx, ssl_sock_msgcbk);
+#endif
+#ifdef HAVE_SSL_KEYLOG
+ /* only activate the keylog callback if it was required to prevent performance loss */
+ if (global_ssl.keylog > 0)
+ SSL_CTX_set_keylog_callback(ctx, SSL_CTX_keylog);
+#endif
+
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ ssl_conf_cur = NULL;
+ if (ssl_conf && ssl_conf->npn_str)
+ ssl_conf_cur = ssl_conf;
+ else if (bind_conf->ssl_conf.npn_str)
+ ssl_conf_cur = &bind_conf->ssl_conf;
+ if (ssl_conf_cur)
+ SSL_CTX_set_next_protos_advertised_cb(ctx, ssl_sock_advertise_npn_protos, ssl_conf_cur);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ssl_conf_cur = NULL;
+ if (ssl_conf && ssl_conf->alpn_str)
+ ssl_conf_cur = ssl_conf;
+ else if (bind_conf->ssl_conf.alpn_str)
+ ssl_conf_cur = &bind_conf->ssl_conf;
+ if (ssl_conf_cur && ssl_conf_cur->alpn_len)
+ SSL_CTX_set_alpn_select_cb(ctx, ssl_sock_advertise_alpn_protos, ssl_conf_cur);
+#endif
+#if defined(SSL_CTX_set1_curves_list)
+ conf_curves = (ssl_conf && ssl_conf->curves) ? ssl_conf->curves : bind_conf->ssl_conf.curves;
+ if (conf_curves) {
+ if (!SSL_CTX_set1_curves_list(ctx, conf_curves)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL curves list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_curves, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ (void)SSL_CTX_set_ecdh_auto(ctx, 1);
+ }
+#endif /* defined(SSL_CTX_set1_curves_list) */
+
+ if (!conf_curves) {
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L)
+#if defined(SSL_CTX_set1_curves_list)
+ const char *ecdhe = (ssl_conf && ssl_conf->ecdhe) ? ssl_conf->ecdhe :
+ (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe :
+ NULL);
+
+ if (ecdhe && SSL_CTX_set1_curves_list(ctx, ecdhe) == 0) {
+ memprintf(err, "%sProxy '%s': unable to set elliptic named curve to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ecdhe, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+#endif /* defined(SSL_CTX_set1_curves_list) */
+#else
+#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH)
+ int i;
+ EC_KEY *ecdh;
+
+ const char *ecdhe = (ssl_conf && ssl_conf->ecdhe) ? ssl_conf->ecdhe :
+ (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe :
+ ECDHE_DEFAULT_CURVE);
+
+ i = OBJ_sn2nid(ecdhe);
+ if (!i || ((ecdh = EC_KEY_new_by_curve_name(i)) == NULL)) {
+ memprintf(err, "%sProxy '%s': unable to set elliptic named curve to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, ecdhe, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ else {
+ SSL_CTX_set_tmp_ecdh(ctx, ecdh);
+ EC_KEY_free(ecdh);
+ }
+#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */
+#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */
+ }
+
+#if defined(SSL_CTX_set1_sigalgs_list)
+ conf_sigalgs = (ssl_conf && ssl_conf->sigalgs) ? ssl_conf->sigalgs : bind_conf->ssl_conf.sigalgs;
+ if (conf_sigalgs) {
+ if (!SSL_CTX_set1_sigalgs_list(ctx, conf_sigalgs)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL Signature Algorithm list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_sigalgs, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#endif
+
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ conf_client_sigalgs = (ssl_conf && ssl_conf->client_sigalgs) ? ssl_conf->client_sigalgs : bind_conf->ssl_conf.client_sigalgs;
+ if (conf_client_sigalgs) {
+ if (!SSL_CTX_set1_client_sigalgs_list(ctx, conf_client_sigalgs)) {
+ memprintf(err, "%sProxy '%s': unable to set SSL Signature Algorithm list to '%s' for bind '%s' at [%s:%d].\n",
+ err && *err ? *err : "", curproxy->id, conf_client_sigalgs, bind_conf->arg, bind_conf->file, bind_conf->line);
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+ }
+ }
+#endif
+
+#ifdef USE_QUIC_OPENSSL_COMPAT
+ if (!quic_tls_compat_init(bind_conf, ctx))
+ cfgerr |= ERR_ALERT | ERR_FATAL;
+#endif
+
+ return cfgerr;
+}
+
+
+/*
+ * Prepare the SSL_CTX based on the bind line configuration.
+ * Since the CA file loading is made depending on the verify option of the bind
+ * line, the link between the SSL_CTX and the CA file tree entry is made here.
+ * If we want to create a link between the CA file entry and the corresponding
+ * ckch instance (for CA file hot update), it needs to be done after
+ * ssl_sock_prepare_ctx.
+ * Returns 0 in case of success.
+ */
+int ssl_sock_prep_ctx_and_inst(struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf,
+ SSL_CTX *ctx, struct ckch_inst *ckch_inst, char **err)
+{
+ int errcode = 0;
+
+ errcode |= ssl_sock_prepare_ctx(bind_conf, ssl_conf, ctx, err);
+ if (!errcode && ckch_inst)
+ ckch_inst_add_cafile_link(ckch_inst, bind_conf, ssl_conf, NULL);
+
+ return errcode;
+}
+
+static int ssl_sock_srv_hostcheck(const char *pattern, const char *hostname)
+{
+ const char *pattern_wildcard, *pattern_left_label_end, *hostname_left_label_end;
+ size_t prefixlen, suffixlen;
+
+ /* Trivial case */
+ if (strcasecmp(pattern, hostname) == 0)
+ return 1;
+
+ /* The rest of this logic is based on RFC 6125, section 6.4.3
+ * (http://tools.ietf.org/html/rfc6125#section-6.4.3) */
+
+ pattern_wildcard = NULL;
+ pattern_left_label_end = pattern;
+ while (*pattern_left_label_end != '.') {
+ switch (*pattern_left_label_end) {
+ case 0:
+ /* End of label not found */
+ return 0;
+ case '*':
+ /* If there is more than one wildcards */
+ if (pattern_wildcard)
+ return 0;
+ pattern_wildcard = pattern_left_label_end;
+ break;
+ }
+ pattern_left_label_end++;
+ }
+
+ /* If it's not trivial and there is no wildcard, it can't
+ * match */
+ if (!pattern_wildcard)
+ return 0;
+
+ /* Make sure all labels match except the leftmost */
+ hostname_left_label_end = strchr(hostname, '.');
+ if (!hostname_left_label_end
+ || strcasecmp(pattern_left_label_end, hostname_left_label_end) != 0)
+ return 0;
+
+ /* Make sure the leftmost label of the hostname is long enough
+ * that the wildcard can match */
+ if (hostname_left_label_end - hostname < (pattern_left_label_end - pattern) - 1)
+ return 0;
+
+ /* Finally compare the string on either side of the
+ * wildcard */
+ prefixlen = pattern_wildcard - pattern;
+ suffixlen = pattern_left_label_end - (pattern_wildcard + 1);
+ if ((prefixlen && (strncasecmp(pattern, hostname, prefixlen) != 0))
+ || (suffixlen && (strncasecmp(pattern_wildcard + 1, hostname_left_label_end - suffixlen, suffixlen) != 0)))
+ return 0;
+
+ return 1;
+}
+
+static int ssl_sock_srv_verifycbk(int ok, X509_STORE_CTX *ctx)
+{
+ SSL *ssl;
+ struct connection *conn;
+ struct ssl_sock_ctx *ssl_ctx;
+ const char *servername;
+ const char *sni;
+
+ int depth;
+ X509 *cert;
+ STACK_OF(GENERAL_NAME) *alt_names;
+ int i;
+ X509_NAME *cert_subject;
+ char *str;
+
+ if (ok == 0)
+ return ok;
+
+ ssl = X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx());
+ conn = SSL_get_ex_data(ssl, ssl_app_data_index);
+ ssl_ctx = __conn_get_ssl_sock_ctx(conn);
+
+ /* We're checking if the provided hostnames match the desired one. The
+ * desired hostname comes from the SNI we presented if any, or if not
+ * provided then it may have been explicitly stated using a "verifyhost"
+ * directive. If neither is set, we don't care about the name so the
+ * verification is OK.
+ */
+ servername = SSL_get_servername(ssl_ctx->ssl, TLSEXT_NAMETYPE_host_name);
+ sni = servername;
+ if (!servername) {
+ servername = __objt_server(conn->target)->ssl_ctx.verify_host;
+ if (!servername)
+ return ok;
+ }
+
+ /* We only need to verify the CN on the actual server cert,
+ * not the indirect CAs */
+ depth = X509_STORE_CTX_get_error_depth(ctx);
+ if (depth != 0)
+ return ok;
+
+ /* At this point, the cert is *not* OK unless we can find a
+ * hostname match */
+ ok = 0;
+
+ cert = X509_STORE_CTX_get_current_cert(ctx);
+ /* It seems like this might happen if verify peer isn't set */
+ if (!cert)
+ return ok;
+
+ alt_names = X509_get_ext_d2i(cert, NID_subject_alt_name, NULL, NULL);
+ if (alt_names) {
+ for (i = 0; !ok && i < sk_GENERAL_NAME_num(alt_names); i++) {
+ GENERAL_NAME *name = sk_GENERAL_NAME_value(alt_names, i);
+ if (name->type == GEN_DNS) {
+#if HA_OPENSSL_VERSION_NUMBER < 0x00907000L
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.ia5) >= 0) {
+#else
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, name->d.dNSName) >= 0) {
+#endif
+ ok = ssl_sock_srv_hostcheck(str, servername);
+ OPENSSL_free(str);
+ }
+ }
+ }
+ sk_GENERAL_NAME_pop_free(alt_names, GENERAL_NAME_free);
+ }
+
+ cert_subject = X509_get_subject_name(cert);
+ i = -1;
+ while (!ok && (i = X509_NAME_get_index_by_NID(cert_subject, NID_commonName, i)) != -1) {
+ X509_NAME_ENTRY *entry = X509_NAME_get_entry(cert_subject, i);
+ ASN1_STRING *value;
+ value = X509_NAME_ENTRY_get_data(entry);
+ if (ASN1_STRING_to_UTF8((unsigned char **)&str, value) >= 0) {
+ ok = ssl_sock_srv_hostcheck(str, servername);
+ OPENSSL_free(str);
+ }
+ }
+
+ /* report the mismatch and indicate if SNI was used or not */
+ if (!ok && !conn->err_code)
+ conn->err_code = sni ? CO_ER_SSL_MISMATCH_SNI : CO_ER_SSL_MISMATCH;
+ return ok;
+}
+
+/* prepare ssl context from servers options. Returns an error count */
+int ssl_sock_prepare_srv_ctx(struct server *srv)
+{
+ int cfgerr = 0;
+ SSL_CTX *ctx;
+ /* Automatic memory computations need to know we use SSL there
+ * If this is an internal proxy, don't use it for the computation */
+ if (!(srv->proxy->cap & PR_CAP_INT))
+ global.ssl_used_backend = 1;
+
+ /* Initiate SSL context for current server */
+ if (!srv->ssl_ctx.reused_sess) {
+ if ((srv->ssl_ctx.reused_sess = calloc(1, global.nbthread*sizeof(*srv->ssl_ctx.reused_sess))) == NULL) {
+ ha_alert("out of memory.\n");
+ cfgerr++;
+ return cfgerr;
+ }
+ }
+ if (srv->use_ssl == 1)
+ srv->xprt = &ssl_sock;
+
+ if (srv->ssl_ctx.client_crt) {
+ const int create_if_none = srv->flags & SRV_F_DYNAMIC ? 0 : 1;
+ char *err = NULL;
+ int err_code = 0;
+
+ /* If there is a crt keyword there, the SSL_CTX will be created here. */
+ err_code = ssl_sock_load_srv_cert(srv->ssl_ctx.client_crt, srv, create_if_none, &err);
+ if (err_code != ERR_NONE) {
+ if ((err_code & ERR_WARN) && !(err_code & ERR_ALERT))
+ ha_warning("%s", err);
+ else
+ ha_alert("%s", err);
+
+ if (err_code & (ERR_FATAL|ERR_ABORT))
+ cfgerr++;
+ }
+ ha_free(&err);
+ }
+
+ ctx = srv->ssl_ctx.ctx;
+
+ /* The context will be uninitialized if there wasn't any "cert" option
+ * in the server line. */
+ if (!ctx) {
+ ctx = SSL_CTX_new(SSLv23_client_method());
+ if (!ctx) {
+ ha_alert("unable to allocate ssl context.\n");
+ cfgerr++;
+ return cfgerr;
+ }
+
+ srv->ssl_ctx.ctx = ctx;
+ }
+
+ cfgerr += ssl_sock_prep_srv_ctx_and_inst(srv, srv->ssl_ctx.ctx, srv->ssl_ctx.inst);
+
+ return cfgerr;
+}
+
+/* Initialize an SSL context that will be used on the backend side.
+ * Returns an error count.
+ */
+static int ssl_sock_prepare_srv_ssl_ctx(const struct server *srv, SSL_CTX *ctx)
+{
+ struct proxy *curproxy = srv->proxy;
+ int cfgerr = 0;
+ long options =
+ SSL_OP_ALL | /* all known workarounds for bugs */
+ SSL_OP_NO_SSLv2 |
+ SSL_OP_NO_COMPRESSION;
+ long mode =
+ SSL_MODE_ENABLE_PARTIAL_WRITE |
+ SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER |
+ SSL_MODE_RELEASE_BUFFERS |
+ SSL_MODE_SMALL_BUFFERS;
+ int verify = SSL_VERIFY_NONE;
+ const struct tls_version_filter *conf_ssl_methods = &srv->ssl_ctx.methods;
+ int i, min, max, hole;
+ int flags = MC_SSL_O_ALL;
+#if defined(SSL_CTX_set1_sigalgs_list)
+ const char *conf_sigalgs = NULL;
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ const char *conf_client_sigalgs = NULL;
+#endif
+#if defined(SSL_CTX_set1_curves_list)
+ const char *conf_curves = NULL;
+#endif
+
+ if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max))
+ ha_warning("no-sslv3/no-tlsv1x are ignored for this server. "
+ "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n");
+ else
+ flags = conf_ssl_methods->flags;
+
+ /* Real min and max should be determinate with configuration and openssl's capabilities */
+ if (conf_ssl_methods->min)
+ flags |= (methodVersions[conf_ssl_methods->min].flag - 1);
+ if (conf_ssl_methods->max)
+ flags |= ~((methodVersions[conf_ssl_methods->max].flag << 1) - 1);
+
+ /* find min, max and holes */
+ min = max = CONF_TLSV_NONE;
+ hole = 0;
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ /* version is in openssl && version not disable in configuration */
+ if (methodVersions[i].option && !(flags & methodVersions[i].flag)) {
+ if (min) {
+ if (hole) {
+ ha_warning("%s '%s': SSL/TLS versions range not contiguous for server '%s'. "
+ "Hole find for %s. Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n",
+ proxy_type_str(curproxy), curproxy->id, srv->id,
+ methodVersions[hole].name);
+ hole = 0;
+ }
+ max = i;
+ }
+ else {
+ min = max = i;
+ }
+ }
+ else {
+ if (min)
+ hole = i;
+ }
+ if (!min) {
+ ha_alert("%s '%s': all SSL/TLS versions are disabled for server '%s'.\n",
+ proxy_type_str(curproxy), curproxy->id, srv->id);
+ cfgerr += 1;
+ }
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL)
+ /* Keep force-xxx implementation as it is in older haproxy. It's a
+ precautionary measure to avoid any surprise with older openssl version. */
+ if (min == max)
+ methodVersions[min].ctx_set_version(ctx, SET_CLIENT);
+ else
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (flags & methodVersions[i].flag)
+ options |= methodVersions[i].option;
+#else /* openssl >= 1.1.0 */
+ /* set the max_version is required to cap TLS version or activate new TLS (v1.3) */
+ methodVersions[min].ctx_set_version(ctx, SET_MIN);
+ methodVersions[max].ctx_set_version(ctx, SET_MAX);
+#endif
+
+ if (srv->ssl_ctx.options & SRV_SSL_O_NO_TLS_TICKETS)
+ options |= SSL_OP_NO_TICKET;
+ SSL_CTX_set_options(ctx, options);
+
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async)
+ mode |= SSL_MODE_ASYNC;
+#endif
+ SSL_CTX_set_mode(ctx, mode);
+
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ verify = SSL_VERIFY_PEER;
+ switch (srv->ssl_ctx.verify) {
+ case SSL_SOCK_VERIFY_NONE:
+ verify = SSL_VERIFY_NONE;
+ break;
+ case SSL_SOCK_VERIFY_REQUIRED:
+ verify = SSL_VERIFY_PEER;
+ break;
+ }
+ SSL_CTX_set_verify(ctx, verify,
+ (srv->ssl_ctx.verify_host || (verify & SSL_VERIFY_PEER)) ? ssl_sock_srv_verifycbk : NULL);
+ if (verify & SSL_VERIFY_PEER) {
+ if (srv->ssl_ctx.ca_file) {
+ /* set CAfile to verify */
+ if (!ssl_set_verify_locations_file(ctx, srv->ssl_ctx.ca_file)) {
+ ha_alert("unable to set CA file '%s'.\n",
+ srv->ssl_ctx.ca_file);
+ cfgerr++;
+ }
+ }
+ else {
+ if (global.ssl_server_verify == SSL_SERVER_VERIFY_REQUIRED)
+ ha_alert("verify is enabled by default but no CA file specified. If you're running on a LAN where you're certain to trust the server's certificate, please set an explicit 'verify none' statement on the 'server' line, or use 'ssl-server-verify none' in the global section to disable server-side verifications by default.\n");
+ else
+ ha_alert("verify is enabled but no CA file specified.\n");
+ cfgerr++;
+ }
+#ifdef X509_V_FLAG_CRL_CHECK
+ if (srv->ssl_ctx.crl_file) {
+ X509_STORE *store = SSL_CTX_get_cert_store(ctx);
+
+ if (!ssl_set_cert_crl_file(store, srv->ssl_ctx.crl_file)) {
+ ha_alert("unable to configure CRL file '%s'.\n",
+ srv->ssl_ctx.crl_file);
+ cfgerr++;
+ }
+ else {
+ X509_STORE_set_flags(store, X509_V_FLAG_CRL_CHECK|X509_V_FLAG_CRL_CHECK_ALL);
+ }
+ }
+#endif
+ }
+
+ SSL_CTX_set_session_cache_mode(ctx, SSL_SESS_CACHE_CLIENT | SSL_SESS_CACHE_NO_INTERNAL_STORE);
+ SSL_CTX_sess_set_new_cb(ctx, ssl_sess_new_srv_cb);
+ if (srv->ssl_ctx.ciphers &&
+ !SSL_CTX_set_cipher_list(ctx, srv->ssl_ctx.ciphers)) {
+ ha_alert("unable to set SSL cipher list to '%s'.\n",
+ srv->ssl_ctx.ciphers);
+ cfgerr++;
+ }
+
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (srv->ssl_ctx.ciphersuites &&
+ !SSL_CTX_set_ciphersuites(ctx, srv->ssl_ctx.ciphersuites)) {
+ ha_alert("unable to set TLS 1.3 cipher suites to '%s'.\n",
+ srv->ssl_ctx.ciphersuites);
+ cfgerr++;
+ }
+#endif
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ if (srv->ssl_ctx.npn_str)
+ SSL_CTX_set_next_proto_select_cb(ctx, ssl_sock_srv_select_protos, (struct server*)srv);
+#endif
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ if (srv->ssl_ctx.alpn_str && srv->ssl_ctx.alpn_len)
+ SSL_CTX_set_alpn_protos(ctx, (unsigned char *)srv->ssl_ctx.alpn_str, srv->ssl_ctx.alpn_len);
+#endif
+
+#if defined(SSL_CTX_set1_sigalgs_list)
+ conf_sigalgs = srv->ssl_ctx.sigalgs;
+ if (conf_sigalgs) {
+ if (!SSL_CTX_set1_sigalgs_list(ctx, conf_sigalgs)) {
+ ha_alert("Proxy '%s': unable to set SSL Signature Algorithm list to '%s' for server '%s'.\n",
+ curproxy->id, conf_sigalgs, srv->id);
+ cfgerr++;
+ }
+ }
+#endif
+#if defined(SSL_CTX_set1_client_sigalgs_list)
+ conf_client_sigalgs = srv->ssl_ctx.client_sigalgs;
+ if (conf_client_sigalgs) {
+ if (!SSL_CTX_set1_client_sigalgs_list(ctx, conf_client_sigalgs)) {
+ ha_alert("Proxy '%s': unable to set SSL Client Signature Algorithm list to '%s' for server '%s'.\n",
+ curproxy->id, conf_client_sigalgs, srv->id);
+ cfgerr++;
+ }
+ }
+#endif
+
+#if defined(SSL_CTX_set1_curves_list)
+ conf_curves = srv->ssl_ctx.curves;
+ if (conf_curves) {
+ if (!SSL_CTX_set1_curves_list(ctx, conf_curves)) {
+ ha_alert("Proxy '%s': unable to set SSL curves list to '%s' for server '%s'.\n",
+ curproxy->id, conf_curves, srv->id);
+ cfgerr++;
+ }
+ }
+#endif /* defined(SSL_CTX_set1_curves_list) */
+
+ return cfgerr;
+}
+
+/*
+ * Prepare the frontend's SSL_CTX based on the server line configuration.
+ * Since the CA file loading is made depending on the verify option of the
+ * server line, the link between the SSL_CTX and the CA file tree entry is
+ * made here.
+ * If we want to create a link between the CA file entry and the corresponding
+ * ckch instance (for CA file hot update), it needs to be done after
+ * ssl_sock_prepare_srv_ssl_ctx.
+ * Returns an error count.
+ */
+int ssl_sock_prep_srv_ctx_and_inst(const struct server *srv, SSL_CTX *ctx,
+ struct ckch_inst *ckch_inst)
+{
+ int cfgerr = 0;
+
+ cfgerr += ssl_sock_prepare_srv_ssl_ctx(srv, ctx);
+ if (!cfgerr && ckch_inst)
+ ckch_inst_add_cafile_link(ckch_inst, NULL, NULL, srv);
+
+ return cfgerr;
+}
+
+
+/*
+ * Create an initial CTX used to start the SSL connections.
+ * May be used by QUIC xprt which makes usage of SSL sessions initialized from SSL_CTXs.
+ * Returns 0 if succeeded, or something >0 if not.
+ */
+#ifdef USE_QUIC
+static int ssl_initial_ctx(struct bind_conf *bind_conf)
+{
+ if (bind_conf->xprt == xprt_get(XPRT_QUIC))
+ return ssl_quic_initial_ctx(bind_conf);
+ else
+ return ssl_sock_initial_ctx(bind_conf);
+}
+#else
+static int ssl_initial_ctx(struct bind_conf *bind_conf)
+{
+ return ssl_sock_initial_ctx(bind_conf);
+}
+#endif
+
+/* Walks down the two trees in bind_conf and prepares all certs. The pointer may
+ * be NULL, in which case nothing is done. Returns the number of errors
+ * encountered.
+ */
+int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf)
+{
+ struct ebmb_node *node;
+ struct sni_ctx *sni;
+ int err = 0;
+ int errcode = 0;
+ char *errmsg = NULL;
+
+ /* Automatic memory computations need to know we use SSL there */
+ global.ssl_used_frontend = 1;
+
+ /* Create initial_ctx used to start the ssl connection before do switchctx */
+ if (!bind_conf->initial_ctx) {
+ err += ssl_initial_ctx(bind_conf);
+ /* It should not be necessary to call this function, but it's
+ necessary first to check and move all initialisation related
+ to initial_ctx in ssl_initial_ctx. */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, NULL, bind_conf->initial_ctx, NULL, &errmsg);
+ }
+ if (bind_conf->default_ctx) {
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, bind_conf->default_ssl_conf, bind_conf->default_ctx, bind_conf->default_inst, &errmsg);
+ }
+
+ node = ebmb_first(&bind_conf->sni_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ if (!sni->order && sni->ctx != bind_conf->default_ctx) {
+ /* only initialize the CTX on its first occurrence and
+ if it is not the default_ctx */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg);
+ }
+ node = ebmb_next(node);
+ }
+
+ node = ebmb_first(&bind_conf->sni_w_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ if (!sni->order && sni->ctx != bind_conf->default_ctx) {
+ /* only initialize the CTX on its first occurrence and
+ if it is not the default_ctx */
+ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg);
+ }
+ node = ebmb_next(node);
+ }
+
+ if (errcode & ERR_WARN) {
+ ha_warning("%s", errmsg);
+ } else if (errcode & ERR_CODE) {
+ ha_alert("%s", errmsg);
+ err++;
+ }
+
+ free(errmsg);
+ return err;
+}
+
+/* Prepares all the contexts for a bind_conf and allocates the shared SSL
+ * context if needed. Returns < 0 on error, 0 on success. The warnings and
+ * alerts are directly emitted since the rest of the stack does it below.
+ */
+int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf)
+{
+ struct proxy *px = bind_conf->frontend;
+ int alloc_ctx;
+ int err;
+
+ if (!(bind_conf->options & BC_O_USE_SSL)) {
+ if (bind_conf->default_ctx) {
+ ha_warning("Proxy '%s': A certificate was specified but SSL was not enabled on bind '%s' at [%s:%d] (use 'ssl').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ }
+ return 0;
+ }
+ if (!bind_conf->default_ctx) {
+ if (bind_conf->strict_sni && !(bind_conf->options & BC_O_GENERATE_CERTS)) {
+ ha_warning("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d], ssl connections will fail (use 'crt').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ }
+ else {
+ ha_alert("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d] (use 'crt').\n",
+ px->id, bind_conf->arg, bind_conf->file, bind_conf->line);
+ return -1;
+ }
+ }
+ if (!ssl_shctx && global.tune.sslcachesize) {
+ alloc_ctx = shctx_init(&ssl_shctx, global.tune.sslcachesize,
+ sizeof(struct sh_ssl_sess_hdr) + SHSESS_BLOCK_MIN_SIZE, -1,
+ sizeof(*sh_ssl_sess_tree));
+ if (alloc_ctx <= 0) {
+ if (alloc_ctx == SHCTX_E_INIT_LOCK)
+ ha_alert("Unable to initialize the lock for the shared SSL session cache. You can retry using the global statement 'tune.ssl.force-private-cache' but it could increase CPU usage due to renegotiations if nbproc > 1.\n");
+ else
+ ha_alert("Unable to allocate SSL session cache.\n");
+ return -1;
+ }
+ /* free block callback */
+ ssl_shctx->free_block = sh_ssl_sess_free_blocks;
+ /* init the root tree within the extra space */
+ sh_ssl_sess_tree = (void *)ssl_shctx + sizeof(struct shared_context);
+ *sh_ssl_sess_tree = EB_ROOT_UNIQUE;
+ }
+ err = 0;
+ /* initialize all certificate contexts */
+ err += ssl_sock_prepare_all_ctx(bind_conf);
+
+ /* initialize CA variables if the certificates generation is enabled */
+ err += ssl_sock_load_ca(bind_conf);
+
+ return -err;
+}
+
+/* release ssl context allocated for servers. Most of the field free here
+ * must also be allocated in srv_ssl_settings_cpy() */
+void ssl_sock_free_srv_ctx(struct server *srv)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ ha_free(&srv->ssl_ctx.alpn_str);
+#endif
+#ifdef OPENSSL_NPN_NEGOTIATED
+ ha_free(&srv->ssl_ctx.npn_str);
+#endif
+ if (srv->ssl_ctx.reused_sess) {
+ int i;
+
+ for (i = 0; i < global.nbthread; i++) {
+ ha_free(&srv->ssl_ctx.reused_sess[i].ptr);
+ ha_free(&srv->ssl_ctx.reused_sess[i].sni);
+ }
+ ha_free(&srv->ssl_ctx.reused_sess);
+ }
+
+ if (srv->ssl_ctx.ctx) {
+ SSL_CTX_free(srv->ssl_ctx.ctx);
+ srv->ssl_ctx.ctx = NULL;
+ }
+
+ ha_free(&srv->ssl_ctx.ca_file);
+ ha_free(&srv->ssl_ctx.crl_file);
+ ha_free(&srv->ssl_ctx.client_crt);
+ ha_free(&srv->ssl_ctx.verify_host);
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ ha_free(&srv->sni_expr);
+ release_sample_expr(srv->ssl_ctx.sni);
+ srv->ssl_ctx.sni = NULL;
+#endif
+ ha_free(&srv->ssl_ctx.ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ ha_free(&srv->ssl_ctx.ciphersuites);
+#endif
+ /* If there is a certificate we must unlink the ckch instance */
+ ckch_inst_free(srv->ssl_ctx.inst);
+}
+
+/* Walks down the two trees in bind_conf and frees all the certs. The pointer may
+ * be NULL, in which case nothing is done. The default_ctx is nullified too.
+ */
+void ssl_sock_free_all_ctx(struct bind_conf *bind_conf)
+{
+ struct ebmb_node *node, *back;
+ struct sni_ctx *sni;
+
+ node = ebmb_first(&bind_conf->sni_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ back = ebmb_next(node);
+ ebmb_delete(node);
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ free(sni);
+ node = back;
+ }
+
+ node = ebmb_first(&bind_conf->sni_w_ctx);
+ while (node) {
+ sni = ebmb_entry(node, struct sni_ctx, name);
+ back = ebmb_next(node);
+ ebmb_delete(node);
+ SSL_CTX_free(sni->ctx);
+ LIST_DELETE(&sni->by_ckch_inst);
+ free(sni);
+ node = back;
+ }
+
+ SSL_CTX_free(bind_conf->initial_ctx);
+ bind_conf->initial_ctx = NULL;
+ SSL_CTX_free(bind_conf->default_ctx);
+ bind_conf->default_ctx = NULL;
+ bind_conf->default_inst = NULL;
+ bind_conf->default_ssl_conf = NULL;
+}
+
+
+void ssl_sock_deinit()
+{
+ crtlist_deinit(); /* must be free'd before the ckchs */
+ ckch_deinit();
+}
+REGISTER_POST_DEINIT(ssl_sock_deinit);
+
+/* Destroys all the contexts for a bind_conf. This is used during deinit(). */
+void ssl_sock_destroy_bind_conf(struct bind_conf *bind_conf)
+{
+ ssl_sock_free_ca(bind_conf);
+ ssl_sock_free_all_ctx(bind_conf);
+ ssl_sock_free_ssl_conf(&bind_conf->ssl_conf);
+ free(bind_conf->ca_sign_file);
+ free(bind_conf->ca_sign_pass);
+ if (bind_conf->keys_ref && !--bind_conf->keys_ref->refcount) {
+ free(bind_conf->keys_ref->filename);
+ free(bind_conf->keys_ref->tlskeys);
+ LIST_DELETE(&bind_conf->keys_ref->list);
+ free(bind_conf->keys_ref);
+ }
+ bind_conf->keys_ref = NULL;
+ bind_conf->ca_sign_pass = NULL;
+ bind_conf->ca_sign_file = NULL;
+}
+
+/* Load CA cert file and private key used to generate certificates */
+int
+ssl_sock_load_ca(struct bind_conf *bind_conf)
+{
+ struct proxy *px = bind_conf->frontend;
+ struct ckch_data *data = NULL;
+ int ret = 0;
+ char *err = NULL;
+
+ if (!(bind_conf->options & BC_O_GENERATE_CERTS))
+ return ret;
+
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ if (global_ssl.ctx_cache) {
+ ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache);
+ }
+ ssl_ctx_lru_seed = (unsigned int)time(NULL);
+ ssl_ctx_serial = now_ms;
+#endif
+
+ if (!bind_conf->ca_sign_file) {
+ ha_alert("Proxy '%s': cannot enable certificate generation, "
+ "no CA certificate File configured at [%s:%d].\n",
+ px->id, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Allocate cert structure */
+ data = calloc(1, sizeof(*data));
+ if (!data) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Try to parse file */
+ if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, data, &err)) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err);
+ free(err);
+ goto failed;
+ }
+
+ /* Fail if missing cert or pkey */
+ if ((!data->cert) || (!data->key)) {
+ ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n",
+ px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line);
+ goto failed;
+ }
+
+ /* Final assignment to bind */
+ bind_conf->ca_sign_ckch = data;
+ return ret;
+
+ failed:
+ if (data) {
+ ssl_sock_free_cert_key_and_chain_contents(data);
+ free(data);
+ }
+
+ bind_conf->options &= ~BC_O_GENERATE_CERTS;
+ ret++;
+ return ret;
+}
+
+/* Release CA cert and private key used to generate certificated */
+void
+ssl_sock_free_ca(struct bind_conf *bind_conf)
+{
+ if (bind_conf->ca_sign_ckch) {
+ ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch);
+ ha_free(&bind_conf->ca_sign_ckch);
+ }
+}
+
+/*
+ * Try to allocate the BIO and SSL session objects of <conn> connection with <bio> and
+ * <ssl> as addresses, <bio_meth> as BIO method and <ssl_ctx> as SSL context inherited settings.
+ * Connect the allocated BIO to the allocated SSL session. Also set <ctx> as address of custom
+ * data for the BIO and store <conn> as user data of the SSL session object.
+ * This is the responsibility of the caller to check the validity of all the pointers passed
+ * as parameters to this function.
+ * Return 0 if succeeded, -1 if not. If failed, sets the ->err_code member of <conn> to
+ * CO_ER_SSL_NO_MEM.
+ */
+int ssl_bio_and_sess_init(struct connection *conn, SSL_CTX *ssl_ctx,
+ SSL **ssl, BIO **bio, BIO_METHOD *bio_meth, void *ctx)
+{
+ int retry = 1;
+
+ retry:
+ /* Alloc a new SSL session. */
+ *ssl = SSL_new(ssl_ctx);
+ if (!*ssl) {
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ *bio = BIO_new(bio_meth);
+ if (!*bio) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ BIO_set_data(*bio, ctx);
+ SSL_set_bio(*ssl, *bio, *bio);
+
+ /* set connection pointer. */
+ if (!SSL_set_ex_data(*ssl, ssl_app_data_index, conn)) {
+ SSL_free(*ssl);
+ *ssl = NULL;
+ if (!retry--)
+ goto err;
+
+ pool_gc(NULL);
+ goto retry;
+ }
+
+ return 0;
+
+ err:
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+}
+
+/* This function is called when all the XPRT have been initialized. We can
+ * now attempt to start the SSL handshake.
+ */
+static int ssl_sock_start(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt->start) {
+ int ret;
+
+ ret = ctx->xprt->start(conn, ctx->xprt_ctx);
+ if (ret < 0)
+ return ret;
+ }
+ tasklet_wakeup(ctx->wait_event.tasklet);
+
+ return 0;
+}
+
+/* Similar to increment_actconn() but for SSL connections. */
+int increment_sslconn()
+{
+ unsigned int count, next_sslconn;
+
+ do {
+ count = global.sslconns;
+ if (global.maxsslconn && count >= global.maxsslconn) {
+ /* maxconn reached */
+ next_sslconn = 0;
+ goto end;
+ }
+
+ /* try to increment sslconns */
+ next_sslconn = count + 1;
+ } while (!_HA_ATOMIC_CAS(&global.sslconns, &count, next_sslconn) && __ha_cpu_relax());
+
+ end:
+ return next_sslconn;
+}
+
+/*
+ * This function is called if SSL * context is not yet allocated. The function
+ * is designed to be called before any other data-layer operation and sets the
+ * handshake flag on the connection. It is safe to call it multiple times.
+ * It returns 0 on success and -1 in error case.
+ */
+static int ssl_sock_init(struct connection *conn, void **xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx;
+ int next_sslconn = 0;
+
+ /* already initialized */
+ if (*xprt_ctx)
+ return 0;
+
+ ctx = pool_alloc(ssl_sock_ctx_pool);
+ if (!ctx) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+ }
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ pool_free(ssl_sock_ctx_pool, ctx);
+ return -1;
+ }
+ ctx->wait_event.tasklet->process = ssl_sock_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.tasklet->state |= TASK_HEAVY; // assign it to the bulk queue during handshake
+ ctx->wait_event.events = 0;
+ ctx->sent_early_data = 0;
+ ctx->early_buf = BUF_NULL;
+ ctx->conn = conn;
+ ctx->subs = NULL;
+ ctx->xprt_st = 0;
+ ctx->xprt_ctx = NULL;
+ ctx->error_code = 0;
+
+ next_sslconn = increment_sslconn();
+ if (!next_sslconn) {
+ conn->err_code = CO_ER_SSL_TOO_MANY;
+ goto err;
+ }
+
+ /* Only work with sockets for now, this should be adapted when we'll
+ * add QUIC support.
+ */
+ ctx->xprt = xprt_get(XPRT_RAW);
+ if (ctx->xprt->init) {
+ if (ctx->xprt->init(conn, &ctx->xprt_ctx) != 0)
+ goto err;
+ }
+
+ /* If it is in client mode initiate SSL session
+ in connect state otherwise accept state */
+ if (objt_server(conn->target)) {
+ struct server *srv = __objt_server(conn->target);
+
+ if (ssl_bio_and_sess_init(conn, srv->ssl_ctx.ctx,
+ &ctx->ssl, &ctx->bio, ha_meth, ctx) == -1)
+ goto err;
+
+ SSL_set_connect_state(ctx->ssl);
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.lock);
+ if (srv->ssl_ctx.reused_sess[tid].ptr) {
+ /* let's recreate a session from (ptr,size) and assign
+ * it to ctx->ssl. Its refcount will be updated by the
+ * creation and by the assignment, so after assigning
+ * it or failing to, we must always free it to decrement
+ * the refcount.
+ */
+ const unsigned char *ptr = srv->ssl_ctx.reused_sess[tid].ptr;
+ SSL_SESSION *sess = d2i_SSL_SESSION(NULL, &ptr, srv->ssl_ctx.reused_sess[tid].size);
+
+ if (sess && !SSL_set_session(ctx->ssl, sess)) {
+ uint old_tid = HA_ATOMIC_LOAD(&srv->ssl_ctx.last_ssl_sess_tid); // 0=none, >0 = tid + 1
+ if (old_tid == tid + 1)
+ HA_ATOMIC_CAS(&srv->ssl_ctx.last_ssl_sess_tid, &old_tid, 0); // no more valid
+ SSL_SESSION_free(sess);
+ HA_RWLOCK_WRLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[tid].sess_lock);
+ ha_free(&srv->ssl_ctx.reused_sess[tid].ptr);
+ HA_RWLOCK_WRTORD(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[tid].sess_lock);
+ if (srv->ssl_ctx.reused_sess[tid].sni)
+ SSL_set_tlsext_host_name(ctx->ssl, srv->ssl_ctx.reused_sess[tid].sni);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[tid].sess_lock);
+ } else if (sess) {
+ /* already assigned, not needed anymore */
+ SSL_SESSION_free(sess);
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[tid].sess_lock);
+ if (srv->ssl_ctx.reused_sess[tid].sni)
+ SSL_set_tlsext_host_name(ctx->ssl, srv->ssl_ctx.reused_sess[tid].sni);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[tid].sess_lock);
+ }
+ } else {
+ /* No session available yet, let's see if we can pick one
+ * from another thread. If old_tid is non-null, it designates
+ * the index of a recently updated thread that might still have
+ * a usable session. All threads are collectively responsible
+ * for resetting the index if it fails.
+ */
+ const unsigned char *ptr;
+ SSL_SESSION *sess;
+ uint old_tid = HA_ATOMIC_LOAD(&srv->ssl_ctx.last_ssl_sess_tid); // 0=none, >0 = tid + 1
+
+ if (old_tid) {
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[old_tid-1].sess_lock);
+
+ ptr = srv->ssl_ctx.reused_sess[old_tid-1].ptr;
+ if (ptr) {
+ sess = d2i_SSL_SESSION(NULL, &ptr, srv->ssl_ctx.reused_sess[old_tid-1].size);
+ if (sess) {
+ if (!SSL_set_session(ctx->ssl, sess))
+ HA_ATOMIC_CAS(&srv->ssl_ctx.last_ssl_sess_tid, &old_tid, 0); // no more valid
+ SSL_SESSION_free(sess);
+ }
+ }
+
+ if (srv->ssl_ctx.reused_sess[old_tid-1].sni)
+ SSL_set_tlsext_host_name(ctx->ssl, srv->ssl_ctx.reused_sess[old_tid-1].sni);
+
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.reused_sess[old_tid-1].sess_lock);
+ }
+ }
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &srv->ssl_ctx.lock);
+
+ /* leave init state and start handshake */
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+
+ _HA_ATOMIC_INC(&global.totalsslconns);
+ *xprt_ctx = ctx;
+ return 0;
+ }
+ else if (objt_listener(conn->target)) {
+ struct bind_conf *bc = __objt_listener(conn->target)->bind_conf;
+
+ if (ssl_bio_and_sess_init(conn, bc->initial_ctx,
+ &ctx->ssl, &ctx->bio, ha_meth, ctx) == -1)
+ goto err;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (bc->ssl_conf.early_data) {
+ b_alloc(&ctx->early_buf);
+ SSL_set_max_early_data(ctx->ssl,
+ /* Only allow early data if we managed to allocate
+ * a buffer.
+ */
+ (!b_is_null(&ctx->early_buf)) ?
+ global.tune.bufsize - global.tune.maxrewrite : 0);
+ }
+#endif
+
+ SSL_set_accept_state(ctx->ssl);
+
+ /* leave init state and start handshake */
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (bc->ssl_conf.early_data)
+ conn->flags |= CO_FL_EARLY_SSL_HS;
+#endif
+
+ _HA_ATOMIC_INC(&global.totalsslconns);
+ *xprt_ctx = ctx;
+ return 0;
+ }
+ /* don't know how to handle such a target */
+ conn->err_code = CO_ER_SSL_NO_TARGET;
+err:
+ if (next_sslconn)
+ _HA_ATOMIC_DEC(&global.sslconns);
+ if (ctx && ctx->wait_event.tasklet)
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ return -1;
+}
+
+
+/* This is the callback which is used when an SSL handshake is pending. It
+ * updates the FD status if it wants some polling before being called again.
+ * It returns 0 if it fails in a fatal way or needs to poll to go further,
+ * otherwise it returns non-zero and removes itself from the connection's
+ * flags (the bit is provided in <flag> by the caller).
+ */
+static int ssl_sock_handshake(struct connection *conn, unsigned int flag)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ int ret;
+ struct ssl_counters *counters = NULL;
+ struct ssl_counters *counters_px = NULL;
+ struct listener *li;
+ struct server *srv;
+ socklen_t lskerr;
+ int skerr;
+
+
+ if (!conn_ctrl_ready(conn))
+ return 0;
+
+ /* get counters */
+ switch (obj_type(conn->target)) {
+ case OBJ_TYPE_LISTENER:
+ li = __objt_listener(conn->target);
+ counters = EXTRA_COUNTERS_GET(li->extra_counters, &ssl_stats_module);
+ counters_px = EXTRA_COUNTERS_GET(li->bind_conf->frontend->extra_counters_fe,
+ &ssl_stats_module);
+ break;
+
+ case OBJ_TYPE_SERVER:
+ srv = __objt_server(conn->target);
+ counters = EXTRA_COUNTERS_GET(srv->extra_counters, &ssl_stats_module);
+ counters_px = EXTRA_COUNTERS_GET(srv->proxy->extra_counters_be,
+ &ssl_stats_module);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!ctx)
+ goto out_error;
+
+ /* don't start calculating a handshake on a dead connection */
+ if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH))
+ goto out_error;
+
+ /* FIXME/WT: for now we don't have a clear way to inspect the connection
+ * status from the lower layers, so let's check the FD directly. Ideally
+ * the xprt layers should provide some status indicating their knowledge
+ * of shutdowns or error.
+ */
+ BUG_ON(conn->flags & CO_FL_FDLESS);
+
+ skerr = 0;
+ lskerr = sizeof(skerr);
+ if ((getsockopt(conn->handle.fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr) < 0) ||
+ skerr != 0)
+ goto out_error;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ /*
+ * Check if we have early data. If we do, we have to read them
+ * before SSL_do_handshake() is called, And there's no way to
+ * detect early data, except to try to read them
+ */
+ if (conn->flags & CO_FL_EARLY_SSL_HS) {
+ size_t read_data = 0;
+
+ while (1) {
+ ret = SSL_read_early_data(ctx->ssl,
+ b_tail(&ctx->early_buf), b_room(&ctx->early_buf),
+ &read_data);
+ if (ret == SSL_READ_EARLY_DATA_ERROR)
+ goto check_error;
+ if (read_data > 0) {
+ conn->flags |= CO_FL_EARLY_DATA;
+ b_add(&ctx->early_buf, read_data);
+ }
+ if (ret == SSL_READ_EARLY_DATA_FINISH) {
+ conn->flags &= ~CO_FL_EARLY_SSL_HS;
+ if (!b_data(&ctx->early_buf))
+ b_free(&ctx->early_buf);
+ break;
+ }
+ }
+ }
+#endif
+ /* If we use SSL_do_handshake to process a reneg initiated by
+ * the remote peer, it sometimes returns SSL_ERROR_SSL.
+ * Usually SSL_write and SSL_read are used and process implicitly
+ * the reneg handshake.
+ * Here we use SSL_peek as a workaround for reneg.
+ */
+ if (!(conn->flags & CO_FL_WAIT_L6_CONN) && SSL_renegotiate_pending(ctx->ssl)) {
+ char c;
+
+ ret = SSL_peek(ctx->ssl, &c, 1);
+ if (ret <= 0) {
+ /* handshake may have not been completed, let's find why */
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* SSL handshake needs to write, L4 connection may not be ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_SEND))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+ return 0;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* handshake may have been completed but we have
+ * no more data to read.
+ */
+ if (!SSL_renegotiate_pending(ctx->ssl)) {
+ ret = 1;
+ goto reneg_ok;
+ }
+ /* SSL handshake needs to read, L4 connection is ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_RECV))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV, &ctx->wait_event);
+ return 0;
+ }
+#ifdef SSL_MODE_ASYNC
+ else if (ret == SSL_ERROR_WANT_ASYNC) {
+ ssl_async_process_fds(ctx);
+ return 0;
+ }
+#endif
+ else if (ret == SSL_ERROR_SYSCALL) {
+ /* if errno is null, then connection was successfully established */
+ if (!errno && conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ if (!conn->err_code) {
+#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
+ /* do not handle empty handshakes in BoringSSL or LibreSSL */
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#else
+ int empty_handshake;
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL)
+ /* use SSL_get_state() in OpenSSL >= 1.1.0; SSL_state() is broken */
+ OSSL_HANDSHAKE_STATE state = SSL_get_state((SSL *)ctx->ssl);
+ empty_handshake = state == TLS_ST_BEFORE;
+#else
+ /* access packet_length directly in OpenSSL <= 1.0.2; SSL_state() is broken */
+ empty_handshake = !ctx->ssl->packet_length;
+#endif
+ if (empty_handshake) {
+ if (!errno) {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_EMPTY;
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_ABORT;
+ }
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ }
+#endif /* BoringSSL or LibreSSL */
+ }
+ goto out_error;
+ }
+ else {
+ /* Fail on all other handshake errors */
+ /* Note: OpenSSL may leave unread bytes in the socket's
+ * buffer, causing an RST to be emitted upon close() on
+ * TCP sockets. We first try to drain possibly pending
+ * data to avoid this as much as possible.
+ */
+ conn_ctrl_drain(conn);
+ if (!conn->err_code)
+ conn->err_code = (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT) ?
+ CO_ER_SSL_KILLED_HB : CO_ER_SSL_HANDSHAKE;
+ goto out_error;
+ }
+ }
+ /* read some data: consider handshake completed */
+ goto reneg_ok;
+ }
+ ret = SSL_do_handshake(ctx->ssl);
+check_error:
+ if (ret != 1) {
+ /* handshake did not complete, let's find why */
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (!ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* SSL handshake needs to write, L4 connection may not be ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_SEND))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+ return 0;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* SSL handshake needs to read, L4 connection is ready */
+ if (!(ctx->wait_event.events & SUB_RETRY_RECV))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV, &ctx->wait_event);
+ return 0;
+ }
+#ifdef SSL_MODE_ASYNC
+ else if (ret == SSL_ERROR_WANT_ASYNC) {
+ ssl_async_process_fds(ctx);
+ return 0;
+ }
+#endif
+ else if (ret == SSL_ERROR_SYSCALL) {
+ /* if errno is null, then connection was successfully established */
+ if (!errno && conn->flags & CO_FL_WAIT_L4_CONN)
+ conn->flags &= ~CO_FL_WAIT_L4_CONN;
+ if (!conn->err_code) {
+#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
+ /* do not handle empty handshakes in BoringSSL or LibreSSL */
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+#else
+ int empty_handshake;
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x1010000fL)
+ /* use SSL_get_state() in OpenSSL >= 1.1.0; SSL_state() is broken */
+ OSSL_HANDSHAKE_STATE state = SSL_get_state(ctx->ssl);
+ empty_handshake = state == TLS_ST_BEFORE;
+#else
+ /* access packet_length directly in OpenSSL <= 1.0.2; SSL_state() is broken */
+ empty_handshake = !ctx->ssl->packet_length;
+#endif
+ if (empty_handshake) {
+ if (!errno) {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_EMPTY;
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_ABORT;
+ }
+ }
+ else {
+ if (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT)
+ conn->err_code = CO_ER_SSL_HANDSHAKE_HB;
+ else
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ }
+#endif /* BoringSSL or LibreSSL */
+ }
+ goto out_error;
+
+ } else if (ret == SSL_ERROR_ZERO_RETURN) {
+ /* The peer has closed the SSL session for writing by
+ * sending a close_notify alert */
+ conn_ctrl_drain(conn);
+ conn->err_code = CO_ER_SSL_EMPTY;
+ goto out_error;
+
+ }
+ else {
+ /* Fail on all other handshake errors */
+ /* Note: OpenSSL may leave unread bytes in the socket's
+ * buffer, causing an RST to be emitted upon close() on
+ * TCP sockets. We first try to drain possibly pending
+ * data to avoid this as much as possible.
+ */
+ conn_ctrl_drain(conn);
+ if (!conn->err_code)
+ conn->err_code = (ctx->xprt_st & SSL_SOCK_RECV_HEARTBEAT) ?
+ CO_ER_SSL_KILLED_HB : CO_ER_SSL_HANDSHAKE;
+ goto out_error;
+ }
+ }
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ else {
+ /*
+ * If the server refused the early data, we have to send a
+ * 425 to the client, as we no longer have the data to sent
+ * them again.
+ */
+ if ((conn->flags & CO_FL_EARLY_DATA) && (objt_server(conn->target))) {
+ if (SSL_get_early_data_status(ctx->ssl) == SSL_EARLY_DATA_REJECTED) {
+ conn->err_code = CO_ER_SSL_EARLY_FAILED;
+ goto out_error;
+ }
+ }
+ }
+#endif
+
+
+reneg_ok:
+
+#ifdef SSL_MODE_ASYNC
+ /* ASYNC engine API doesn't support moving read/write
+ * buffers. So we disable ASYNC mode right after
+ * the handshake to avoid buffer overflow.
+ */
+ if (global_ssl.async)
+ SSL_clear_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ /* Handshake succeeded */
+ if (!SSL_session_reused(ctx->ssl)) {
+ if (objt_server(conn->target)) {
+ update_freq_ctr(&global.ssl_be_keys_per_sec, 1);
+ if (global.ssl_be_keys_per_sec.curr_ctr > global.ssl_be_keys_max)
+ global.ssl_be_keys_max = global.ssl_be_keys_per_sec.curr_ctr;
+ }
+ else {
+ update_freq_ctr(&global.ssl_fe_keys_per_sec, 1);
+ if (global.ssl_fe_keys_per_sec.curr_ctr > global.ssl_fe_keys_max)
+ global.ssl_fe_keys_max = global.ssl_fe_keys_per_sec.curr_ctr;
+ }
+
+ if (counters) {
+ HA_ATOMIC_INC(&counters->sess);
+ HA_ATOMIC_INC(&counters_px->sess);
+ }
+ }
+ else if (counters) {
+ HA_ATOMIC_INC(&counters->reused_sess);
+ HA_ATOMIC_INC(&counters_px->reused_sess);
+ }
+
+ /* The connection is now established at both layers, it's time to leave */
+ conn->flags &= ~(flag | CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN);
+ return 1;
+
+ out_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+
+ /* free resumed session if exists */
+ if (objt_server(conn->target)) {
+ struct server *s = __objt_server(conn->target);
+ /* RWLOCK: only rdlock the SSL cache even when writing in it because there is
+ * one cache per thread, it only prevents to flush it from the CLI in
+ * another thread */
+
+ HA_RWLOCK_RDLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ if (s->ssl_ctx.reused_sess[tid].ptr)
+ ha_free(&s->ssl_ctx.reused_sess[tid].ptr);
+ HA_RWLOCK_RDUNLOCK(SSL_SERVER_LOCK, &s->ssl_ctx.lock);
+ }
+
+ if (counters) {
+ HA_ATOMIC_INC(&counters->failed_handshake);
+ HA_ATOMIC_INC(&counters_px->failed_handshake);
+ }
+
+ /* Fail on all other handshake errors */
+ conn->flags |= CO_FL_ERROR;
+ if (!conn->err_code)
+ conn->err_code = CO_ER_SSL_HANDSHAKE;
+ return 0;
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0,
+ * unless the transport layer was already released.
+ */
+static int ssl_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx)
+ return -1;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ ctx->subs = es;
+ es->events |= event_type;
+
+ /* we may have to subscribe to lower layers for new events */
+ event_type &= ~ctx->wait_event.events;
+ if (event_type && !(conn->flags & CO_FL_SSL_WAIT_HS))
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, event_type, &ctx->wait_event);
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int ssl_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ ctx->subs = NULL;
+
+ /* If we subscribed, and we're not doing the handshake,
+ * then we subscribed because the upper layer asked for it,
+ * as the upper layer is no longer interested, we can
+ * unsubscribe too.
+ */
+ event_type &= ctx->wait_event.events;
+ if (event_type && !(ctx->conn->flags & CO_FL_SSL_WAIT_HS))
+ conn_unsubscribe(conn, ctx->xprt_ctx, event_type, &ctx->wait_event);
+
+ return 0;
+}
+
+/* The connection has been taken over, so destroy the old tasklet and create
+ * a new one. The original thread ID must be passed into orig_tid
+ * It should be called with the takeover lock for the old thread held.
+ * Returns 0 on success, and -1 on failure
+ */
+static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ struct tasklet *tl = tasklet_new();
+
+ if (!tl)
+ return -1;
+
+ ctx->wait_event.tasklet->context = NULL;
+ tasklet_wakeup_on(ctx->wait_event.tasklet, orig_tid);
+ ctx->wait_event.tasklet = tl;
+ ctx->wait_event.tasklet->process = ssl_sock_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ return 0;
+}
+
+/* notify the next xprt that the connection is about to become idle and that it
+ * may be stolen at any time after the function returns and that any tasklet in
+ * the chain must be careful before dereferencing its context.
+ */
+static void ssl_set_idle(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx || !ctx->wait_event.tasklet)
+ return;
+
+ HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1);
+ if (ctx->xprt)
+ xprt_set_idle(conn, ctx->xprt, ctx->xprt_ctx);
+}
+
+/* notify the next xprt that the connection is not idle anymore and that it may
+ * not be stolen before the next xprt_set_idle().
+ */
+static void ssl_set_used(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (!ctx || !ctx->wait_event.tasklet)
+ return;
+
+ HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1);
+ if (ctx->xprt)
+ xprt_set_used(conn, ctx->xprt, ctx->xprt_ctx);
+}
+
+/* Use the provided XPRT as an underlying XPRT, and provide the old one.
+ * Returns 0 on success, and non-zero on failure.
+ */
+static int ssl_add_xprt(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (oldxprt_ops != NULL)
+ *oldxprt_ops = ctx->xprt;
+ if (oldxprt_ctx != NULL)
+ *oldxprt_ctx = ctx->xprt_ctx;
+ ctx->xprt = toadd_ops;
+ ctx->xprt_ctx = toadd_ctx;
+ return 0;
+}
+
+/* Remove the specified xprt. If if it our underlying XPRT, remove it and
+ * return 0, otherwise just call the remove_xprt method from the underlying
+ * XPRT.
+ */
+static int ssl_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt_ctx == toremove_ctx) {
+ ctx->xprt_ctx = newctx;
+ ctx->xprt = newops;
+ return 0;
+ }
+ return (ctx->xprt->remove_xprt(conn, ctx->xprt_ctx, toremove_ctx, newops, newctx));
+}
+
+struct task *ssl_sock_io_cb(struct task *t, void *context, unsigned int state)
+{
+ struct tasklet *tl = (struct tasklet *)t;
+ struct ssl_sock_ctx *ctx = context;
+ struct connection *conn;
+ int conn_in_list;
+ int ret = 0;
+
+ if (state & TASK_F_USR1) {
+ /* the tasklet was idling on an idle connection, it might have
+ * been stolen, let's be careful!
+ */
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ if (tl->context == NULL) {
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ tasklet_free(tl);
+ return NULL;
+ }
+ conn = ctx->conn;
+ conn_in_list = conn->flags & CO_FL_LIST_MASK;
+ if (conn_in_list)
+ conn_delete_from_tree(conn);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ } else {
+ conn = ctx->conn;
+ conn_in_list = 0;
+ }
+
+ /* First if we're doing an handshake, try that */
+ if (ctx->conn->flags & CO_FL_SSL_WAIT_HS) {
+ ssl_sock_handshake(ctx->conn, CO_FL_SSL_WAIT_HS);
+ if (!(ctx->conn->flags & CO_FL_SSL_WAIT_HS)) {
+ /* handshake completed, leave the bulk queue */
+ _HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY);
+ }
+ }
+ /* If we had an error, or the handshake is done and I/O is available,
+ * let the upper layer know.
+ * If no mux was set up yet, then call conn_create_mux()
+ * we can't be sure conn_fd_handler() will be called again.
+ */
+ if ((ctx->conn->flags & CO_FL_ERROR) ||
+ !(ctx->conn->flags & CO_FL_SSL_WAIT_HS)) {
+ int woke = 0;
+
+ /* On error, wake any waiter */
+ if (ctx->subs) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events = 0;
+ woke = 1;
+ ctx->subs = NULL;
+ }
+
+ /* If we're the first xprt for the connection, let the
+ * upper layers know. If we have no mux, create it,
+ * and once we have a mux, call its wake method if we didn't
+ * woke a tasklet already.
+ */
+ if (ctx->conn->xprt_ctx == ctx) {
+ if (!ctx->conn->mux)
+ ret = conn_create_mux(ctx->conn);
+ if (ret >= 0 && !woke && ctx->conn->mux && ctx->conn->mux->wake)
+ ret = ctx->conn->mux->wake(ctx->conn);
+ goto leave;
+ }
+ }
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ /* If we have early data and somebody wants to receive, let them */
+ else if (b_data(&ctx->early_buf) && ctx->subs &&
+ ctx->subs->events & SUB_RETRY_RECV) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events &= ~SUB_RETRY_RECV;
+ if (!ctx->subs->events)
+ ctx->subs = NULL;
+ }
+#endif
+leave:
+ if (!ret && conn_in_list) {
+ struct server *srv = objt_server(conn->target);
+
+ HA_SPIN_LOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ _srv_add_idle(srv, conn, conn_in_list == CO_FL_SAFE_LIST);
+ HA_SPIN_UNLOCK(IDLE_CONNS_LOCK, &idle_conns[tid].idle_conns_lock);
+ }
+ return t;
+}
+
+/* Receive up to <count> bytes from connection <conn>'s socket and store them
+ * into buffer <buf>. Only one call to recv() is performed, unless the
+ * buffer wraps, in which case a second call may be performed. The connection's
+ * flags are updated with whatever special event is detected (error, read0,
+ * empty). The caller is responsible for taking care of those events and
+ * avoiding the call if inappropriate. The function does not call the
+ * connection's polling update function, so the caller is responsible for this.
+ */
+static size_t ssl_sock_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ ssize_t ret;
+ size_t try, done = 0;
+
+ if (!ctx)
+ goto out_error;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (b_data(&ctx->early_buf)) {
+ try = b_contig_space(buf);
+ if (try > b_data(&ctx->early_buf))
+ try = b_data(&ctx->early_buf);
+ memcpy(b_tail(buf), b_head(&ctx->early_buf), try);
+ b_add(buf, try);
+ b_del(&ctx->early_buf, try);
+ if (b_data(&ctx->early_buf) == 0)
+ b_free(&ctx->early_buf);
+ return try;
+ }
+#endif
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS))
+ /* a handshake was requested */
+ return 0;
+
+ /* read the largest possible block. For this, we perform only one call
+ * to recv() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again. A new attempt is made on
+ * EINTR too.
+ */
+ while (count > 0) {
+
+ try = b_contig_space(buf);
+ if (!try)
+ break;
+
+ if (try > count)
+ try = count;
+
+ ret = SSL_read(ctx->ssl, b_tail(buf), try);
+
+ if (conn->flags & CO_FL_ERROR) {
+ /* CO_FL_ERROR may be set by ssl_sock_infocbk */
+ goto out_error;
+ }
+ if (ret > 0) {
+ b_add(buf, ret);
+ done += ret;
+ count -= ret;
+ }
+ else {
+ ret = SSL_get_error(ctx->ssl, ret);
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ /* handshake is running, and it needs to enable write */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ if (SSL_renegotiate_pending(ctx->ssl)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV,
+ &ctx->wait_event);
+ /* handshake is running, and it may need to re-enable read */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ break;
+ } else if (ret == SSL_ERROR_ZERO_RETURN)
+ goto read0;
+ else if (ret == SSL_ERROR_SSL) {
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ if (ctx && !ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+ conn->err_code = CO_ERR_SSL_FATAL;
+ }
+ /* For SSL_ERROR_SYSCALL, make sure to clear the error
+ * stack before shutting down the connection for
+ * reading. */
+ if (ret == SSL_ERROR_SYSCALL && (!errno || errno == EAGAIN || errno == EWOULDBLOCK))
+ goto clear_ssl_error;
+ /* otherwise it's a real error */
+ goto out_error;
+ }
+ }
+ leave:
+ return done;
+
+ clear_ssl_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ read0:
+ conn_sock_read0(conn);
+ goto leave;
+
+ out_error:
+ conn->flags |= CO_FL_ERROR;
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ goto leave;
+}
+
+
+/* Send up to <count> pending bytes from buffer <buf> to connection <conn>'s
+ * socket. <flags> may contain some CO_SFL_* flags to hint the system about
+ * other pending data for example, but this flag is ignored at the moment.
+ * Only one call to send() is performed, unless the buffer wraps, in which case
+ * a second call may be performed. The connection's flags are updated with
+ * whatever special event is detected (error, empty). The caller is responsible
+ * for taking care of those events and avoiding the call if inappropriate. The
+ * function does not call the connection's polling update function, so the caller
+ * is responsible for this. The buffer's output is not adjusted, it's up to the
+ * caller to take care of this. It's up to the caller to update the buffer's
+ * contents based on the return value.
+ */
+static size_t ssl_sock_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ ssize_t ret;
+ size_t try, done;
+
+ done = 0;
+
+ if (!ctx)
+ goto out_error;
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS | CO_FL_EARLY_SSL_HS))
+ /* a handshake was requested */
+ return 0;
+
+ /* send the largest possible block. For this we perform only one call
+ * to send() unless the buffer wraps and we exactly fill the first hunk,
+ * in which case we accept to do it once again.
+ */
+ while (count) {
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ size_t written_data;
+#endif
+
+ try = b_contig_data(buf, done);
+ if (try > count)
+ try = count;
+
+ if (global_ssl.hard_max_record && try > global_ssl.hard_max_record)
+ try = global_ssl.hard_max_record;
+
+ if (!(flags & CO_SFL_STREAMER) &&
+ !(ctx->xprt_st & SSL_SOCK_SEND_UNLIMITED) &&
+ global_ssl.max_record && try > global_ssl.max_record) {
+ try = global_ssl.max_record;
+ }
+ else {
+ /* we need to keep the information about the fact that
+ * we're not limiting the upcoming send(), because if it
+ * fails, we'll have to retry with at least as many data.
+ */
+ ctx->xprt_st |= SSL_SOCK_SEND_UNLIMITED;
+ }
+
+ if (try < count || flags & CO_SFL_MSG_MORE)
+ ctx->xprt_st |= SSL_SOCK_SEND_MORE;
+ else
+ ctx->xprt_st &= ~SSL_SOCK_SEND_MORE;
+
+#ifdef SSL_READ_EARLY_DATA_SUCCESS
+ if (!SSL_is_init_finished(ctx->ssl) && conn_is_back(conn)) {
+ unsigned int max_early;
+
+ if (objt_listener(conn->target))
+ max_early = SSL_get_max_early_data(ctx->ssl);
+ else {
+ if (SSL_get0_session(ctx->ssl))
+ max_early = SSL_SESSION_get_max_early_data(SSL_get0_session(ctx->ssl));
+ else
+ max_early = 0;
+ }
+
+ if (try + ctx->sent_early_data > max_early) {
+ try -= (try + ctx->sent_early_data) - max_early;
+ if (try <= 0) {
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN;
+ tasklet_wakeup(ctx->wait_event.tasklet);
+ break;
+ }
+ }
+ ret = SSL_write_early_data(ctx->ssl, b_peek(buf, done), try, &written_data);
+ if (ret == 1) {
+ ret = written_data;
+ ctx->sent_early_data += ret;
+ if (objt_server(conn->target)) {
+ conn->flags |= CO_FL_SSL_WAIT_HS | CO_FL_WAIT_L6_CONN | CO_FL_EARLY_DATA;
+ /* Initiate the handshake, now */
+ tasklet_wakeup(ctx->wait_event.tasklet);
+ }
+
+ }
+
+ } else
+#endif
+ ret = SSL_write(ctx->ssl, b_peek(buf, done), try);
+
+ if (conn->flags & CO_FL_ERROR) {
+ /* CO_FL_ERROR may be set by ssl_sock_infocbk */
+ goto out_error;
+ }
+ if (ret > 0) {
+ /* A send succeeded, so we can consider ourself connected */
+ conn->flags &= ~CO_FL_WAIT_L4L6;
+ ctx->xprt_st &= ~SSL_SOCK_SEND_UNLIMITED;
+ count -= ret;
+ done += ret;
+ }
+ else {
+ ret = SSL_get_error(ctx->ssl, ret);
+
+ if (ret == SSL_ERROR_WANT_WRITE) {
+ if (SSL_renegotiate_pending(ctx->ssl)) {
+ /* handshake is running, and it may need to re-enable write */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND, &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+
+ break;
+ }
+ else if (ret == SSL_ERROR_WANT_READ) {
+ /* handshake is running, and it needs to enable read */
+ conn->flags |= CO_FL_SSL_WAIT_HS;
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx,
+ SUB_RETRY_RECV,
+ &ctx->wait_event);
+#ifdef SSL_MODE_ASYNC
+ /* Async mode can be re-enabled, because we're leaving data state.*/
+ if (global_ssl.async)
+ SSL_set_mode(ctx->ssl, SSL_MODE_ASYNC);
+#endif
+ break;
+ }
+ else if (ret == SSL_ERROR_SSL || ret == SSL_ERROR_SYSCALL) {
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (ctx && !ctx->error_code)
+ ctx->error_code = ERR_peek_error();
+ conn->err_code = CO_ERR_SSL_FATAL;
+ }
+ goto out_error;
+ }
+ }
+ leave:
+ return done;
+
+ out_error:
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+
+ conn->flags |= CO_FL_ERROR;
+ goto leave;
+}
+
+void ssl_sock_close(struct connection *conn, void *xprt_ctx) {
+
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+
+ if (ctx) {
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn, ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (ctx->subs) {
+ ctx->subs->events = 0;
+ tasklet_wakeup(ctx->subs->tasklet);
+ }
+
+ if (ctx->xprt->close)
+ ctx->xprt->close(conn, ctx->xprt_ctx);
+#ifdef SSL_MODE_ASYNC
+ if (global_ssl.async) {
+ OSSL_ASYNC_FD all_fd[32], afd;
+ size_t num_all_fds = 0;
+ int i;
+
+ SSL_get_all_async_fds(ctx->ssl, NULL, &num_all_fds);
+ if (num_all_fds > 32) {
+ send_log(NULL, LOG_EMERG, "haproxy: openssl returns too many async fds. It seems a bug. Process may crash\n");
+ return;
+ }
+
+ SSL_get_all_async_fds(ctx->ssl, all_fd, &num_all_fds);
+
+ /* If an async job is pending, we must try to
+ to catch the end using polling before calling
+ SSL_free */
+ if (num_all_fds && SSL_waiting_for_async(ctx->ssl)) {
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* switch on an handler designed to
+ * handle the SSL_free
+ */
+ afd = all_fd[i];
+ fdtab[afd].iocb = ssl_async_fd_free;
+ fdtab[afd].owner = ctx->ssl;
+ fd_want_recv(afd);
+ /* To ensure that the fd cache won't be used
+ * and we'll catch a real RD event.
+ */
+ fd_cant_recv(afd);
+ }
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ _HA_ATOMIC_INC(&jobs);
+ return;
+ }
+ /* Else we can remove the fds from the fdtab
+ * and call SSL_free.
+ * note: we do a fd_stop_both and not a delete
+ * because the fd is owned by the engine.
+ * the engine is responsible to close
+ */
+ for (i=0 ; i < num_all_fds ; i++) {
+ /* We want to remove the fd from the fdtab
+ * but we flag it to disown because the
+ * close is performed by the engine itself
+ */
+ fdtab[all_fd[i]].state |= FD_DISOWN;
+ fd_delete(all_fd[i]);
+ }
+ }
+#endif
+ SSL_free(ctx->ssl);
+ b_free(&ctx->early_buf);
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(ssl_sock_ctx_pool, ctx);
+ _HA_ATOMIC_DEC(&global.sslconns);
+ }
+}
+
+/* This function tries to perform a clean shutdown on an SSL connection, and in
+ * any case, flags the connection as reusable if no handshake was in progress.
+ */
+static void ssl_sock_shutw(struct connection *conn, void *xprt_ctx, int clean)
+{
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+
+ if (conn->flags & (CO_FL_WAIT_XPRT | CO_FL_SSL_WAIT_HS))
+ return;
+ if (!clean)
+ /* don't sent notify on SSL_shutdown */
+ SSL_set_quiet_shutdown(ctx->ssl, 1);
+ /* no handshake was in progress, try a clean ssl shutdown */
+ if (SSL_shutdown(ctx->ssl) <= 0) {
+ /* Clear openssl global errors stack */
+ ssl_sock_dump_errors(conn, NULL);
+ ERR_clear_error();
+ }
+}
+
+
+/* used for ppv2 pkey algo (can be used for logging) */
+int ssl_sock_get_pkey_algo(struct connection *conn, struct buffer *out)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt;
+
+ if (!ctx)
+ return 0;
+ crt = SSL_get_certificate(ctx->ssl);
+ if (!crt)
+ return 0;
+
+ return cert_get_pkey_algo(crt, out);
+}
+
+/* used for ppv2 cert signature (can be used for logging) */
+const char *ssl_sock_get_cert_sig(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ __OPENSSL_110_CONST__ ASN1_OBJECT *algorithm;
+ X509 *crt;
+
+ if (!ctx)
+ return NULL;
+ crt = SSL_get_certificate(ctx->ssl);
+ if (!crt)
+ return NULL;
+ X509_ALGOR_get0(&algorithm, NULL, NULL, X509_get0_tbs_sigalg(crt));
+ return OBJ_nid2sn(OBJ_obj2nid(algorithm));
+}
+
+/* used for ppv2 authority */
+const char *ssl_sock_get_sni(struct connection *conn)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_servername(ctx->ssl, TLSEXT_NAMETYPE_host_name);
+#else
+ return NULL;
+#endif
+}
+
+/* used for logging/ppv2, may be changed for a sample fetch later */
+const char *ssl_sock_get_cipher_name(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_cipher_name(ctx->ssl);
+}
+
+/* used for logging/ppv2, may be changed for a sample fetch later */
+const char *ssl_sock_get_proto_version(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return NULL;
+ return SSL_get_version(ctx->ssl);
+}
+
+void ssl_sock_set_alpn(struct connection *conn, const unsigned char *alpn, int len)
+{
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return;
+ SSL_set_alpn_protos(ctx->ssl, alpn, len);
+#endif
+}
+
+/* Sets advertised SNI for outgoing connections. Please set <hostname> to NULL
+ * to disable SNI.
+ */
+void ssl_sock_set_servername(struct connection *conn, const char *hostname)
+{
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ char *prev_name;
+
+ if (!ctx)
+ return;
+
+ BUG_ON(!(conn->flags & CO_FL_WAIT_L6_CONN));
+ BUG_ON(!(conn->flags & CO_FL_SSL_WAIT_HS));
+
+ /* if the SNI changes, we must destroy the reusable context so that a
+ * new connection will present a new SNI. compare with the SNI
+ * previously stored in the reused_sess. If the session was reused,
+ * the associated SNI (if any) has already been assigned to the SSL
+ * during ssl_sock_init() so SSL_get_servername() will properly
+ * retrieve the currently known hostname for the SSL.
+ */
+
+ prev_name = (char *)SSL_get_servername(ctx->ssl, TLSEXT_NAMETYPE_host_name);
+ if ((!prev_name && hostname) ||
+ !hostname ||
+ strcmp(hostname, prev_name) != 0) {
+ SSL_set_session(ctx->ssl, NULL);
+ SSL_set_tlsext_host_name(ctx->ssl, hostname);
+ }
+#endif
+}
+
+/* Extract peer certificate's common name into the chunk dest
+ * Returns
+ * the len of the extracted common name
+ * or 0 if no CN found in DN
+ * or -1 on error case (i.e. no peer certificate)
+ */
+int ssl_sock_get_remote_common_name(struct connection *conn,
+ struct buffer *dest)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt = NULL;
+ X509_NAME *name;
+ const char find_cn[] = "CN";
+ const struct buffer find_cn_chunk = {
+ .area = (char *)&find_cn,
+ .data = sizeof(find_cn)-1
+ };
+ int result = -1;
+
+ if (!ctx)
+ goto out;
+
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ crt = SSL_get_peer_certificate(ctx->ssl);
+ if (!crt)
+ goto out;
+
+ name = X509_get_subject_name(crt);
+ if (!name)
+ goto out;
+
+ result = ssl_sock_get_dn_entry(name, &find_cn_chunk, 1, dest);
+out:
+ if (crt)
+ X509_free(crt);
+
+ return result;
+}
+
+/* returns 1 if client passed a certificate for this session, 0 if not */
+int ssl_sock_get_cert_used_sess(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+ X509 *crt = NULL;
+
+ if (!ctx)
+ return 0;
+
+ /* SSL_get_peer_certificate, it increase X509 * ref count */
+ crt = SSL_get_peer_certificate(ctx->ssl);
+ if (!crt)
+ return 0;
+
+ X509_free(crt);
+ return 1;
+}
+
+/* returns 1 if client passed a certificate for this connection, 0 if not */
+int ssl_sock_get_cert_used_conn(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return 0;
+ return SSL_SOCK_ST_FL_VERIFY_DONE & ctx->xprt_st ? 1 : 0;
+}
+
+/* returns result from SSL verify */
+unsigned int ssl_sock_get_verify_result(struct connection *conn)
+{
+ struct ssl_sock_ctx *ctx = conn_get_ssl_sock_ctx(conn);
+
+ if (!ctx)
+ return (unsigned int)X509_V_ERR_APPLICATION_VERIFICATION;
+ return (unsigned int)SSL_get_verify_result(ctx->ssl);
+}
+
+/* Returns the application layer protocol name in <str> and <len> when known.
+ * Zero is returned if the protocol name was not found, otherwise non-zero is
+ * returned. The string is allocated in the SSL context and doesn't have to be
+ * freed by the caller. NPN is also checked if available since older versions
+ * of openssl (1.0.1) which are more common in field only support this one.
+ */
+int ssl_sock_get_alpn(const struct connection *conn, void *xprt_ctx, const char **str, int *len)
+{
+#if defined(TLSEXT_TYPE_application_layer_protocol_negotiation) || \
+ defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ struct ssl_sock_ctx *ctx = xprt_ctx;
+ if (!ctx)
+ return 0;
+
+ *str = NULL;
+
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ SSL_get0_alpn_selected(ctx->ssl, (const unsigned char **)str, (unsigned *)len);
+ if (*str)
+ return 1;
+#endif
+#if defined(OPENSSL_NPN_NEGOTIATED) && !defined(OPENSSL_NO_NEXTPROTONEG)
+ SSL_get0_next_proto_negotiated(ctx->ssl, (const unsigned char **)str, (unsigned *)len);
+ if (*str)
+ return 1;
+#endif
+#endif
+ return 0;
+}
+
+/* "issuers-chain-path" load chain certificate in global */
+int ssl_load_global_issuer_from_BIO(BIO *in, char *fp, char **err)
+{
+ X509 *ca;
+ X509_NAME *name = NULL;
+ ASN1_OCTET_STRING *skid = NULL;
+ STACK_OF(X509) *chain = NULL;
+ struct issuer_chain *issuer;
+ struct eb64_node *node;
+ char *path;
+ u64 key;
+ int ret = 0;
+
+ while ((ca = PEM_read_bio_X509(in, NULL, NULL, NULL))) {
+ if (chain == NULL) {
+ chain = sk_X509_new_null();
+ skid = X509_get_ext_d2i(ca, NID_subject_key_identifier, NULL, NULL);
+ name = X509_get_subject_name(ca);
+ }
+ if (!sk_X509_push(chain, ca)) {
+ X509_free(ca);
+ goto end;
+ }
+ }
+ if (!chain) {
+ memprintf(err, "unable to load issuers-chain %s : pem certificate not found.\n", fp);
+ goto end;
+ }
+ if (!skid) {
+ memprintf(err, "unable to load issuers-chain %s : SubjectKeyIdentifier not found.\n", fp);
+ goto end;
+ }
+ if (!name) {
+ memprintf(err, "unable to load issuers-chain %s : SubjectName not found.\n", fp);
+ goto end;
+ }
+ key = XXH3(ASN1_STRING_get0_data(skid), ASN1_STRING_length(skid), 0);
+ for (node = eb64_lookup(&cert_issuer_tree, key); node; node = eb64_next(node)) {
+ issuer = container_of(node, typeof(*issuer), node);
+ if (!X509_NAME_cmp(name, X509_get_subject_name(sk_X509_value(issuer->chain, 0)))) {
+ memprintf(err, "duplicate issuers-chain %s: %s already in store\n", fp, issuer->path);
+ goto end;
+ }
+ }
+ issuer = calloc(1, sizeof *issuer);
+ path = strdup(fp);
+ if (!issuer || !path) {
+ free(issuer);
+ free(path);
+ goto end;
+ }
+ issuer->node.key = key;
+ issuer->path = path;
+ issuer->chain = chain;
+ chain = NULL;
+ eb64_insert(&cert_issuer_tree, &issuer->node);
+ ret = 1;
+ end:
+ if (skid)
+ ASN1_OCTET_STRING_free(skid);
+ if (chain)
+ sk_X509_pop_free(chain, X509_free);
+ return ret;
+}
+
+ struct issuer_chain* ssl_get0_issuer_chain(X509 *cert)
+{
+ AUTHORITY_KEYID *akid;
+ struct issuer_chain *issuer = NULL;
+
+ akid = X509_get_ext_d2i(cert, NID_authority_key_identifier, NULL, NULL);
+ if (akid && akid->keyid) {
+ struct eb64_node *node;
+ u64 hk;
+ hk = XXH3(ASN1_STRING_get0_data(akid->keyid), ASN1_STRING_length(akid->keyid), 0);
+ for (node = eb64_lookup(&cert_issuer_tree, hk); node; node = eb64_next(node)) {
+ struct issuer_chain *ti = container_of(node, typeof(*issuer), node);
+ if (X509_check_issued(sk_X509_value(ti->chain, 0), cert) == X509_V_OK) {
+ issuer = ti;
+ break;
+ }
+ }
+ }
+ AUTHORITY_KEYID_free(akid);
+ return issuer;
+}
+
+void ssl_free_global_issuers(void)
+{
+ struct eb64_node *node, *back;
+ struct issuer_chain *issuer;
+
+ node = eb64_first(&cert_issuer_tree);
+ while (node) {
+ issuer = container_of(node, typeof(*issuer), node);
+ back = eb64_next(node);
+ eb64_delete(node);
+ free(issuer->path);
+ sk_X509_pop_free(issuer->chain, X509_free);
+ free(issuer);
+ node = back;
+ }
+}
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+static int ssl_check_async_engine_count(void) {
+ int err_code = ERR_NONE;
+
+ if (global_ssl.async && (openssl_engines_initialized > 32)) {
+ ha_alert("ssl-mode-async only supports a maximum of 32 engines.\n");
+ err_code = ERR_ABORT;
+ }
+ return err_code;
+}
+#endif
+
+/* "show fd" helper to dump ssl internals. Warning: the output buffer is often
+ * the common trash! It returns non-zero if the connection entry looks suspicious.
+ */
+static int ssl_sock_show_fd(struct buffer *buf, const struct connection *conn, const void *ctx)
+{
+ const struct ssl_sock_ctx *sctx = ctx;
+ int ret = 0;
+
+ if (!sctx)
+ return ret;
+
+ if (sctx->conn != conn) {
+ chunk_appendf(&trash, " xctx.conn=%p(BOGUS)", sctx->conn);
+ ret = 1;
+ }
+ chunk_appendf(&trash, " xctx.st=%d .err=%ld", sctx->xprt_st, sctx->error_code);
+
+ if (sctx->xprt) {
+ chunk_appendf(&trash, " .xprt=%s", sctx->xprt->name);
+ if (sctx->xprt_ctx)
+ chunk_appendf(&trash, " .xctx=%p", sctx->xprt_ctx);
+ }
+
+ chunk_appendf(&trash, " .wait.ev=%d", sctx->wait_event.events);
+
+ /* as soon as a shutdown is reported the lower layer unregisters its
+ * subscriber, so the situations below are transient and rare enough to
+ * be reported as suspicious. In any case they shouldn't last.
+ */
+ if ((sctx->wait_event.events & 1) && (conn->flags & (CO_FL_SOCK_RD_SH|CO_FL_ERROR)))
+ ret = 1;
+ if ((sctx->wait_event.events & 2) && (conn->flags & (CO_FL_SOCK_WR_SH|CO_FL_ERROR)))
+ ret = 1;
+
+ chunk_appendf(&trash, " .subs=%p", sctx->subs);
+ if (sctx->subs) {
+ chunk_appendf(&trash, "(ev=%d tl=%p", sctx->subs->events, sctx->subs->tasklet);
+ if (sctx->subs->tasklet->calls >= 1000000)
+ ret = 1;
+ chunk_appendf(&trash, " tl.calls=%d tl.ctx=%p tl.fct=",
+ sctx->subs->tasklet->calls,
+ sctx->subs->tasklet->context);
+ resolve_sym_name(&trash, NULL, sctx->subs->tasklet->process);
+ chunk_appendf(&trash, ")");
+ }
+ chunk_appendf(&trash, " .sent_early=%d", sctx->sent_early_data);
+ chunk_appendf(&trash, " .early_in=%d", (int)sctx->early_buf.data);
+ return ret;
+}
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+/* This function is used with TLS ticket keys management. It permits to browse
+ * each reference. The variable <ref> must point to the current node's list
+ * element (which starts by the root), and <end> must point to the root node.
+ */
+static inline
+struct tls_keys_ref *tlskeys_list_get_next(struct list *ref, struct list *end)
+{
+ /* Get next list entry. */
+ ref = ref->n;
+
+ /* If the entry is the last of the list, return NULL. */
+ if (ref == end)
+ return NULL;
+
+ return LIST_ELEM(ref, struct tls_keys_ref *, list);
+}
+
+static inline
+struct tls_keys_ref *tlskeys_ref_lookup_ref(const char *reference)
+{
+ int id;
+ char *error;
+
+ /* If the reference starts by a '#', this is numeric id. */
+ if (reference[0] == '#') {
+ /* Try to convert the numeric id. If the conversion fails, the lookup fails. */
+ id = strtol(reference + 1, &error, 10);
+ if (*error != '\0')
+ return NULL;
+
+ /* Perform the unique id lookup. */
+ return tlskeys_ref_lookupid(id);
+ }
+
+ /* Perform the string lookup. */
+ return tlskeys_ref_lookup(reference);
+}
+#endif
+
+
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+
+/* dumps all tls keys. Relies on the show_keys_ctx context from the appctx. */
+static int cli_io_handler_tlskeys_files(struct appctx *appctx)
+{
+ struct show_keys_ctx *ctx = appctx->svcctx;
+
+ switch (ctx->state) {
+ case SHOW_KEYS_INIT:
+ /* Display the column headers. If the message cannot be sent,
+ * quit the function with returning 0. The function is called
+ * later and restart at the state "SHOW_KEYS_INIT".
+ */
+ chunk_reset(&trash);
+
+ if (ctx->dump_entries)
+ chunk_appendf(&trash, "# id secret\n");
+ else
+ chunk_appendf(&trash, "# id (file)\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ return 0;
+
+ /* Now, we start the browsing of the references lists.
+ * Note that the following call to LIST_ELEM return bad pointer. The only
+ * available field of this pointer is <list>. It is used with the function
+ * tlskeys_list_get_next() for returning the first available entry
+ */
+ if (ctx->next_ref == NULL)
+ ctx->next_ref = tlskeys_list_get_next(&tlskeys_reference, &tlskeys_reference);
+
+ ctx->state = SHOW_KEYS_LIST;
+ __fallthrough;
+
+ case SHOW_KEYS_LIST:
+ while (ctx->next_ref) {
+ struct tls_keys_ref *ref = ctx->next_ref;
+
+ chunk_reset(&trash);
+ if (ctx->dump_entries && ctx->next_index == 0)
+ chunk_appendf(&trash, "# ");
+
+ if (ctx->next_index == 0)
+ chunk_appendf(&trash, "%d (%s)\n", ref->unique_id, ref->filename);
+
+ if (ctx->dump_entries) {
+ int head;
+
+ HA_RWLOCK_RDLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ head = ref->tls_ticket_enc_index;
+ while (ctx->next_index < TLS_TICKETS_NO) {
+ struct buffer *t2 = get_trash_chunk();
+
+ chunk_reset(t2);
+ /* should never fail here because we dump only a key in the t2 buffer */
+ if (ref->key_size_bits == 128) {
+ t2->data = a2base64((char *)(ref->tlskeys + (head + 2 + ctx->next_index) % TLS_TICKETS_NO),
+ sizeof(struct tls_sess_key_128),
+ t2->area, t2->size);
+ chunk_appendf(&trash, "%d.%d %s\n", ref->unique_id, ctx->next_index,
+ t2->area);
+ }
+ else if (ref->key_size_bits == 256) {
+ t2->data = a2base64((char *)(ref->tlskeys + (head + 2 + ctx->next_index) % TLS_TICKETS_NO),
+ sizeof(struct tls_sess_key_256),
+ t2->area, t2->size);
+ chunk_appendf(&trash, "%d.%d %s\n", ref->unique_id, ctx->next_index,
+ t2->area);
+ }
+ else {
+ /* This case should never happen */
+ chunk_appendf(&trash, "%d.%d <unknown>\n", ref->unique_id, ctx->next_index);
+ }
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ return 0;
+ }
+ ctx->next_index++;
+ }
+ HA_RWLOCK_RDUNLOCK(TLSKEYS_REF_LOCK, &ref->lock);
+ ctx->next_index = 0;
+ }
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ return 0;
+ }
+
+ if (ctx->names_only == 0) /* don't display everything if not necessary */
+ break;
+
+ /* get next list entry and check the end of the list */
+ ctx->next_ref = tlskeys_list_get_next(&ref->list, &tlskeys_reference);
+ }
+ ctx->state = SHOW_KEYS_DONE;
+ __fallthrough;
+
+ default:
+ return 1;
+ }
+ return 0;
+}
+
+/* Prepares a "show_keys_ctx" and sets the appropriate io_handler if needed */
+static int cli_parse_show_tlskeys(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_keys_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ /* no parameter, shows only file list */
+ if (!*args[2]) {
+ ctx->names_only = 1;
+ return 0;
+ }
+
+ if (args[2][0] == '*') {
+ /* list every TLS ticket keys */
+ ctx->names_only = 1;
+ } else {
+ ctx->next_ref = tlskeys_ref_lookup_ref(args[2]);
+ if (!ctx->next_ref)
+ return cli_err(appctx, "'show tls-keys' unable to locate referenced filename\n");
+ }
+
+ ctx->dump_entries = 1;
+ return 0;
+}
+
+static int cli_parse_set_tlskeys(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct tls_keys_ref *ref;
+ int ret;
+
+ /* Expect two parameters: the filename and the new new TLS key in encoding */
+ if (!*args[3] || !*args[4])
+ return cli_err(appctx, "'set ssl tls-key' expects a filename and the new TLS key in base64 encoding.\n");
+
+ ref = tlskeys_ref_lookup_ref(args[3]);
+ if (!ref)
+ return cli_err(appctx, "'set ssl tls-key' unable to locate referenced filename\n");
+
+ ret = base64dec(args[4], strlen(args[4]), trash.area, trash.size);
+ if (ret < 0)
+ return cli_err(appctx, "'set ssl tls-key' received invalid base64 encoded TLS key.\n");
+
+ trash.data = ret;
+ if (ssl_sock_update_tlskey_ref(ref, &trash) < 0)
+ return cli_err(appctx, "'set ssl tls-key' received a key of wrong size.\n");
+
+ return cli_msg(appctx, LOG_INFO, "TLS ticket key updated!\n");
+}
+#endif
+
+
+#ifdef HAVE_SSL_PROVIDERS
+struct provider_name {
+ const char *name;
+ struct list list;
+};
+
+
+static int ssl_provider_get_name_cb(OSSL_PROVIDER *provider, void *cbdata)
+{
+ struct list *provider_names = cbdata;
+ struct provider_name *item = NULL;
+ const char *name = OSSL_PROVIDER_get0_name(provider);
+
+ if (!provider_names)
+ return 0;
+
+ item = calloc(1, sizeof(*item));
+
+ if (!item)
+ return 0;
+
+ item->name = name;
+ LIST_APPEND(provider_names, &item->list);
+
+ return 1;
+}
+
+static void ssl_provider_get_name_list(struct list *provider_names)
+{
+ if (!provider_names)
+ return;
+
+ OSSL_PROVIDER_do_all(NULL, ssl_provider_get_name_cb, provider_names);
+}
+
+static void ssl_provider_clear_name_list(struct list *provider_names)
+{
+ struct provider_name *item = NULL, *item_s = NULL;
+
+ if (provider_names) {
+ list_for_each_entry_safe(item, item_s, provider_names, list) {
+ LIST_DELETE(&item->list);
+ free(item);
+ }
+ }
+}
+
+static int cli_io_handler_show_providers(struct appctx *appctx)
+{
+ struct buffer *trash = get_trash_chunk();
+ struct list provider_names;
+ struct provider_name *name;
+
+ LIST_INIT(&provider_names);
+
+ chunk_appendf(trash, "Loaded providers : \n");
+
+ ssl_provider_get_name_list(&provider_names);
+
+ list_for_each_entry(name, &provider_names, list) {
+ chunk_appendf(trash, "\t- %s\n", name->name);
+ }
+
+ ssl_provider_clear_name_list(&provider_names);
+
+ if (applet_putchk(appctx, trash) == -1)
+ goto yield;
+
+ return 1;
+
+yield:
+ return 0;
+}
+#endif
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ { { "show", "tls-keys", NULL }, "show tls-keys [id|*] : show tls keys references or dump tls ticket keys when id specified", cli_parse_show_tlskeys, cli_io_handler_tlskeys_files },
+ { { "set", "ssl", "tls-key", NULL }, "set ssl tls-key [id|file] <key> : set the next TLS key for the <id> or <file> listener to <key>", cli_parse_set_tlskeys, NULL },
+#endif
+#ifdef HAVE_SSL_PROVIDERS
+ { { "show", "ssl", "providers", NULL }, "show ssl providers : show loaded SSL providers", NULL, cli_io_handler_show_providers },
+#endif
+ { { NULL }, NULL, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* transport-layer operations for SSL sockets */
+struct xprt_ops ssl_sock = {
+ .snd_buf = ssl_sock_from_buf,
+ .rcv_buf = ssl_sock_to_buf,
+ .subscribe = ssl_subscribe,
+ .unsubscribe = ssl_unsubscribe,
+ .remove_xprt = ssl_remove_xprt,
+ .add_xprt = ssl_add_xprt,
+ .rcv_pipe = NULL,
+ .snd_pipe = NULL,
+ .shutr = NULL,
+ .shutw = ssl_sock_shutw,
+ .close = ssl_sock_close,
+ .init = ssl_sock_init,
+ .start = ssl_sock_start,
+ .prepare_bind_conf = ssl_sock_prepare_bind_conf,
+ .destroy_bind_conf = ssl_sock_destroy_bind_conf,
+ .prepare_srv = ssl_sock_prepare_srv_ctx,
+ .destroy_srv = ssl_sock_free_srv_ctx,
+ .get_alpn = ssl_sock_get_alpn,
+ .takeover = ssl_takeover,
+ .set_idle = ssl_set_idle,
+ .set_used = ssl_set_used,
+ .get_ssl_sock_ctx = ssl_sock_get_ctx,
+ .name = "SSL",
+ .show_fd = ssl_sock_show_fd,
+};
+
+enum act_return ssl_action_wait_for_hs(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *conn;
+
+ conn = objt_conn(sess->origin);
+
+ if (conn) {
+ if (conn->flags & (CO_FL_EARLY_SSL_HS | CO_FL_SSL_WAIT_HS)) {
+ sc_ep_set(s->scf, SE_FL_WAIT_FOR_HS);
+ s->req.flags |= CF_READ_EVENT;
+ return ACT_RET_YIELD;
+ }
+ }
+ return (ACT_RET_CONT);
+}
+
+static enum act_parse_ret ssl_parse_wait_for_hs(const char **args, int *orig_arg, struct proxy *px, struct act_rule *rule, char **err)
+{
+ rule->action_ptr = ssl_action_wait_for_hs;
+
+ return ACT_RET_PRS_OK;
+}
+
+static struct action_kw_list http_req_actions = {ILH, {
+ { "wait-for-handshake", ssl_parse_wait_for_hs },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+
+static void ssl_sock_sctl_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ if (ptr) {
+ chunk_destroy(ptr);
+ free(ptr);
+ }
+}
+
+#endif
+
+
+static void ssl_sock_capture_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ pool_free(pool_head_ssl_capture, ptr);
+}
+
+#ifdef HAVE_SSL_KEYLOG
+static void ssl_sock_keylog_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ struct ssl_keylog *keylog;
+
+ if (!ptr)
+ return;
+
+ keylog = ptr;
+
+ pool_free(pool_head_ssl_keylog_str, keylog->client_random);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_early_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_handshake_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->server_handshake_traffic_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->client_traffic_secret_0);
+ pool_free(pool_head_ssl_keylog_str, keylog->server_traffic_secret_0);
+ pool_free(pool_head_ssl_keylog_str, keylog->exporter_secret);
+ pool_free(pool_head_ssl_keylog_str, keylog->early_exporter_secret);
+
+ pool_free(pool_head_ssl_keylog, ptr);
+}
+#endif
+
+static void ssl_sock_clt_crt_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ if (!ptr)
+ return;
+
+ X509_free((X509*)ptr);
+}
+
+static void ssl_sock_clt_sni_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int idx, long argl, void *argp)
+{
+ pool_free(ssl_sock_client_sni_pool, ptr);
+}
+
+static void __ssl_sock_init(void)
+{
+#if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION))
+ STACK_OF(SSL_COMP)* cm;
+ int n;
+#endif
+
+ if (global_ssl.listen_default_ciphers)
+ global_ssl.listen_default_ciphers = strdup(global_ssl.listen_default_ciphers);
+ if (global_ssl.connect_default_ciphers)
+ global_ssl.connect_default_ciphers = strdup(global_ssl.connect_default_ciphers);
+#ifdef HAVE_SSL_CTX_SET_CIPHERSUITES
+ if (global_ssl.listen_default_ciphersuites)
+ global_ssl.listen_default_ciphersuites = strdup(global_ssl.listen_default_ciphersuites);
+ if (global_ssl.connect_default_ciphersuites)
+ global_ssl.connect_default_ciphersuites = strdup(global_ssl.connect_default_ciphersuites);
+#endif
+
+ xprt_register(XPRT_SSL, &ssl_sock);
+#if HA_OPENSSL_VERSION_NUMBER < 0x10100000L
+ SSL_library_init();
+#endif
+#if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION))
+ cm = SSL_COMP_get_compression_methods();
+ n = sk_SSL_COMP_num(cm);
+ while (n--) {
+ (void) sk_SSL_COMP_pop(cm);
+ }
+#endif
+
+#if defined(USE_THREAD) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ ssl_locking_init();
+#endif
+#ifdef HAVE_SSL_CTX_ADD_SERVER_CUSTOM_EXT
+ sctl_ex_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_sctl_free_func);
+#endif
+
+#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL)
+ ocsp_ex_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_ocsp_free_func);
+#endif
+
+ ssl_app_data_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+ ssl_capture_ptr_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_capture_free_func);
+#ifdef USE_QUIC
+ ssl_qc_app_data_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+#endif /* USE_QUIC */
+#ifdef HAVE_SSL_KEYLOG
+ ssl_keylog_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_keylog_free_func);
+#endif
+ ssl_client_crt_ref_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_clt_crt_free_func);
+ ssl_client_sni_index = SSL_get_ex_new_index(0, NULL, NULL, NULL, ssl_sock_clt_sni_free_func);
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ ENGINE_load_builtin_engines();
+ hap_register_post_check(ssl_check_async_engine_count);
+#endif
+#if (defined SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB && TLS_TICKETS_NO > 0)
+ hap_register_post_check(tlskeys_finalize_config);
+#endif
+
+ global.ssl_session_max_cost = SSL_SESSION_MAX_COST;
+ global.ssl_handshake_max_cost = SSL_HANDSHAKE_MAX_COST;
+
+ hap_register_post_deinit(ssl_free_global_issuers);
+
+#ifndef OPENSSL_NO_DH
+ ssl_dh_ptr_index = SSL_CTX_get_ex_new_index(0, NULL, NULL, NULL, NULL);
+ hap_register_post_deinit(ssl_free_dh);
+#endif
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+ hap_register_post_deinit(ssl_free_engines);
+#endif
+#ifdef HAVE_SSL_PROVIDERS
+ hap_register_post_deinit(ssl_unload_providers);
+#endif
+#if HA_OPENSSL_VERSION_NUMBER < 0x3000000fL
+ /* Load SSL string for the verbose & debug mode. */
+ ERR_load_SSL_strings();
+#endif
+ ha_meth = BIO_meth_new(0x666, "ha methods");
+ if (ha_meth != NULL) {
+ BIO_meth_set_write(ha_meth, ha_ssl_write);
+ BIO_meth_set_read(ha_meth, ha_ssl_read);
+ BIO_meth_set_ctrl(ha_meth, ha_ssl_ctrl);
+ BIO_meth_set_create(ha_meth, ha_ssl_new);
+ BIO_meth_set_destroy(ha_meth, ha_ssl_free);
+ BIO_meth_set_puts(ha_meth, ha_ssl_puts);
+ BIO_meth_set_gets(ha_meth, ha_ssl_gets);
+ }
+
+ HA_SPIN_INIT(&ckch_lock);
+
+ HA_SPIN_INIT(&ocsp_tree_lock);
+
+ /* Try to register dedicated SSL/TLS protocol message callbacks for
+ * heartbleed attack (CVE-2014-0160) and clienthello.
+ */
+ hap_register_post_check(ssl_sock_register_msg_callbacks);
+
+ /* Try to free all callbacks that were registered by using
+ * ssl_sock_register_msg_callback().
+ */
+ hap_register_post_deinit(ssl_sock_unregister_msg_callbacks);
+}
+INITCALL0(STG_REGISTER, __ssl_sock_init);
+
+/* Compute and register the version string */
+static void ssl_register_build_options()
+{
+ char *ptr = NULL;
+ int i;
+
+ memprintf(&ptr, "Built with OpenSSL version : "
+#ifdef OPENSSL_IS_BORINGSSL
+ "BoringSSL");
+#else /* OPENSSL_IS_BORINGSSL */
+ OPENSSL_VERSION_TEXT
+ "\nRunning on OpenSSL version : %s%s",
+ OpenSSL_version(OPENSSL_VERSION),
+ ((OPENSSL_VERSION_NUMBER ^ OpenSSL_version_num()) >> 8) ? " (VERSIONS DIFFER!)" : "");
+#endif
+ memprintf(&ptr, "%s\nOpenSSL library supports TLS extensions : "
+#if HA_OPENSSL_VERSION_NUMBER < 0x00907000L
+ "no (library version too old)"
+#elif defined(OPENSSL_NO_TLSEXT)
+ "no (disabled via OPENSSL_NO_TLSEXT)"
+#else
+ "yes"
+#endif
+ "", ptr);
+
+ memprintf(&ptr, "%s\nOpenSSL library supports SNI : "
+#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME
+ "yes"
+#else
+#ifdef OPENSSL_NO_TLSEXT
+ "no (because of OPENSSL_NO_TLSEXT)"
+#else
+ "no (version might be too old, 0.9.8f min needed)"
+#endif
+#endif
+ "", ptr);
+
+ memprintf(&ptr, "%s\nOpenSSL library supports :", ptr);
+ for (i = CONF_TLSV_MIN; i <= CONF_TLSV_MAX; i++)
+ if (methodVersions[i].option)
+ memprintf(&ptr, "%s %s", ptr, methodVersions[i].name);
+
+#ifdef HAVE_SSL_PROVIDERS
+ {
+ struct list provider_names;
+ struct provider_name *name;
+ LIST_INIT(&provider_names);
+ ssl_provider_get_name_list(&provider_names);
+
+ memprintf(&ptr, "%s\nOpenSSL providers loaded :", ptr);
+
+ list_for_each_entry(name, &provider_names, list) {
+ memprintf(&ptr, "%s %s", ptr, name->name);
+ }
+
+ ssl_provider_clear_name_list(&provider_names);
+ }
+#endif
+
+ hap_register_build_opts(ptr, 1);
+}
+
+INITCALL0(STG_REGISTER, ssl_register_build_options);
+
+#if defined(USE_ENGINE) && !defined(OPENSSL_NO_ENGINE)
+void ssl_free_engines(void) {
+ struct ssl_engine_list *wl, *wlb;
+ /* free up engine list */
+ list_for_each_entry_safe(wl, wlb, &openssl_engines, list) {
+ ENGINE_finish(wl->e);
+ ENGINE_free(wl->e);
+ LIST_DELETE(&wl->list);
+ free(wl);
+ }
+}
+#endif
+
+#ifdef HAVE_SSL_PROVIDERS
+void ssl_unload_providers(void) {
+ struct ssl_provider_list *prov, *provb;
+ list_for_each_entry_safe(prov, provb, &openssl_providers, list) {
+ OSSL_PROVIDER_unload(prov->provider);
+ LIST_DELETE(&prov->list);
+ free(prov);
+ }
+}
+#endif
+
+#ifndef OPENSSL_NO_DH
+void ssl_free_dh(void) {
+ if (local_dh_1024) {
+ HASSL_DH_free(local_dh_1024);
+ local_dh_1024 = NULL;
+ }
+ if (local_dh_2048) {
+ HASSL_DH_free(local_dh_2048);
+ local_dh_2048 = NULL;
+ }
+ if (local_dh_4096) {
+ HASSL_DH_free(local_dh_4096);
+ local_dh_4096 = NULL;
+ }
+ if (global_dh) {
+ HASSL_DH_free(global_dh);
+ global_dh = NULL;
+ }
+}
+#endif
+
+static void __ssl_sock_deinit(void)
+{
+#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES)
+ if (ssl_ctx_lru_tree) {
+ lru64_destroy(ssl_ctx_lru_tree);
+ HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock);
+ }
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ ERR_remove_state(0);
+ ERR_free_strings();
+
+ EVP_cleanup();
+#endif
+
+#if (HA_OPENSSL_VERSION_NUMBER >= 0x00907000L) && (HA_OPENSSL_VERSION_NUMBER < 0x10100000L)
+ CRYPTO_cleanup_all_ex_data();
+#endif
+ BIO_meth_free(ha_meth);
+
+#if !defined OPENSSL_NO_OCSP
+ ssl_destroy_ocsp_update_task();
+#endif
+}
+REGISTER_POST_DEINIT(__ssl_sock_deinit);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/ssl_utils.c b/src/ssl_utils.c
new file mode 100644
index 0000000..4a85b89
--- /dev/null
+++ b/src/ssl_utils.c
@@ -0,0 +1,702 @@
+/*
+ * Utility functions for SSL:
+ * Mostly generic functions that retrieve information from certificates
+ *
+ * Copyright (C) 2012 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2020 HAProxy Technologies, William Lallemand <wlallemand@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <haproxy/api.h>
+#include <haproxy/buf-t.h>
+#include <haproxy/chunk.h>
+#include <haproxy/openssl-compat.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+
+/* fill a buffer with the algorithm and size of a public key */
+int cert_get_pkey_algo(X509 *crt, struct buffer *out)
+{
+ int bits = 0;
+ int sig = TLSEXT_signature_anonymous;
+ int len = -1;
+ EVP_PKEY *pkey;
+
+ pkey = X509_get_pubkey(crt);
+ if (pkey) {
+ bits = EVP_PKEY_bits(pkey);
+ switch(EVP_PKEY_base_id(pkey)) {
+ case EVP_PKEY_RSA:
+ sig = TLSEXT_signature_rsa;
+ break;
+ case EVP_PKEY_EC:
+ sig = TLSEXT_signature_ecdsa;
+ break;
+ case EVP_PKEY_DSA:
+ sig = TLSEXT_signature_dsa;
+ break;
+ }
+ EVP_PKEY_free(pkey);
+ }
+
+ switch(sig) {
+ case TLSEXT_signature_rsa:
+ len = chunk_printf(out, "RSA%d", bits);
+ break;
+ case TLSEXT_signature_ecdsa:
+ len = chunk_printf(out, "EC%d", bits);
+ break;
+ case TLSEXT_signature_dsa:
+ len = chunk_printf(out, "DSA%d", bits);
+ break;
+ default:
+ return 0;
+ }
+ if (len < 0)
+ return 0;
+ return 1;
+}
+
+/* Extract a serial from a cert, and copy it to a chunk.
+ * Returns 1 if serial is found and copied, 0 if no serial found and
+ * -1 if output is not large enough.
+ */
+int ssl_sock_get_serial(X509 *crt, struct buffer *out)
+{
+ ASN1_INTEGER *serial;
+
+ serial = X509_get_serialNumber(crt);
+ if (!serial)
+ return 0;
+
+ if (out->size < serial->length)
+ return -1;
+
+ memcpy(out->area, serial->data, serial->length);
+ out->data = serial->length;
+ return 1;
+}
+
+/* Extract a cert to der, and copy it to a chunk.
+ * Returns 1 if the cert is found and copied, 0 on der conversion failure
+ * and -1 if the output is not large enough.
+ */
+int ssl_sock_crt2der(X509 *crt, struct buffer *out)
+{
+ int len;
+ unsigned char *p = (unsigned char *) out->area;
+
+ len = i2d_X509(crt, NULL);
+ if (len <= 0)
+ return 1;
+
+ if (out->size < len)
+ return -1;
+
+ i2d_X509(crt, &p);
+ out->data = len;
+ return 1;
+}
+
+
+/* Copy Date in ASN1_UTCTIME format in struct buffer out.
+ * Returns 1 if serial is found and copied, 0 if no valid time found
+ * and -1 if output is not large enough.
+ */
+int ssl_sock_get_time(ASN1_TIME *tm, struct buffer *out)
+{
+ if (tm->type == V_ASN1_GENERALIZEDTIME) {
+ ASN1_GENERALIZEDTIME *gentm = (ASN1_GENERALIZEDTIME *)tm;
+
+ if (gentm->length < 12)
+ return 0;
+ if (gentm->data[0] != 0x32 || gentm->data[1] != 0x30)
+ return 0;
+ if (out->size < gentm->length-2)
+ return -1;
+
+ memcpy(out->area, gentm->data+2, gentm->length-2);
+ out->data = gentm->length-2;
+ return 1;
+ }
+ else if (tm->type == V_ASN1_UTCTIME) {
+ ASN1_UTCTIME *utctm = (ASN1_UTCTIME *)tm;
+
+ if (utctm->length < 10)
+ return 0;
+ if (utctm->data[0] >= 0x35)
+ return 0;
+ if (out->size < utctm->length)
+ return -1;
+
+ memcpy(out->area, utctm->data, utctm->length);
+ out->data = utctm->length;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Extract an entry from a X509_NAME and copy its value to an output chunk.
+ * Returns 1 if entry found, 0 if entry not found, or -1 if output not large enough.
+ */
+int ssl_sock_get_dn_entry(X509_NAME *a, const struct buffer *entry, int pos,
+ struct buffer *out)
+{
+ X509_NAME_ENTRY *ne;
+ ASN1_OBJECT *obj;
+ ASN1_STRING *data;
+ const unsigned char *data_ptr;
+ int data_len;
+ int i, j, n;
+ int cur = 0;
+ const char *s;
+ char tmp[128];
+ int name_count;
+
+ name_count = X509_NAME_entry_count(a);
+
+ out->data = 0;
+ for (i = 0; i < name_count; i++) {
+ if (pos < 0)
+ j = (name_count-1) - i;
+ else
+ j = i;
+
+ ne = X509_NAME_get_entry(a, j);
+ obj = X509_NAME_ENTRY_get_object(ne);
+ data = X509_NAME_ENTRY_get_data(ne);
+ data_ptr = ASN1_STRING_get0_data(data);
+ data_len = ASN1_STRING_length(data);
+ n = OBJ_obj2nid(obj);
+ if ((n == NID_undef) || ((s = OBJ_nid2sn(n)) == NULL)) {
+ i2t_ASN1_OBJECT(tmp, sizeof(tmp), obj);
+ s = tmp;
+ }
+
+ if (chunk_strcasecmp(entry, s) != 0)
+ continue;
+
+ if (pos < 0)
+ cur--;
+ else
+ cur++;
+
+ if (cur != pos)
+ continue;
+
+ if (data_len > out->size)
+ return -1;
+
+ memcpy(out->area, data_ptr, data_len);
+ out->data = data_len;
+ return 1;
+ }
+
+ return 0;
+
+}
+
+/*
+ * Extract the DN in the specified format from the X509_NAME and copy result to a chunk.
+ * Currently supports rfc2253 for returning LDAP V3 DNs.
+ * Returns 1 if dn entries exist, 0 if no dn entry was found.
+ */
+int ssl_sock_get_dn_formatted(X509_NAME *a, const struct buffer *format, struct buffer *out)
+{
+ BIO *bio = NULL;
+ int ret = 0;
+ int data_len = 0;
+
+ if (chunk_strcmp(format, "rfc2253") == 0) {
+ bio = BIO_new(BIO_s_mem());
+ if (bio == NULL)
+ goto out;
+
+ if (X509_NAME_print_ex(bio, a, 0, XN_FLAG_RFC2253) < 0)
+ goto out;
+
+ if ((data_len = BIO_read(bio, out->area, out->size)) <= 0)
+ goto out;
+
+ out->data = data_len;
+
+ ret = 1;
+ }
+out:
+ if (bio)
+ BIO_free(bio);
+ return ret;
+}
+
+/* Extract and format full DN from a X509_NAME and copy result into a chunk
+ * Returns 1 if dn entries exits, 0 if no dn entry found or -1 if output is not large enough.
+ */
+int ssl_sock_get_dn_oneline(X509_NAME *a, struct buffer *out)
+{
+ X509_NAME_ENTRY *ne;
+ ASN1_OBJECT *obj;
+ ASN1_STRING *data;
+ const unsigned char *data_ptr;
+ int data_len;
+ int i, n, ln;
+ int l = 0;
+ const char *s;
+ char *p;
+ char tmp[128];
+ int name_count;
+
+
+ name_count = X509_NAME_entry_count(a);
+
+ out->data = 0;
+ p = out->area;
+ for (i = 0; i < name_count; i++) {
+ ne = X509_NAME_get_entry(a, i);
+ obj = X509_NAME_ENTRY_get_object(ne);
+ data = X509_NAME_ENTRY_get_data(ne);
+ data_ptr = ASN1_STRING_get0_data(data);
+ data_len = ASN1_STRING_length(data);
+ n = OBJ_obj2nid(obj);
+ if ((n == NID_undef) || ((s = OBJ_nid2sn(n)) == NULL)) {
+ i2t_ASN1_OBJECT(tmp, sizeof(tmp), obj);
+ s = tmp;
+ }
+ ln = strlen(s);
+
+ l += 1 + ln + 1 + data_len;
+ if (l > out->size)
+ return -1;
+ out->data = l;
+
+ *(p++)='/';
+ memcpy(p, s, ln);
+ p += ln;
+ *(p++)='=';
+ memcpy(p, data_ptr, data_len);
+ p += data_len;
+ }
+
+ if (!out->data)
+ return 0;
+
+ return 1;
+}
+
+
+extern int ssl_client_crt_ref_index;
+
+/*
+ * This function fetches the SSL certificate for a specific connection (either
+ * client certificate or server certificate depending on the cert_peer
+ * parameter).
+ * When trying to get the peer certificate from the server side, we first try to
+ * use the dedicated SSL_get_peer_certificate function, but we fall back to
+ * trying to get the client certificate reference that might have been stored in
+ * the SSL structure's ex_data during the verification process.
+ * Returns NULL in case of failure.
+ */
+X509* ssl_sock_get_peer_certificate(SSL *ssl)
+{
+ X509* cert;
+
+ cert = SSL_get_peer_certificate(ssl);
+ /* Get the client certificate reference stored in the SSL
+ * structure's ex_data during the verification process. */
+ if (!cert) {
+ cert = SSL_get_ex_data(ssl, ssl_client_crt_ref_index);
+ if (cert)
+ X509_up_ref(cert);
+ }
+
+ return cert;
+}
+
+/*
+ * This function fetches the x509* for the root CA of client certificate
+ * from the verified chain. We use the SSL_get0_verified_chain and get the
+ * last certificate in the x509 stack.
+ *
+ * Returns NULL in case of failure.
+*/
+#ifdef HAVE_SSL_get0_verified_chain
+X509* ssl_sock_get_verified_chain_root(SSL *ssl)
+{
+ STACK_OF(X509) *chain = NULL;
+ X509 *crt = NULL;
+ int i;
+
+ chain = SSL_get0_verified_chain(ssl);
+ if (!chain)
+ return NULL;
+
+ for (i = 0; i < sk_X509_num(chain); i++) {
+ crt = sk_X509_value(chain, i);
+
+ if (X509_check_issued(crt, crt) == X509_V_OK)
+ break;
+ }
+
+ return crt;
+}
+#endif
+
+/*
+ * Take an OpenSSL version in text format and return a numeric openssl version
+ * Return 0 if it failed to parse the version
+ *
+ * https://www.openssl.org/docs/man1.1.1/man3/OPENSSL_VERSION_NUMBER.html
+ *
+ * MNNFFPPS: major minor fix patch status
+ *
+ * The status nibble has one of the values 0 for development, 1 to e for betas
+ * 1 to 14, and f for release.
+ *
+ * for example
+ *
+ * 0x0090821f 0.9.8zh
+ * 0x1000215f 1.0.2u
+ * 0x30000000 3.0.0-alpha17
+ * 0x30000002 3.0.0-beta2
+ * 0x3000000e 3.0.0-beta14
+ * 0x3000000f 3.0.0
+ */
+unsigned int openssl_version_parser(const char *version)
+{
+ unsigned int numversion;
+ unsigned int major = 0, minor = 0, fix = 0, patch = 0, status = 0;
+ char *p, *end;
+
+ p = (char *)version;
+
+ if (!p || !*p)
+ return 0;
+
+ major = strtol(p, &end, 10);
+ if (*end != '.' || major > 0xf)
+ goto error;
+ p = end + 1;
+
+ minor = strtol(p, &end, 10);
+ if (*end != '.' || minor > 0xff)
+ goto error;
+ p = end + 1;
+
+ fix = strtol(p, &end, 10);
+ if (fix > 0xff)
+ goto error;
+ p = end;
+
+ if (!*p) {
+ /* end of the string, that's a release */
+ status = 0xf;
+ } else if (*p == '-') {
+ /* after the hyphen, only the beta will increment the status
+ * counter, all others versions will be considered as "dev" and
+ * does not increment anything */
+ p++;
+
+ if (!strncmp(p, "beta", 4)) {
+ p += 4;
+ status = strtol(p, &end, 10);
+ if (status > 14)
+ goto error;
+ }
+ } else {
+ /* that's a patch release */
+ patch = 1;
+
+ /* add the value of each letter */
+ while (*p) {
+ patch += (*p & ~0x20) - 'A';
+ p++;
+ }
+ status = 0xf;
+ }
+
+end:
+ numversion = ((major & 0xf) << 28) | ((minor & 0xff) << 20) | ((fix & 0xff) << 12) | ((patch & 0xff) << 4) | (status & 0xf);
+ return numversion;
+
+error:
+ return 0;
+
+}
+
+/* Exclude GREASE (RFC8701) values from input buffer */
+void exclude_tls_grease(char *input, int len, struct buffer *output)
+{
+ int ptr = 0;
+
+ while (ptr < len - 1) {
+ if (input[ptr] != input[ptr+1] || (input[ptr] & 0x0f) != 0x0a) {
+ if (output->data <= output->size - 2) {
+ memcpy(output->area + output->data, input + ptr, 2);
+ output->data += 2;
+ } else
+ break;
+ }
+ ptr += 2;
+ }
+ if (output->size - output->data > 0 && len - ptr > 0)
+ output->area[output->data++] = input[ptr];
+}
+
+/*
+ * The following generates an array <x509_v_codes> in which the X509_V_ERR_*
+ * codes are populated with there string equivalent. Depending on the version
+ * of the SSL library, some code does not exist, these will be populated as
+ * "-1" in the array.
+ *
+ * The list was taken from
+ * https://github.com/openssl/openssl/blob/master/include/openssl/x509_vfy.h.in
+ * and must be updated when new constant are introduced.
+ */
+
+#undef _Q
+#define _Q(x) (#x)
+#undef V
+#define V(x) { .code = -1, .value = _Q(x), .string = #x }
+
+static struct x509_v_codes {
+ int code; // integer value of the code or -1 if undefined
+ const char *value; // value of the macro as a string or its name
+ const char *string; // name of the macro
+} x509_v_codes[] = {
+ V(X509_V_OK),
+ V(X509_V_ERR_UNSPECIFIED),
+ V(X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT),
+ V(X509_V_ERR_UNABLE_TO_GET_CRL),
+ V(X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE),
+ V(X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE),
+ V(X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY),
+ V(X509_V_ERR_CERT_SIGNATURE_FAILURE),
+ V(X509_V_ERR_CRL_SIGNATURE_FAILURE),
+ V(X509_V_ERR_CERT_NOT_YET_VALID),
+ V(X509_V_ERR_CERT_HAS_EXPIRED),
+ V(X509_V_ERR_CRL_NOT_YET_VALID),
+ V(X509_V_ERR_CRL_HAS_EXPIRED),
+ V(X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD),
+ V(X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD),
+ V(X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD),
+ V(X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD),
+ V(X509_V_ERR_OUT_OF_MEM),
+ V(X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT),
+ V(X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN),
+ V(X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY),
+ V(X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE),
+ V(X509_V_ERR_CERT_CHAIN_TOO_LONG),
+ V(X509_V_ERR_CERT_REVOKED),
+ V(X509_V_ERR_NO_ISSUER_PUBLIC_KEY),
+ V(X509_V_ERR_PATH_LENGTH_EXCEEDED),
+ V(X509_V_ERR_INVALID_PURPOSE),
+ V(X509_V_ERR_CERT_UNTRUSTED),
+ V(X509_V_ERR_CERT_REJECTED),
+ V(X509_V_ERR_SUBJECT_ISSUER_MISMATCH),
+ V(X509_V_ERR_AKID_SKID_MISMATCH),
+ V(X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH),
+ V(X509_V_ERR_KEYUSAGE_NO_CERTSIGN),
+ V(X509_V_ERR_UNABLE_TO_GET_CRL_ISSUER),
+ V(X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION),
+ V(X509_V_ERR_KEYUSAGE_NO_CRL_SIGN),
+ V(X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION),
+ V(X509_V_ERR_INVALID_NON_CA),
+ V(X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED),
+ V(X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE),
+ V(X509_V_ERR_PROXY_CERTIFICATES_NOT_ALLOWED),
+ V(X509_V_ERR_INVALID_EXTENSION),
+ V(X509_V_ERR_INVALID_POLICY_EXTENSION),
+ V(X509_V_ERR_NO_EXPLICIT_POLICY),
+ V(X509_V_ERR_DIFFERENT_CRL_SCOPE),
+ V(X509_V_ERR_UNSUPPORTED_EXTENSION_FEATURE),
+ V(X509_V_ERR_UNNESTED_RESOURCE),
+ V(X509_V_ERR_PERMITTED_VIOLATION),
+ V(X509_V_ERR_EXCLUDED_VIOLATION),
+ V(X509_V_ERR_SUBTREE_MINMAX),
+ V(X509_V_ERR_APPLICATION_VERIFICATION),
+ V(X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE),
+ V(X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX),
+ V(X509_V_ERR_UNSUPPORTED_NAME_SYNTAX),
+ V(X509_V_ERR_CRL_PATH_VALIDATION_ERROR),
+ V(X509_V_ERR_PATH_LOOP),
+ V(X509_V_ERR_SUITE_B_INVALID_VERSION),
+ V(X509_V_ERR_SUITE_B_INVALID_ALGORITHM),
+ V(X509_V_ERR_SUITE_B_INVALID_CURVE),
+ V(X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM),
+ V(X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED),
+ V(X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256),
+ V(X509_V_ERR_HOSTNAME_MISMATCH),
+ V(X509_V_ERR_EMAIL_MISMATCH),
+ V(X509_V_ERR_IP_ADDRESS_MISMATCH),
+ V(X509_V_ERR_DANE_NO_MATCH),
+ V(X509_V_ERR_EE_KEY_TOO_SMALL),
+ V(X509_V_ERR_CA_KEY_TOO_SMALL),
+ V(X509_V_ERR_CA_MD_TOO_WEAK),
+ V(X509_V_ERR_INVALID_CALL),
+ V(X509_V_ERR_STORE_LOOKUP),
+ V(X509_V_ERR_NO_VALID_SCTS),
+ V(X509_V_ERR_PROXY_SUBJECT_NAME_VIOLATION),
+ V(X509_V_ERR_OCSP_VERIFY_NEEDED),
+ V(X509_V_ERR_OCSP_VERIFY_FAILED),
+ V(X509_V_ERR_OCSP_CERT_UNKNOWN),
+ V(X509_V_ERR_UNSUPPORTED_SIGNATURE_ALGORITHM),
+ V(X509_V_ERR_SIGNATURE_ALGORITHM_MISMATCH),
+ V(X509_V_ERR_SIGNATURE_ALGORITHM_INCONSISTENCY),
+ V(X509_V_ERR_INVALID_CA),
+ V(X509_V_ERR_PATHLEN_INVALID_FOR_NON_CA),
+ V(X509_V_ERR_PATHLEN_WITHOUT_KU_KEY_CERT_SIGN),
+ V(X509_V_ERR_KU_KEY_CERT_SIGN_INVALID_FOR_NON_CA),
+ V(X509_V_ERR_ISSUER_NAME_EMPTY),
+ V(X509_V_ERR_SUBJECT_NAME_EMPTY),
+ V(X509_V_ERR_MISSING_AUTHORITY_KEY_IDENTIFIER),
+ V(X509_V_ERR_MISSING_SUBJECT_KEY_IDENTIFIER),
+ V(X509_V_ERR_EMPTY_SUBJECT_ALT_NAME),
+ V(X509_V_ERR_EMPTY_SUBJECT_SAN_NOT_CRITICAL),
+ V(X509_V_ERR_CA_BCONS_NOT_CRITICAL),
+ V(X509_V_ERR_AUTHORITY_KEY_IDENTIFIER_CRITICAL),
+ V(X509_V_ERR_SUBJECT_KEY_IDENTIFIER_CRITICAL),
+ V(X509_V_ERR_CA_CERT_MISSING_KEY_USAGE),
+ V(X509_V_ERR_EXTENSIONS_REQUIRE_VERSION_3),
+ V(X509_V_ERR_EC_KEY_EXPLICIT_PARAMS),
+ { 0, NULL, NULL },
+};
+
+/*
+ * Return the X509_V_ERR code corresponding to the name of the constant.
+ * See https://github.com/openssl/openssl/blob/master/include/openssl/x509_vfy.h.in
+ * If not found, return -1
+ */
+int x509_v_err_str_to_int(const char *str)
+{
+ int i;
+
+ for (i = 0; x509_v_codes[i].string; i++) {
+ if (strcmp(str, x509_v_codes[i].string) == 0) {
+ return x509_v_codes[i].code;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * Return the constant name corresponding to the X509_V_ERR code
+ * See https://github.com/openssl/openssl/blob/master/include/openssl/x509_vfy.h.in
+ * If not found, return NULL;
+ */
+const char *x509_v_err_int_to_str(int code)
+{
+ int i;
+
+ if (code == -1)
+ return NULL;
+
+ for (i = 0; x509_v_codes[i].string; i++) {
+ if (x509_v_codes[i].code == code) {
+ return x509_v_codes[i].string;
+ }
+ }
+ return NULL;
+}
+
+void init_x509_v_err_tab(void)
+{
+ int i;
+
+ for (i = 0; x509_v_codes[i].string; i++) {
+ /* either the macro exists or it's equal to its own name */
+ if (strcmp(x509_v_codes[i].string, x509_v_codes[i].value) == 0)
+ continue;
+ x509_v_codes[i].code = atoi(x509_v_codes[i].value);
+ }
+}
+
+INITCALL0(STG_REGISTER, init_x509_v_err_tab);
+
+
+/*
+ * This function returns the number of seconds elapsed
+ * since the Epoch, 1970-01-01 00:00:00 +0000 (UTC) and the
+ * date presented un ASN1_GENERALIZEDTIME.
+ *
+ * In parsing error case, it returns -1.
+ */
+long asn1_generalizedtime_to_epoch(ASN1_GENERALIZEDTIME *d)
+{
+ long epoch;
+ char *p, *end;
+ const unsigned short month_offset[12] = {
+ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
+ };
+ unsigned long year, month;
+
+ if (!d || (d->type != V_ASN1_GENERALIZEDTIME)) return -1;
+
+ p = (char *)d->data;
+ end = p + d->length;
+
+ if (end - p < 4) return -1;
+ year = 1000 * (p[0] - '0') + 100 * (p[1] - '0') + 10 * (p[2] - '0') + p[3] - '0';
+ p += 4;
+ if (end - p < 2) return -1;
+ month = 10 * (p[0] - '0') + p[1] - '0';
+ if (month < 1 || month > 12) return -1;
+ /* Compute the number of seconds since 1 jan 1970 and the beginning of current month
+ We consider leap years and the current month (<marsh or not) */
+ epoch = ( ((year - 1970) * 365)
+ + ((year - (month < 3)) / 4 - (year - (month < 3)) / 100 + (year - (month < 3)) / 400)
+ - ((1970 - 1) / 4 - (1970 - 1) / 100 + (1970 - 1) / 400)
+ + month_offset[month-1]
+ ) * 24 * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the number of seconds of completed days of current month */
+ epoch += (10 * (p[0] - '0') + p[1] - '0' - 1) * 24 * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the completed hours of the current day */
+ epoch += (10 * (p[0] - '0') + p[1] - '0') * 60 * 60;
+ p += 2;
+ if (end - p < 2) return -1;
+ /* Add the completed minutes of the current hour */
+ epoch += (10 * (p[0] - '0') + p[1] - '0') * 60;
+ p += 2;
+ if (p == end) return -1;
+ /* Test if there is available seconds */
+ if (p[0] < '0' || p[0] > '9')
+ goto nosec;
+ if (end - p < 2) return -1;
+ /* Add the seconds of the current minute */
+ epoch += 10 * (p[0] - '0') + p[1] - '0';
+ p += 2;
+ if (p == end) return -1;
+ /* Ignore seconds float part if present */
+ if (p[0] == '.') {
+ do {
+ if (++p == end) return -1;
+ } while (p[0] >= '0' && p[0] <= '9');
+ }
+
+nosec:
+ if (p[0] == 'Z') {
+ if (end - p != 1) return -1;
+ return epoch;
+ }
+ else if (p[0] == '+') {
+ if (end - p != 5) return -1;
+ /* Apply timezone offset */
+ return epoch - ((10 * (p[1] - '0') + p[2] - '0') * 60 * 60 + (10 * (p[3] - '0') + p[4] - '0')) * 60;
+ }
+ else if (p[0] == '-') {
+ if (end - p != 5) return -1;
+ /* Apply timezone offset */
+ return epoch + ((10 * (p[1] - '0') + p[2] - '0') * 60 * 60 + (10 * (p[3] - '0') + p[4] - '0')) * 60;
+ }
+
+ return -1;
+}
diff --git a/src/stats.c b/src/stats.c
new file mode 100644
index 0000000..0ed5758
--- /dev/null
+++ b/src/stats.c
@@ -0,0 +1,5521 @@
+/*
+ * Functions dedicated to statistics output and the stats socket
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2009 Krzysztof Piotr Oledzki <ole@ans.pl>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/applet.h>
+#include <haproxy/backend.h>
+#include <haproxy/base64.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/clock.h>
+#include <haproxy/compression.h>
+#include <haproxy/debug.h>
+#include <haproxy/errors.h>
+#include <haproxy/fd.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/list.h>
+#include <haproxy/listener.h>
+#include <haproxy/log.h>
+#include <haproxy/map-t.h>
+#include <haproxy/pattern-t.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/stats.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/ticks.h>
+#include <haproxy/time.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_auth-t.h>
+#include <haproxy/version.h>
+
+
+/* status codes available for the stats admin page (strictly 4 chars length) */
+const char *stat_status_codes[STAT_STATUS_SIZE] = {
+ [STAT_STATUS_DENY] = "DENY",
+ [STAT_STATUS_DONE] = "DONE",
+ [STAT_STATUS_ERRP] = "ERRP",
+ [STAT_STATUS_EXCD] = "EXCD",
+ [STAT_STATUS_NONE] = "NONE",
+ [STAT_STATUS_PART] = "PART",
+ [STAT_STATUS_UNKN] = "UNKN",
+ [STAT_STATUS_IVAL] = "IVAL",
+};
+
+/* These are the field names for each INF_* field position. Please pay attention
+ * to always use the exact same name except that the strings for new names must
+ * be lower case or CamelCase while the enum entries must be upper case.
+ */
+const struct name_desc info_fields[INF_TOTAL_FIELDS] = {
+ [INF_NAME] = { .name = "Name", .desc = "Product name" },
+ [INF_VERSION] = { .name = "Version", .desc = "Product version" },
+ [INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" },
+ [INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" },
+ [INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" },
+ [INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" },
+ [INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" },
+ [INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" },
+ [INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" },
+ [INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" },
+ [INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" },
+ [INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" },
+ [INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" },
+ [INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" },
+ [INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" },
+ [INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" },
+ [INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" },
+ [INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" },
+ [INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" },
+ [INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" },
+ [INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" },
+ [INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" },
+ [INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" },
+ [INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" },
+ [INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" },
+ [INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" },
+ [INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" },
+ [INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" },
+ [INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" },
+ [INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" },
+ [INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" },
+ [INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" },
+ [INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" },
+ [INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" },
+ [INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" },
+ [INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" },
+ [INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" },
+ [INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" },
+ [INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" },
+ [INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" },
+ [INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" },
+ [INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" },
+ [INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" },
+ [INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" },
+ [INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" },
+ [INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" },
+ [INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" },
+ [INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" },
+ [INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" },
+ [INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" },
+ [INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" },
+ [INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" },
+ [INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" },
+ [INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" },
+ [INF_NODE] = { .name = "node", .desc = "Node name (global.node)" },
+ [INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" },
+ [INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" },
+ [INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" },
+ [INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" },
+ [INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" },
+ [INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" },
+ [INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" },
+ [INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" },
+ [INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" },
+ [INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" },
+ [INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" },
+ [INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" },
+ [INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" },
+ [INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" },
+ [INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" },
+ [INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" },
+ [INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" },
+ [INF_WARNINGS] = { .name = "TotalWarnings", .desc = "Total warnings issued" },
+ [INF_MAXCONN_REACHED] = { .name = "MaxconnReached", .desc = "Number of times an accepted connection resulted in Maxconn being reached" },
+ [INF_BOOTTIME_MS] = { .name = "BootTime_ms", .desc = "How long ago it took to parse and process the config before being ready (milliseconds)" },
+ [INF_NICED_TASKS] = { .name = "Niced_tasks", .desc = "Total number of active tasks+tasklets in the current worker process (Run_queue) that are niced" },
+};
+
+const struct name_desc stat_fields[ST_F_TOTAL_FIELDS] = {
+ [ST_F_PXNAME] = { .name = "pxname", .desc = "Proxy name" },
+ [ST_F_SVNAME] = { .name = "svname", .desc = "Server name" },
+ [ST_F_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" },
+ [ST_F_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" },
+ [ST_F_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" },
+ [ST_F_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" },
+ [ST_F_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" },
+ [ST_F_STOT] = { .name = "stot", .desc = "Total number of sessions since process started" },
+ [ST_F_BIN] = { .name = "bin", .desc = "Total number of request bytes since process started" },
+ [ST_F_BOUT] = { .name = "bout", .desc = "Total number of response bytes since process started" },
+ [ST_F_DREQ] = { .name = "dreq", .desc = "Total number of denied requests since process started" },
+ [ST_F_DRESP] = { .name = "dresp", .desc = "Total number of denied responses since process started" },
+ [ST_F_EREQ] = { .name = "ereq", .desc = "Total number of invalid requests since process started" },
+ [ST_F_ECON] = { .name = "econ", .desc = "Total number of failed connections to server since the worker process started" },
+ [ST_F_ERESP] = { .name = "eresp", .desc = "Total number of invalid responses since the worker process started" },
+ [ST_F_WRETR] = { .name = "wretr", .desc = "Total number of server connection retries since the worker process started" },
+ [ST_F_WREDIS] = { .name = "wredis", .desc = "Total number of server redispatches due to connection failures since the worker process started" },
+ [ST_F_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" },
+ [ST_F_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" },
+ [ST_F_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" },
+ [ST_F_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" },
+ [ST_F_CHKFAIL] = { .name = "chkfail", .desc = "Total number of failed individual health checks per server/backend, since the worker process started" },
+ [ST_F_CHKDOWN] = { .name = "chkdown", .desc = "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started" },
+ [ST_F_LASTCHG] = { .name = "lastchg", .desc = "How long ago the last server state changed, in seconds" },
+ [ST_F_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" },
+ [ST_F_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" },
+ [ST_F_PID] = { .name = "pid", .desc = "Relative worker process number (1)" },
+ [ST_F_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" },
+ [ST_F_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" },
+ [ST_F_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" },
+ [ST_F_LBTOT] = { .name = "lbtot", .desc = "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)" },
+ [ST_F_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" },
+ [ST_F_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" },
+ [ST_F_RATE] = { .name = "rate", .desc = "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)" },
+ [ST_F_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" },
+ [ST_F_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" },
+ [ST_F_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" },
+ [ST_F_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" },
+ [ST_F_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" },
+ [ST_F_HRSP_1XX] = { .name = "hrsp_1xx", .desc = "Total number of HTTP responses with status 100-199 returned by this object since the worker process started" },
+ [ST_F_HRSP_2XX] = { .name = "hrsp_2xx", .desc = "Total number of HTTP responses with status 200-299 returned by this object since the worker process started" },
+ [ST_F_HRSP_3XX] = { .name = "hrsp_3xx", .desc = "Total number of HTTP responses with status 300-399 returned by this object since the worker process started" },
+ [ST_F_HRSP_4XX] = { .name = "hrsp_4xx", .desc = "Total number of HTTP responses with status 400-499 returned by this object since the worker process started" },
+ [ST_F_HRSP_5XX] = { .name = "hrsp_5xx", .desc = "Total number of HTTP responses with status 500-599 returned by this object since the worker process started" },
+ [ST_F_HRSP_OTHER] = { .name = "hrsp_other", .desc = "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)" },
+ [ST_F_HANAFAIL] = { .name = "hanafail", .desc = "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched" },
+ [ST_F_REQ_RATE] = { .name = "req_rate", .desc = "Number of HTTP requests processed over the last second on this object" },
+ [ST_F_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" },
+ [ST_F_REQ_TOT] = { .name = "req_tot", .desc = "Total number of HTTP requests processed by this object since the worker process started" },
+ [ST_F_CLI_ABRT] = { .name = "cli_abrt", .desc = "Total number of requests or connections aborted by the client since the worker process started" },
+ [ST_F_SRV_ABRT] = { .name = "srv_abrt", .desc = "Total number of requests or connections aborted by the server since the worker process started" },
+ [ST_F_COMP_IN] = { .name = "comp_in", .desc = "Total number of bytes submitted to the HTTP compressor for this object since the worker process started" },
+ [ST_F_COMP_OUT] = { .name = "comp_out", .desc = "Total number of bytes emitted by the HTTP compressor for this object since the worker process started" },
+ [ST_F_COMP_BYP] = { .name = "comp_byp", .desc = "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)" },
+ [ST_F_COMP_RSP] = { .name = "comp_rsp", .desc = "Total number of HTTP responses that were compressed for this object since the worker process started" },
+ [ST_F_LASTSESS] = { .name = "lastsess", .desc = "How long ago some traffic was seen on this object on this worker process, in seconds" },
+ [ST_F_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" },
+ [ST_F_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" },
+ [ST_F_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" },
+ [ST_F_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" },
+ [ST_F_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" },
+ [ST_F_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" },
+ [ST_F_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" },
+ [ST_F_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" },
+ [ST_F_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" },
+ [ST_F_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" },
+ [ST_F_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" },
+ [ST_F_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" },
+ [ST_F_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" },
+ [ST_F_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" },
+ [ST_F_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" },
+ [ST_F_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" },
+ [ST_F_CONN_RATE] = { .name = "conn_rate", .desc = "Number of new connections accepted over the last second on the frontend for this worker process" },
+ [ST_F_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" },
+ [ST_F_CONN_TOT] = { .name = "conn_tot", .desc = "Total number of new connections accepted on this frontend since the worker process started" },
+ [ST_F_INTERCEPTED] = { .name = "intercepted", .desc = "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started" },
+ [ST_F_DCON] = { .name = "dcon", .desc = "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started" },
+ [ST_F_DSES] = { .name = "dses", .desc = "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started" },
+ [ST_F_WREW] = { .name = "wrew", .desc = "Total number of failed HTTP header rewrites since the worker process started" },
+ [ST_F_CONNECT] = { .name = "connect", .desc = "Total number of outgoing connection attempts on this backend/server since the worker process started" },
+ [ST_F_REUSE] = { .name = "reuse", .desc = "Total number of reused connection on this backend/server since the worker process started" },
+ [ST_F_CACHE_LOOKUPS] = { .name = "cache_lookups", .desc = "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started" },
+ [ST_F_CACHE_HITS] = { .name = "cache_hits", .desc = "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started" },
+ [ST_F_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" },
+ [ST_F_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" },
+ [ST_F_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" },
+ [ST_F_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" },
+ [ST_F_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" },
+ [ST_F_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" },
+ [ST_F_EINT] = { .name = "eint", .desc = "Total number of internal errors since process started"},
+ [ST_F_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"},
+ [ST_F_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"},
+ [ST_F_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"},
+ [ST_F_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"},
+ [ST_F_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" },
+ [ST_F_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "[DEPRECATED] Backend's aggregated gauge of servers' status" },
+ [ST_F_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" },
+ [ST_F_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" },
+ [ST_F_SRID] = { .name = "srid", .desc = "Server id revision, to prevent server id reuse mixups" },
+ [ST_F_SESS_OTHER] = { .name = "sess_other", .desc = "Total number of sessions other than HTTP since process started" },
+ [ST_F_H1SESS] = { .name = "h1sess", .desc = "Total number of HTTP/1 sessions since process started" },
+ [ST_F_H2SESS] = { .name = "h2sess", .desc = "Total number of HTTP/2 sessions since process started" },
+ [ST_F_H3SESS] = { .name = "h3sess", .desc = "Total number of HTTP/3 sessions since process started" },
+ [ST_F_REQ_OTHER] = { .name = "req_other", .desc = "Total number of sessions other than HTTP processed by this object since the worker process started" },
+ [ST_F_H1REQ] = { .name = "h1req", .desc = "Total number of HTTP/1 sessions processed by this object since the worker process started" },
+ [ST_F_H2REQ] = { .name = "h2req", .desc = "Total number of hTTP/2 sessions processed by this object since the worker process started" },
+ [ST_F_H3REQ] = { .name = "h3req", .desc = "Total number of HTTP/3 sessions processed by this object since the worker process started" },
+ [ST_F_PROTO] = { .name = "proto", .desc = "Protocol" },
+};
+
+/* one line of info */
+THREAD_LOCAL struct field info[INF_TOTAL_FIELDS];
+
+/* description of statistics (static and dynamic) */
+static struct name_desc *stat_f[STATS_DOMAIN_COUNT];
+static size_t stat_count[STATS_DOMAIN_COUNT];
+
+/* one line for stats */
+THREAD_LOCAL struct field *stat_l[STATS_DOMAIN_COUNT];
+
+/* list of all registered stats module */
+static struct list stats_module_list[STATS_DOMAIN_COUNT] = {
+ LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_PROXY]),
+ LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_RESOLVERS]),
+};
+
+THREAD_LOCAL void *trash_counters;
+static THREAD_LOCAL struct buffer trash_chunk = BUF_NULL;
+
+
+static inline uint8_t stats_get_domain(uint32_t domain)
+{
+ return domain >> STATS_DOMAIN & STATS_DOMAIN_MASK;
+}
+
+static inline enum stats_domain_px_cap stats_px_get_cap(uint32_t domain)
+{
+ return domain >> STATS_PX_CAP & STATS_PX_CAP_MASK;
+}
+
+static void stats_dump_json_schema(struct buffer *out);
+
+int stats_putchk(struct appctx *appctx, struct htx *htx)
+{
+ struct stconn *sc = appctx_sc(appctx);
+ struct channel *chn = sc_ic(sc);
+ struct buffer *chk = &trash_chunk;
+
+ if (htx) {
+ if (chk->data >= channel_htx_recv_max(chn, htx)) {
+ sc_need_room(sc, chk->data);
+ return 0;
+ }
+ if (!htx_add_data_atonce(htx, ist2(chk->area, chk->data))) {
+ sc_need_room(sc, 0);
+ return 0;
+ }
+ channel_add_input(chn, chk->data);
+ chk->data = 0;
+ }
+ else {
+ if (applet_putchk(appctx, chk) == -1)
+ return 0;
+ }
+ return 1;
+}
+
+static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *req = sc_oc(sc);
+ struct htx *htx = htxbuf(&req->buf);
+ struct htx_blk *blk;
+ struct ist uri;
+
+ blk = htx_get_head_blk(htx);
+ BUG_ON(!blk || htx_get_blk_type(blk) != HTX_BLK_REQ_SL);
+ ALREADY_CHECKED(blk);
+ uri = htx_sl_req_uri(htx_get_blk_ptr(htx, blk));
+ return uri.ptr + ctx->scope_str;
+}
+
+/*
+ * http_stats_io_handler()
+ * -> stats_dump_stat_to_buffer() // same as above, but used for CSV or HTML
+ * -> stats_dump_csv_header() // emits the CSV headers (same as above)
+ * -> stats_dump_json_header() // emits the JSON headers (same as above)
+ * -> stats_dump_html_head() // emits the HTML headers
+ * -> stats_dump_html_info() // emits the equivalent of "show info" at the top
+ * -> stats_dump_proxy_to_buffer() // same as above, valid for CSV and HTML
+ * -> stats_dump_html_px_hdr()
+ * -> stats_dump_fe_stats()
+ * -> stats_dump_li_stats()
+ * -> stats_dump_sv_stats()
+ * -> stats_dump_be_stats()
+ * -> stats_dump_html_px_end()
+ * -> stats_dump_html_end() // emits HTML trailer
+ * -> stats_dump_json_end() // emits JSON trailer
+ */
+
+
+/* Dumps the stats CSV header to the local trash buffer. The caller is
+ * responsible for clearing it if needed.
+ * NOTE: Some tools happen to rely on the field position instead of its name,
+ * so please only append new fields at the end, never in the middle.
+ */
+static void stats_dump_csv_header(enum stats_domain domain)
+{
+ int field;
+
+ chunk_appendf(&trash_chunk, "# ");
+ if (stat_f[domain]) {
+ for (field = 0; field < stat_count[domain]; ++field) {
+ chunk_appendf(&trash_chunk, "%s,", stat_f[domain][field].name);
+
+ /* print special delimiter on proxy stats to mark end of
+ static fields */
+ if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS)
+ chunk_appendf(&trash_chunk, "-,");
+ }
+ }
+
+ chunk_appendf(&trash_chunk, "\n");
+}
+
+/* Emits a stats field without any surrounding element and properly encoded to
+ * resist CSV output. Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_raw_data_field(struct buffer *out, const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: return chunk_appendf(out, "%d", f->u.s32);
+ case FF_U32: return chunk_appendf(out, "%u", f->u.u32);
+ case FF_S64: return chunk_appendf(out, "%lld", (long long)f->u.s64);
+ case FF_U64: return chunk_appendf(out, "%llu", (unsigned long long)f->u.u64);
+ case FF_FLT: {
+ size_t prev_data = out->data;
+ out->data = flt_trim(out->area, prev_data, chunk_appendf(out, "%f", f->u.flt));
+ return out->data;
+ }
+ case FF_STR: return csv_enc_append(field_str(f, 0), 1, 2, out) != NULL;
+ default: return chunk_appendf(out, "[INCORRECT_FIELD_TYPE_%08x]", f->type);
+ }
+}
+
+const char *field_to_html_str(const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_S32: return U2H(f->u.s32);
+ case FF_S64: return U2H(f->u.s64);
+ case FF_U64: return U2H(f->u.u64);
+ case FF_U32: return U2H(f->u.u32);
+ case FF_FLT: return F2H(f->u.flt);
+ case FF_STR: return field_str(f, 0);
+ case FF_EMPTY:
+ default:
+ return "";
+ }
+}
+
+/* Emits a stats field prefixed with its type. No CSV encoding is prepared, the
+ * output is supposed to be used on its own line. Returns non-zero on success, 0
+ * if the buffer is full.
+ */
+int stats_emit_typed_data_field(struct buffer *out, const struct field *f)
+{
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: return chunk_appendf(out, "s32:%d", f->u.s32);
+ case FF_U32: return chunk_appendf(out, "u32:%u", f->u.u32);
+ case FF_S64: return chunk_appendf(out, "s64:%lld", (long long)f->u.s64);
+ case FF_U64: return chunk_appendf(out, "u64:%llu", (unsigned long long)f->u.u64);
+ case FF_FLT: {
+ size_t prev_data = out->data;
+ out->data = flt_trim(out->area, prev_data, chunk_appendf(out, "flt:%f", f->u.flt));
+ return out->data;
+ }
+ case FF_STR: return chunk_appendf(out, "str:%s", field_str(f, 0));
+ default: return chunk_appendf(out, "%08x:?", f->type);
+ }
+}
+
+/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per
+ * the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+#define JSON_INT_MAX ((1ULL << 53) - 1)
+#define JSON_INT_MIN (0 - JSON_INT_MAX)
+
+/* Emits a stats field value and its type in JSON.
+ * Returns non-zero on success, 0 on error.
+ */
+int stats_emit_json_data_field(struct buffer *out, const struct field *f)
+{
+ int old_len;
+ char buf[20];
+ const char *type, *value = buf, *quote = "";
+
+ switch (field_format(f, 0)) {
+ case FF_EMPTY: return 1;
+ case FF_S32: type = "\"s32\"";
+ snprintf(buf, sizeof(buf), "%d", f->u.s32);
+ break;
+ case FF_U32: type = "\"u32\"";
+ snprintf(buf, sizeof(buf), "%u", f->u.u32);
+ break;
+ case FF_S64: type = "\"s64\"";
+ if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX)
+ return 0;
+ type = "\"s64\"";
+ snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64);
+ break;
+ case FF_U64: if (f->u.u64 > JSON_INT_MAX)
+ return 0;
+ type = "\"u64\"";
+ snprintf(buf, sizeof(buf), "%llu",
+ (unsigned long long) f->u.u64);
+ break;
+ case FF_FLT: type = "\"flt\"";
+ flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt));
+ break;
+ case FF_STR: type = "\"str\"";
+ value = field_str(f, 0);
+ quote = "\"";
+ break;
+ default: snprintf(buf, sizeof(buf), "%u", f->type);
+ type = buf;
+ value = "unknown";
+ quote = "\"";
+ break;
+ }
+
+ old_len = out->data;
+ chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}",
+ type, quote, value, quote);
+ return !(old_len == out->data);
+}
+
+/* Emits an encoding of the field type on 3 characters followed by a delimiter.
+ * Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_field_tags(struct buffer *out, const struct field *f,
+ char delim)
+{
+ char origin, nature, scope;
+
+ switch (field_origin(f, 0)) {
+ case FO_METRIC: origin = 'M'; break;
+ case FO_STATUS: origin = 'S'; break;
+ case FO_KEY: origin = 'K'; break;
+ case FO_CONFIG: origin = 'C'; break;
+ case FO_PRODUCT: origin = 'P'; break;
+ default: origin = '?'; break;
+ }
+
+ switch (field_nature(f, 0)) {
+ case FN_GAUGE: nature = 'G'; break;
+ case FN_LIMIT: nature = 'L'; break;
+ case FN_MIN: nature = 'm'; break;
+ case FN_MAX: nature = 'M'; break;
+ case FN_RATE: nature = 'R'; break;
+ case FN_COUNTER: nature = 'C'; break;
+ case FN_DURATION: nature = 'D'; break;
+ case FN_AGE: nature = 'A'; break;
+ case FN_TIME: nature = 'T'; break;
+ case FN_NAME: nature = 'N'; break;
+ case FN_OUTPUT: nature = 'O'; break;
+ case FN_AVG: nature = 'a'; break;
+ default: nature = '?'; break;
+ }
+
+ switch (field_scope(f, 0)) {
+ case FS_PROCESS: scope = 'P'; break;
+ case FS_SERVICE: scope = 'S'; break;
+ case FS_SYSTEM: scope = 's'; break;
+ case FS_CLUSTER: scope = 'C'; break;
+ default: scope = '?'; break;
+ }
+
+ return chunk_appendf(out, "%c%c%c%c", origin, nature, scope, delim);
+}
+
+/* Emits an encoding of the field type as JSON.
+ * Returns non-zero on success, 0 if the buffer is full.
+ */
+int stats_emit_json_field_tags(struct buffer *out, const struct field *f)
+{
+ const char *origin, *nature, *scope;
+ int old_len;
+
+ switch (field_origin(f, 0)) {
+ case FO_METRIC: origin = "Metric"; break;
+ case FO_STATUS: origin = "Status"; break;
+ case FO_KEY: origin = "Key"; break;
+ case FO_CONFIG: origin = "Config"; break;
+ case FO_PRODUCT: origin = "Product"; break;
+ default: origin = "Unknown"; break;
+ }
+
+ switch (field_nature(f, 0)) {
+ case FN_GAUGE: nature = "Gauge"; break;
+ case FN_LIMIT: nature = "Limit"; break;
+ case FN_MIN: nature = "Min"; break;
+ case FN_MAX: nature = "Max"; break;
+ case FN_RATE: nature = "Rate"; break;
+ case FN_COUNTER: nature = "Counter"; break;
+ case FN_DURATION: nature = "Duration"; break;
+ case FN_AGE: nature = "Age"; break;
+ case FN_TIME: nature = "Time"; break;
+ case FN_NAME: nature = "Name"; break;
+ case FN_OUTPUT: nature = "Output"; break;
+ case FN_AVG: nature = "Avg"; break;
+ default: nature = "Unknown"; break;
+ }
+
+ switch (field_scope(f, 0)) {
+ case FS_PROCESS: scope = "Process"; break;
+ case FS_SERVICE: scope = "Service"; break;
+ case FS_SYSTEM: scope = "System"; break;
+ case FS_CLUSTER: scope = "Cluster"; break;
+ default: scope = "Unknown"; break;
+ }
+
+ old_len = out->data;
+ chunk_appendf(out, "\"tags\":{"
+ "\"origin\":\"%s\","
+ "\"nature\":\"%s\","
+ "\"scope\":\"%s\""
+ "}", origin, nature, scope);
+ return !(old_len == out->data);
+}
+
+/* Dump all fields from <stats> into <out> using CSV format */
+static int stats_dump_fields_csv(struct buffer *out,
+ const struct field *stats, size_t stats_count,
+ struct show_stat_ctx *ctx)
+{
+ int domain = ctx->domain;
+ int field;
+
+ for (field = 0; field < stats_count; ++field) {
+ if (!stats_emit_raw_data_field(out, &stats[field]))
+ return 0;
+ if (!chunk_strcat(out, ","))
+ return 0;
+
+ /* print special delimiter on proxy stats to mark end of
+ static fields */
+ if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) {
+ if (!chunk_strcat(out, "-,"))
+ return 0;
+ }
+ }
+
+ chunk_strcat(out, "\n");
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */
+static int stats_dump_fields_typed(struct buffer *out,
+ const struct field *stats,
+ size_t stats_count,
+ struct show_stat_ctx * ctx)
+{
+ int flags = ctx->flags;
+ int domain = ctx->domain;
+ int field;
+
+ for (field = 0; field < stats_count; ++field) {
+ if (!stats[field].type)
+ continue;
+
+ switch (domain) {
+ case STATS_DOMAIN_PROXY:
+ chunk_appendf(out, "%c.%u.%u.%d.%s.%u:",
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE ? 'F' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE ? 'B' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO ? 'L' :
+ stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV ? 'S' :
+ '?',
+ stats[ST_F_IID].u.u32, stats[ST_F_SID].u.u32,
+ field,
+ stat_f[domain][field].name,
+ stats[ST_F_PID].u.u32);
+ break;
+
+ case STATS_DOMAIN_RESOLVERS:
+ chunk_appendf(out, "N.%d.%s:", field,
+ stat_f[domain][field].name);
+ break;
+
+ default:
+ break;
+ }
+
+ if (!stats_emit_field_tags(out, &stats[field], ':'))
+ return 0;
+ if (!stats_emit_typed_data_field(out, &stats[field]))
+ return 0;
+
+ if (flags & STAT_SHOW_FDESC &&
+ !chunk_appendf(out, ":\"%s\"", stat_f[domain][field].desc)) {
+ return 0;
+ }
+
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using the "show info json" format */
+static int stats_dump_json_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int started = (ctx->field) ? 1 : 0;
+ int ready_data = 0;
+
+ if (!started && !chunk_strcat(out, "["))
+ return 0;
+
+ for (; ctx->field < INF_TOTAL_FIELDS; ctx->field++) {
+ int old_len;
+ int field = ctx->field;
+
+ if (!field_format(info, field))
+ continue;
+
+ if (started && !chunk_strcat(out, ","))
+ goto err;
+ started = 1;
+
+ old_len = out->data;
+ chunk_appendf(out,
+ "{\"field\":{\"pos\":%d,\"name\":\"%s\"},"
+ "\"processNum\":%u,",
+ field, info_fields[field].name,
+ info[INF_PROCESS_NUM].u.u32);
+ if (old_len == out->data)
+ goto err;
+
+ if (!stats_emit_json_field_tags(out, &info[field]))
+ goto err;
+
+ if (!stats_emit_json_data_field(out, &info[field]))
+ goto err;
+
+ if (!chunk_strcat(out, "}"))
+ goto err;
+ ready_data = out->data;
+ }
+
+ if (!chunk_strcat(out, "]\n"))
+ goto err;
+ ctx->field = 0; /* we're done */
+ return 1;
+
+err:
+ if (!ready_data) {
+ /* not enough buffer space for a single entry.. */
+ chunk_reset(out);
+ chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n");
+ return 0; /* hard error */
+ }
+ /* push ready data and wait for a new buffer to complete the dump */
+ out->data = ready_data;
+ return 1;
+}
+
+static void stats_print_proxy_field_json(struct buffer *out,
+ const struct field *stat,
+ const char *name,
+ int pos,
+ uint32_t field_type,
+ uint32_t iid,
+ uint32_t sid,
+ uint32_t pid)
+{
+ const char *obj_type;
+ switch (field_type) {
+ case STATS_TYPE_FE: obj_type = "Frontend"; break;
+ case STATS_TYPE_BE: obj_type = "Backend"; break;
+ case STATS_TYPE_SO: obj_type = "Listener"; break;
+ case STATS_TYPE_SV: obj_type = "Server"; break;
+ default: obj_type = "Unknown"; break;
+ }
+
+ chunk_appendf(out,
+ "{"
+ "\"objType\":\"%s\","
+ "\"proxyId\":%u,"
+ "\"id\":%u,"
+ "\"field\":{\"pos\":%d,\"name\":\"%s\"},"
+ "\"processNum\":%u,",
+ obj_type, iid, sid, pos, name, pid);
+}
+
+static void stats_print_rslv_field_json(struct buffer *out,
+ const struct field *stat,
+ const char *name,
+ int pos)
+{
+ chunk_appendf(out,
+ "{"
+ "\"field\":{\"pos\":%d,\"name\":\"%s\"},",
+ pos, name);
+}
+
+
+/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */
+static int stats_dump_fields_json(struct buffer *out,
+ const struct field *stats, size_t stats_count,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int domain = ctx->domain;
+ int started = (ctx->field) ? 1 : 0;
+ int ready_data = 0;
+
+ if (!started && (flags & STAT_STARTED) && !chunk_strcat(out, ","))
+ return 0;
+ if (!started && !chunk_strcat(out, "["))
+ return 0;
+
+ for (; ctx->field < stats_count; ctx->field++) {
+ int old_len;
+ int field = ctx->field;
+
+ if (!stats[field].type)
+ continue;
+
+ if (started && !chunk_strcat(out, ","))
+ goto err;
+ started = 1;
+
+ old_len = out->data;
+ if (domain == STATS_DOMAIN_PROXY) {
+ stats_print_proxy_field_json(out, &stats[field],
+ stat_f[domain][field].name,
+ field,
+ stats[ST_F_TYPE].u.u32,
+ stats[ST_F_IID].u.u32,
+ stats[ST_F_SID].u.u32,
+ stats[ST_F_PID].u.u32);
+ } else if (domain == STATS_DOMAIN_RESOLVERS) {
+ stats_print_rslv_field_json(out, &stats[field],
+ stat_f[domain][field].name,
+ field);
+ }
+
+ if (old_len == out->data)
+ goto err;
+
+ if (!stats_emit_json_field_tags(out, &stats[field]))
+ goto err;
+
+ if (!stats_emit_json_data_field(out, &stats[field]))
+ goto err;
+
+ if (!chunk_strcat(out, "}"))
+ goto err;
+ ready_data = out->data;
+ }
+
+ if (!chunk_strcat(out, "]"))
+ goto err;
+
+ ctx->field = 0; /* we're done */
+ return 1;
+
+err:
+ if (!ready_data) {
+ /* not enough buffer space for a single entry.. */
+ chunk_reset(out);
+ if (ctx->flags & STAT_STARTED)
+ chunk_strcat(out, ",");
+ chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}");
+ return 0; /* hard error */
+ }
+ /* push ready data and wait for a new buffer to complete the dump */
+ out->data = ready_data;
+ return 1;
+}
+
+/* Dump all fields from <stats> into <out> using the HTML format. A column is
+ * reserved for the checkbox is STAT_ADMIN is set in <flags>. Some extra info
+ * are provided if STAT_SHLGNDS is present in <flags>. The statistics from
+ * extra modules are displayed at the end of the lines if STAT_SHMODULES is
+ * present in <flags>.
+ */
+static int stats_dump_fields_html(struct buffer *out,
+ const struct field *stats,
+ struct show_stat_ctx *ctx)
+{
+ struct buffer src;
+ struct stats_module *mod;
+ int flags = ctx->flags;
+ int i = 0, j = 0;
+
+ if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE) {
+ chunk_appendf(out,
+ /* name, queue */
+ "<tr class=\"frontend\">");
+
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+
+ chunk_appendf(out,
+ "<td class=ac>"
+ "<a name=\"%s/Frontend\"></a>"
+ "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>"
+ "<td colspan=3></td>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME));
+
+ chunk_appendf(out,
+ /* sessions rate : current */
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Current connection rate:</th><td>%s/s</td></tr>"
+ "<tr><th>Current session rate:</th><td>%s/s</td></tr>"
+ "",
+ U2H(stats[ST_F_RATE].u.u32),
+ U2H(stats[ST_F_CONN_RATE].u.u32),
+ U2H(stats[ST_F_RATE].u.u32));
+
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out,
+ "<tr><th>Current request rate:</th><td>%s/s</td></tr>",
+ U2H(stats[ST_F_REQ_RATE].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions rate : max */
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Max connection rate:</th><td>%s/s</td></tr>"
+ "<tr><th>Max session rate:</th><td>%s/s</td></tr>"
+ "",
+ U2H(stats[ST_F_RATE_MAX].u.u32),
+ U2H(stats[ST_F_CONN_RATE_MAX].u.u32),
+ U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out,
+ "<tr><th>Max request rate:</th><td>%s/s</td></tr>",
+ U2H(stats[ST_F_REQ_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions rate : limit */
+ "<td>%s</td>",
+ LIM2A(stats[ST_F_RATE_LIM].u.u32, "-"));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. connections:</th><td>%s</td></tr>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_CONN_TOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>- HTTP/1 sessions:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP/2 sessions:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP/3 sessions:</th><td>%s</td></tr>"
+ "<tr><th>- other sessions:</th><td>%s</td></tr>"
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP/1 requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP/2 requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP/3 requests:</th><td>%s</td></tr>"
+ "<tr><th>- other requests:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_H1SESS].u.u64),
+ U2H(stats[ST_F_H2SESS].u.u64),
+ U2H(stats[ST_F_H3SESS].u.u64),
+ U2H(stats[ST_F_SESS_OTHER].u.u64),
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_H1REQ].u.u64),
+ U2H(stats[ST_F_H2REQ].u.u64),
+ U2H(stats[ST_F_H3REQ].u.u64),
+ U2H(stats[ST_F_REQ_OTHER].u.u64));
+
+ chunk_appendf(out,
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>"
+ "<tr><th>&nbsp;&nbsp;Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_HRSP_1XX].u.u64),
+ U2H(stats[ST_F_HRSP_2XX].u.u64),
+ U2H(stats[ST_F_COMP_RSP].u.u64),
+ stats[ST_F_HRSP_2XX].u.u64 ?
+ (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64),
+ U2H(stats[ST_F_HRSP_4XX].u.u64),
+ U2H(stats[ST_F_HRSP_5XX].u.u64),
+ U2H(stats[ST_F_HRSP_OTHER].u.u64));
+
+ chunk_appendf(out,
+ "<tr><th>Intercepted requests:</th><td>%s</td></tr>"
+ "<tr><th>Cache lookups:</th><td>%s</td></tr>"
+ "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal errors:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_INTERCEPTED].u.u64),
+ U2H(stats[ST_F_CACHE_LOOKUPS].u.u64),
+ U2H(stats[ST_F_CACHE_HITS].u.u64),
+ stats[ST_F_CACHE_LOOKUPS].u.u64 ?
+ (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, lastsess */
+ "<td></td><td></td>"
+ /* bytes : in */
+ "<td>%s</td>"
+ "",
+ U2H(stats[ST_F_BIN].u.u64));
+
+ chunk_appendf(out,
+ /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */
+ "<td>%s%s<div class=tips><table class=det>"
+ "<tr><th>Response bytes in:</th><td>%s</td></tr>"
+ "<tr><th>Compression in:</th><td>%s</td></tr>"
+ "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Compression bypass:</th><td>%s</td></tr>"
+ "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "</table></div>%s</td>",
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"",
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64),
+ U2H(stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0,
+ U2H(stats[ST_F_COMP_BYP].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0,
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":"");
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors : request, connect, response */
+ "<td>%s</td><td></td><td></td>"
+ /* warnings: retries, redispatches */
+ "<td></td><td></td>"
+ /* server status : reflect frontend status */
+ "<td class=ac>%s</td>"
+ /* rest of server: nothing */
+ "<td class=ac colspan=8></td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_EREQ].u.u64),
+ field_str(stats, ST_F_STATUS));
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO) {
+ chunk_appendf(out, "<tr class=socket>");
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+
+ chunk_appendf(out,
+ /* frontend name, listener name */
+ "<td class=ac><a name=\"%s/+%s\"></a>%s"
+ "<a class=lfsb href=\"#%s/+%s\">%s</a>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME),
+ (flags & STAT_SHLGNDS)?"<u>":"",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ chunk_appendf(out, "<div class=tips>");
+
+ if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR)))
+ chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR) == '[')
+ chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR))
+ chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR));
+
+ chunk_appendf(out, "proto=%s, ", field_str(stats, ST_F_PROTO));
+
+ /* id */
+ chunk_appendf(out, "id: %d</div>", stats[ST_F_SID].u.u32);
+ }
+
+ chunk_appendf(out,
+ /* queue */
+ "%s</td><td colspan=3></td>"
+ /* sessions rate: current, max, limit */
+ "<td colspan=3>&nbsp;</td>"
+ /* sessions: current, max, limit, total, lbtot, lastsess */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td>%s</td><td>&nbsp;</td><td>&nbsp;</td>"
+ /* bytes: in, out */
+ "<td>%s</td><td>%s</td>"
+ "",
+ (flags & STAT_SHLGNDS)?"</u>":"",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64), U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64));
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors: request, connect, response */
+ "<td>%s</td><td></td><td></td>"
+ /* warnings: retries, redispatches */
+ "<td></td><td></td>"
+ /* server status: reflect listener status */
+ "<td class=ac>%s</td>"
+ /* rest of server: nothing */
+ "<td class=ac colspan=8></td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_EREQ].u.u64),
+ field_str(stats, ST_F_STATUS));
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV) {
+ const char *style;
+
+ /* determine the style to use depending on the server's state,
+ * its health and weight. There isn't a 1-to-1 mapping between
+ * state and styles for the cases where the server is (still)
+ * up. The reason is that we don't want to report nolb and
+ * drain with the same color.
+ */
+
+ if (strcmp(field_str(stats, ST_F_STATUS), "DOWN") == 0 ||
+ strcmp(field_str(stats, ST_F_STATUS), "DOWN (agent)") == 0) {
+ style = "down";
+ }
+ else if (strncmp(field_str(stats, ST_F_STATUS), "DOWN ", strlen("DOWN ")) == 0) {
+ style = "going_up";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "DRAIN") == 0) {
+ style = "draining";
+ }
+ else if (strncmp(field_str(stats, ST_F_STATUS), "NOLB ", strlen("NOLB ")) == 0) {
+ style = "going_down";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "NOLB") == 0) {
+ style = "nolb";
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) {
+ style = "no_check";
+ }
+ else if (!stats[ST_F_CHKFAIL].type ||
+ stats[ST_F_CHECK_HEALTH].u.u32 == stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) {
+ /* no check or max health = UP */
+ if (stats[ST_F_WEIGHT].u.u32)
+ style = "up";
+ else
+ style = "draining";
+ }
+ else {
+ style = "going_down";
+ }
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0)
+ chunk_appendf(out, "<tr class=\"maintain\">");
+ else
+ chunk_appendf(out,
+ "<tr class=\"%s_%s\">",
+ (stats[ST_F_BCK].u.u32) ? "backup" : "active", style);
+
+
+ if (flags & STAT_ADMIN)
+ chunk_appendf(out,
+ "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>",
+ field_str(stats, ST_F_PXNAME),
+ field_str(stats, ST_F_SVNAME));
+
+ chunk_appendf(out,
+ "<td class=ac><a name=\"%s/%s\"></a>%s"
+ "<a class=lfsb href=\"#%s/%s\">%s</a>"
+ "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME),
+ (flags & STAT_SHLGNDS) ? "<u>" : "",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ chunk_appendf(out, "<div class=tips>");
+
+ if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR)))
+ chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR) == '[')
+ chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR));
+ else if (*field_str(stats, ST_F_ADDR))
+ chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR));
+
+ /* id */
+ chunk_appendf(out, "id: %d, rid: %d", stats[ST_F_SID].u.u32, stats[ST_F_SRID].u.u32);
+
+ /* cookie */
+ if (stats[ST_F_COOKIE].type) {
+ chunk_appendf(out, ", cookie: '");
+ chunk_initstr(&src, field_str(stats, ST_F_COOKIE));
+ chunk_htmlencode(out, &src);
+ chunk_appendf(out, "'");
+ }
+
+ chunk_appendf(out, "</div>");
+ }
+
+ chunk_appendf(out,
+ /* queue : current, max, limit */
+ "%s</td><td>%s</td><td>%s</td><td>%s</td>"
+ /* sessions rate : current, max, limit */
+ "<td>%s</td><td>%s</td><td></td>"
+ "",
+ (flags & STAT_SHLGNDS) ? "</u>" : "",
+ U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), LIM2A(stats[ST_F_QLIMIT].u.u32, "-"),
+ U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td><u>%s<div class=tips>"
+ "<table class=det>"
+ "<tr><th>Current active connections:</th><td>%s</td></tr>"
+ "<tr><th>Current used connections:</th><td>%s</td></tr>"
+ "<tr><th>Current idle connections:</th><td>%s</td></tr>"
+ "<tr><th>- unsafe:</th><td>%s</td></tr>"
+ "<tr><th>- safe:</th><td>%s</td></tr>"
+ "<tr><th>Estimated need of connections:</th><td>%s</td></tr>"
+ "<tr><th>Active connections limit:</th><td>%s</td></tr>"
+ "<tr><th>Idle connections limit:</th><td>%s</td></tr>"
+ "</table></div></u>"
+ "</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32),
+ U2H(stats[ST_F_SCUR].u.u32),
+ U2H(stats[ST_F_USED_CONN_CUR].u.u32),
+ U2H(stats[ST_F_SRV_ICUR].u.u32),
+ U2H(stats[ST_F_IDLE_CONN_CUR].u.u32),
+ U2H(stats[ST_F_SAFE_CONN_CUR].u.u32),
+ U2H(stats[ST_F_NEED_CONN_EST].u.u32),
+
+ LIM2A(stats[ST_F_SLIM].u.u32, "-"),
+ stats[ST_F_SRV_ILIM].type ? U2H(stats[ST_F_SRV_ILIM].u.u32) : "-",
+ U2H(stats[ST_F_SMAX].u.u32), LIM2A(stats[ST_F_SLIM].u.u32, "-"),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>New connections:</th><td>%s</td></tr>"
+ "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal error:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_CONNECT].u.u64),
+ U2H(stats[ST_F_REUSE].u.u64),
+ (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ?
+ (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0,
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_HRSP_1XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_1XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_2XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_2XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_3XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_4XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_4XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_5XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_5XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_OTHER].u.u64), stats[ST_F_REQ_TOT].u.u64 ?
+ (int)(100 * stats[ST_F_HRSP_OTHER].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>");
+ chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32));
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, last */
+ "<td>%s</td><td>%s</td>",
+ U2H(stats[ST_F_LBTOT].u.u64),
+ human_time(stats[ST_F_LASTSESS].u.s32, 1));
+
+ chunk_appendf(out,
+ /* bytes : in, out */
+ "<td>%s</td><td>%s</td>"
+ /* denied: req, resp */
+ "<td></td><td>%s</td>"
+ /* errors : request, connect */
+ "<td></td><td>%s</td>"
+ /* errors : response */
+ "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>"
+ /* warnings: retries, redispatches */
+ "<td>%lld</td><td>%lld</td>"
+ "",
+ U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_ECON].u.u64),
+ U2H(stats[ST_F_ERESP].u.u64),
+ (long long)stats[ST_F_CLI_ABRT].u.u64,
+ (long long)stats[ST_F_SRV_ABRT].u.u64,
+ (long long)stats[ST_F_WRETR].u.u64,
+ (long long)stats[ST_F_WREDIS].u.u64);
+
+ /* status, last change */
+ chunk_appendf(out, "<td class=ac>");
+
+ /* FIXME!!!!
+ * LASTCHG should contain the last change for *this* server and must be computed
+ * properly above, as was done below, ie: this server if maint, otherwise ref server
+ * if tracking. Note that ref is either local or remote depending on tracking.
+ */
+
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) {
+ chunk_appendf(out, "%s MAINT", human_time(stats[ST_F_LASTCHG].u.u32, 1));
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) {
+ chunk_strcat(out, "<i>no check</i>");
+ }
+ else {
+ chunk_appendf(out, "%s %s", human_time(stats[ST_F_LASTCHG].u.u32, 1), field_str(stats, ST_F_STATUS));
+ if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0) {
+ if (stats[ST_F_CHECK_HEALTH].u.u32)
+ chunk_strcat(out, " &uarr;");
+ }
+ else if (stats[ST_F_CHECK_HEALTH].u.u32 < stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1)
+ chunk_strcat(out, " &darr;");
+ }
+
+ if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0 &&
+ stats[ST_F_AGENT_STATUS].type && !stats[ST_F_AGENT_HEALTH].u.u32) {
+ chunk_appendf(out,
+ "</td><td class=ac><u> %s",
+ field_str(stats, ST_F_AGENT_STATUS));
+
+ if (stats[ST_F_AGENT_CODE].type)
+ chunk_appendf(out, "/%d", stats[ST_F_AGENT_CODE].u.u32);
+
+ if (stats[ST_F_AGENT_DURATION].type)
+ chunk_appendf(out, " in %lums", (long)stats[ST_F_AGENT_DURATION].u.u64);
+
+ chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_AGENT_DESC));
+
+ if (*field_str(stats, ST_F_LAST_AGT)) {
+ chunk_appendf(out, ": ");
+ chunk_initstr(&src, field_str(stats, ST_F_LAST_AGT));
+ chunk_htmlencode(out, &src);
+ }
+ chunk_appendf(out, "</div></u>");
+ }
+ else if (stats[ST_F_CHECK_STATUS].type) {
+ chunk_appendf(out,
+ "</td><td class=ac><u> %s",
+ field_str(stats, ST_F_CHECK_STATUS));
+
+ if (stats[ST_F_CHECK_CODE].type)
+ chunk_appendf(out, "/%d", stats[ST_F_CHECK_CODE].u.u32);
+
+ if (stats[ST_F_CHECK_DURATION].type)
+ chunk_appendf(out, " in %lums", (long)stats[ST_F_CHECK_DURATION].u.u64);
+
+ chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_CHECK_DESC));
+
+ if (*field_str(stats, ST_F_LAST_CHK)) {
+ chunk_appendf(out, ": ");
+ chunk_initstr(&src, field_str(stats, ST_F_LAST_CHK));
+ chunk_htmlencode(out, &src);
+ }
+ chunk_appendf(out, "</div></u>");
+ }
+ else
+ chunk_appendf(out, "</td><td>");
+
+ chunk_appendf(out,
+ /* weight / uweight */
+ "</td><td class=ac>%d/%d</td>"
+ /* act, bck */
+ "<td class=ac>%s</td><td class=ac>%s</td>"
+ "",
+ stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32,
+ stats[ST_F_BCK].u.u32 ? "-" : "Y",
+ stats[ST_F_BCK].u.u32 ? "Y" : "-");
+
+ /* check failures: unique, fatal, down time */
+ if (strcmp(field_str(stats, ST_F_STATUS), "MAINT (resolution)") == 0) {
+ chunk_appendf(out, "<td class=ac colspan=3>resolution</td>");
+ }
+ else if (stats[ST_F_CHKFAIL].type) {
+ chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_F_CHKFAIL].u.u64);
+
+ if (stats[ST_F_HANAFAIL].type)
+ chunk_appendf(out, "/%lld", (long long)stats[ST_F_HANAFAIL].u.u64);
+
+ chunk_appendf(out,
+ "<div class=tips>Failed Health Checks%s</div></u></td>"
+ "<td>%lld</td><td>%s</td>"
+ "",
+ stats[ST_F_HANAFAIL].type ? "/Health Analyses" : "",
+ (long long)stats[ST_F_CHKDOWN].u.u64, human_time(stats[ST_F_DOWNTIME].u.u32, 1));
+ }
+ else if (strcmp(field_str(stats, ST_F_STATUS), "MAINT") != 0 && field_format(stats, ST_F_TRACKED) == FF_STR) {
+ /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */
+ chunk_appendf(out,
+ "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>",
+ field_str(stats, ST_F_TRACKED), field_str(stats, ST_F_TRACKED));
+ }
+ else
+ chunk_appendf(out, "<td colspan=3></td>");
+
+ /* throttle */
+ if (stats[ST_F_THROTTLE].type)
+ chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_F_THROTTLE].u.u32);
+ else
+ chunk_appendf(out, "<td class=ac>-</td>");
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>\n");
+ }
+ else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE) {
+ chunk_appendf(out, "<tr class=\"backend\">");
+ if (flags & STAT_ADMIN) {
+ /* Column sub-heading for Enable or Disable server */
+ chunk_appendf(out, "<td></td>");
+ }
+ chunk_appendf(out,
+ "<td class=ac>"
+ /* name */
+ "%s<a name=\"%s/Backend\"></a>"
+ "<a class=lfsb href=\"#%s/Backend\">Backend</a>"
+ "",
+ (flags & STAT_SHLGNDS)?"<u>":"",
+ field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME));
+
+ if (flags & STAT_SHLGNDS) {
+ /* balancing */
+ chunk_appendf(out, "<div class=tips>balancing: %s",
+ field_str(stats, ST_F_ALGO));
+
+ /* cookie */
+ if (stats[ST_F_COOKIE].type) {
+ chunk_appendf(out, ", cookie: '");
+ chunk_initstr(&src, field_str(stats, ST_F_COOKIE));
+ chunk_htmlencode(out, &src);
+ chunk_appendf(out, "'");
+ }
+ chunk_appendf(out, "</div>");
+ }
+
+ chunk_appendf(out,
+ "%s</td>"
+ /* queue : current, max */
+ "<td>%s</td><td>%s</td><td></td>"
+ /* sessions rate : current, max, limit */
+ "<td>%s</td><td>%s</td><td></td>"
+ "",
+ (flags & STAT_SHLGNDS)?"</u>":"",
+ U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32),
+ U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32));
+
+ chunk_appendf(out,
+ /* sessions: current, max, limit, total */
+ "<td>%s</td><td>%s</td><td>%s</td>"
+ "<td><u>%s<div class=tips><table class=det>"
+ "<tr><th>Cum. sessions:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32),
+ U2H(stats[ST_F_STOT].u.u64),
+ U2H(stats[ST_F_STOT].u.u64));
+
+ /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) {
+ chunk_appendf(out,
+ "<tr><th>New connections:</th><td>%s</td></tr>"
+ "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>"
+ "<tr><th>&nbsp;&nbsp;Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>"
+ "<tr><th>- other responses:</th><td>%s</td></tr>"
+ "<tr><th>Cache lookups:</th><td>%s</td></tr>"
+ "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>"
+ "<tr><th>Internal errors:</th><td>%s</td></tr>"
+ "",
+ U2H(stats[ST_F_CONNECT].u.u64),
+ U2H(stats[ST_F_REUSE].u.u64),
+ (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ?
+ (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0,
+ U2H(stats[ST_F_REQ_TOT].u.u64),
+ U2H(stats[ST_F_HRSP_1XX].u.u64),
+ U2H(stats[ST_F_HRSP_2XX].u.u64),
+ U2H(stats[ST_F_COMP_RSP].u.u64),
+ stats[ST_F_HRSP_2XX].u.u64 ?
+ (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0,
+ U2H(stats[ST_F_HRSP_3XX].u.u64),
+ U2H(stats[ST_F_HRSP_4XX].u.u64),
+ U2H(stats[ST_F_HRSP_5XX].u.u64),
+ U2H(stats[ST_F_HRSP_OTHER].u.u64),
+ U2H(stats[ST_F_CACHE_LOOKUPS].u.u64),
+ U2H(stats[ST_F_CACHE_HITS].u.u64),
+ stats[ST_F_CACHE_LOOKUPS].u.u64 ?
+ (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0,
+ U2H(stats[ST_F_WREW].u.u64),
+ U2H(stats[ST_F_EINT].u.u64));
+ }
+
+ chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>");
+ chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32));
+ if (strcmp(field_str(stats, ST_F_MODE), "http") == 0)
+ chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32));
+ chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>",
+ U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32));
+
+ chunk_appendf(out,
+ "</table></div></u></td>"
+ /* sessions: lbtot, last */
+ "<td>%s</td><td>%s</td>"
+ /* bytes: in */
+ "<td>%s</td>"
+ "",
+ U2H(stats[ST_F_LBTOT].u.u64),
+ human_time(stats[ST_F_LASTSESS].u.s32, 1),
+ U2H(stats[ST_F_BIN].u.u64));
+
+ chunk_appendf(out,
+ /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */
+ "<td>%s%s<div class=tips><table class=det>"
+ "<tr><th>Response bytes in:</th><td>%s</td></tr>"
+ "<tr><th>Compression in:</th><td>%s</td></tr>"
+ "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "<tr><th>Compression bypass:</th><td>%s</td></tr>"
+ "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>"
+ "</table></div>%s</td>",
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"",
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_BOUT].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64),
+ U2H(stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0,
+ U2H(stats[ST_F_COMP_BYP].u.u64),
+ U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64),
+ stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0,
+ (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":"");
+
+ chunk_appendf(out,
+ /* denied: req, resp */
+ "<td>%s</td><td>%s</td>"
+ /* errors : request, connect */
+ "<td></td><td>%s</td>"
+ /* errors : response */
+ "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>"
+ /* warnings: retries, redispatches */
+ "<td>%lld</td><td>%lld</td>"
+ /* backend status: reflect backend status (up/down): we display UP
+ * if the backend has known working servers or if it has no server at
+ * all (eg: for stats). Then we display the total weight, number of
+ * active and backups. */
+ "<td class=ac>%s %s</td><td class=ac>&nbsp;</td><td class=ac>%d/%d</td>"
+ "<td class=ac>%d</td><td class=ac>%d</td>"
+ "",
+ U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64),
+ U2H(stats[ST_F_ECON].u.u64),
+ U2H(stats[ST_F_ERESP].u.u64),
+ (long long)stats[ST_F_CLI_ABRT].u.u64,
+ (long long)stats[ST_F_SRV_ABRT].u.u64,
+ (long long)stats[ST_F_WRETR].u.u64, (long long)stats[ST_F_WREDIS].u.u64,
+ human_time(stats[ST_F_LASTCHG].u.u32, 1),
+ strcmp(field_str(stats, ST_F_STATUS), "DOWN") ? field_str(stats, ST_F_STATUS) : "<font color=\"red\"><b>DOWN</b></font>",
+ stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32,
+ stats[ST_F_ACT].u.u32, stats[ST_F_BCK].u.u32);
+
+ chunk_appendf(out,
+ /* rest of backend: nothing, down transitions, total downtime, throttle */
+ "<td class=ac>&nbsp;</td><td>%d</td>"
+ "<td>%s</td>"
+ "<td></td>",
+ stats[ST_F_CHKDOWN].u.u32,
+ stats[ST_F_DOWNTIME].type ? human_time(stats[ST_F_DOWNTIME].u.u32, 1) : "&nbsp;");
+
+ if (flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(out, "<td>");
+
+ if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) {
+ chunk_appendf(out,
+ "<u>%s<div class=tips><table class=det>",
+ mod->name);
+ for (j = 0; j < mod->stats_count; ++j) {
+ chunk_appendf(out,
+ "<tr><th>%s</th><td>%s</td></tr>",
+ mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i]));
+ ++i;
+ }
+ chunk_appendf(out, "</table></div></u>");
+ } else {
+ i += mod->stats_count;
+ }
+
+ chunk_appendf(out, "</td>");
+ }
+ }
+
+ chunk_appendf(out, "</tr>");
+ }
+
+ return 1;
+}
+
+int stats_dump_one_line(const struct field *stats, size_t stats_count,
+ struct appctx *appctx)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ int ret;
+
+ if (ctx->flags & STAT_FMT_HTML)
+ ret = stats_dump_fields_html(&trash_chunk, stats, ctx);
+ else if (ctx->flags & STAT_FMT_TYPED)
+ ret = stats_dump_fields_typed(&trash_chunk, stats, stats_count, ctx);
+ else if (ctx->flags & STAT_FMT_JSON)
+ ret = stats_dump_fields_json(&trash_chunk, stats, stats_count, ctx);
+ else
+ ret = stats_dump_fields_csv(&trash_chunk, stats, stats_count, ctx);
+
+ return ret;
+}
+
+/* Fill <stats> with the frontend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for frontends, the
+ * function returns 0, otherwise, it returns 1.
+ */
+int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND");
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, px->feconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_resp);
+ break;
+ case ST_F_EREQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_req);
+ break;
+ case ST_F_DCON:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_conn);
+ break;
+ case ST_F_DSES:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_sess);
+ break;
+ case ST_F_STATUS: {
+ const char *state;
+
+ if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))
+ state = "STOP";
+ else if (px->flags & PR_FL_PAUSED)
+ state = "PAUSED";
+ else
+ state = "OPEN";
+ metric = mkf_str(FO_STATUS, state);
+ break;
+ }
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, 0);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_sess_per_sec));
+ break;
+ case ST_F_RATE_LIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim);
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.sps_max);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.internal_errors);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[0]);
+ break;
+ case ST_F_INTERCEPTED:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.intercepted_req);
+ break;
+ case ST_F_CACHE_LOOKUPS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_lookups);
+ break;
+ case ST_F_CACHE_HITS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_hits);
+ break;
+ case ST_F_REQ_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_req_per_sec));
+ break;
+ case ST_F_REQ_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max);
+ break;
+ case ST_F_REQ_TOT: {
+ int i;
+ uint64_t total_req;
+ size_t nb_reqs =
+ sizeof(px->fe_counters.p.http.cum_req) / sizeof(*px->fe_counters.p.http.cum_req);
+
+ total_req = 0;
+ for (i = 0; i < nb_reqs; i++)
+ total_req += px->fe_counters.p.http.cum_req[i];
+ metric = mkf_u64(FN_COUNTER, total_req);
+ break;
+ }
+ case ST_F_COMP_IN:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_in[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_OUT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_out[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_BYP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_byp[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_RSP:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.comp_rsp);
+ break;
+ case ST_F_CONN_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_conn_per_sec));
+ break;
+ case ST_F_CONN_RATE_MAX:
+ metric = mkf_u32(FN_MAX, px->fe_counters.cps_max);
+ break;
+ case ST_F_CONN_TOT:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_conn);
+ break;
+ case ST_F_SESS_OTHER: {
+ int i;
+ uint64_t total_sess;
+ size_t nb_sess =
+ sizeof(px->fe_counters.cum_sess_ver) / sizeof(*px->fe_counters.cum_sess_ver);
+
+ total_sess = px->fe_counters.cum_sess;
+ for (i = 0; i < nb_sess; i++)
+ total_sess -= px->fe_counters.cum_sess_ver[i];
+ total_sess = (int64_t)total_sess < 0 ? 0 : total_sess;
+ metric = mkf_u64(FN_COUNTER, total_sess);
+ break;
+ }
+ case ST_F_H1SESS:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[0]);
+ break;
+ case ST_F_H2SESS:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[1]);
+ break;
+ case ST_F_H3SESS:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[2]);
+ break;
+ case ST_F_REQ_OTHER:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[0]);
+ break;
+ case ST_F_H1REQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[1]);
+ break;
+ case ST_F_H2REQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[2]);
+ break;
+ case ST_F_H3REQ:
+ metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[3]);
+ break;
+ default:
+ /* not used for frontends. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a frontend's line to the local trash buffer for the current proxy <px>
+ * and uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed. Returns non-zero if it emits
+ * anything, zero otherwise.
+ */
+static int stats_dump_fe_stats(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ if (!(px->cap & PR_CAP_FE))
+ return 0;
+
+ if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE)))
+ return 0;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_fe_stats(px, stats, ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Fill <stats> with the listener statistics. <stats> is preallocated array of
+ * length <len>. The length of the array must be at least ST_F_TOTAL_FIELDS. If
+ * this length is less then this value, the function returns 0, otherwise, it
+ * returns 1. If selected_field is != NULL, only fill this one. <flags> can
+ * take the value STAT_SHLGNDS.
+ */
+int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags,
+ struct field *stats, int len, enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ struct buffer *out = get_trash_chunk();
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ if (!l->counters)
+ return 0;
+
+ chunk_reset(out);
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name);
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, l->nbconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, l->counters->conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, l->bind_conf->maxconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, l->counters->cum_conn);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, l->counters->bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, l->counters->bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_resp);
+ break;
+ case ST_F_EREQ:
+ metric = mkf_u64(FN_COUNTER, l->counters->failed_req);
+ break;
+ case ST_F_DCON:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_conn);
+ break;
+ case ST_F_DSES:
+ metric = mkf_u64(FN_COUNTER, l->counters->denied_sess);
+ break;
+ case ST_F_STATUS:
+ metric = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]);
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, l->luid);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, l->counters->failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, l->counters->internal_errors);
+ break;
+ case ST_F_ADDR:
+ if (flags & STAT_SHLGNDS) {
+ char str[INET6_ADDRSTRLEN];
+ int port;
+
+ port = get_host_port(&l->rx.addr);
+ switch (addr_to_str(&l->rx.addr, str, sizeof(str))) {
+ case AF_INET:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "%s:%d", str, port);
+ break;
+ case AF_INET6:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "[%s]:%d", str, port);
+ break;
+ case AF_UNIX:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix");
+ break;
+ case -1:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_strcat(out, strerror(errno));
+ break;
+ default: /* address family not supported */
+ break;
+ }
+ }
+ break;
+ case ST_F_PROTO:
+ metric = mkf_str(FO_STATUS, l->rx.proto->name);
+ break;
+ default:
+ /* not used for listen. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for listener <l> and proxy <px> to the local trash buffer and
+ * uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed. Returns non-zero if it emits
+ * anything, zero otherwise.
+ */
+static int stats_dump_li_stats(struct stconn *sc, struct proxy *px, struct listener *l)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_li_stats(px, l, ctx->flags, stats,
+ ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(l->extra_counters, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+enum srv_stats_state {
+ SRV_STATS_STATE_DOWN = 0,
+ SRV_STATS_STATE_DOWN_AGENT,
+ SRV_STATS_STATE_GOING_UP,
+ SRV_STATS_STATE_UP_GOING_DOWN,
+ SRV_STATS_STATE_UP,
+ SRV_STATS_STATE_NOLB_GOING_DOWN,
+ SRV_STATS_STATE_NOLB,
+ SRV_STATS_STATE_DRAIN_GOING_DOWN,
+ SRV_STATS_STATE_DRAIN,
+ SRV_STATS_STATE_DRAIN_AGENT,
+ SRV_STATS_STATE_NO_CHECK,
+
+ SRV_STATS_STATE_COUNT, /* Must be last */
+};
+
+static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = {
+ [SRV_STATS_STATE_DOWN] = "DOWN",
+ [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)",
+ [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d",
+ [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d",
+ [SRV_STATS_STATE_UP] = "UP",
+ [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d",
+ [SRV_STATS_STATE_NOLB] = "NOLB",
+ [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d",
+ [SRV_STATS_STATE_DRAIN] = "DRAIN",
+ [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)",
+ [SRV_STATS_STATE_NO_CHECK] = "no check"
+};
+
+/* Compute server state helper
+ */
+static void stats_fill_sv_stats_computestate(struct server *sv, struct server *ref,
+ enum srv_stats_state *state)
+{
+ if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) {
+ if ((ref->check.state & CHK_ST_ENABLED) &&
+ (ref->check.health < ref->check.rise + ref->check.fall - 1)) {
+ *state = SRV_STATS_STATE_UP_GOING_DOWN;
+ } else {
+ *state = SRV_STATS_STATE_UP;
+ }
+
+ if (sv->cur_admin & SRV_ADMF_DRAIN) {
+ if (ref->agent.state & CHK_ST_ENABLED)
+ *state = SRV_STATS_STATE_DRAIN_AGENT;
+ else if (*state == SRV_STATS_STATE_UP_GOING_DOWN)
+ *state = SRV_STATS_STATE_DRAIN_GOING_DOWN;
+ else
+ *state = SRV_STATS_STATE_DRAIN;
+ }
+
+ if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) {
+ *state = SRV_STATS_STATE_NO_CHECK;
+ }
+ }
+ else if (sv->cur_state == SRV_ST_STOPPING) {
+ if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) ||
+ (ref->check.health == ref->check.rise + ref->check.fall - 1)) {
+ *state = SRV_STATS_STATE_NOLB;
+ } else {
+ *state = SRV_STATS_STATE_NOLB_GOING_DOWN;
+ }
+ }
+ else { /* stopped */
+ if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) {
+ *state = SRV_STATS_STATE_DOWN_AGENT;
+ } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) {
+ *state = SRV_STATS_STATE_DOWN; /* DOWN */
+ } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) {
+ *state = SRV_STATS_STATE_GOING_UP;
+ } else {
+ *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */
+ }
+ }
+}
+
+/* Fill <stats> with the backend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for servers, the
+ * function returns 0, otherwise, it returns 1. <flags> can take the value
+ * STAT_SHLGNDS.
+ */
+int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags,
+ struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ struct server *via = sv->track ? sv->track : sv;
+ struct server *ref = via;
+ enum srv_stats_state state = 0;
+ char str[INET6_ADDRSTRLEN];
+ struct buffer *out = get_trash_chunk();
+ char *fld_status;
+ long long srv_samples_counter;
+ unsigned int srv_samples_window = TIME_STATS_SAMPLES;
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ chunk_reset(out);
+
+ /* compute state for later use */
+ if (selected_field == NULL || *selected_field == ST_F_STATUS ||
+ *selected_field == ST_F_CHECK_RISE || *selected_field == ST_F_CHECK_FALL ||
+ *selected_field == ST_F_CHECK_HEALTH || *selected_field == ST_F_HANAFAIL) {
+ /* we have "via" which is the tracked server as described in the configuration,
+ * and "ref" which is the checked server and the end of the chain.
+ */
+ while (ref->track)
+ ref = ref->track;
+ stats_fill_sv_stats_computestate(sv, ref, &state);
+ }
+
+ /* compue time values for later use */
+ if (selected_field == NULL || *selected_field == ST_F_QTIME ||
+ *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME ||
+ *selected_field == ST_F_TTIME) {
+ srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn;
+ if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0)
+ srv_samples_window = srv_samples_counter;
+ }
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id);
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_QCUR:
+ metric = mkf_u32(0, sv->queue.length);
+ break;
+ case ST_F_QMAX:
+ metric = mkf_u32(FN_MAX, sv->counters.nbpend_max);
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, sv->cur_sess);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, sv->counters.cur_sess_max);
+ break;
+ case ST_F_SLIM:
+ if (sv->maxconn)
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn);
+ break;
+ case ST_F_SRV_ICUR:
+ metric = mkf_u32(0, sv->curr_idle_conns);
+ break;
+ case ST_F_SRV_ILIM:
+ if (sv->max_idle_conns != -1)
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cum_sess);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, sv->counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.bytes_out);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, sv->counters.denied_resp);
+ break;
+ case ST_F_ECON:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_conns);
+ break;
+ case ST_F_ERESP:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_resp);
+ break;
+ case ST_F_WRETR:
+ metric = mkf_u64(FN_COUNTER, sv->counters.retries);
+ break;
+ case ST_F_WREDIS:
+ metric = mkf_u64(FN_COUNTER, sv->counters.redispatches);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.internal_errors);
+ break;
+ case ST_F_CONNECT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.connect);
+ break;
+ case ST_F_REUSE:
+ metric = mkf_u64(FN_COUNTER, sv->counters.reuse);
+ break;
+ case ST_F_IDLE_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_idle_nb);
+ break;
+ case ST_F_SAFE_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_safe_nb);
+ break;
+ case ST_F_USED_CONN_CUR:
+ metric = mkf_u32(0, sv->curr_used_conns);
+ break;
+ case ST_F_NEED_CONN_EST:
+ metric = mkf_u32(0, sv->est_need_conns);
+ break;
+ case ST_F_STATUS:
+ fld_status = chunk_newstr(out);
+ if (sv->cur_admin & SRV_ADMF_RMAINT)
+ chunk_appendf(out, "MAINT (resolution)");
+ else if (sv->cur_admin & SRV_ADMF_IMAINT)
+ chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id);
+ else if (sv->cur_admin & SRV_ADMF_MAINT)
+ chunk_appendf(out, "MAINT");
+ else
+ chunk_appendf(out,
+ srv_hlt_st[state],
+ (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health),
+ (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise));
+
+ metric = mkf_str(FO_STATUS, fld_status);
+ break;
+ case ST_F_LASTCHG:
+ metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - sv->last_change);
+ break;
+ case ST_F_WEIGHT:
+ metric = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv);
+ break;
+ case ST_F_UWEIGHT:
+ metric = mkf_u32(FN_AVG, sv->uweight);
+ break;
+ case ST_F_ACT:
+ metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1);
+ break;
+ case ST_F_BCK:
+ metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0);
+ break;
+ case ST_F_CHKFAIL:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks);
+ break;
+ case ST_F_CHKDOWN:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u64(FN_COUNTER, sv->counters.down_trans);
+ break;
+ case ST_F_DOWNTIME:
+ if (sv->check.state & CHK_ST_ENABLED)
+ metric = mkf_u32(FN_COUNTER, srv_downtime(sv));
+ break;
+ case ST_F_QLIMIT:
+ if (sv->maxqueue)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue);
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, sv->puid);
+ break;
+ case ST_F_SRID:
+ metric = mkf_u32(FN_COUNTER, sv->rid);
+ break;
+ case ST_F_THROTTLE:
+ if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv))
+ metric = mkf_u32(FN_AVG, server_throttle_rate(sv));
+ break;
+ case ST_F_LBTOT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cum_lbconn);
+ break;
+ case ST_F_TRACKED:
+ if (sv->track) {
+ char *fld_track = chunk_newstr(out);
+ chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id);
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track);
+ }
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(FN_RATE, read_freq_ctr(&sv->sess_per_sec));
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.sps_max);
+ break;
+ case ST_F_CHECK_STATUS:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) {
+ const char *fld_chksts;
+
+ fld_chksts = chunk_newstr(out);
+ chunk_strcat(out, "* "); // for check in progress
+ chunk_strcat(out, get_check_status_info(sv->check.status));
+ if (!(sv->check.state & CHK_ST_INPROGRESS))
+ fld_chksts += 2; // skip "* "
+ metric = mkf_str(FN_OUTPUT, fld_chksts);
+ }
+ break;
+ case ST_F_CHECK_CODE:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ sv->check.status >= HCHK_STATUS_L57DATA)
+ metric = mkf_u32(FN_OUTPUT, sv->check.code);
+ break;
+ case ST_F_CHECK_DURATION:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ sv->check.status >= HCHK_STATUS_CHECKED)
+ metric = mkf_u64(FN_DURATION, MAX(sv->check.duration, 0));
+ break;
+ case ST_F_CHECK_DESC:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status));
+ break;
+ case ST_F_LAST_CHK:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, sv->check.desc);
+ break;
+ case ST_F_CHECK_RISE:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise);
+ break;
+ case ST_F_CHECK_FALL:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall);
+ break;
+ case ST_F_CHECK_HEALTH:
+ if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health);
+ break;
+ case ST_F_AGENT_STATUS:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) {
+ const char *fld_chksts;
+
+ fld_chksts = chunk_newstr(out);
+ chunk_strcat(out, "* "); // for check in progress
+ chunk_strcat(out, get_check_status_info(sv->agent.status));
+ if (!(sv->agent.state & CHK_ST_INPROGRESS))
+ fld_chksts += 2; // skip "* "
+ metric = mkf_str(FN_OUTPUT, fld_chksts);
+ }
+ break;
+ case ST_F_AGENT_CODE:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED &&
+ (sv->agent.status >= HCHK_STATUS_L57DATA))
+ metric = mkf_u32(FN_OUTPUT, sv->agent.code);
+ break;
+ case ST_F_AGENT_DURATION:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u64(FN_DURATION, sv->agent.duration);
+ break;
+ case ST_F_AGENT_DESC:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status));
+ break;
+ case ST_F_LAST_AGT:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_str(FN_OUTPUT, sv->agent.desc);
+ break;
+ case ST_F_AGENT_RISE:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise);
+ break;
+ case ST_F_AGENT_FALL:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall);
+ break;
+ case ST_F_AGENT_HEALTH:
+ if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED)
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health);
+ break;
+ case ST_F_REQ_TOT:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.cum_req);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[0]);
+ break;
+ case ST_F_HANAFAIL:
+ if (ref->observe)
+ metric = mkf_u64(FN_COUNTER, sv->counters.failed_hana);
+ break;
+ case ST_F_CLI_ABRT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.cli_aborts);
+ break;
+ case ST_F_SRV_ABRT:
+ metric = mkf_u64(FN_COUNTER, sv->counters.srv_aborts);
+ break;
+ case ST_F_LASTSESS:
+ metric = mkf_s32(FN_AGE, srv_lastsession(sv));
+ break;
+ case ST_F_QTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window));
+ break;
+ case ST_F_CTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window));
+ break;
+ case ST_F_RTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window));
+ break;
+ case ST_F_TTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window));
+ break;
+ case ST_F_QT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.qtime_max);
+ break;
+ case ST_F_CT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.ctime_max);
+ break;
+ case ST_F_RT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.dtime_max);
+ break;
+ case ST_F_TT_MAX:
+ metric = mkf_u32(FN_MAX, sv->counters.ttime_max);
+ break;
+ case ST_F_ADDR:
+ if (flags & STAT_SHLGNDS) {
+ switch (addr_to_str(&sv->addr, str, sizeof(str))) {
+ case AF_INET:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "%s:%d", str, sv->svc_port);
+ break;
+ case AF_INET6:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_appendf(out, "[%s]:%d", str, sv->svc_port);
+ break;
+ case AF_UNIX:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix");
+ break;
+ case -1:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out));
+ chunk_strcat(out, strerror(errno));
+ break;
+ default: /* address family not supported */
+ break;
+ }
+ }
+ break;
+ case ST_F_COOKIE:
+ if (flags & STAT_SHLGNDS && sv->cookie)
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie);
+ break;
+ default:
+ /* not used for servers. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for server <sv> and proxy <px> to the local trash vbuffer and
+ * uses the state from stream connector <sc>, and server state <state>. The
+ * caller is responsible for clearing the local trash buffer if needed. Returns
+ * non-zero if it emits anything, zero otherwise.
+ */
+static int stats_dump_sv_stats(struct stconn *sc, struct proxy *px, struct server *sv)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct stats_module *mod;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_sv_stats(px, sv, ctx->flags, stats,
+ ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ void *counters;
+
+ if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY)
+ continue;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Helper to compute srv values for a given backend
+ */
+static void stats_fill_be_stats_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw)
+{
+ int nbup_tmp, nbsrv_tmp, totuw_tmp;
+ const struct server *srv;
+
+ nbup_tmp = nbsrv_tmp = totuw_tmp = 0;
+ for (srv = px->srv; srv; srv = srv->next) {
+ if (srv->cur_state != SRV_ST_STOPPED) {
+ nbup_tmp++;
+ if (srv_currently_usable(srv) &&
+ (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP)))
+ totuw_tmp += srv->uweight;
+ }
+ nbsrv_tmp++;
+ }
+
+ HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock);
+ if (!px->srv_act && px->lbprm.fbck)
+ totuw_tmp = px->lbprm.fbck->uweight;
+ HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock);
+
+ /* use tmp variable then assign result to make gcc happy */
+ *nbup = nbup_tmp;
+ *nbsrv = nbsrv_tmp;
+ *totuw = totuw_tmp;
+}
+
+/* Fill <stats> with the backend statistics. <stats> is preallocated array of
+ * length <len>. If <selected_field> is != NULL, only fill this one. The length
+ * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than
+ * this value, or if the selected field is not implemented for backends, the
+ * function returns 0, otherwise, it returns 1. <flags> can take the value
+ * STAT_SHLGNDS.
+ */
+int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len,
+ enum stat_field *selected_field)
+{
+ enum stat_field current_field = (selected_field != NULL ? *selected_field : 0);
+ long long be_samples_counter;
+ unsigned int be_samples_window = TIME_STATS_SAMPLES;
+ struct buffer *out = get_trash_chunk();
+ int nbup, nbsrv, totuw;
+ char *fld;
+
+ if (len < ST_F_TOTAL_FIELDS)
+ return 0;
+
+ nbup = nbsrv = totuw = 0;
+ /* some srv values compute for later if we either select all fields or
+ * need them for one of the mentioned ones */
+ if (selected_field == NULL || *selected_field == ST_F_STATUS ||
+ *selected_field == ST_F_UWEIGHT)
+ stats_fill_be_stats_computesrv(px, &nbup, &nbsrv, &totuw);
+
+ /* same here but specific to time fields */
+ if (selected_field == NULL || *selected_field == ST_F_QTIME ||
+ *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME ||
+ *selected_field == ST_F_TTIME) {
+ be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn;
+ if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0)
+ be_samples_window = be_samples_counter;
+ }
+
+ for (; current_field < ST_F_TOTAL_FIELDS; current_field++) {
+ struct field metric = { 0 };
+
+ switch (current_field) {
+ case ST_F_PXNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id);
+ break;
+ case ST_F_SVNAME:
+ metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND");
+ break;
+ case ST_F_MODE:
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode));
+ break;
+ case ST_F_QCUR:
+ metric = mkf_u32(0, px->queue.length);
+ break;
+ case ST_F_QMAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.nbpend_max);
+ break;
+ case ST_F_SCUR:
+ metric = mkf_u32(0, px->beconn);
+ break;
+ case ST_F_SMAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.conn_max);
+ break;
+ case ST_F_SLIM:
+ metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn);
+ break;
+ case ST_F_STOT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cum_conn);
+ break;
+ case ST_F_BIN:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_in);
+ break;
+ case ST_F_BOUT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_out);
+ break;
+ case ST_F_DREQ:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.denied_req);
+ break;
+ case ST_F_DRESP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.denied_resp);
+ break;
+ case ST_F_ECON:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_conns);
+ break;
+ case ST_F_ERESP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_resp);
+ break;
+ case ST_F_WRETR:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.retries);
+ break;
+ case ST_F_WREDIS:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.redispatches);
+ break;
+ case ST_F_WREW:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.failed_rewrites);
+ break;
+ case ST_F_EINT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.internal_errors);
+ break;
+ case ST_F_CONNECT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.connect);
+ break;
+ case ST_F_REUSE:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.reuse);
+ break;
+ case ST_F_STATUS:
+ fld = chunk_newstr(out);
+ chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN");
+ if (flags & (STAT_HIDE_MAINT|STAT_HIDE_DOWN))
+ chunk_appendf(out, " (%d/%d)", nbup, nbsrv);
+ metric = mkf_str(FO_STATUS, fld);
+ break;
+ case ST_F_AGG_SRV_CHECK_STATUS: // DEPRECATED
+ case ST_F_AGG_SRV_STATUS:
+ metric = mkf_u32(FN_GAUGE, 0);
+ break;
+ case ST_F_AGG_CHECK_STATUS:
+ metric = mkf_u32(FN_GAUGE, 0);
+ break;
+ case ST_F_WEIGHT:
+ metric = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv);
+ break;
+ case ST_F_UWEIGHT:
+ metric = mkf_u32(FN_AVG, totuw);
+ break;
+ case ST_F_ACT:
+ metric = mkf_u32(0, px->srv_act);
+ break;
+ case ST_F_BCK:
+ metric = mkf_u32(0, px->srv_bck);
+ break;
+ case ST_F_CHKDOWN:
+ metric = mkf_u64(FN_COUNTER, px->down_trans);
+ break;
+ case ST_F_LASTCHG:
+ metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - px->last_change);
+ break;
+ case ST_F_DOWNTIME:
+ if (px->srv)
+ metric = mkf_u32(FN_COUNTER, be_downtime(px));
+ break;
+ case ST_F_PID:
+ metric = mkf_u32(FO_KEY, 1);
+ break;
+ case ST_F_IID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid);
+ break;
+ case ST_F_SID:
+ metric = mkf_u32(FO_KEY|FS_SERVICE, 0);
+ break;
+ case ST_F_LBTOT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cum_lbconn);
+ break;
+ case ST_F_TYPE:
+ metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE);
+ break;
+ case ST_F_RATE:
+ metric = mkf_u32(0, read_freq_ctr(&px->be_sess_per_sec));
+ break;
+ case ST_F_RATE_MAX:
+ metric = mkf_u32(0, px->be_counters.sps_max);
+ break;
+ case ST_F_COOKIE:
+ if (flags & STAT_SHLGNDS && px->cookie_name)
+ metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name);
+ break;
+ case ST_F_ALGO:
+ if (flags & STAT_SHLGNDS)
+ metric = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO));
+ break;
+ case ST_F_REQ_TOT:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cum_req);
+ break;
+ case ST_F_HRSP_1XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[1]);
+ break;
+ case ST_F_HRSP_2XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[2]);
+ break;
+ case ST_F_HRSP_3XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[3]);
+ break;
+ case ST_F_HRSP_4XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[4]);
+ break;
+ case ST_F_HRSP_5XX:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[5]);
+ break;
+ case ST_F_HRSP_OTHER:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[0]);
+ break;
+ case ST_F_CACHE_LOOKUPS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_lookups);
+ break;
+ case ST_F_CACHE_HITS:
+ if (px->mode == PR_MODE_HTTP)
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_hits);
+ break;
+ case ST_F_CLI_ABRT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.cli_aborts);
+ break;
+ case ST_F_SRV_ABRT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.srv_aborts);
+ break;
+ case ST_F_COMP_IN:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_in[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_OUT:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_out[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_BYP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.comp_byp[COMP_DIR_RES]);
+ break;
+ case ST_F_COMP_RSP:
+ metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.comp_rsp);
+ break;
+ case ST_F_LASTSESS:
+ metric = mkf_s32(FN_AGE, be_lastsession(px));
+ break;
+ case ST_F_QTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window));
+ break;
+ case ST_F_CTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window));
+ break;
+ case ST_F_RTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window));
+ break;
+ case ST_F_TTIME:
+ metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window));
+ break;
+ case ST_F_QT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.qtime_max);
+ break;
+ case ST_F_CT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.ctime_max);
+ break;
+ case ST_F_RT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.dtime_max);
+ break;
+ case ST_F_TT_MAX:
+ metric = mkf_u32(FN_MAX, px->be_counters.ttime_max);
+ break;
+ default:
+ /* not used for backends. If a specific metric
+ * is requested, return an error. Otherwise continue.
+ */
+ if (selected_field != NULL)
+ return 0;
+ continue;
+ }
+ stats[current_field] = metric;
+ if (selected_field != NULL)
+ break;
+ }
+ return 1;
+}
+
+/* Dumps a line for backend <px> to the local trash buffer for and uses the
+ * state from stream interface <si>. The caller is responsible for clearing the
+ * local trash buffer if needed. Returns non-zero if it emits anything, zero
+ * otherwise.
+ */
+static int stats_dump_be_stats(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct field *stats = stat_l[STATS_DOMAIN_PROXY];
+ struct stats_module *mod;
+ size_t stats_count = ST_F_TOTAL_FIELDS;
+
+ if (!(px->cap & PR_CAP_BE))
+ return 0;
+
+ if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE)))
+ return 0;
+
+ memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]);
+
+ if (!stats_fill_be_stats(px, ctx->flags, stats, ST_F_TOTAL_FIELDS, NULL))
+ return 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ struct extra_counters *counters;
+
+ if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY)
+ continue;
+
+ if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) {
+ stats_count += mod->stats_count;
+ continue;
+ }
+
+ counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod);
+ mod->fill_stats(counters, stats + stats_count);
+ stats_count += mod->stats_count;
+ }
+
+ return stats_dump_one_line(stats, stats_count, appctx);
+}
+
+/* Dumps the HTML table header for proxy <px> to the local trash buffer for and
+ * uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ struct stats_module *mod;
+ int stats_module_len = 0;
+
+ if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_ADMIN)) {
+ /* A form to enable/disable this proxy servers */
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt));
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<form method=\"post\">");
+ }
+
+ /* print a new table */
+ chunk_appendf(&trash_chunk,
+ "<table class=\"tbl\" width=\"100%%\">\n"
+ "<tr class=\"titre\">"
+ "<th class=\"pxname\" width=\"10%%\">");
+
+ chunk_appendf(&trash_chunk,
+ "<a name=\"%s\"></a>%s"
+ "<a class=px href=\"#%s\">%s</a>",
+ px->id,
+ (ctx->flags & STAT_SHLGNDS) ? "<u>":"",
+ px->id, px->id);
+
+ if (ctx->flags & STAT_SHLGNDS) {
+ /* cap, mode, id */
+ chunk_appendf(&trash_chunk, "<div class=tips>cap: %s, mode: %s, id: %d",
+ proxy_cap_str(px->cap), proxy_mode_str(px->mode),
+ px->uuid);
+ chunk_appendf(&trash_chunk, "</div>");
+ }
+
+ chunk_appendf(&trash_chunk,
+ "%s</th>"
+ "<th class=\"%s\" width=\"90%%\">%s</th>"
+ "</tr>\n"
+ "</table>\n"
+ "<table class=\"tbl\" width=\"100%%\">\n"
+ "<tr class=\"titre\">",
+ (ctx->flags & STAT_SHLGNDS) ? "</u>":"",
+ px->desc ? "desc" : "empty", px->desc ? px->desc : "");
+
+ if (ctx->flags & STAT_ADMIN) {
+ /* Column heading for Enable or Disable server */
+ if ((px->cap & PR_CAP_BE) && px->srv)
+ chunk_appendf(&trash_chunk,
+ "<th rowspan=2 width=1><input type=\"checkbox\" "
+ "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) "
+ "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>",
+ px->id,
+ px->id);
+ else
+ chunk_appendf(&trash_chunk, "<th rowspan=2></th>");
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<th rowspan=2></th>"
+ "<th colspan=3>Queue</th>"
+ "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>"
+ "<th colspan=2>Bytes</th><th colspan=2>Denied</th>"
+ "<th colspan=3>Errors</th><th colspan=2>Warnings</th>"
+ "<th colspan=9>Server</th>");
+
+ if (ctx->flags & STAT_SHMODULES) {
+ // calculate the count of module for colspan attribute
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ ++stats_module_len;
+ }
+ chunk_appendf(&trash_chunk, "<th colspan=%d>Extra modules</th>",
+ stats_module_len);
+ }
+
+ chunk_appendf(&trash_chunk,
+ "</tr>\n"
+ "<tr class=\"titre\">"
+ "<th>Cur</th><th>Max</th><th>Limit</th>"
+ "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>"
+ "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>"
+ "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>"
+ "<th>Resp</th><th>Retr</th><th>Redis</th>"
+ "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>"
+ "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>"
+ "<th>Thrtle</th>\n");
+
+ if (ctx->flags & STAT_SHMODULES) {
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ chunk_appendf(&trash_chunk, "<th>%s</th>", mod->name);
+ }
+ }
+
+ chunk_appendf(&trash_chunk, "</tr>");
+}
+
+/* Dumps the HTML table trailer for proxy <px> to the local trash buffer for and
+ * uses the state from stream connector <sc>. The caller is responsible for
+ * clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_px_end(struct stconn *sc, struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+
+ chunk_appendf(&trash_chunk, "</table>");
+
+ if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_ADMIN)) {
+ /* close the form used to enable/disable this proxy servers */
+ chunk_appendf(&trash_chunk,
+ "Choose the action to perform on the checked servers : "
+ "<select name=action>"
+ "<option value=\"\"></option>"
+ "<option value=\"ready\">Set state to READY</option>"
+ "<option value=\"drain\">Set state to DRAIN</option>"
+ "<option value=\"maint\">Set state to MAINT</option>"
+ "<option value=\"dhlth\">Health: disable checks</option>"
+ "<option value=\"ehlth\">Health: enable checks</option>"
+ "<option value=\"hrunn\">Health: force UP</option>"
+ "<option value=\"hnolb\">Health: force NOLB</option>"
+ "<option value=\"hdown\">Health: force DOWN</option>"
+ "<option value=\"dagent\">Agent: disable checks</option>"
+ "<option value=\"eagent\">Agent: enable checks</option>"
+ "<option value=\"arunn\">Agent: force UP</option>"
+ "<option value=\"adown\">Agent: force DOWN</option>"
+ "<option value=\"shutdown\">Kill Sessions</option>"
+ "</select>"
+ "<input type=\"hidden\" name=\"b\" value=\"#%d\">"
+ "&nbsp;<input type=\"submit\" value=\"Apply\">"
+ "</form>",
+ px->uuid);
+ }
+
+ chunk_appendf(&trash_chunk, "<p>\n");
+}
+
+/*
+ * Dumps statistics for a proxy. The output is sent to the stream connector's
+ * input buffer. Returns 0 if it had to stop dumping data because of lack of
+ * buffer space, or non-zero if everything completed. This function is used
+ * both by the CLI and the HTTP entry points, and is able to dump the output
+ * in HTML or CSV formats.
+ */
+int stats_dump_proxy_to_buffer(struct stconn *sc, struct htx *htx,
+ struct proxy *px)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ struct server *sv, *svs; /* server and server-state, server-state=server or server->track */
+ struct listener *l;
+ struct uri_auth *uri = NULL;
+ int current_field;
+ int px_st = ctx->px_st;
+
+ if (ctx->http_px)
+ uri = ctx->http_px->uri_auth;
+ chunk_reset(&trash_chunk);
+more:
+ current_field = ctx->field;
+
+ switch (ctx->px_st) {
+ case STAT_PX_ST_INIT:
+ /* we are on a new proxy */
+ if (uri && uri->scope) {
+ /* we have a limited scope, we have to check the proxy name */
+ struct stat_scope *scope;
+ int len;
+
+ len = strlen(px->id);
+ scope = uri->scope;
+
+ while (scope) {
+ /* match exact proxy name */
+ if (scope->px_len == len && !memcmp(px->id, scope->px_id, len))
+ break;
+
+ /* match '.' which means 'self' proxy */
+ if (strcmp(scope->px_id, ".") == 0 && px == ctx->http_px)
+ break;
+ scope = scope->next;
+ }
+
+ /* proxy name not found : don't dump anything */
+ if (scope == NULL)
+ return 1;
+ }
+
+ /* if the user has requested a limited output and the proxy
+ * name does not match, skip it.
+ */
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL)
+ return 1;
+ }
+
+ if ((ctx->flags & STAT_BOUND) &&
+ (ctx->iid != -1) &&
+ (px->uuid != ctx->iid))
+ return 1;
+
+ ctx->px_st = STAT_PX_ST_TH;
+ __fallthrough;
+
+ case STAT_PX_ST_TH:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_px_hdr(sc, px);
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ }
+
+ ctx->px_st = STAT_PX_ST_FE;
+ __fallthrough;
+
+ case STAT_PX_ST_FE:
+ /* print the frontend */
+ if (stats_dump_fe_stats(sc, px)) {
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+
+ current_field = 0;
+ ctx->obj2 = px->conf.listeners.n;
+ ctx->px_st = STAT_PX_ST_LI;
+ __fallthrough;
+
+ case STAT_PX_ST_LI:
+ /* obj2 points to listeners list as initialized above */
+ for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) {
+ if (htx) {
+ if (htx_almost_full(htx)) {
+ sc_need_room(sc, htx->size / 2);
+ goto full;
+ }
+ }
+ else {
+ if (buffer_almost_full(&rep->buf)) {
+ sc_need_room(sc, b_size(&rep->buf) / 2);
+ goto full;
+ }
+ }
+
+ l = LIST_ELEM(ctx->obj2, struct listener *, by_fe);
+ if (!l->counters)
+ continue;
+
+ if (ctx->flags & STAT_BOUND) {
+ if (!(ctx->type & (1 << STATS_TYPE_SO)))
+ break;
+
+ if (ctx->sid != -1 && l->luid != ctx->sid)
+ continue;
+ }
+
+ /* print the frontend */
+ if (stats_dump_li_stats(sc, px, l)) {
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+ current_field = 0;
+ }
+
+ ctx->obj2 = px->srv; /* may be NULL */
+ ctx->px_st = STAT_PX_ST_SV;
+ __fallthrough;
+
+ case STAT_PX_ST_SV:
+ /* check for dump resumption */
+ if (px_st == STAT_PX_ST_SV) {
+ struct server *cur = ctx->obj2;
+
+ /* re-entrant dump */
+ BUG_ON(!cur);
+ if (cur->flags & SRV_F_DELETED) {
+ /* the server could have been marked as deleted
+ * between two dumping attempts, skip it.
+ */
+ cur = cur->next;
+ }
+ srv_drop(ctx->obj2); /* drop old srv taken on last dumping attempt */
+ ctx->obj2 = cur; /* could be NULL */
+ /* back to normal */
+ }
+
+ /* obj2 points to servers list as initialized above.
+ *
+ * A server may be removed during the stats dumping.
+ * Temporarily increment its refcount to prevent its
+ * anticipated cleaning. Call srv_drop() to release it.
+ */
+ for (; ctx->obj2 != NULL;
+ ctx->obj2 = srv_drop(sv)) {
+
+ sv = ctx->obj2;
+ srv_take(sv);
+
+ if (htx) {
+ if (htx_almost_full(htx)) {
+ sc_need_room(sc, htx->size / 2);
+ goto full;
+ }
+ }
+ else {
+ if (buffer_almost_full(&rep->buf)) {
+ sc_need_room(sc, b_size(&rep->buf) / 2);
+ goto full;
+ }
+ }
+
+ if (ctx->flags & STAT_BOUND) {
+ if (!(ctx->type & (1 << STATS_TYPE_SV))) {
+ srv_drop(sv);
+ break;
+ }
+
+ if (ctx->sid != -1 && sv->puid != ctx->sid)
+ continue;
+ }
+
+ /* do not report disabled servers */
+ if (ctx->flags & STAT_HIDE_MAINT &&
+ sv->cur_admin & SRV_ADMF_MAINT) {
+ continue;
+ }
+
+ svs = sv;
+ while (svs->track)
+ svs = svs->track;
+
+ /* do not report servers which are DOWN and not changing state */
+ if ((ctx->flags & STAT_HIDE_DOWN) &&
+ ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */
+ (sv->cur_state == SRV_ST_STOPPED && /* server is down */
+ (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) ||
+ ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) ||
+ ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) {
+ continue;
+ }
+
+ if (stats_dump_sv_stats(sc, px, sv)) {
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+ current_field = 0;
+ } /* for sv */
+
+ ctx->px_st = STAT_PX_ST_BE;
+ __fallthrough;
+
+ case STAT_PX_ST_BE:
+ /* print the backend */
+ if (stats_dump_be_stats(sc, px)) {
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ ctx->flags |= STAT_STARTED;
+ if (ctx->field)
+ goto more;
+ }
+
+ current_field = 0;
+ ctx->px_st = STAT_PX_ST_END;
+ __fallthrough;
+
+ case STAT_PX_ST_END:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_px_end(sc, px);
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ }
+
+ ctx->px_st = STAT_PX_ST_FIN;
+ __fallthrough;
+
+ case STAT_PX_ST_FIN:
+ return 1;
+
+ default:
+ /* unknown state, we should put an abort() here ! */
+ return 1;
+ }
+
+ full:
+ /* restore previous field */
+ ctx->field = current_field;
+ return 0;
+}
+
+/* Dumps the HTTP stats head block to the local trash buffer and uses the
+ * per-uri parameters from the parent proxy. The caller is responsible for
+ * clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_head(struct appctx *appctx)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct uri_auth *uri;
+
+ BUG_ON(!ctx->http_px);
+ uri = ctx->http_px->uri_auth;
+
+ /* WARNING! This must fit in the first buffer !!! */
+ chunk_appendf(&trash_chunk,
+ "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n"
+ "\"http://www.w3.org/TR/html4/loose.dtd\">\n"
+ "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n"
+ "<link rel=\"icon\" href=\"data:,\">\n"
+ "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n"
+ "<style type=\"text/css\"><!--\n"
+ "body {"
+ " font-family: arial, helvetica, sans-serif;"
+ " font-size: 12px;"
+ " font-weight: normal;"
+ " color: black;"
+ " background: white;"
+ "}\n"
+ "th,td {"
+ " font-size: 10px;"
+ "}\n"
+ "h1 {"
+ " font-size: x-large;"
+ " margin-bottom: 0.5em;"
+ "}\n"
+ "h2 {"
+ " font-family: helvetica, arial;"
+ " font-size: x-large;"
+ " font-weight: bold;"
+ " font-style: italic;"
+ " color: #6020a0;"
+ " margin-top: 0em;"
+ " margin-bottom: 0em;"
+ "}\n"
+ "h3 {"
+ " font-family: helvetica, arial;"
+ " font-size: 16px;"
+ " font-weight: bold;"
+ " color: #b00040;"
+ " background: #e8e8d0;"
+ " margin-top: 0em;"
+ " margin-bottom: 0em;"
+ "}\n"
+ "li {"
+ " margin-top: 0.25em;"
+ " margin-right: 2em;"
+ "}\n"
+ ".hr {margin-top: 0.25em;"
+ " border-color: black;"
+ " border-bottom-style: solid;"
+ "}\n"
+ ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n"
+ ".total {background: #20D0D0;color: #ffff80;}\n"
+ ".frontend {background: #e8e8d0;}\n"
+ ".socket {background: #d0d0d0;}\n"
+ ".backend {background: #e8e8d0;}\n"
+ ".active_down {background: #ff9090;}\n"
+ ".active_going_up {background: #ffd020;}\n"
+ ".active_going_down {background: #ffffa0;}\n"
+ ".active_up {background: #c0ffc0;}\n"
+ ".active_nolb {background: #20a0ff;}\n"
+ ".active_draining {background: #20a0FF;}\n"
+ ".active_no_check {background: #e0e0e0;}\n"
+ ".backup_down {background: #ff9090;}\n"
+ ".backup_going_up {background: #ff80ff;}\n"
+ ".backup_going_down {background: #c060ff;}\n"
+ ".backup_up {background: #b0d0ff;}\n"
+ ".backup_nolb {background: #90b0e0;}\n"
+ ".backup_draining {background: #cc9900;}\n"
+ ".backup_no_check {background: #e0e0e0;}\n"
+ ".maintain {background: #c07820;}\n"
+ ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */
+ "\n"
+ "a.px:link {color: #ffff40; text-decoration: none;}"
+ "a.px:visited {color: #ffff40; text-decoration: none;}"
+ "a.px:hover {color: #ffffff; text-decoration: none;}"
+ "a.lfsb:link {color: #000000; text-decoration: none;}"
+ "a.lfsb:visited {color: #000000; text-decoration: none;}"
+ "a.lfsb:hover {color: #505050; text-decoration: none;}"
+ "\n"
+ "table.tbl { border-collapse: collapse; border-style: none;}\n"
+ "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n"
+ "table.tbl td.ac { text-align: center;}\n"
+ "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n"
+ "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n"
+ "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n"
+ "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n"
+ "\n"
+ "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n"
+ "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n"
+ "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n"
+ "table.det { border-collapse: collapse; border-style: none; }\n"
+ "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n"
+ "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n"
+ "u {text-decoration:none; border-bottom: 1px dotted black;}\n"
+ "div.tips {\n"
+ " display:block;\n"
+ " visibility:hidden;\n"
+ " z-index:2147483647;\n"
+ " position:absolute;\n"
+ " padding:2px 4px 3px;\n"
+ " background:#f0f060; color:#000000;\n"
+ " border:1px solid #7040c0;\n"
+ " white-space:nowrap;\n"
+ " font-style:normal;font-size:11px;font-weight:normal;\n"
+ " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n"
+ " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n"
+ "}\n"
+ "u:hover div.tips {visibility:visible;}\n"
+ "@media (prefers-color-scheme: dark) {\n"
+ " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n"
+ " h1 { color: #a265e0!important; }\n"
+ " h2 { color: #a265e0; }\n"
+ " h3 { color: #ff5190; background-color: #3e3e1f; }\n"
+ " a { color: #3391ff; }\n"
+ " input { background-color: #2f3437; }\n"
+ " .hr { border-color: #8c8273; }\n"
+ " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n"
+ " .frontend {background: #2f3437;}\n"
+ " .socket {background: #2a2d2f;}\n"
+ " .backend {background: #2f3437;}\n"
+ " .active_down {background: #760000;}\n"
+ " .active_going_up {background: #b99200;}\n"
+ " .active_going_down {background: #6c6c00;}\n"
+ " .active_up {background: #165900;}\n"
+ " .active_nolb {background: #006ab9;}\n"
+ " .active_draining {background: #006ab9;}\n"
+ " .active_no_check {background: #2a2d2f;}\n"
+ " .backup_down {background: #760000;}\n"
+ " .backup_going_up {background: #7f007f;}\n"
+ " .backup_going_down {background: #580092;}\n"
+ " .backup_up {background: #2e3234;}\n"
+ " .backup_nolb {background: #1e3c6a;}\n"
+ " .backup_draining {background: #a37a00;}\n"
+ " .backup_no_check {background: #2a2d2f;}\n"
+ " .maintain {background: #9a601a;}\n"
+ " a.px:link {color: #d8d83b; text-decoration: none;}\n"
+ " a.px:visited {color: #d8d83b; text-decoration: none;}\n"
+ " a.px:hover {color: #ffffff; text-decoration: none;}\n"
+ " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n"
+ " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n"
+ " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n"
+ " table.tbl th.empty { background-color: #181a1b; }\n"
+ " table.tbl th.desc { background: #181a1b; }\n"
+ " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n"
+ " table.tbl th { border-color: #808080; }\n"
+ " table.tbl td { border-color: #808080; }\n"
+ " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n"
+ " div.tips {\n"
+ " background:#8e8e0d;\n"
+ " color:#e8e6e3;\n"
+ " border-color: #4e2c86;\n"
+ " -moz-box-shadow: #60686c 2px 2px 3px;\n"
+ " -webkit-box-shadow: #60686c 2px 2px 3px;\n"
+ " box-shadow: #60686c 2px 2px 3px;\n"
+ " }\n"
+ "}\n"
+ "-->\n"
+ "</style></head>\n",
+ (ctx->flags & STAT_SHNODE) ? " on " : "",
+ (ctx->flags & STAT_SHNODE) ? (uri && uri->node ? uri->node : global.node) : ""
+ );
+}
+
+/* Dumps the HTML stats information block to the local trash buffer and uses
+ * the state from stream connector <sc> and per-uri parameter from the parent
+ * proxy. The caller is responsible for clearing the local trash buffer if
+ * needed.
+ */
+static void stats_dump_html_info(struct stconn *sc)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ unsigned int up = ns_to_sec(now_ns - start_time_ns);
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+ struct uri_auth *uri;
+ unsigned long long bps;
+ int thr;
+
+ BUG_ON(!ctx->http_px);
+ uri = ctx->http_px->uri_auth;
+ for (bps = thr = 0; thr < global.nbthread; thr++)
+ bps += 32ULL * read_freq_ctr(&ha_thread_ctx[thr].out_32bps);
+
+ /* Turn the bytes per second to bits per second and take care of the
+ * usual ethernet overhead in order to help figure how far we are from
+ * interface saturation since it's the only case which usually matters.
+ * For this we count the total size of an Ethernet frame on the wire
+ * including preamble and IFG (1538) for the largest TCP segment it
+ * transports (1448 with TCP timestamps). This is not valid for smaller
+ * packets (under-estimated), but it gives a reasonably accurate
+ * estimation of how far we are from uplink saturation.
+ */
+ bps = bps * 8 * 1538 / 1448;
+
+ /* WARNING! this has to fit the first packet too.
+ * We are around 3.5 kB, add adding entries will
+ * become tricky if we want to support 4kB buffers !
+ */
+ chunk_appendf(&trash_chunk,
+ "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">"
+ PRODUCT_NAME "%s</a></h1>\n"
+ "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n"
+ "<hr width=\"100%%\" class=\"hr\">\n"
+ "<h3>&gt; General process information</h3>\n"
+ "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n"
+ "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n"
+ "<b>uptime = </b> %dd %dh%02dm%02ds; warnings = %u<br>\n"
+ "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n"
+ "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>reached = </b> %llu; <b>maxpipes = </b> %d<br>\n"
+ "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n"
+ "Running tasks: %d/%d (%d niced); idle = %d %%<br>\n"
+ "</td><td align=\"center\" nowrap>\n"
+ "<table class=\"lgd\"><tr>\n"
+ "<td class=\"active_up\">&nbsp;</td><td class=\"noborder\">active UP </td>"
+ "<td class=\"backup_up\">&nbsp;</td><td class=\"noborder\">backup UP </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>"
+ "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>"
+ "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>"
+ "</tr><tr>\n"
+ "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN &nbsp;</td>"
+ "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>"
+ "</tr><tr>\n"
+ "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) &nbsp;</td>"
+ "</tr><tr>\n"
+ "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance &nbsp;</td>"
+ "</tr></table>\n"
+ "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled."
+ "</td>"
+ "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">"
+ "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">"
+ "",
+ (ctx->flags & STAT_HIDEVER) ? "" : (stats_version_string),
+ pid, (ctx->flags & STAT_SHNODE) ? " on " : "",
+ (ctx->flags & STAT_SHNODE) ? (uri->node ? uri->node : global.node) : "",
+ (ctx->flags & STAT_SHDESC) ? ": " : "",
+ (ctx->flags & STAT_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "",
+ pid, 1, 1, global.nbthread,
+ up / 86400, (up % 86400) / 3600,
+ (up % 3600) / 60, (up % 60),
+ HA_ATOMIC_LOAD(&tot_warnings),
+ global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited",
+ global.rlimit_memmax ? " MB" : "",
+ global.rlimit_nofile,
+ global.maxsock, global.maxconn, HA_ATOMIC_LOAD(&maxconn_reached), global.maxpipes,
+ actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec),
+ bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0),
+ bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k',
+ total_run_queues(), total_allocated_tasks(), total_niced_running_tasks(), clock_report_idle());
+
+ /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ memcpy(scope_txt, scope_ptr, ctx->scope_len);
+ scope_txt[ctx->scope_len] = '\0';
+
+ chunk_appendf(&trash_chunk,
+ "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n",
+ (ctx->scope_len > 0) ? scope_txt : "",
+ STAT_SCOPE_TXT_MAXLEN);
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt));
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ if (ctx->flags & STAT_HIDE_DOWN)
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n",
+ uri->uri_prefix,
+ "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ else
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n",
+ uri->uri_prefix,
+ ";up",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ if (uri->refresh > 0) {
+ if (ctx->flags & STAT_NO_REFRESH)
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ "",
+ scope_txt);
+ else
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ ";norefresh",
+ scope_txt);
+ }
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n",
+ uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n",
+ uri->uri_prefix,
+ (uri->refresh > 0) ? ";norefresh" : "",
+ scope_txt);
+
+ chunk_appendf(&trash_chunk,
+ "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n",
+ uri->uri_prefix,
+ (uri->refresh > 0) ? ";norefresh" : "",
+ scope_txt, uri->uri_prefix);
+
+ chunk_appendf(&trash_chunk,
+ "</ul></td>"
+ "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">"
+ "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n"
+ "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n"
+ "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n"
+ "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n"
+ "</ul>"
+ "</td>"
+ "</tr></table>\n"
+ ""
+ );
+
+ if (ctx->st_code) {
+ switch (ctx->st_code) {
+ case STAT_STATUS_DONE:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_up>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action processed successfully."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_NONE:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_going_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Nothing has changed."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_PART:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_going_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action partially processed.<br>"
+ "Some server names are probably unknown or ambiguous (duplicated names in the backend)."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_ERRP:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Action not processed because of invalid parameters."
+ "<ul>"
+ "<li>The action is maybe unknown.</li>"
+ "<li>Invalid key parameter (empty or too long).</li>"
+ "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>"
+ "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>"
+ "</ul>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_EXCD:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Action not processed : the buffer couldn't store all the data.<br>"
+ "You should retry with less servers at a time.</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_DENY:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Action denied.</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ case STAT_STATUS_IVAL:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_down>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "<b>Invalid requests (unsupported method or chunked encoded request).</b>"
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ break;
+ default:
+ chunk_appendf(&trash_chunk,
+ "<p><div class=active_no_check>"
+ "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> "
+ "Unexpected result."
+ "</div>\n", uri->uri_prefix,
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+ }
+ chunk_appendf(&trash_chunk, "<p>\n");
+ }
+}
+
+/* Dumps the HTML stats trailer block to the local trash buffer. The caller is
+ * responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_html_end()
+{
+ chunk_appendf(&trash_chunk, "</body></html>\n");
+}
+
+/* Dumps the stats JSON header to the local trash buffer buffer which. The
+ * caller is responsible for clearing it if needed.
+ */
+static void stats_dump_json_header()
+{
+ chunk_strcat(&trash_chunk, "[");
+}
+
+
+/* Dumps the JSON stats trailer block to the local trash buffer. The caller is
+ * responsible for clearing the local trash buffer if needed.
+ */
+static void stats_dump_json_end()
+{
+ chunk_strcat(&trash_chunk, "]\n");
+}
+
+/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as
+ * a pointer to the current server/listener.
+ */
+static int stats_dump_proxies(struct stconn *sc,
+ struct htx *htx)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct channel *rep = sc_ic(sc);
+ struct proxy *px;
+
+ /* dump proxies */
+ while (ctx->obj1) {
+ if (htx) {
+ if (htx_almost_full(htx)) {
+ sc_need_room(sc, htx->size / 2);
+ goto full;
+ }
+ }
+ else {
+ if (buffer_almost_full(&rep->buf)) {
+ sc_need_room(sc, b_size(&rep->buf) / 2);
+ goto full;
+ }
+ }
+
+ px = ctx->obj1;
+ /* Skip the global frontend proxies and non-networked ones.
+ * Also skip proxies that were disabled in the configuration
+ * This change allows retrieving stats from "old" proxies after a reload.
+ */
+ if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 &&
+ (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) {
+ if (stats_dump_proxy_to_buffer(sc, htx, px) == 0)
+ return 0;
+ }
+
+ ctx->obj1 = px->next;
+ ctx->px_st = STAT_PX_ST_INIT;
+ ctx->field = 0;
+ }
+
+ return 1;
+
+ full:
+ return 0;
+}
+
+/* This function dumps statistics onto the stream connector's read buffer in
+ * either CSV or HTML format. It returns 0 if it had to stop writing data and
+ * an I/O is needed, 1 if the dump is finished and the stream must be closed,
+ * or -1 in case of any error. This function is used by both the CLI and the
+ * HTTP handlers.
+ */
+static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ enum stats_domain domain = ctx->domain;
+
+ chunk_reset(&trash_chunk);
+
+ switch (ctx->state) {
+ case STAT_STATE_INIT:
+ ctx->state = STAT_STATE_HEAD; /* let's start producing data */
+ __fallthrough;
+
+ case STAT_STATE_HEAD:
+ if (ctx->flags & STAT_FMT_HTML)
+ stats_dump_html_head(appctx);
+ else if (ctx->flags & STAT_JSON_SCHM)
+ stats_dump_json_schema(&trash_chunk);
+ else if (ctx->flags & STAT_FMT_JSON)
+ stats_dump_json_header();
+ else if (!(ctx->flags & STAT_FMT_TYPED))
+ stats_dump_csv_header(ctx->domain);
+
+ if (!stats_putchk(appctx, htx))
+ goto full;
+
+ if (ctx->flags & STAT_JSON_SCHM) {
+ ctx->state = STAT_STATE_FIN;
+ return 1;
+ }
+ ctx->state = STAT_STATE_INFO;
+ __fallthrough;
+
+ case STAT_STATE_INFO:
+ if (ctx->flags & STAT_FMT_HTML) {
+ stats_dump_html_info(sc);
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ }
+
+ if (domain == STATS_DOMAIN_PROXY)
+ ctx->obj1 = proxies_list;
+
+ ctx->px_st = STAT_PX_ST_INIT;
+ ctx->field = 0;
+ ctx->state = STAT_STATE_LIST;
+ __fallthrough;
+
+ case STAT_STATE_LIST:
+ switch (domain) {
+ case STATS_DOMAIN_RESOLVERS:
+ if (!stats_dump_resolvers(sc, stat_l[domain],
+ stat_count[domain],
+ &stats_module_list[domain])) {
+ return 0;
+ }
+ break;
+
+ case STATS_DOMAIN_PROXY:
+ default:
+ /* dump proxies */
+ if (!stats_dump_proxies(sc, htx))
+ return 0;
+ break;
+ }
+
+ ctx->state = STAT_STATE_END;
+ __fallthrough;
+
+ case STAT_STATE_END:
+ if (ctx->flags & (STAT_FMT_HTML|STAT_FMT_JSON)) {
+ if (ctx->flags & STAT_FMT_HTML)
+ stats_dump_html_end();
+ else
+ stats_dump_json_end();
+ if (!stats_putchk(appctx, htx))
+ goto full;
+ }
+
+ ctx->state = STAT_STATE_FIN;
+ __fallthrough;
+
+ case STAT_STATE_FIN:
+ return 1;
+
+ default:
+ /* unknown state ! */
+ ctx->state = STAT_STATE_FIN;
+ return -1;
+ }
+
+ full:
+ return 0;
+
+}
+
+/* We reached the stats page through a POST request. The appctx is
+ * expected to have already been allocated by the caller.
+ * Parse the posted data and enable/disable servers if necessary.
+ * Returns 1 if request was parsed or zero if it needs more data.
+ */
+static int stats_process_http_post(struct stconn *sc)
+{
+ struct stream *s = __sc_strm(sc);
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+
+ struct proxy *px = NULL;
+ struct server *sv = NULL;
+
+ char key[LINESIZE];
+ int action = ST_ADM_ACTION_NONE;
+ int reprocess = 0;
+
+ int total_servers = 0;
+ int altered_servers = 0;
+
+ char *first_param, *cur_param, *next_param, *end_params;
+ char *st_cur_param = NULL;
+ char *st_next_param = NULL;
+
+ struct buffer *temp = get_trash_chunk();
+
+ struct htx *htx = htxbuf(&s->req.buf);
+ struct htx_blk *blk;
+
+ /* we need more data */
+ if (s->txn->req.msg_state < HTTP_MSG_DONE) {
+ /* check if we can receive more */
+ if (htx_free_data_space(htx) <= global.tune.maxrewrite) {
+ ctx->st_code = STAT_STATUS_EXCD;
+ goto out;
+ }
+ goto wait;
+ }
+
+ /* The request was fully received. Copy data */
+ blk = htx_get_head_blk(htx);
+ while (blk) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ struct ist v = htx_get_blk_value(htx, blk);
+
+ if (!chunk_memcat(temp, v.ptr, v.len)) {
+ ctx->st_code = STAT_STATUS_EXCD;
+ goto out;
+ }
+ }
+ blk = htx_get_next_blk(htx, blk);
+ }
+
+ first_param = temp->area;
+ end_params = temp->area + temp->data;
+ cur_param = next_param = end_params;
+ *end_params = '\0';
+
+ ctx->st_code = STAT_STATUS_NONE;
+
+ /*
+ * Parse the parameters in reverse order to only store the last value.
+ * From the html form, the backend and the action are at the end.
+ */
+ while (cur_param > first_param) {
+ char *value;
+ int poffset, plen;
+
+ cur_param--;
+
+ if ((*cur_param == '&') || (cur_param == first_param)) {
+ reprocess_servers:
+ /* Parse the key */
+ poffset = (cur_param != first_param ? 1 : 0);
+ plen = next_param - cur_param + (cur_param == first_param ? 1 : 0);
+ if ((plen > 0) && (plen <= sizeof(key))) {
+ strncpy(key, cur_param + poffset, plen);
+ key[plen - 1] = '\0';
+ } else {
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+
+ /* Parse the value */
+ value = key;
+ while (*value != '\0' && *value != '=') {
+ value++;
+ }
+ if (*value == '=') {
+ /* Ok, a value is found, we can mark the end of the key */
+ *value++ = '\0';
+ }
+ if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0)
+ break;
+
+ /* Now we can check the key to see what to do */
+ if (!px && (strcmp(key, "b") == 0)) {
+ if ((px = proxy_be_by_name(value)) == NULL) {
+ /* the backend name is unknown or ambiguous (duplicate names) */
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+ }
+ else if (!action && (strcmp(key, "action") == 0)) {
+ if (strcmp(value, "ready") == 0) {
+ action = ST_ADM_ACTION_READY;
+ }
+ else if (strcmp(value, "drain") == 0) {
+ action = ST_ADM_ACTION_DRAIN;
+ }
+ else if (strcmp(value, "maint") == 0) {
+ action = ST_ADM_ACTION_MAINT;
+ }
+ else if (strcmp(value, "shutdown") == 0) {
+ action = ST_ADM_ACTION_SHUTDOWN;
+ }
+ else if (strcmp(value, "dhlth") == 0) {
+ action = ST_ADM_ACTION_DHLTH;
+ }
+ else if (strcmp(value, "ehlth") == 0) {
+ action = ST_ADM_ACTION_EHLTH;
+ }
+ else if (strcmp(value, "hrunn") == 0) {
+ action = ST_ADM_ACTION_HRUNN;
+ }
+ else if (strcmp(value, "hnolb") == 0) {
+ action = ST_ADM_ACTION_HNOLB;
+ }
+ else if (strcmp(value, "hdown") == 0) {
+ action = ST_ADM_ACTION_HDOWN;
+ }
+ else if (strcmp(value, "dagent") == 0) {
+ action = ST_ADM_ACTION_DAGENT;
+ }
+ else if (strcmp(value, "eagent") == 0) {
+ action = ST_ADM_ACTION_EAGENT;
+ }
+ else if (strcmp(value, "arunn") == 0) {
+ action = ST_ADM_ACTION_ARUNN;
+ }
+ else if (strcmp(value, "adown") == 0) {
+ action = ST_ADM_ACTION_ADOWN;
+ }
+ /* else these are the old supported methods */
+ else if (strcmp(value, "disable") == 0) {
+ action = ST_ADM_ACTION_DISABLE;
+ }
+ else if (strcmp(value, "enable") == 0) {
+ action = ST_ADM_ACTION_ENABLE;
+ }
+ else if (strcmp(value, "stop") == 0) {
+ action = ST_ADM_ACTION_STOP;
+ }
+ else if (strcmp(value, "start") == 0) {
+ action = ST_ADM_ACTION_START;
+ }
+ else {
+ ctx->st_code = STAT_STATUS_ERRP;
+ goto out;
+ }
+ }
+ else if (strcmp(key, "s") == 0) {
+ if (!(px && action)) {
+ /*
+ * Indicates that we'll need to reprocess the parameters
+ * as soon as backend and action are known
+ */
+ if (!reprocess) {
+ st_cur_param = cur_param;
+ st_next_param = next_param;
+ }
+ reprocess = 1;
+ }
+ else if ((sv = findserver(px, value)) != NULL) {
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ switch (action) {
+ case ST_ADM_ACTION_DISABLE:
+ if (!(sv->cur_admin & SRV_ADMF_FMAINT)) {
+ altered_servers++;
+ total_servers++;
+ srv_set_admin_flag(sv, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_STATS_DISABLE);
+ }
+ break;
+ case ST_ADM_ACTION_ENABLE:
+ if (sv->cur_admin & SRV_ADMF_FMAINT) {
+ altered_servers++;
+ total_servers++;
+ srv_clr_admin_flag(sv, SRV_ADMF_FMAINT);
+ }
+ break;
+ case ST_ADM_ACTION_STOP:
+ if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) {
+ srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_STATS_STOP);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_START:
+ if (sv->cur_admin & SRV_ADMF_FDRAIN) {
+ srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_DHLTH:
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state &= ~CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_EHLTH:
+ if (sv->check.state & CHK_ST_CONFIGURED) {
+ sv->check.state |= CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HRUNN:
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HNOLB:
+ if (!(sv->track)) {
+ sv->check.health = sv->check.rise + sv->check.fall - 1;
+ srv_set_stopping(sv, SRV_OP_STCHGC_STATS_WEB);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_HDOWN:
+ if (!(sv->track)) {
+ sv->check.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_DAGENT:
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state &= ~CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_EAGENT:
+ if (sv->agent.state & CHK_ST_CONFIGURED) {
+ sv->agent.state |= CHK_ST_ENABLED;
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_ARUNN:
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = sv->agent.rise + sv->agent.fall - 1;
+ srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_ADOWN:
+ if (sv->agent.state & CHK_ST_ENABLED) {
+ sv->agent.health = 0;
+ srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ case ST_ADM_ACTION_READY:
+ srv_adm_set_ready(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_DRAIN:
+ srv_adm_set_drain(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_MAINT:
+ srv_adm_set_maint(sv);
+ altered_servers++;
+ total_servers++;
+ break;
+ case ST_ADM_ACTION_SHUTDOWN:
+ if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ srv_shutdown_streams(sv, SF_ERR_KILLED);
+ altered_servers++;
+ total_servers++;
+ }
+ break;
+ }
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ } else {
+ /* the server name is unknown or ambiguous (duplicate names) */
+ total_servers++;
+ }
+ }
+ if (reprocess && px && action) {
+ /* Now, we know the backend and the action chosen by the user.
+ * We can safely restart from the first server parameter
+ * to reprocess them
+ */
+ cur_param = st_cur_param;
+ next_param = st_next_param;
+ reprocess = 0;
+ goto reprocess_servers;
+ }
+
+ next_param = cur_param;
+ }
+ }
+
+ if (total_servers == 0) {
+ ctx->st_code = STAT_STATUS_NONE;
+ }
+ else if (altered_servers == 0) {
+ ctx->st_code = STAT_STATUS_ERRP;
+ }
+ else if (altered_servers == total_servers) {
+ ctx->st_code = STAT_STATUS_DONE;
+ }
+ else {
+ ctx->st_code = STAT_STATUS_PART;
+ }
+ out:
+ return 1;
+ wait:
+ ctx->st_code = STAT_STATUS_NONE;
+ return 0;
+}
+
+
+static int stats_send_http_headers(struct stconn *sc, struct htx *htx)
+{
+ struct stream *s = __sc_strm(sc);
+ struct uri_auth *uri;
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct htx_sl *sl;
+ unsigned int flags;
+
+ BUG_ON(!ctx->http_px);
+ uri = ctx->http_px->uri_auth;
+
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK"));
+ if (!sl)
+ goto full;
+ sl->info.res.status = 200;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")))
+ goto full;
+ if (ctx->flags & STAT_FMT_HTML) {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("text/html")))
+ goto full;
+ }
+ else if (ctx->flags & (STAT_FMT_JSON|STAT_JSON_SCHM)) {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("application/json")))
+ goto full;
+ }
+ else {
+ if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain")))
+ goto full;
+ }
+
+ if (uri->refresh > 0 && !(ctx->flags & STAT_NO_REFRESH)) {
+ const char *refresh = U2A(uri->refresh);
+ if (!htx_add_header(htx, ist("Refresh"), ist(refresh)))
+ goto full;
+ }
+
+ if (ctx->flags & STAT_CHUNKED) {
+ if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked")))
+ goto full;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto full;
+
+ channel_add_input(&s->res, htx->data);
+ return 1;
+
+ full:
+ htx_reset(htx);
+ sc_need_room(sc, 0);
+ return 0;
+}
+
+
+static int stats_send_http_redirect(struct stconn *sc, struct htx *htx)
+{
+ char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN];
+ struct stream *s = __sc_strm(sc);
+ struct uri_auth *uri;
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct htx_sl *sl;
+ unsigned int flags;
+
+ BUG_ON(!ctx->http_px);
+ uri = ctx->http_px->uri_auth;
+
+ /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */
+ scope_txt[0] = 0;
+ if (ctx->scope_len) {
+ const char *scope_ptr = stats_scope_ptr(appctx, sc);
+
+ strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt));
+ memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len);
+ scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0;
+ }
+
+ /* We don't want to land on the posted stats page because a refresh will
+ * repost the data. We don't want this to happen on accident so we redirect
+ * the browse to the stats page with a GET.
+ */
+ chunk_printf(&trash, "%s;st=%s%s%s%s",
+ uri->uri_prefix,
+ ((ctx->st_code > STAT_STATUS_INIT) &&
+ (ctx->st_code < STAT_STATUS_SIZE) &&
+ stat_status_codes[ctx->st_code]) ?
+ stat_status_codes[ctx->st_code] :
+ stat_status_codes[STAT_STATUS_UNKN],
+ (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "",
+ (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "",
+ scope_txt);
+
+ flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_CHNK);
+ sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other"));
+ if (!sl)
+ goto full;
+ sl->info.res.status = 303;
+
+ if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) ||
+ !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) ||
+ !htx_add_header(htx, ist("Content-Length"), ist("0")) ||
+ !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data)))
+ goto full;
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH))
+ goto full;
+
+ channel_add_input(&s->res, htx->data);
+ return 1;
+
+full:
+ htx_reset(htx);
+ sc_need_room(sc, 0);
+ return 0;
+}
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to send HTTP stats over a TCP socket. The mechanism is very simple.
+ * appctx->st0 contains the operation in progress (dump, done). The handler
+ * automatically unregisters itself once transfer is complete.
+ */
+static void http_stats_io_handler(struct appctx *appctx)
+{
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct channel *req = sc_oc(sc);
+ struct channel *res = sc_ic(sc);
+ struct htx *req_htx, *res_htx;
+
+ /* only proxy stats are available via http */
+ ctx->domain = STATS_DOMAIN_PROXY;
+
+ res_htx = htx_from_buf(&res->buf);
+
+ if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) {
+ appctx->st0 = STAT_HTTP_END;
+ goto out;
+ }
+
+ /* Check if the input buffer is available. */
+ if (!b_size(&res->buf)) {
+ sc_need_room(sc, 0);
+ goto out;
+ }
+
+ /* all states are processed in sequence */
+ if (appctx->st0 == STAT_HTTP_HEAD) {
+ if (stats_send_http_headers(sc, res_htx)) {
+ if (s->txn->meth == HTTP_METH_HEAD)
+ appctx->st0 = STAT_HTTP_DONE;
+ else
+ appctx->st0 = STAT_HTTP_DUMP;
+ }
+ }
+
+ if (appctx->st0 == STAT_HTTP_DUMP) {
+ trash_chunk = b_make(trash.area, res->buf.size, 0, 0);
+ /* adjust buffer size to take htx overhead into account,
+ * make sure to perform this call on an empty buffer
+ */
+ trash_chunk.size = buf_room_for_htx_data(&trash_chunk);
+ if (stats_dump_stat_to_buffer(sc, res_htx))
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_POST) {
+ if (stats_process_http_post(sc))
+ appctx->st0 = STAT_HTTP_LAST;
+ else if (s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_LAST) {
+ if (stats_send_http_redirect(sc, res_htx))
+ appctx->st0 = STAT_HTTP_DONE;
+ }
+
+ if (appctx->st0 == STAT_HTTP_DONE) {
+ /* no more data are expected. If the response buffer is empty,
+ * be sure to add something (EOT block in this case) to have
+ * something to send. It is important to be sure the EOM flags
+ * will be handled by the endpoint.
+ */
+ if (htx_is_empty(res_htx)) {
+ if (!htx_add_endof(res_htx, HTX_BLK_EOT)) {
+ sc_need_room(sc, sizeof(struct htx_blk) + 1);
+ goto out;
+ }
+ channel_add_input(res, 1);
+ }
+ res_htx->flags |= HTX_FL_EOM;
+ se_fl_set(appctx->sedesc, SE_FL_EOI);
+ appctx->st0 = STAT_HTTP_END;
+ }
+
+ if (appctx->st0 == STAT_HTTP_END) {
+ se_fl_set(appctx->sedesc, SE_FL_EOS);
+ applet_will_consume(appctx);
+ }
+
+ out:
+ /* we have left the request in the buffer for the case where we
+ * process a POST, and this automatically re-enables activity on
+ * read. It's better to indicate that we want to stop reading when
+ * we're sending, so that we know there's at most one direction
+ * deciding to wake the applet up. It saves it from looping when
+ * emitting large blocks into small TCP windows.
+ */
+ htx_to_buf(res_htx, &res->buf);
+ if (appctx->st0 == STAT_HTTP_END) {
+ /* eat the whole request */
+ if (co_data(req)) {
+ req_htx = htx_from_buf(&req->buf);
+ co_htx_skip(req, req_htx, co_data(req));
+ htx_to_buf(req_htx, &req->buf);
+ }
+ }
+ else if (co_data(res))
+ applet_wont_consume(appctx);
+}
+
+/* Dump all fields from <info> into <out> using the "show info" format (name: value) */
+static int stats_dump_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int field;
+
+ for (field = 0; field < INF_TOTAL_FIELDS; field++) {
+ if (!field_format(info, field))
+ continue;
+
+ if (!chunk_appendf(out, "%s: ", info_fields[field].name))
+ return 0;
+ if (!stats_emit_raw_data_field(out, &info[field]))
+ return 0;
+ if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc))
+ return 0;
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Dump all fields from <info> into <out> using the "show info typed" format */
+static int stats_dump_typed_info_fields(struct buffer *out,
+ const struct field *info,
+ struct show_stat_ctx *ctx)
+{
+ int flags = ctx->flags;
+ int field;
+
+ for (field = 0; field < INF_TOTAL_FIELDS; field++) {
+ if (!field_format(info, field))
+ continue;
+
+ if (!chunk_appendf(out, "%d.%s.%u:", field, info_fields[field].name, info[INF_PROCESS_NUM].u.u32))
+ return 0;
+ if (!stats_emit_field_tags(out, &info[field], ':'))
+ return 0;
+ if (!stats_emit_typed_data_field(out, &info[field]))
+ return 0;
+ if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc))
+ return 0;
+ if (!chunk_strcat(out, "\n"))
+ return 0;
+ }
+ return 1;
+}
+
+/* Fill <info> with HAProxy global info. <info> is preallocated array of length
+ * <len>. The length of the array must be INF_TOTAL_FIELDS. If this length is
+ * less then this value, the function returns 0, otherwise, it returns 1. Some
+ * fields' presence or precision may depend on some of the STAT_* flags present
+ * in <flags>.
+ */
+int stats_fill_info(struct field *info, int len, uint flags)
+{
+ struct buffer *out = get_trash_chunk();
+ uint64_t glob_out_bytes, glob_spl_bytes, glob_out_b32;
+ uint up_sec, up_usec;
+ ullong up;
+ ulong boot;
+ int thr;
+
+#ifdef USE_OPENSSL
+ double ssl_sess_rate = read_freq_ctr_flt(&global.ssl_per_sec);
+ double ssl_key_rate = read_freq_ctr_flt(&global.ssl_fe_keys_per_sec);
+ double ssl_reuse = 0;
+
+ if (ssl_key_rate < ssl_sess_rate)
+ ssl_reuse = 100.0 * (1.0 - ssl_key_rate / ssl_sess_rate);
+#endif
+
+ /* sum certain per-thread totals (mostly byte counts) */
+ glob_out_bytes = glob_spl_bytes = glob_out_b32 = 0;
+ for (thr = 0; thr < global.nbthread; thr++) {
+ glob_out_bytes += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].out_bytes);
+ glob_spl_bytes += HA_ATOMIC_LOAD(&ha_thread_ctx[thr].spliced_out_bytes);
+ glob_out_b32 += read_freq_ctr(&ha_thread_ctx[thr].out_32bps);
+ }
+ glob_out_b32 *= 32; // values are 32-byte units
+
+ up = now_ns - start_time_ns;
+ up_sec = ns_to_sec(up);
+ up_usec = (up / 1000U) % 1000000U;
+
+ boot = tv_ms_remain(&start_date, &ready_date);
+
+ if (len < INF_TOTAL_FIELDS)
+ return 0;
+
+ chunk_reset(out);
+ memset(info, 0, sizeof(*info) * len);
+
+ info[INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME);
+ info[INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version);
+ info[INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version);
+ info[INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date);
+
+ info[INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread);
+ info[INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1);
+ info[INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1);
+ info[INF_PID] = mkf_u32(FO_STATUS, pid);
+
+ info[INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out));
+ chunk_appendf(out, "%ud %uh%02um%02us", up_sec / 86400, (up_sec % 86400) / 3600, (up_sec % 3600) / 60, (up_sec % 60));
+
+ info[INF_UPTIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, up_sec + up_usec / 1000000.0) : mkf_u32(FN_DURATION, up_sec);
+ info[INF_START_TIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec);
+ info[INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax);
+ info[INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L);
+ info[INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L));
+ info[INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated());
+ info[INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L));
+ info[INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used());
+ info[INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures());
+ info[INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile);
+ info[INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock);
+ info[INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn);
+ info[INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn);
+ info[INF_CURR_CONN] = mkf_u32(0, actconn);
+ info[INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn);
+ info[INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count);
+#ifdef USE_OPENSSL
+ info[INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn);
+ info[INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns);
+ info[INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns);
+#endif
+ info[INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes);
+ info[INF_PIPES_USED] = mkf_u32(0, pipes_used);
+ info[INF_PIPES_FREE] = mkf_u32(0, pipes_free);
+ info[INF_CONN_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec));
+ info[INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim);
+ info[INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max);
+ info[INF_SESS_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec));
+ info[INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim);
+ info[INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max);
+
+#ifdef USE_OPENSSL
+ info[INF_SSL_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate);
+ info[INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim);
+ info[INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max);
+ info[INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate);
+ info[INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max);
+ info[INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse);
+ info[INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec));
+ info[INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max);
+ info[INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups);
+ info[INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses);
+#endif
+ info[INF_COMPRESS_BPS_IN] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in));
+ info[INF_COMPRESS_BPS_OUT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out));
+ info[INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim);
+#ifdef USE_ZLIB
+ info[INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory);
+ info[INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem);
+#endif
+ info[INF_TASKS] = mkf_u32(0, total_allocated_tasks());
+ info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues());
+ info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle());
+ info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node);
+ if (global.desc)
+ info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc);
+ info[INF_STOPPING] = mkf_u32(0, stopping);
+ info[INF_JOBS] = mkf_u32(0, jobs);
+ info[INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs);
+ info[INF_LISTENERS] = mkf_u32(0, listeners);
+ info[INF_ACTIVE_PEERS] = mkf_u32(0, active_peers);
+ info[INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers);
+ info[INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs);
+ info[INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING));
+ info[INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions);
+ info[INF_TOTAL_BYTES_OUT] = mkf_u64(0, glob_out_bytes);
+ info[INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, glob_spl_bytes);
+ info[INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, glob_out_b32);
+ info[INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued);
+ info[INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages);
+
+ info[INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out));
+ chunk_appendf(out, "%#x", get_tainted());
+ info[INF_WARNINGS] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&tot_warnings));
+ info[INF_MAXCONN_REACHED] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&maxconn_reached));
+ info[INF_BOOTTIME_MS] = mkf_u32(FN_DURATION, boot);
+ info[INF_NICED_TASKS] = mkf_u32(0, total_niced_running_tasks());
+
+ return 1;
+}
+
+/* This function dumps information onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ */
+static int stats_dump_info_to_buffer(struct stconn *sc)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_stat_ctx *ctx = appctx->svcctx;
+ int ret;
+ int current_field;
+
+ if (!stats_fill_info(info, INF_TOTAL_FIELDS, ctx->flags))
+ return 0;
+
+ chunk_reset(&trash_chunk);
+more:
+ current_field = ctx->field;
+
+ if (ctx->flags & STAT_FMT_TYPED)
+ ret = stats_dump_typed_info_fields(&trash_chunk, info, ctx);
+ else if (ctx->flags & STAT_FMT_JSON)
+ ret = stats_dump_json_info_fields(&trash_chunk, info, ctx);
+ else
+ ret = stats_dump_info_fields(&trash_chunk, info, ctx);
+
+ if (applet_putchk(appctx, &trash_chunk) == -1) {
+ /* restore previous field */
+ ctx->field = current_field;
+ return 0;
+ }
+ if (ret && ctx->field) {
+ /* partial dump */
+ goto more;
+ }
+ ctx->field = 0;
+ return 1;
+}
+
+/* This function dumps the schema onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ *
+ * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as
+ * per the recommendation for interoperable integers in section 6 of RFC 7159.
+ */
+static void stats_dump_json_schema(struct buffer *out)
+{
+
+ int old_len = out->data;
+
+ chunk_strcat(out,
+ "{"
+ "\"$schema\":\"http://json-schema.org/draft-04/schema#\","
+ "\"oneOf\":["
+ "{"
+ "\"title\":\"Info\","
+ "\"type\":\"array\","
+ "\"items\":{"
+ "\"title\":\"InfoItem\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"field\":{\"$ref\":\"#/definitions/field\"},"
+ "\"processNum\":{\"$ref\":\"#/definitions/processNum\"},"
+ "\"tags\":{\"$ref\":\"#/definitions/tags\"},"
+ "\"value\":{\"$ref\":\"#/definitions/typedValue\"}"
+ "},"
+ "\"required\":[\"field\",\"processNum\",\"tags\","
+ "\"value\"]"
+ "}"
+ "},"
+ "{"
+ "\"title\":\"Stat\","
+ "\"type\":\"array\","
+ "\"items\":{"
+ "\"title\":\"InfoItem\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"objType\":{"
+ "\"enum\":[\"Frontend\",\"Backend\",\"Listener\","
+ "\"Server\",\"Unknown\"]"
+ "},"
+ "\"proxyId\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"id\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"field\":{\"$ref\":\"#/definitions/field\"},"
+ "\"processNum\":{\"$ref\":\"#/definitions/processNum\"},"
+ "\"tags\":{\"$ref\":\"#/definitions/tags\"},"
+ "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}"
+ "},"
+ "\"required\":[\"objType\",\"proxyId\",\"id\","
+ "\"field\",\"processNum\",\"tags\","
+ "\"value\"]"
+ "}"
+ "},"
+ "{"
+ "\"title\":\"Error\","
+ "\"type\":\"object\","
+ "\"properties\":{"
+ "\"errorStr\":{"
+ "\"type\":\"string\""
+ "}"
+ "},"
+ "\"required\":[\"errorStr\"]"
+ "}"
+ "],"
+ "\"definitions\":{"
+ "\"field\":{"
+ "\"type\":\"object\","
+ "\"pos\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"name\":{"
+ "\"type\":\"string\""
+ "},"
+ "\"required\":[\"pos\",\"name\"]"
+ "},"
+ "\"processNum\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":1"
+ "},"
+ "\"tags\":{"
+ "\"type\":\"object\","
+ "\"origin\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Metric\",\"Status\",\"Key\","
+ "\"Config\",\"Product\",\"Unknown\"]"
+ "},"
+ "\"nature\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\","
+ "\"Rate\",\"Counter\",\"Duration\","
+ "\"Age\",\"Time\",\"Name\",\"Output\","
+ "\"Avg\", \"Unknown\"]"
+ "},"
+ "\"scope\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"Cluster\",\"Process\",\"Service\","
+ "\"System\",\"Unknown\"]"
+ "},"
+ "\"required\":[\"origin\",\"nature\",\"scope\"]"
+ "},"
+ "\"typedValue\":{"
+ "\"type\":\"object\","
+ "\"oneOf\":["
+ "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"},"
+ "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}"
+ "],"
+ "\"definitions\":{"
+ "\"s32Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"s32\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":-2147483648,"
+ "\"maximum\":2147483647"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"s64Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"s64\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":-9007199254740991,"
+ "\"maximum\":9007199254740991"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"u32Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"u32\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0,"
+ "\"maximum\":4294967295"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"u64Value\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"u64\"]"
+ "},"
+ "\"value\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0,"
+ "\"maximum\":9007199254740991"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"strValue\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"str\"]"
+ "},"
+ "\"value\":{\"type\":\"string\"}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "},"
+ "\"unknownValue\":{"
+ "\"properties\":{"
+ "\"type\":{"
+ "\"type\":\"integer\","
+ "\"minimum\":0"
+ "},"
+ "\"value\":{"
+ "\"type\":\"string\","
+ "\"enum\":[\"unknown\"]"
+ "}"
+ "},"
+ "\"required\":[\"type\",\"value\"]"
+ "}"
+ "}"
+ "}"
+ "}"
+ "}");
+
+ if (old_len == out->data) {
+ chunk_reset(out);
+ chunk_appendf(out,
+ "{\"errorStr\":\"output buffer too short\"}");
+ }
+ chunk_appendf(out, "\n");
+}
+
+/* This function dumps the schema onto the stream connector's read buffer.
+ * It returns 0 as long as it does not complete, non-zero upon completion.
+ * No state is used.
+ */
+static int stats_dump_json_schema_to_buffer(struct appctx *appctx)
+{
+
+ chunk_reset(&trash_chunk);
+
+ stats_dump_json_schema(&trash_chunk);
+
+ if (applet_putchk(appctx, &trash_chunk) == -1)
+ return 0;
+
+ return 1;
+}
+
+static int cli_parse_clear_counters(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy *px;
+ struct server *sv;
+ struct listener *li;
+ struct stats_module *mod;
+ int clrall = 0;
+
+ if (strcmp(args[2], "all") == 0)
+ clrall = 1;
+
+ /* check permissions */
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER) ||
+ (clrall && !cli_has_level(appctx, ACCESS_LVL_ADMIN)))
+ return 1;
+
+ for (px = proxies_list; px; px = px->next) {
+ if (clrall) {
+ memset(&px->be_counters, 0, sizeof(px->be_counters));
+ memset(&px->fe_counters, 0, sizeof(px->fe_counters));
+ }
+ else {
+ px->be_counters.conn_max = 0;
+ px->be_counters.p.http.rps_max = 0;
+ px->be_counters.sps_max = 0;
+ px->be_counters.cps_max = 0;
+ px->be_counters.nbpend_max = 0;
+ px->be_counters.qtime_max = 0;
+ px->be_counters.ctime_max = 0;
+ px->be_counters.dtime_max = 0;
+ px->be_counters.ttime_max = 0;
+
+ px->fe_counters.conn_max = 0;
+ px->fe_counters.p.http.rps_max = 0;
+ px->fe_counters.sps_max = 0;
+ px->fe_counters.cps_max = 0;
+ }
+
+ for (sv = px->srv; sv; sv = sv->next)
+ if (clrall)
+ memset(&sv->counters, 0, sizeof(sv->counters));
+ else {
+ sv->counters.cur_sess_max = 0;
+ sv->counters.nbpend_max = 0;
+ sv->counters.sps_max = 0;
+ sv->counters.qtime_max = 0;
+ sv->counters.ctime_max = 0;
+ sv->counters.dtime_max = 0;
+ sv->counters.ttime_max = 0;
+ }
+
+ list_for_each_entry(li, &px->conf.listeners, by_fe)
+ if (li->counters) {
+ if (clrall)
+ memset(li->counters, 0, sizeof(*li->counters));
+ else
+ li->counters->conn_max = 0;
+ }
+ }
+
+ global.cps_max = 0;
+ global.sps_max = 0;
+ global.ssl_max = 0;
+ global.ssl_fe_keys_max = 0;
+ global.ssl_be_keys_max = 0;
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!mod->clearable && !clrall)
+ continue;
+
+ for (px = proxies_list; px; px = px->next) {
+ enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags);
+
+ if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) {
+ EXTRA_COUNTERS_INIT(px->extra_counters_fe,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) {
+ EXTRA_COUNTERS_INIT(px->extra_counters_be,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+
+ if (mod_cap & STATS_PX_CAP_SRV) {
+ for (sv = px->srv; sv; sv = sv->next) {
+ EXTRA_COUNTERS_INIT(sv->extra_counters,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+ }
+
+ if (mod_cap & STATS_PX_CAP_LI) {
+ list_for_each_entry(li, &px->conf.listeners, by_fe) {
+ EXTRA_COUNTERS_INIT(li->extra_counters,
+ mod,
+ mod->counters,
+ mod->counters_size);
+ }
+ }
+ }
+ }
+
+ resolv_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_RESOLVERS]);
+
+ memset(activity, 0, sizeof(activity));
+ return 1;
+}
+
+
+static int cli_parse_show_info(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg = 2;
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ ctx->flags = 0;
+ ctx->field = 0; /* explicit default value */
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "typed") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED;
+ else if (strcmp(args[arg], "json") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON;
+ else if (strcmp(args[arg], "desc") == 0)
+ ctx->flags |= STAT_SHOW_FDESC;
+ else if (strcmp(args[arg], "float") == 0)
+ ctx->flags |= STAT_USE_FLOAT;
+ arg++;
+ }
+ return 0;
+}
+
+
+static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int arg = 2;
+
+ ctx->scope_str = 0;
+ ctx->scope_len = 0;
+ ctx->http_px = NULL; // not under http context
+ ctx->flags = STAT_SHNODE | STAT_SHDESC;
+
+ if ((strm_li(appctx_strm(appctx))->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER)
+ ctx->flags |= STAT_SHLGNDS;
+
+ /* proxy is the default domain */
+ ctx->domain = STATS_DOMAIN_PROXY;
+ if (strcmp(args[arg], "domain") == 0) {
+ ++args;
+
+ if (strcmp(args[arg], "proxy") == 0) {
+ ++args;
+ } else if (strcmp(args[arg], "resolvers") == 0) {
+ ctx->domain = STATS_DOMAIN_RESOLVERS;
+ ++args;
+ } else {
+ return cli_err(appctx, "Invalid statistics domain.\n");
+ }
+ }
+
+ if (ctx->domain == STATS_DOMAIN_PROXY
+ && *args[arg] && *args[arg+1] && *args[arg+2]) {
+ struct proxy *px;
+
+ px = proxy_find_by_name(args[arg], 0, 0);
+ if (px)
+ ctx->iid = px->uuid;
+ else
+ ctx->iid = atoi(args[arg]);
+
+ if (!ctx->iid)
+ return cli_err(appctx, "No such proxy.\n");
+
+ ctx->flags |= STAT_BOUND;
+ ctx->type = atoi(args[arg+1]);
+ ctx->sid = atoi(args[arg+2]);
+ arg += 3;
+ }
+
+ while (*args[arg]) {
+ if (strcmp(args[arg], "typed") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED;
+ else if (strcmp(args[arg], "json") == 0)
+ ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON;
+ else if (strcmp(args[arg], "desc") == 0)
+ ctx->flags |= STAT_SHOW_FDESC;
+ else if (strcmp(args[arg], "no-maint") == 0)
+ ctx->flags |= STAT_HIDE_MAINT;
+ else if (strcmp(args[arg], "up") == 0)
+ ctx->flags |= STAT_HIDE_DOWN;
+ arg++;
+ }
+
+ return 0;
+}
+
+static int cli_io_handler_dump_info(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_info_to_buffer(appctx_sc(appctx));
+}
+
+/* This I/O handler runs as an applet embedded in a stream connector. It is
+ * used to send raw stats over a socket.
+ */
+static int cli_io_handler_dump_stat(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL);
+}
+
+static int cli_io_handler_dump_json_schema(struct appctx *appctx)
+{
+ trash_chunk = b_make(trash.area, trash.size, 0, 0);
+ return stats_dump_json_schema_to_buffer(appctx);
+}
+
+int stats_allocate_proxy_counters_internal(struct extra_counters **counters,
+ int type, int px_cap)
+{
+ struct stats_module *mod;
+
+ EXTRA_COUNTERS_REGISTER(counters, type, alloc_failed);
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!(stats_px_get_cap(mod->domain_flags) & px_cap))
+ continue;
+
+ EXTRA_COUNTERS_ADD(mod, *counters, mod->counters, mod->counters_size);
+ }
+
+ EXTRA_COUNTERS_ALLOC(*counters, alloc_failed);
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ if (!(stats_px_get_cap(mod->domain_flags) & px_cap))
+ continue;
+
+ EXTRA_COUNTERS_INIT(*counters, mod, mod->counters, mod->counters_size);
+ }
+
+ return 1;
+
+ alloc_failed:
+ return 0;
+}
+
+/* Initialize and allocate all extra counters for a proxy and its attached
+ * servers/listeners with all already registered stats module
+ */
+int stats_allocate_proxy_counters(struct proxy *px)
+{
+ struct server *sv;
+ struct listener *li;
+
+ if (px->cap & PR_CAP_FE) {
+ if (!stats_allocate_proxy_counters_internal(&px->extra_counters_fe,
+ COUNTERS_FE,
+ STATS_PX_CAP_FE)) {
+ return 0;
+ }
+ }
+
+ if (px->cap & PR_CAP_BE) {
+ if (!stats_allocate_proxy_counters_internal(&px->extra_counters_be,
+ COUNTERS_BE,
+ STATS_PX_CAP_BE)) {
+ return 0;
+ }
+ }
+
+ for (sv = px->srv; sv; sv = sv->next) {
+ if (!stats_allocate_proxy_counters_internal(&sv->extra_counters,
+ COUNTERS_SV,
+ STATS_PX_CAP_SRV)) {
+ return 0;
+ }
+ }
+
+ list_for_each_entry(li, &px->conf.listeners, by_fe) {
+ if (!stats_allocate_proxy_counters_internal(&li->extra_counters,
+ COUNTERS_LI,
+ STATS_PX_CAP_LI)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void stats_register_module(struct stats_module *m)
+{
+ const uint8_t domain = stats_get_domain(m->domain_flags);
+
+ LIST_APPEND(&stats_module_list[domain], &m->list);
+ stat_count[domain] += m->stats_count;
+}
+
+static int allocate_stats_px_postcheck(void)
+{
+ struct stats_module *mod;
+ size_t i = ST_F_TOTAL_FIELDS;
+ int err_code = 0;
+ struct proxy *px;
+
+ stat_count[STATS_DOMAIN_PROXY] += ST_F_TOTAL_FIELDS;
+
+ stat_f[STATS_DOMAIN_PROXY] = malloc(stat_count[STATS_DOMAIN_PROXY] * sizeof(struct name_desc));
+ if (!stat_f[STATS_DOMAIN_PROXY]) {
+ ha_alert("stats: cannot allocate all fields for proxy statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ memcpy(stat_f[STATS_DOMAIN_PROXY], stat_fields,
+ ST_F_TOTAL_FIELDS * sizeof(struct name_desc));
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) {
+ memcpy(stat_f[STATS_DOMAIN_PROXY] + i,
+ mod->stats,
+ mod->stats_count * sizeof(struct name_desc));
+ i += mod->stats_count;
+ }
+
+ for (px = proxies_list; px; px = px->next) {
+ if (!stats_allocate_proxy_counters(px)) {
+ ha_alert("stats: cannot allocate all counters for proxy statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+ }
+
+ /* wait per-thread alloc to perform corresponding stat_l allocation */
+
+ return err_code;
+}
+
+REGISTER_CONFIG_POSTPARSER("allocate-stats-px", allocate_stats_px_postcheck);
+
+static int allocate_stats_rslv_postcheck(void)
+{
+ struct stats_module *mod;
+ size_t i = 0;
+ int err_code = 0;
+
+ stat_f[STATS_DOMAIN_RESOLVERS] = malloc(stat_count[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc));
+ if (!stat_f[STATS_DOMAIN_RESOLVERS]) {
+ ha_alert("stats: cannot allocate all fields for resolver statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_RESOLVERS], list) {
+ memcpy(stat_f[STATS_DOMAIN_RESOLVERS] + i,
+ mod->stats,
+ mod->stats_count * sizeof(struct name_desc));
+ i += mod->stats_count;
+ }
+
+ if (!resolv_allocate_counters(&stats_module_list[STATS_DOMAIN_RESOLVERS])) {
+ ha_alert("stats: cannot allocate all counters for resolver statistics\n");
+ err_code |= ERR_ALERT | ERR_FATAL;
+ return err_code;
+ }
+
+ /* wait per-thread alloc to perform corresponding stat_l allocation */
+
+ return err_code;
+}
+
+REGISTER_CONFIG_POSTPARSER("allocate-stats-resolver", allocate_stats_rslv_postcheck);
+
+static int allocate_stat_lines_per_thread(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ stat_l[domain] = malloc(stat_count[domain] * sizeof(struct field));
+ if (!stat_l[domain])
+ return 0;
+ }
+ return 1;
+}
+
+REGISTER_PER_THREAD_ALLOC(allocate_stat_lines_per_thread);
+
+static int allocate_trash_counters(void)
+{
+ struct stats_module *mod;
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+ size_t max_counters_size = 0;
+
+ /* calculate the greatest counters used by any stats modules */
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ list_for_each_entry(mod, &stats_module_list[domains[i]], list) {
+ max_counters_size = mod->counters_size > max_counters_size ?
+ mod->counters_size : max_counters_size;
+ }
+ }
+
+ /* allocate the trash with the size of the greatest counters */
+ if (max_counters_size) {
+ trash_counters = malloc(max_counters_size);
+ if (!trash_counters) {
+ ha_alert("stats: cannot allocate trash counters for statistics\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+REGISTER_PER_THREAD_ALLOC(allocate_trash_counters);
+
+static void deinit_stat_lines_per_thread(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ ha_free(&stat_l[domain]);
+ }
+}
+
+
+REGISTER_PER_THREAD_FREE(deinit_stat_lines_per_thread);
+
+static void deinit_stats(void)
+{
+ int domains[] = { STATS_DOMAIN_PROXY, STATS_DOMAIN_RESOLVERS }, i;
+
+ for (i = 0; i < STATS_DOMAIN_COUNT; ++i) {
+ const int domain = domains[i];
+
+ if (stat_f[domain])
+ free(stat_f[domain]);
+ }
+}
+
+REGISTER_POST_DEINIT(deinit_stats);
+
+static void free_trash_counters(void)
+{
+ if (trash_counters)
+ free(trash_counters);
+}
+
+REGISTER_PER_THREAD_FREE(free_trash_counters);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "clear", "counters", NULL }, "clear counters [all] : clear max statistics counters (or all counters)", cli_parse_clear_counters, NULL, NULL },
+ { { "show", "info", NULL }, "show info [desc|json|typed|float]* : report information about the running process", cli_parse_show_info, cli_io_handler_dump_info, NULL },
+ { { "show", "stat", NULL }, "show stat [desc|json|no-maint|typed|up]*: report counters for each proxy and server", cli_parse_show_stat, cli_io_handler_dump_stat, NULL },
+ { { "show", "schema", "json", NULL }, "show schema json : report schema used for stats", NULL, cli_io_handler_dump_json_schema, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+struct applet http_stats_applet = {
+ .obj_type = OBJ_TYPE_APPLET,
+ .name = "<STATS>", /* used for logging */
+ .fct = http_stats_io_handler,
+ .release = NULL,
+};
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/stconn.c b/src/stconn.c
new file mode 100644
index 0000000..8e3ae7e
--- /dev/null
+++ b/src/stconn.c
@@ -0,0 +1,2050 @@
+/*
+ * stream connector management functions
+ *
+ * Copyright 2021 Christopher Faulet <cfaulet@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/connection.h>
+#include <haproxy/check.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/xref.h>
+
+DECLARE_POOL(pool_head_connstream, "stconn", sizeof(struct stconn));
+DECLARE_POOL(pool_head_sedesc, "sedesc", sizeof(struct sedesc));
+
+/* functions used by default on a detached stream connector */
+static void sc_app_abort(struct stconn *sc);
+static void sc_app_shut(struct stconn *sc);
+static void sc_app_chk_rcv(struct stconn *sc);
+static void sc_app_chk_snd(struct stconn *sc);
+
+/* functions used on a mux-based stream connector */
+static void sc_app_abort_conn(struct stconn *sc);
+static void sc_app_shut_conn(struct stconn *sc);
+static void sc_app_chk_rcv_conn(struct stconn *sc);
+static void sc_app_chk_snd_conn(struct stconn *sc);
+
+/* functions used on an applet-based stream connector */
+static void sc_app_abort_applet(struct stconn *sc);
+static void sc_app_shut_applet(struct stconn *sc);
+static void sc_app_chk_rcv_applet(struct stconn *sc);
+static void sc_app_chk_snd_applet(struct stconn *sc);
+
+static int sc_conn_process(struct stconn *sc);
+static int sc_conn_recv(struct stconn *sc);
+static int sc_conn_send(struct stconn *sc);
+static int sc_applet_process(struct stconn *sc);
+
+/* stream connector operations for connections */
+struct sc_app_ops sc_app_conn_ops = {
+ .chk_rcv = sc_app_chk_rcv_conn,
+ .chk_snd = sc_app_chk_snd_conn,
+ .abort = sc_app_abort_conn,
+ .shutdown= sc_app_shut_conn,
+ .wake = sc_conn_process,
+ .name = "STRM",
+};
+
+/* stream connector operations for embedded tasks */
+struct sc_app_ops sc_app_embedded_ops = {
+ .chk_rcv = sc_app_chk_rcv,
+ .chk_snd = sc_app_chk_snd,
+ .abort = sc_app_abort,
+ .shutdown= sc_app_shut,
+ .wake = NULL, /* may never be used */
+ .name = "NONE", /* may never be used */
+};
+
+/* stream connector operations for applets */
+struct sc_app_ops sc_app_applet_ops = {
+ .chk_rcv = sc_app_chk_rcv_applet,
+ .chk_snd = sc_app_chk_snd_applet,
+ .abort = sc_app_abort_applet,
+ .shutdown= sc_app_shut_applet,
+ .wake = sc_applet_process,
+ .name = "STRM",
+};
+
+/* stream connector for health checks on connections */
+struct sc_app_ops sc_app_check_ops = {
+ .chk_rcv = NULL,
+ .chk_snd = NULL,
+ .abort = NULL,
+ .shutdown= NULL,
+ .wake = wake_srv_chk,
+ .name = "CHCK",
+};
+
+/* Initializes an endpoint */
+void sedesc_init(struct sedesc *sedesc)
+{
+ sedesc->se = NULL;
+ sedesc->conn = NULL;
+ sedesc->sc = NULL;
+ sedesc->lra = TICK_ETERNITY;
+ sedesc->fsb = TICK_ETERNITY;
+ sedesc->xref.peer = NULL;
+ se_fl_setall(sedesc, SE_FL_NONE);
+
+ sedesc->iobuf.pipe = NULL;
+ sedesc->iobuf.buf = NULL;
+ sedesc->iobuf.offset = sedesc->iobuf.data = 0;
+ sedesc->iobuf.flags = IOBUF_FL_NONE;
+}
+
+/* Tries to alloc an endpoint and initialize it. Returns NULL on failure. */
+struct sedesc *sedesc_new()
+{
+ struct sedesc *sedesc;
+
+ sedesc = pool_alloc(pool_head_sedesc);
+ if (unlikely(!sedesc))
+ return NULL;
+
+ sedesc_init(sedesc);
+ return sedesc;
+}
+
+/* Releases an endpoint. It is the caller responsibility to be sure it is safe
+ * and it is not shared with another entity
+ */
+void sedesc_free(struct sedesc *sedesc)
+{
+ if (sedesc) {
+ if (sedesc->iobuf.pipe)
+ put_pipe(sedesc->iobuf.pipe);
+ pool_free(pool_head_sedesc, sedesc);
+ }
+}
+
+/* Tries to allocate a new stconn and initialize its main fields. On
+ * failure, nothing is allocated and NULL is returned. It is an internal
+ * function. The caller must, at least, set the SE_FL_ORPHAN or SE_FL_DETACHED
+ * flag.
+ */
+static struct stconn *sc_new(struct sedesc *sedesc)
+{
+ struct stconn *sc;
+
+ sc = pool_alloc(pool_head_connstream);
+
+ if (unlikely(!sc))
+ goto alloc_error;
+
+ sc->obj_type = OBJ_TYPE_SC;
+ sc->flags = SC_FL_NONE;
+ sc->state = SC_ST_INI;
+ sc->ioto = TICK_ETERNITY;
+ sc->room_needed = 0;
+ sc->app = NULL;
+ sc->app_ops = NULL;
+ sc->src = NULL;
+ sc->dst = NULL;
+ sc->wait_event.tasklet = NULL;
+ sc->wait_event.events = 0;
+
+ /* If there is no endpoint, allocate a new one now */
+ if (!sedesc) {
+ sedesc = sedesc_new();
+ if (unlikely(!sedesc))
+ goto alloc_error;
+ }
+ sc->sedesc = sedesc;
+ sedesc->sc = sc;
+
+ return sc;
+
+ alloc_error:
+ pool_free(pool_head_connstream, sc);
+ return NULL;
+}
+
+/* Creates a new stream connector and its associated stream from a mux. <sd> must
+ * be defined. It returns NULL on error. On success, the new stream connector is
+ * returned. In this case, SE_FL_ORPHAN flag is removed.
+ */
+struct stconn *sc_new_from_endp(struct sedesc *sd, struct session *sess, struct buffer *input)
+{
+ struct stconn *sc;
+
+ sc = sc_new(sd);
+ if (unlikely(!sc))
+ return NULL;
+ if (unlikely(!stream_new(sess, sc, input))) {
+ sd->sc = NULL;
+ if (sc->sedesc != sd) {
+ /* none was provided so sc_new() allocated one */
+ sedesc_free(sc->sedesc);
+ }
+ pool_free(pool_head_connstream, sc);
+ se_fl_set(sd, SE_FL_ORPHAN);
+ return NULL;
+ }
+ se_fl_clr(sd, SE_FL_ORPHAN);
+ return sc;
+}
+
+/* Creates a new stream connector from an stream. There is no endpoint here, thus it
+ * will be created by sc_new(). So the SE_FL_DETACHED flag is set. It returns
+ * NULL on error. On success, the new stream connector is returned.
+ */
+struct stconn *sc_new_from_strm(struct stream *strm, unsigned int flags)
+{
+ struct stconn *sc;
+
+ sc = sc_new(NULL);
+ if (unlikely(!sc))
+ return NULL;
+ sc->flags |= flags;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ sc->app = &strm->obj_type;
+ sc->app_ops = &sc_app_embedded_ops;
+ return sc;
+}
+
+/* Creates a new stream connector from an health-check. There is no endpoint here,
+ * thus it will be created by sc_new(). So the SE_FL_DETACHED flag is set. It
+ * returns NULL on error. On success, the new stream connector is returned.
+ */
+struct stconn *sc_new_from_check(struct check *check, unsigned int flags)
+{
+ struct stconn *sc;
+
+ sc = sc_new(NULL);
+ if (unlikely(!sc))
+ return NULL;
+ sc->flags |= flags;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ sc->app = &check->obj_type;
+ sc->app_ops = &sc_app_check_ops;
+ return sc;
+}
+
+/* Releases a stconn previously allocated by sc_new(), as well as its
+ * endpoint, if it exists. This function is called internally or on error path.
+ */
+void sc_free(struct stconn *sc)
+{
+ sockaddr_free(&sc->src);
+ sockaddr_free(&sc->dst);
+ if (sc->sedesc) {
+ BUG_ON(!sc_ep_test(sc, SE_FL_DETACHED));
+ sedesc_free(sc->sedesc);
+ }
+ tasklet_free(sc->wait_event.tasklet);
+ pool_free(pool_head_connstream, sc);
+}
+
+/* Conditionally removes a stream connector if it is detached and if there is no app
+ * layer defined. Except on error path, this one must be used. if release, the
+ * pointer on the SC is set to NULL.
+ */
+static void sc_free_cond(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+
+ if (!sc->app && (!sc->sedesc || sc_ep_test(sc, SE_FL_DETACHED))) {
+ sc_free(sc);
+ *scp = NULL;
+ }
+}
+
+
+/* Attaches a stconn to a mux endpoint and sets the endpoint ctx. Returns
+ * -1 on error and 0 on success. SE_FL_DETACHED flag is removed. This function is
+ * called from a mux when it is attached to a stream or a health-check.
+ */
+int sc_attach_mux(struct stconn *sc, void *sd, void *ctx)
+{
+ struct connection *conn = ctx;
+ struct sedesc *sedesc = sc->sedesc;
+
+ if (sc_strm(sc)) {
+ if (!sc->wait_event.tasklet) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = sc_conn_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+ }
+
+ sc->app_ops = &sc_app_conn_ops;
+ xref_create(&sc->sedesc->xref, &sc_opposite(sc)->sedesc->xref);
+ }
+ else if (sc_check(sc)) {
+ if (!sc->wait_event.tasklet) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = srv_chk_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+ }
+
+ sc->app_ops = &sc_app_check_ops;
+ }
+
+ sedesc->se = sd;
+ sedesc->conn = ctx;
+ se_fl_set(sedesc, SE_FL_T_MUX);
+ se_fl_clr(sedesc, SE_FL_DETACHED);
+ if (!conn->ctx)
+ conn->ctx = sc;
+ return 0;
+}
+
+/* Attaches a stconn to an applet endpoint and sets the endpoint
+ * ctx. Returns -1 on error and 0 on success. SE_FL_DETACHED flag is
+ * removed. This function is called by a stream when a backend applet is
+ * registered.
+ */
+static void sc_attach_applet(struct stconn *sc, void *sd)
+{
+ sc->sedesc->se = sd;
+ sc_ep_set(sc, SE_FL_T_APPLET);
+ sc_ep_clr(sc, SE_FL_DETACHED);
+ if (sc_strm(sc)) {
+ sc->app_ops = &sc_app_applet_ops;
+ xref_create(&sc->sedesc->xref, &sc_opposite(sc)->sedesc->xref);
+ }
+}
+
+/* Attaches a stconn to a app layer and sets the relevant
+ * callbacks. Returns -1 on error and 0 on success. SE_FL_ORPHAN flag is
+ * removed. This function is called by a stream when it is created to attach it
+ * on the stream connector on the client side.
+ */
+int sc_attach_strm(struct stconn *sc, struct stream *strm)
+{
+ sc->app = &strm->obj_type;
+ sc_ep_clr(sc, SE_FL_ORPHAN);
+ sc_ep_report_read_activity(sc);
+ if (sc_ep_test(sc, SE_FL_T_MUX)) {
+ sc->wait_event.tasklet = tasklet_new();
+ if (!sc->wait_event.tasklet)
+ return -1;
+ sc->wait_event.tasklet->process = sc_conn_io_cb;
+ sc->wait_event.tasklet->context = sc;
+ sc->wait_event.events = 0;
+
+ sc->app_ops = &sc_app_conn_ops;
+ }
+ else if (sc_ep_test(sc, SE_FL_T_APPLET)) {
+ sc->app_ops = &sc_app_applet_ops;
+ }
+ else {
+ sc->app_ops = &sc_app_embedded_ops;
+ }
+ return 0;
+}
+
+/* Detaches the stconn from the endpoint, if any. For a connecrion, if a
+ * mux owns the connection ->detach() callback is called. Otherwise, it means
+ * the stream connector owns the connection. In this case the connection is closed
+ * and released. For an applet, the appctx is released. If still allocated, the
+ * endpoint is reset and flag as detached. If the app layer is also detached,
+ * the stream connector is released.
+ */
+static void sc_detach_endp(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+ struct xref *peer;
+
+ if (!sc)
+ return;
+
+
+ /* Remove my link in the original objects. */
+ peer = xref_get_peer_and_lock(&sc->sedesc->xref);
+ if (peer)
+ xref_disconnect(&sc->sedesc->xref, peer);
+
+ if (sc_ep_test(sc, SE_FL_T_MUX)) {
+ struct connection *conn = __sc_conn(sc);
+ struct sedesc *sedesc = sc->sedesc;
+
+ if (conn->mux) {
+ if (sc->wait_event.events != 0)
+ conn->mux->unsubscribe(sc, sc->wait_event.events, &sc->wait_event);
+ se_fl_set(sedesc, SE_FL_ORPHAN);
+ sedesc->sc = NULL;
+ sc->sedesc = NULL;
+ conn->mux->detach(sedesc);
+ }
+ else {
+ /* It's too early to have a mux, let's just destroy
+ * the connection
+ */
+ conn_stop_tracking(conn);
+ conn_full_close(conn);
+ if (conn->destroy_cb)
+ conn->destroy_cb(conn);
+ conn_free(conn);
+ }
+ }
+ else if (sc_ep_test(sc, SE_FL_T_APPLET)) {
+ struct appctx *appctx = __sc_appctx(sc);
+
+ sc_ep_set(sc, SE_FL_ORPHAN);
+ sc->sedesc->sc = NULL;
+ sc->sedesc = NULL;
+ appctx_shut(appctx);
+ appctx_free(appctx);
+ }
+
+ if (sc->sedesc) {
+ /* the SD wasn't used and can be recycled */
+ sc->sedesc->se = NULL;
+ sc->sedesc->conn = NULL;
+ sc->sedesc->flags = 0;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ }
+
+ /* FIXME: Rest SC for now but must be reviewed. SC flags are only
+ * connection related for now but this will evolved
+ */
+ sc->flags &= SC_FL_ISBACK;
+ if (sc_strm(sc))
+ sc->app_ops = &sc_app_embedded_ops;
+ else
+ sc->app_ops = NULL;
+ sc_free_cond(scp);
+}
+
+/* Detaches the stconn from the app layer. If there is no endpoint attached
+ * to the stconn
+ */
+static void sc_detach_app(struct stconn **scp)
+{
+ struct stconn *sc = *scp;
+
+ if (!sc)
+ return;
+
+ sc->app = NULL;
+ sc->app_ops = NULL;
+ sockaddr_free(&sc->src);
+ sockaddr_free(&sc->dst);
+
+ tasklet_free(sc->wait_event.tasklet);
+ sc->wait_event.tasklet = NULL;
+ sc->wait_event.events = 0;
+ sc_free_cond(scp);
+}
+
+/* Destroy the stconn. It is detached from its endpoint and its
+ * application. After this call, the stconn must be considered as released.
+ */
+void sc_destroy(struct stconn *sc)
+{
+ sc_detach_endp(&sc);
+ sc_detach_app(&sc);
+ BUG_ON_HOT(sc);
+}
+
+/* Resets the stream connector endpoint. It happens when the app layer want to renew
+ * its endpoint. For a connection retry for instance. If a mux or an applet is
+ * attached, a new endpoint is created. Returns -1 on error and 0 on success.
+ */
+int sc_reset_endp(struct stconn *sc)
+{
+ struct sedesc *new_sd;
+
+ BUG_ON(!sc->app);
+
+ if (!__sc_endp(sc)) {
+ /* endpoint not attached or attached to a mux with no
+ * target. Thus the endpoint will not be release but just
+ * reset. The app is still attached, the sc will not be
+ * released.
+ */
+ sc_detach_endp(&sc);
+ return 0;
+ }
+
+ /* allocate the new endpoint first to be able to set error if it
+ * fails */
+ new_sd = sedesc_new();
+ if (!unlikely(new_sd))
+ return -1;
+
+ /* The app is still attached, the sc will not be released */
+ sc_detach_endp(&sc);
+ BUG_ON(!sc);
+ BUG_ON(sc->sedesc);
+ sc->sedesc = new_sd;
+ sc->sedesc->sc = sc;
+ sc_ep_set(sc, SE_FL_DETACHED);
+ return 0;
+}
+
+
+/* Create an applet to handle a stream connector as a new appctx. The SC will
+ * wake it up every time it is solicited. The appctx must be deleted by the task
+ * handler using sc_detach_endp(), possibly from within the function itself.
+ * It also pre-initializes the applet's context and returns it (or NULL in case
+ * it could not be allocated).
+ */
+struct appctx *sc_applet_create(struct stconn *sc, struct applet *app)
+{
+ struct appctx *appctx;
+
+ appctx = appctx_new_here(app, sc->sedesc);
+ if (!appctx)
+ return NULL;
+ sc_attach_applet(sc, appctx);
+ appctx->t->nice = __sc_strm(sc)->task->nice;
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+
+ sc->state = SC_ST_RDY;
+ return appctx;
+}
+
+/* Conditionally forward the close to the write side. It return 1 if it can be
+ * forwarded. It is the caller responsibility to forward the close to the write
+ * side. Otherwise, 0 is returned. In this case, SC_FL_SHUT_WANTED flag may be set on
+ * the consumer SC if we are only waiting for the outgoing data to be flushed.
+ */
+static inline int sc_cond_forward_shut(struct stconn *sc)
+{
+ /* The close must not be forwarded */
+ if (!(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) || !(sc->flags & SC_FL_NOHALF))
+ return 0;
+
+ if (co_data(sc_ic(sc)) && !(sc_ic(sc)->flags & CF_WRITE_TIMEOUT)) {
+ /* the shutdown cannot be forwarded now because
+ * we should flush outgoing data first. But instruct the output
+ * channel it should be done ASAP.
+ */
+ sc_schedule_shutdown(sc);
+ return 0;
+ }
+
+ /* the close can be immediately forwarded to the write side */
+ return 1;
+}
+
+
+static inline int sc_is_fastfwd_supported(struct stconn *sc)
+{
+ return (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD) &&
+ sc_ep_test(sc, SE_FL_MAY_FASTFWD_PROD) &&
+ sc_ep_test(sc_opposite(sc), SE_FL_MAY_FASTFWD_CONS) &&
+ sc_ic(sc)->to_forward);
+}
+/*
+ * This function performs a shutdown-read on a detached stream connector in a
+ * connected or init state (it does nothing for other states). It either shuts
+ * the read side or marks itself as closed. The buffer flags are updated to
+ * reflect the new state. If the stream connector has SC_FL_NOHALF, we also
+ * forward the close to the write side. The owner task is woken up if it exists.
+ */
+static void sc_app_abort(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+
+ sc->flags |= SC_FL_ABRT_DONE;
+ ic->flags |= CF_READ_EVENT;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc->flags & SC_FL_SHUT_DONE) {
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shut(sc))
+ return sc_app_shut(sc);
+
+ /* note that if the task exists, it must unregister itself once it runs */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/*
+ * This function performs a shutdown-write on a detached stream connector in a
+ * connected or init state (it does nothing for other states). It either shuts
+ * the write side or marks itself as closed. The buffer flags are updated to
+ * reflect the new state. It does also close everything if the SC was marked as
+ * being in error state. The owner task is woken up if it exists.
+ */
+static void sc_app_shut(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ sc->flags &= ~SC_FL_SHUT_WANTED;
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return;
+ sc->flags |= SC_FL_SHUT_DONE;
+ oc->flags |= CF_WRITE_EVENT;
+ sc_set_hcto(sc);
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+ if (!(sc->flags & (SC_FL_ERROR|SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ !(ic->flags & CF_DONT_READ))
+ return;
+
+ __fallthrough;
+ case SC_ST_CON:
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ /* Note that none of these states may happen with applets */
+ sc->state = SC_ST_DIS;
+ __fallthrough;
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ sc->flags |= SC_FL_ABRT_DONE;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+
+ /* note that if the task exists, it must unregister itself once it runs */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/* default chk_rcv function for scheduled tasks */
+static void sc_app_chk_rcv(struct stconn *sc)
+{
+ if (sc_ep_have_ff_data(sc_opposite(sc))) {
+ /* stop reading */
+ sc_need_room(sc, -1);
+ }
+ else {
+ /* (re)start reading */
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+ }
+}
+
+/* default chk_snd function for scheduled tasks */
+static void sc_app_chk_snd(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ if (unlikely(sc->state != SC_ST_EST || (sc->flags & SC_FL_SHUT_DONE)))
+ return;
+
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA) || /* not waiting for data */
+ (!co_data(oc) && !sc_ep_have_ff_data(sc))) /* called with nothing to send ! */
+ return;
+
+ /* Otherwise there are remaining data to be sent in the buffer,
+ * so we tell the handler.
+ */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+}
+
+/*
+ * This function performs a shutdown-read on a stream connector attached to
+ * a connection in a connected or init state (it does nothing for other
+ * states). It either shuts the read side or marks itself as closed. The buffer
+ * flags are updated to reflect the new state. If the stream connector has
+ * SC_FL_NOHALF, we also forward the close to the write side. If a control
+ * layer is defined, then it is supposed to be a socket layer and file
+ * descriptors are then shutdown or closed accordingly. The function
+ * automatically disables polling if needed.
+ */
+static void sc_app_abort_conn(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+ sc->flags |= SC_FL_ABRT_DONE;
+ ic->flags |= CF_READ_EVENT;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc->flags & SC_FL_SHUT_DONE) {
+ sc_conn_shut(sc);
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shut(sc))
+ return sc_app_shut_conn(sc);
+}
+
+/*
+ * This function performs a shutdown-write on a stream connector attached to
+ * a connection in a connected or init state (it does nothing for other
+ * states). It either shuts the write side or marks itself as closed. The
+ * buffer flags are updated to reflect the new state. It does also close
+ * everything if the SC was marked as being in error state. If there is a
+ * data-layer shutdown, it is called.
+ */
+static void sc_app_shut_conn(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ sc->flags &= ~SC_FL_SHUT_WANTED;
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return;
+ sc->flags |= SC_FL_SHUT_DONE;
+ oc->flags |= CF_WRITE_EVENT;
+ sc_set_hcto(sc);
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+ if (sc->flags & SC_FL_NOLINGER) {
+ /* unclean data-layer shutdown, typically an aborted request
+ * or a forwarded shutdown from a client to a server due to
+ * option abortonclose. No need for the TLS layer to try to
+ * emit a shutdown message.
+ */
+ sc_conn_shutw(sc, CO_SHW_SILENT);
+ }
+ else {
+ /* clean data-layer shutdown. This only happens on the
+ * frontend side, or on the backend side when forwarding
+ * a client close in TCP mode or in HTTP TUNNEL mode
+ * while option abortonclose is set. We want the TLS
+ * layer to try to signal it to the peer before we close.
+ */
+ sc_conn_shutw(sc, CO_SHW_NORMAL);
+
+ if (!(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(ic->flags & CF_DONT_READ))
+ return;
+ }
+
+ __fallthrough;
+ case SC_ST_CON:
+ /* we may have to close a pending connection, and mark the
+ * response buffer as abort
+ */
+ sc_conn_shut(sc);
+ __fallthrough;
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ sc->state = SC_ST_DIS;
+ __fallthrough;
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ sc->flags |= SC_FL_ABRT_DONE;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+}
+
+/* This function is used for inter-stream connector calls. It is called by the
+ * consumer to inform the producer side that it may be interested in checking
+ * for free space in the buffer. Note that it intentionally does not update
+ * timeouts, so that we can still check them later at wake-up. This function is
+ * dedicated to connection-based stream connectors.
+ */
+static void sc_app_chk_rcv_conn(struct stconn *sc)
+{
+ BUG_ON(!sc_conn(sc));
+
+ /* (re)start reading */
+ if (sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ tasklet_wakeup(sc->wait_event.tasklet);
+}
+
+
+/* This function is used for inter-stream connector calls. It is called by the
+ * producer to inform the consumer side that it may be interested in checking
+ * for data in the buffer. Note that it intentionally does not update timeouts,
+ * so that we can still check them later at wake-up.
+ */
+static void sc_app_chk_snd_conn(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (unlikely(!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST) ||
+ (sc->flags & SC_FL_SHUT_DONE)))
+ return;
+
+ if (unlikely(!co_data(oc) && !sc_ep_have_ff_data(sc))) /* called with nothing to send ! */
+ return;
+
+ if (!sc_ep_have_ff_data(sc) && /* data wants to be fast-forwarded ASAP */
+ !sc_ep_test(sc, SE_FL_WAIT_DATA)) /* not waiting for data */
+ return;
+
+ if (!(sc->wait_event.events & SUB_RETRY_SEND))
+ sc_conn_send(sc);
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING) || sc_is_conn_error(sc)) {
+ /* Write error on the file descriptor */
+ BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING));
+ goto out_wakeup;
+ }
+
+ /* OK, so now we know that some data might have been sent, and that we may
+ * have to poll first. We have to do that too if the buffer is not empty.
+ */
+ if (!co_data(oc)) {
+ /* the connection is established but we can't write. Either the
+ * buffer is empty, or we just refrain from sending because the
+ * ->o limit was reached. Maybe we just wrote the last
+ * chunk and need to close.
+ */
+ if ((oc->flags & CF_AUTO_CLOSE) &&
+ ((sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == SC_FL_SHUT_WANTED) &&
+ sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) {
+ sc_shutdown(sc);
+ goto out_wakeup;
+ }
+
+ if ((sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == 0)
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ }
+ else {
+ /* Otherwise there are remaining data to be sent in the buffer,
+ * which means we have to poll before doing so.
+ */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+ }
+
+ /* in case of special condition (error, shutdown, end of write...), we
+ * have to notify the task.
+ */
+ if (likely((sc->flags & SC_FL_SHUT_DONE) ||
+ ((oc->flags & CF_WRITE_EVENT) && sc->state < SC_ST_EST) ||
+ ((oc->flags & CF_WAKE_WRITE) &&
+ ((!co_data(oc) && !oc->to_forward) ||
+ !sc_state_in(sc->state, SC_SB_EST))))) {
+ out_wakeup:
+ if (!(sc->flags & SC_FL_DONT_WAKE))
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO);
+ }
+}
+
+/*
+ * This function performs a shutdown-read on a stream connector attached to an
+ * applet in a connected or init state (it does nothing for other states). It
+ * either shuts the read side or marks itself as closed. The buffer flags are
+ * updated to reflect the new state. If the stream connector has SC_FL_NOHALF,
+ * we also forward the close to the write side. The owner task is woken up if
+ * it exists.
+ */
+static void sc_app_abort_applet(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+ sc->flags |= SC_FL_ABRT_DONE;
+ ic->flags |= CF_READ_EVENT;
+
+ /* Note: on abort, we don't call the applet */
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc->flags & SC_FL_SHUT_DONE) {
+ appctx_shut(__sc_appctx(sc));
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shut(sc))
+ return sc_app_shut_applet(sc);
+}
+
+/*
+ * This function performs a shutdown-write on a stream connector attached to an
+ * applet in a connected or init state (it does nothing for other states). It
+ * either shuts the write side or marks itself as closed. The buffer flags are
+ * updated to reflect the new state. It does also close everything if the SI
+ * was marked as being in error state. The owner task is woken up if it exists.
+ */
+static void sc_app_shut_applet(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ sc->flags &= ~SC_FL_SHUT_WANTED;
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return;
+ sc->flags |= SC_FL_SHUT_DONE;
+ oc->flags |= CF_WRITE_EVENT;
+ sc_set_hcto(sc);
+
+ /* on shutw we always wake the applet up */
+ appctx_wakeup(__sc_appctx(sc));
+
+ switch (sc->state) {
+ case SC_ST_RDY:
+ case SC_ST_EST:
+ /* we have to shut before closing, otherwise some short messages
+ * may never leave the system, especially when there are remaining
+ * unread data in the socket input buffer, or when nolinger is set.
+ * However, if SC_FL_NOLINGER is explicitly set, we know there is
+ * no risk so we close both sides immediately.
+ */
+ if (!(sc->flags & (SC_FL_ERROR|SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ !(ic->flags & CF_DONT_READ))
+ return;
+
+ __fallthrough;
+ case SC_ST_CON:
+ case SC_ST_CER:
+ case SC_ST_QUE:
+ case SC_ST_TAR:
+ /* Note that none of these states may happen with applets */
+ appctx_shut(__sc_appctx(sc));
+ sc->state = SC_ST_DIS;
+ __fallthrough;
+ default:
+ sc->flags &= ~SC_FL_NOLINGER;
+ sc->flags |= SC_FL_ABRT_DONE;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+}
+
+/* chk_rcv function for applets */
+static void sc_app_chk_rcv_applet(struct stconn *sc)
+{
+ BUG_ON(!sc_appctx(sc));
+
+ if (!sc_ep_have_ff_data(sc_opposite(sc))) {
+ /* (re)start reading */
+ appctx_wakeup(__sc_appctx(sc));
+ }
+}
+
+/* chk_snd function for applets */
+static void sc_app_chk_snd_applet(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ if (unlikely(sc->state != SC_ST_EST || (sc->flags & SC_FL_SHUT_DONE)))
+ return;
+
+ /* we only wake the applet up if it was waiting for some data and is ready to consume it */
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA|SE_FL_WONT_CONSUME))
+ return;
+
+ if (co_data(oc) || sc_ep_have_ff_data(sc)) {
+ /* (re)start sending */
+ appctx_wakeup(__sc_appctx(sc));
+ }
+}
+
+
+/* This function is designed to be called from within the stream handler to
+ * update the input channel's expiration timer and the stream connector's
+ * Rx flags based on the channel's flags. It needs to be called only once
+ * after the channel's flags have settled down, and before they are cleared,
+ * though it doesn't harm to call it as often as desired (it just slightly
+ * hurts performance). It must not be called from outside of the stream
+ * handler, as what it does will be used to compute the stream task's
+ * expiration.
+ */
+void sc_update_rx(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+
+ /* Unblock the SC if it needs room and the free space is large enough (0
+ * means it can always be unblocked). Do not unblock it if -1 was
+ * specified.
+ */
+ if (!sc->room_needed || (sc->room_needed > 0 && channel_recv_max(ic) >= sc->room_needed))
+ sc_have_room(sc);
+
+ /* Read not closed, update FD status and timeout for reads */
+ if (ic->flags & CF_DONT_READ)
+ sc_wont_read(sc);
+ else
+ sc_will_read(sc);
+
+ sc_chk_rcv(sc);
+}
+
+/* This function is designed to be called from within the stream handler to
+ * update the output channel's expiration timer and the stream connector's
+ * Tx flags based on the channel's flags. It needs to be called only once
+ * after the channel's flags have settled down, and before they are cleared,
+ * though it doesn't harm to call it as often as desired (it just slightly
+ * hurts performance). It must not be called from outside of the stream
+ * handler, as what it does will be used to compute the stream task's
+ * expiration.
+ */
+void sc_update_tx(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return;
+
+ /* Write not closed, update FD status and timeout for writes */
+ if (!co_data(oc)) {
+ /* stop writing */
+ if (!sc_ep_test(sc, SE_FL_WAIT_DATA)) {
+ if ((sc->flags & SC_FL_SHUT_WANTED) == 0)
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ }
+ return;
+ }
+
+ /* (re)start writing */
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+}
+
+/* This function is the equivalent to sc_update() except that it's
+ * designed to be called from outside the stream handlers, typically the lower
+ * layers (applets, connections) after I/O completion. After updating the stream
+ * interface and timeouts, it will try to forward what can be forwarded, then to
+ * wake the associated task up if an important event requires special handling.
+ * It may update SE_FL_WAIT_DATA and/or SC_FL_NEED_ROOM, that the callers are
+ * encouraged to watch to take appropriate action.
+ * It should not be called from within the stream itself, sc_update()
+ * is designed for this. Please do not statify this function, it's often
+ * present in backtraces, it's useful to recognize it.
+ */
+void sc_notify(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+ struct stconn *sco = sc_opposite(sc);
+ struct task *task = sc_strm_task(sc);
+
+ /* process consumer side */
+ if (!co_data(oc)) {
+ struct connection *conn = sc_conn(sc);
+
+ if (((sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == SC_FL_SHUT_WANTED) &&
+ (sc->state == SC_ST_EST) && (!conn || !(conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS))))
+ sc_shutdown(sc);
+ }
+
+ /* indicate that we may be waiting for data from the output channel or
+ * we're about to close and can't expect more data if SC_FL_SHUT_WANTED is there.
+ */
+ if (!(sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)))
+ sc_ep_set(sc, SE_FL_WAIT_DATA);
+ else if ((sc->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == SC_FL_SHUT_WANTED)
+ sc_ep_clr(sc, SE_FL_WAIT_DATA);
+
+ if (oc->flags & CF_DONT_READ)
+ sc_wont_read(sco);
+ else
+ sc_will_read(sco);
+
+ /* Notify the other side when we've injected data into the IC that
+ * needs to be forwarded. We can do fast-forwarding as soon as there
+ * are output data, but we avoid doing this if some of the data are
+ * not yet scheduled for being forwarded, because it is very likely
+ * that it will be done again immediately afterwards once the following
+ * data are parsed (eg: HTTP chunking). We only clear SC_FL_NEED_ROOM
+ * once we've emptied *some* of the output buffer, and not just when
+ * there is available room, because applets are often forced to stop
+ * before the buffer is full. We must not stop based on input data
+ * alone because an HTTP parser might need more data to complete the
+ * parsing.
+ */
+ if (sc_ep_have_ff_data(sc_opposite(sc)) ||
+ (co_data(ic) && sc_ep_test(sco, SE_FL_WAIT_DATA) &&
+ (!(sc->flags & SC_FL_SND_EXP_MORE) || channel_full(ic, co_data(ic)) || channel_input_data(ic) == 0))) {
+ int new_len, last_len;
+
+ last_len = co_data(ic) + sc_ep_ff_data(sco);
+ sc_chk_snd(sco);
+ new_len = co_data(ic) + sc_ep_ff_data(sco);
+
+ /* check if the consumer has freed some space either in the
+ * buffer or in the pipe.
+ */
+ if (!sc->room_needed || (new_len < last_len && (sc->room_needed < 0 || channel_recv_max(ic) >= sc->room_needed)))
+ sc_have_room(sc);
+ }
+
+ if (!(ic->flags & CF_DONT_READ))
+ sc_will_read(sc);
+
+ sc_chk_rcv(sc);
+ sc_chk_rcv(sco);
+
+ /* wake the task up only when needed */
+ if (/* changes on the production side that must be handled:
+ * - An error on receipt: SC_FL_ERROR
+ * - A read event: shutdown for reads (CF_READ_EVENT + EOS/ABRT_DONE)
+ * end of input (CF_READ_EVENT + SC_FL_EOI)
+ * data received and no fast-forwarding (CF_READ_EVENT + !to_forward)
+ * read event while consumer side is not established (CF_READ_EVENT + sco->state != SC_ST_EST)
+ */
+ ((ic->flags & CF_READ_EVENT) && ((sc->flags & SC_FL_EOI) || (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) || !ic->to_forward || sco->state != SC_ST_EST)) ||
+ (sc->flags & SC_FL_ERROR) ||
+
+ /* changes on the consumption side */
+ sc_ep_test(sc, SE_FL_ERR_PENDING) ||
+ ((oc->flags & CF_WRITE_EVENT) &&
+ ((sc->state < SC_ST_EST) ||
+ (sc->flags & SC_FL_SHUT_DONE) ||
+ (((oc->flags & CF_WAKE_WRITE) ||
+ (!(oc->flags & CF_AUTO_CLOSE) &&
+ !(sc->flags & (SC_FL_SHUT_WANTED|SC_FL_SHUT_DONE)))) &&
+ (sco->state != SC_ST_EST ||
+ (!co_data(oc) && !oc->to_forward)))))) {
+ task_wakeup(task, TASK_WOKEN_IO);
+ }
+ else {
+ /* Update expiration date for the task and requeue it if not already expired */
+ if (!tick_is_expired(task->expire, now_ms)) {
+ task->expire = tick_first(task->expire, sc_ep_rcv_ex(sc));
+ task->expire = tick_first(task->expire, sc_ep_snd_ex(sc));
+ task->expire = tick_first(task->expire, sc_ep_rcv_ex(sco));
+ task->expire = tick_first(task->expire, sc_ep_snd_ex(sco));
+ task->expire = tick_first(task->expire, ic->analyse_exp);
+ task->expire = tick_first(task->expire, oc->analyse_exp);
+ task->expire = tick_first(task->expire, __sc_strm(sc)->conn_exp);
+
+ /* WARNING: Don't forget to remove this BUG_ON before 2.9.0 */
+ BUG_ON(tick_is_expired(task->expire, now_ms));
+ task_queue(task);
+ }
+ }
+
+ if (ic->flags & CF_READ_EVENT)
+ sc->flags &= ~SC_FL_RCV_ONCE;
+}
+
+/*
+ * This function propagates an end-of-stream received on a socket-based connection.
+ * It updates the stream connector. If the stream connector has SC_FL_NOHALF,
+ * the close is also forwarded to the write side as an abort.
+ */
+static void sc_conn_eos(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_conn(sc));
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+ sc->flags |= SC_FL_EOS;
+ ic->flags |= CF_READ_EVENT;
+ sc_ep_report_read_activity(sc);
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc->flags & SC_FL_SHUT_DONE)
+ goto do_close;
+
+ if (sc_cond_forward_shut(sc)) {
+ /* we want to immediately forward this close to the write side */
+ /* force flag on ssl to keep stream in cache */
+ sc_conn_shutw(sc, CO_SHW_SILENT);
+ goto do_close;
+ }
+
+ /* otherwise that's just a normal read shutdown */
+ return;
+
+ do_close:
+ /* OK we completely close the socket here just as if we went through sc_shut[rw]() */
+ sc_conn_shut(sc);
+
+ sc->flags &= ~SC_FL_SHUT_WANTED;
+ sc->flags |= SC_FL_SHUT_DONE;
+
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ return;
+}
+
+/*
+ * This is the callback which is called by the connection layer to receive data
+ * into the buffer from the connection. It iterates over the mux layer's
+ * rcv_buf function. Please do not statify this function, it's often present in
+ * backtraces, it's useful to recognize it.
+ */
+int sc_conn_recv(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct channel *ic = sc_ic(sc);
+ int ret, max, cur_read = 0;
+ int read_poll = MAX_READ_POLL_LOOPS;
+ int flags = 0;
+
+ /* If not established yet, do nothing. */
+ if (sc->state != SC_ST_EST)
+ return 0;
+
+ /* If another call to sc_conn_recv() failed, and we subscribed to
+ * recv events already, give up now.
+ */
+ if ((sc->wait_event.events & SUB_RETRY_RECV) || sc_waiting_room(sc))
+ return 0;
+
+ /* maybe we were called immediately after an asynchronous abort */
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return 1;
+
+ /* we must wait because the mux is not installed yet */
+ if (!conn->mux)
+ return 0;
+
+ /* stop immediately on errors. Note that we DON'T want to stop on
+ * POLL_ERR, as the poller might report a write error while there
+ * are still data available in the recv buffer. This typically
+ * happens when we send too large a request to a backend server
+ * which rejects it before reading it all.
+ */
+ if (!sc_ep_test(sc, SE_FL_RCV_MORE)) {
+ if (!conn_xprt_ready(conn))
+ return 0;
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ goto end_recv;
+ }
+
+ /* prepare to detect if the mux needs more room */
+ sc_ep_clr(sc, SE_FL_WANT_ROOM);
+
+ if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !co_data(ic) &&
+ global.tune.idle_timer &&
+ (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) {
+ /* The buffer was empty and nothing was transferred for more
+ * than one second. This was caused by a pause and not by
+ * congestion. Reset any streaming mode to reduce latency.
+ */
+ ic->xfer_small = 0;
+ ic->xfer_large = 0;
+ ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
+ }
+
+#if defined(USE_LINUX_SPLICE)
+ /* Detect if the splicing is possible depending on the stream policy */
+ if ((global.tune.options & GTUNE_USE_SPLICE) &&
+ (ic->to_forward >= MIN_SPLICE_FORWARD) &&
+ ((!(sc->flags & SC_FL_ISBACK) && ((strm_fe(__sc_strm(sc))->options2|__sc_strm(sc)->be->options2) & PR_O2_SPLIC_REQ)) ||
+ ((sc->flags & SC_FL_ISBACK) && ((strm_fe(__sc_strm(sc))->options2|__sc_strm(sc)->be->options2) & PR_O2_SPLIC_RTR)) ||
+ ((ic->flags & CF_STREAMER_FAST) && ((strm_sess(__sc_strm(sc))->fe->options2|__sc_strm(sc)->be->options2) & PR_O2_SPLIC_AUT))))
+ flags |= CO_RFL_MAY_SPLICE;
+#endif
+
+ /* First, let's see if we may fast-forward data from a side to the other
+ * one without using the channel buffer.
+ */
+ if (sc_is_fastfwd_supported(sc)) {
+ if (channel_data(ic)) {
+ /* We're embarrassed, there are already data pending in
+ * the buffer and we don't want to have them at two
+ * locations at a time. Let's indicate we need some
+ * place and ask the consumer to hurry.
+ */
+ flags |= CO_RFL_BUF_FLUSH;
+ goto abort_fastfwd;
+ }
+ ret = conn->mux->fastfwd(sc, ic->to_forward, flags);
+ if (ret < 0)
+ goto abort_fastfwd;
+ else if (ret > 0) {
+ if (ic->to_forward != CHN_INFINITE_FORWARD)
+ ic->to_forward -= ret;
+ ic->total += ret;
+ cur_read += ret;
+ ic->flags |= CF_READ_EVENT;
+ }
+
+ if (sc_ep_test(sc, SE_FL_EOS | SE_FL_ERROR))
+ goto end_recv;
+
+ if (sc_ep_test(sc, SE_FL_WANT_ROOM))
+ sc_need_room(sc, -1);
+
+ if (sc_ep_test(sc, SE_FL_MAY_FASTFWD_PROD) && ic->to_forward)
+ goto done_recv;
+ }
+
+ abort_fastfwd:
+ /* now we'll need a input buffer for the stream */
+ if (!sc_alloc_ibuf(sc, &(__sc_strm(sc)->buffer_wait)))
+ goto end_recv;
+
+ /* For an HTX stream, if the buffer is stuck (no output data with some
+ * input data) and if the HTX message is fragmented or if its free space
+ * wraps, we force an HTX deframentation. It is a way to have a
+ * contiguous free space nad to let the mux to copy as much data as
+ * possible.
+ *
+ * NOTE: A possible optim may be to let the mux decides if defrag is
+ * required or not, depending on amount of data to be xferred.
+ */
+ if (IS_HTX_STRM(__sc_strm(sc)) && !co_data(ic)) {
+ struct htx *htx = htxbuf(&ic->buf);
+
+ if (htx_is_not_empty(htx) && ((htx->flags & HTX_FL_FRAGMENTED) || htx_space_wraps(htx)))
+ htx_defrag(htx, NULL, 0);
+ }
+
+ /* Instruct the mux it must subscribed for read events */
+ if (!(sc->flags & SC_FL_ISBACK) && /* for frontend conns only */
+ (sc_opposite(sc)->state != SC_ST_INI) && /* before backend connection setup */
+ (__sc_strm(sc)->be->options & PR_O_ABRT_CLOSE)) /* if abortonclose option is set for the current backend */
+ flags |= CO_RFL_KEEP_RECV;
+
+ /* Important note : if we're called with POLL_IN|POLL_HUP, it means the read polling
+ * was enabled, which implies that the recv buffer was not full. So we have a guarantee
+ * that if such an event is not handled above in splice, it will be handled here by
+ * recv().
+ */
+ while (sc_ep_test(sc, SE_FL_RCV_MORE) ||
+ (!(conn->flags & CO_FL_HANDSHAKE) &&
+ (!sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS)) && !(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)))) {
+ int cur_flags = flags;
+
+ /* Compute transient CO_RFL_* flags */
+ if (co_data(ic)) {
+ cur_flags |= (CO_RFL_BUF_WET | CO_RFL_BUF_NOT_STUCK);
+ }
+
+ /* <max> may be null. This is the mux responsibility to set
+ * SE_FL_RCV_MORE on the SC if more space is needed.
+ */
+ max = channel_recv_max(ic);
+ ret = conn->mux->rcv_buf(sc, &ic->buf, max, cur_flags);
+
+ if (sc_ep_test(sc, SE_FL_WANT_ROOM)) {
+ /* SE_FL_WANT_ROOM must not be reported if the channel's
+ * buffer is empty.
+ */
+ BUG_ON(c_empty(ic));
+
+ sc_need_room(sc, channel_recv_max(ic) + 1);
+ /* Add READ_PARTIAL because some data are pending but
+ * cannot be xferred to the channel
+ */
+ ic->flags |= CF_READ_EVENT;
+ sc_ep_report_read_activity(sc);
+ }
+
+ if (ret <= 0) {
+ /* if we refrained from reading because we asked for a
+ * flush to satisfy rcv_pipe(), we must not subscribe
+ * and instead report that there's not enough room
+ * here to proceed.
+ */
+ if (flags & CO_RFL_BUF_FLUSH)
+ sc_need_room(sc, -1);
+ break;
+ }
+
+ cur_read += ret;
+
+ /* if we're allowed to directly forward data, we must update ->o */
+ if (ic->to_forward && !(sc_opposite(sc)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) {
+ unsigned long fwd = ret;
+ if (ic->to_forward != CHN_INFINITE_FORWARD) {
+ if (fwd > ic->to_forward)
+ fwd = ic->to_forward;
+ ic->to_forward -= fwd;
+ }
+ c_adv(ic, fwd);
+ }
+
+ ic->flags |= CF_READ_EVENT;
+ ic->total += ret;
+
+ /* End-of-input reached, we can leave. In this case, it is
+ * important to break the loop to not block the SC because of
+ * the channel's policies.This way, we are still able to receive
+ * shutdowns.
+ */
+ if (sc_ep_test(sc, SE_FL_EOI))
+ break;
+
+ if ((sc->flags & SC_FL_RCV_ONCE) || --read_poll <= 0) {
+ /* we don't expect to read more data */
+ sc_wont_read(sc);
+ break;
+ }
+
+ /* if too many bytes were missing from last read, it means that
+ * it's pointless trying to read again because the system does
+ * not have them in buffers.
+ */
+ if (ret < max) {
+ /* if a streamer has read few data, it may be because we
+ * have exhausted system buffers. It's not worth trying
+ * again.
+ */
+ if (ic->flags & CF_STREAMER) {
+ /* we're stopped by the channel's policy */
+ sc_wont_read(sc);
+ break;
+ }
+
+ /* if we read a large block smaller than what we requested,
+ * it's almost certain we'll never get anything more.
+ */
+ if (ret >= global.tune.recv_enough) {
+ /* we're stopped by the channel's policy */
+ sc_wont_read(sc);
+ break;
+ }
+ }
+
+ /* if we are waiting for more space, don't try to read more data
+ * right now.
+ */
+ if (sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))
+ break;
+ } /* while !flags */
+
+ done_recv:
+ if (!cur_read)
+ se_have_no_more_data(sc->sedesc);
+ else {
+ if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) &&
+ (cur_read <= ic->buf.size / 2)) {
+ ic->xfer_large = 0;
+ ic->xfer_small++;
+ if (ic->xfer_small >= 3) {
+ /* we have read less than half of the buffer in
+ * one pass, and this happened at least 3 times.
+ * This is definitely not a streamer.
+ */
+ ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
+ }
+ else if (ic->xfer_small >= 2) {
+ /* if the buffer has been at least half full twice,
+ * we receive faster than we send, so at least it
+ * is not a "fast streamer".
+ */
+ ic->flags &= ~CF_STREAMER_FAST;
+ }
+ }
+ else if (!(ic->flags & CF_STREAMER_FAST) && (cur_read >= channel_data_limit(ic))) {
+ /* we read a full buffer at once */
+ ic->xfer_small = 0;
+ ic->xfer_large++;
+ if (ic->xfer_large >= 3) {
+ /* we call this buffer a fast streamer if it manages
+ * to be filled in one call 3 consecutive times.
+ */
+ ic->flags |= (CF_STREAMER | CF_STREAMER_FAST);
+ }
+ }
+ else {
+ ic->xfer_small = 0;
+ ic->xfer_large = 0;
+ }
+ ic->last_read = now_ms;
+ sc_ep_report_read_activity(sc);
+ }
+
+ end_recv:
+ ret = (cur_read != 0);
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view. */
+ if (sc_ep_test(sc, SE_FL_EOI) && !(sc->flags & SC_FL_EOI)) {
+ sc_ep_report_read_activity(sc);
+ sc->flags |= SC_FL_EOI;
+ ic->flags |= CF_READ_EVENT;
+ ret = 1;
+ }
+
+ if (sc_ep_test(sc, SE_FL_EOS)) {
+ /* we received a shutdown */
+ if (ic->flags & CF_AUTO_CLOSE)
+ sc_schedule_shutdown(sc_opposite(sc));
+ sc_conn_eos(sc);
+ ret = 1;
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR)) {
+ sc->flags |= SC_FL_ERROR;
+ ret = 1;
+ }
+ else if (!cur_read &&
+ !(sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)) &&
+ !(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))) {
+ /* Subscribe to receive events if we're blocking on I/O */
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ se_have_no_more_data(sc->sedesc);
+ }
+ else {
+ se_have_more_data(sc->sedesc);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+/* This tries to perform a synchronous receive on the stream connector to
+ * try to collect last arrived data. In practice it's only implemented on
+ * stconns. Returns 0 if nothing was done, non-zero if new data or a
+ * shutdown were collected. This may result on some delayed receive calls
+ * to be programmed and performed later, though it doesn't provide any
+ * such guarantee.
+ */
+int sc_conn_sync_recv(struct stconn *sc)
+{
+ if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST))
+ return 0;
+
+ if (!sc_mux_ops(sc))
+ return 0; // only stconns are supported
+
+ if (sc->wait_event.events & SUB_RETRY_RECV)
+ return 0; // already subscribed
+
+ if (!sc_is_recv_allowed(sc))
+ return 0; // already failed
+
+ return sc_conn_recv(sc);
+}
+
+/*
+ * This function is called to send buffer data to a stream socket.
+ * It calls the mux layer's snd_buf function. It relies on the
+ * caller to commit polling changes. The caller should check conn->flags
+ * for errors. Please do not statify this function, it's often present in
+ * backtraces, it's useful to recognize it.
+ */
+int sc_conn_send(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct stconn *sco = sc_opposite(sc);
+ struct stream *s = __sc_strm(sc);
+ struct channel *oc = sc_oc(sc);
+ int ret;
+ int did_send = 0;
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING) || sc_is_conn_error(sc)) {
+ /* We're probably there because the tasklet was woken up,
+ * but process_stream() ran before, detected there were an
+ * error and put the SC back to SC_ST_TAR. There's still
+ * CO_FL_ERROR on the connection but we don't want to add
+ * SE_FL_ERROR back, so give up
+ */
+ if (sc->state < SC_ST_CON)
+ return 0;
+ BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING));
+ return 1;
+ }
+
+ /* We're already waiting to be able to send, give up */
+ if (sc->wait_event.events & SUB_RETRY_SEND)
+ return 0;
+
+ /* we might have been called just after an asynchronous shutw */
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return 1;
+
+ /* we must wait because the mux is not installed yet */
+ if (!conn->mux)
+ return 0;
+
+ if (sc_ep_have_ff_data(sc)) {
+ unsigned int send_flag = 0;
+
+ if ((!(sc->flags & (SC_FL_SND_ASAP|SC_FL_SND_NEVERWAIT)) &&
+ ((oc->to_forward && oc->to_forward != CHN_INFINITE_FORWARD) ||
+ (sc->flags & SC_FL_SND_EXP_MORE) ||
+ (IS_HTX_STRM(s) &&
+ (!(sco->flags & (SC_FL_EOI|SC_FL_EOS|SC_FL_ABRT_DONE)) && htx_expect_more(htxbuf(&oc->buf)))))) ||
+ ((oc->flags & CF_ISRESP) &&
+ (oc->flags & CF_AUTO_CLOSE) &&
+ (sc->flags & SC_FL_SHUT_WANTED)))
+ send_flag |= CO_SFL_MSG_MORE;
+
+ if (oc->flags & CF_STREAMER)
+ send_flag |= CO_SFL_STREAMER;
+
+ ret = conn->mux->resume_fastfwd(sc, send_flag);
+ if (ret > 0)
+ did_send = 1;
+
+ if (sc_ep_have_ff_data(sc))
+ goto end;
+ }
+
+ /* At this point, the pipe is empty, but we may still have data pending
+ * in the normal buffer.
+ */
+ if (co_data(oc)) {
+ /* when we're here, we already know that there is no spliced
+ * data left, and that there are sendable buffered data.
+ */
+
+ /* check if we want to inform the kernel that we're interested in
+ * sending more data after this call. We want this if :
+ * - we're about to close after this last send and want to merge
+ * the ongoing FIN with the last segment.
+ * - we know we can't send everything at once and must get back
+ * here because of unaligned data
+ * - there is still a finite amount of data to forward
+ * The test is arranged so that the most common case does only 2
+ * tests.
+ */
+ unsigned int send_flag = 0;
+
+ if ((!(sc->flags & (SC_FL_SND_ASAP|SC_FL_SND_NEVERWAIT)) &&
+ ((oc->to_forward && oc->to_forward != CHN_INFINITE_FORWARD) ||
+ (sc->flags & SC_FL_SND_EXP_MORE) ||
+ (IS_HTX_STRM(s) &&
+ (!(sco->flags & (SC_FL_EOI|SC_FL_EOS|SC_FL_ABRT_DONE)) && htx_expect_more(htxbuf(&oc->buf)))))) ||
+ ((oc->flags & CF_ISRESP) &&
+ (oc->flags & CF_AUTO_CLOSE) &&
+ (sc->flags & SC_FL_SHUT_WANTED)))
+ send_flag |= CO_SFL_MSG_MORE;
+
+ if (oc->flags & CF_STREAMER)
+ send_flag |= CO_SFL_STREAMER;
+
+ if (s->txn && s->txn->flags & TX_L7_RETRY && !b_data(&s->txn->l7_buffer)) {
+ /* If we want to be able to do L7 retries, copy
+ * the data we're about to send, so that we are able
+ * to resend them if needed
+ */
+ /* Try to allocate a buffer if we had none.
+ * If it fails, the next test will just
+ * disable the l7 retries by setting
+ * l7_conn_retries to 0.
+ */
+ if (s->txn->req.msg_state != HTTP_MSG_DONE)
+ s->txn->flags &= ~TX_L7_RETRY;
+ else {
+ if (b_alloc(&s->txn->l7_buffer) == NULL)
+ s->txn->flags &= ~TX_L7_RETRY;
+ else {
+ memcpy(b_orig(&s->txn->l7_buffer),
+ b_orig(&oc->buf),
+ b_size(&oc->buf));
+ s->txn->l7_buffer.head = co_data(oc);
+ b_add(&s->txn->l7_buffer, co_data(oc));
+ }
+
+ }
+ }
+
+ ret = conn->mux->snd_buf(sc, &oc->buf, co_data(oc), send_flag);
+ if (ret > 0) {
+ did_send = 1;
+ c_rew(oc, ret);
+ c_realign_if_empty(oc);
+
+ if (!co_data(oc)) {
+ /* Always clear both flags once everything has been sent, they're one-shot */
+ sc->flags &= ~(SC_FL_SND_ASAP|SC_FL_SND_EXP_MORE);
+ }
+ /* if some data remain in the buffer, it's only because the
+ * system buffers are full, we will try next time.
+ */
+ }
+ }
+
+ end:
+ if (did_send) {
+ oc->flags |= CF_WRITE_EVENT | CF_WROTE_DATA;
+ if (sc->state == SC_ST_CON)
+ sc->state = SC_ST_RDY;
+ }
+
+ if (!sco->room_needed || (did_send && (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed)))
+ sc_have_room(sco);
+
+ if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) {
+ oc->flags |= CF_WRITE_EVENT;
+ BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING));
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ sc->flags |= SC_FL_ERROR;
+ return 1;
+ }
+
+ /* FIXME: Must be reviewed for FF */
+ if (!co_data(oc) && !sc_ep_have_ff_data(sc)) {
+ if (did_send)
+ sc_ep_report_send_activity(sc);
+ /* If fast-forwarding is blocked, unblock it now to check for
+ * receive on the other side
+ */
+ if (sc->sedesc->iobuf.flags & IOBUF_FL_FF_BLOCKED) {
+ sc->sedesc->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED;
+ sc_have_room(sco);
+ did_send = 1;
+ }
+ }
+ else {
+ /* We couldn't send all of our data, let the mux know we'd like to send more */
+ conn->mux->subscribe(sc, SUB_RETRY_SEND, &sc->wait_event);
+ if (sc_state_in(sc->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO))
+ sc_ep_report_blocked_send(sc, did_send);
+ }
+
+ return did_send;
+}
+
+/* perform a synchronous send() for the stream connector. The CF_WRITE_EVENT
+ * flag are cleared prior to the attempt, and will possibly be updated in case
+ * of success.
+ */
+void sc_conn_sync_send(struct stconn *sc)
+{
+ struct channel *oc = sc_oc(sc);
+
+ oc->flags &= ~CF_WRITE_EVENT;
+
+ if (sc->flags & SC_FL_SHUT_DONE)
+ return;
+
+ if (!co_data(oc))
+ return;
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (!sc_mux_ops(sc))
+ return;
+
+ sc_conn_send(sc);
+}
+
+/* Called by I/O handlers after completion.. It propagates
+ * connection flags to the stream connector, updates the stream (which may or
+ * may not take this opportunity to try to forward data), then update the
+ * connection's polling based on the channels and stream connector's final
+ * states. The function always returns 0. Please do not statify this function,
+ * it's often present in backtraces, it's useful to recognize it.
+ */
+int sc_conn_process(struct stconn *sc)
+{
+ struct connection *conn = __sc_conn(sc);
+ struct channel *ic = sc_ic(sc);
+ struct channel *oc = sc_oc(sc);
+
+ BUG_ON(!conn);
+
+ /* If we have data to send, try it now */
+ if ((co_data(oc) || sc_ep_have_ff_data(sc)) &&
+ !(sc->wait_event.events & SUB_RETRY_SEND))
+ sc_conn_send(sc);
+
+ /* First step, report to the stream connector what was detected at the
+ * connection layer : errors and connection establishment.
+ * Only add SC_FL_ERROR if we're connected, or we're attempting to
+ * connect, we may get there because we got woken up, but only run
+ * after process_stream() noticed there were an error, and decided
+ * to retry to connect, the connection may still have CO_FL_ERROR,
+ * and we don't want to add SC_FL_ERROR back
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+
+ if (sc->state >= SC_ST_CON) {
+ if (sc_is_conn_error(sc))
+ sc->flags |= SC_FL_ERROR;
+ }
+
+ /* If we had early data, and the handshake ended, then
+ * we can remove the flag, and attempt to wake the task up,
+ * in the event there's an analyser waiting for the end of
+ * the handshake.
+ */
+ if (!(conn->flags & (CO_FL_WAIT_XPRT | CO_FL_EARLY_SSL_HS)) &&
+ sc_ep_test(sc, SE_FL_WAIT_FOR_HS)) {
+ sc_ep_clr(sc, SE_FL_WAIT_FOR_HS);
+ task_wakeup(sc_strm_task(sc), TASK_WOKEN_MSG);
+ }
+
+ if (!sc_state_in(sc->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (conn->flags & CO_FL_WAIT_XPRT) == 0) {
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ oc->flags |= CF_WRITE_EVENT;
+ if (sc->state == SC_ST_CON)
+ sc->state = SC_ST_RDY;
+ }
+
+ /* Report EOS on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(sc, SE_FL_EOS) && !(sc->flags & SC_FL_EOS)) {
+ /* we received a shutdown */
+ if (ic->flags & CF_AUTO_CLOSE)
+ sc_schedule_shutdown(sc_opposite(sc));
+ sc_conn_eos(sc);
+ }
+
+ /* Report EOI on the channel if it was reached from the mux point of
+ * view.
+ *
+ * Note: This test is only required because sc_conn_process is also the SI
+ * wake callback. Otherwise sc_conn_recv()/sc_conn_send() already take
+ * care of it.
+ */
+ if (sc_ep_test(sc, SE_FL_EOI) && !(sc->flags & SC_FL_EOI)) {
+ sc->flags |= SC_FL_EOI;
+ ic->flags |= CF_READ_EVENT;
+ sc_ep_report_read_activity(sc);
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ sc->flags |= SC_FL_ERROR;
+
+ /* Second step : update the stream connector and channels, try to forward any
+ * pending data, then possibly wake the stream up based on the new
+ * stream connector status.
+ */
+ sc_notify(sc);
+ stream_release_buffers(__sc_strm(sc));
+ return 0;
+}
+
+/* This is the ->process() function for any stream connector's wait_event task.
+ * It's assigned during the stream connector's initialization, for any type of
+ * stream connector. Thus it is always safe to perform a tasklet_wakeup() on a
+ * stream connector, as the presence of the SC is checked there.
+ */
+struct task *sc_conn_io_cb(struct task *t, void *ctx, unsigned int state)
+{
+ struct stconn *sc = ctx;
+ int ret = 0;
+
+ if (!sc_conn(sc))
+ return t;
+
+ if (!(sc->wait_event.events & SUB_RETRY_SEND) && (co_data(sc_oc(sc)) || sc_ep_have_ff_data(sc) || (sc->sedesc->iobuf.flags & IOBUF_FL_FF_BLOCKED)))
+ ret = sc_conn_send(sc);
+ if (!(sc->wait_event.events & SUB_RETRY_RECV))
+ ret |= sc_conn_recv(sc);
+ if (ret != 0)
+ sc_conn_process(sc);
+
+ stream_release_buffers(__sc_strm(sc));
+ return t;
+}
+
+/*
+ * This function propagates an end-of-stream received from an applet. It
+ * updates the stream connector. If it is is already shut, the applet is
+ * released. Otherwise, we try to forward the shutdown, immediately or ASAP.
+ */
+static void sc_applet_eos(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE))
+ return;
+ sc->flags |= SC_FL_EOS;
+ ic->flags |= CF_READ_EVENT;
+ sc_ep_report_read_activity(sc);
+
+ /* Note: on abort, we don't call the applet */
+
+ if (!sc_state_in(sc->state, SC_SB_CON|SC_SB_RDY|SC_SB_EST))
+ return;
+
+ if (sc->flags & SC_FL_SHUT_DONE) {
+ appctx_shut(__sc_appctx(sc));
+ sc->state = SC_ST_DIS;
+ if (sc->flags & SC_FL_ISBACK)
+ __sc_strm(sc)->conn_exp = TICK_ETERNITY;
+ }
+ else if (sc_cond_forward_shut(sc))
+ return sc_app_shut_applet(sc);
+}
+
+/* Callback to be used by applet handlers upon completion. It updates the stream
+ * (which may or may not take this opportunity to try to forward data), then
+ * may re-enable the applet's based on the channels and stream connector's final
+ * states. Please do not statify this function, it's often present in backtraces,
+ * it's useful to recognize it.
+ */
+int sc_applet_process(struct stconn *sc)
+{
+ struct channel *ic = sc_ic(sc);
+
+ BUG_ON(!sc_appctx(sc));
+
+ /* Report EOI on the channel if it was reached from the applet point of
+ * view. */
+ if (sc_ep_test(sc, SE_FL_EOI) && !(sc->flags & SC_FL_EOI)) {
+ sc_ep_report_read_activity(sc);
+ sc->flags |= SC_FL_EOI;
+ ic->flags |= CF_READ_EVENT;
+ }
+
+ if (sc_ep_test(sc, SE_FL_ERROR))
+ sc->flags |= SC_FL_ERROR;
+
+ if (sc_ep_test(sc, SE_FL_EOS)) {
+ /* we received a shutdown */
+ sc_applet_eos(sc);
+ }
+
+ BUG_ON(sc_ep_test(sc, SE_FL_HAVE_NO_DATA|SE_FL_EOI) == SE_FL_EOI);
+
+ /* If the applet wants to write and the channel is closed, it's a
+ * broken pipe and it must be reported.
+ */
+ if (!sc_ep_test(sc, SE_FL_HAVE_NO_DATA) && (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)))
+ sc_ep_set(sc, SE_FL_ERROR);
+
+ /* automatically mark the applet having data available if it reported
+ * begin blocked by the channel.
+ */
+ if ((sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM)) ||
+ sc_ep_test(sc, SE_FL_APPLET_NEED_CONN))
+ applet_have_more_data(__sc_appctx(sc));
+
+ /* update the stream connector, channels, and possibly wake the stream up */
+ sc_notify(sc);
+ stream_release_buffers(__sc_strm(sc));
+
+ /* sc_notify may have passed through chk_snd and released some blocking
+ * flags. Process_stream will consider those flags to wake up the
+ * appctx but in the case the task is not in runqueue we may have to
+ * wakeup the appctx immediately.
+ */
+ if (sc_is_recv_allowed(sc) || sc_is_send_allowed(sc))
+ appctx_wakeup(__sc_appctx(sc));
+ return 0;
+}
+
+
+/* Prepares an endpoint upgrade. We don't now at this stage if the upgrade will
+ * succeed or not and if the stconn will be reused by the new endpoint. Thus,
+ * for now, only pretend the stconn is detached.
+ */
+void sc_conn_prepare_endp_upgrade(struct stconn *sc)
+{
+ BUG_ON(!sc_conn(sc) || !sc->app);
+ sc_ep_clr(sc, SE_FL_T_MUX);
+ sc_ep_set(sc, SE_FL_DETACHED);
+}
+
+/* Endpoint upgrade failed. Restore the stconn state. */
+void sc_conn_abort_endp_upgrade(struct stconn *sc)
+{
+ sc_ep_set(sc, SE_FL_T_MUX);
+ sc_ep_clr(sc, SE_FL_DETACHED);
+}
+
+/* Commit the endpoint upgrade. If stconn is attached, it means the new endpoint
+ * use it. So we do nothing. Otherwise, the stconn will be destroy with the
+ * overlying stream. So, it means we must commit the detach.
+*/
+void sc_conn_commit_endp_upgrade(struct stconn *sc)
+{
+ if (!sc_ep_test(sc, SE_FL_DETACHED))
+ return;
+ sc_detach_endp(&sc);
+ /* Because it was already set as detached, the sedesc must be preserved */
+ BUG_ON(!sc);
+ BUG_ON(!sc->sedesc);
+}
+
+/* return the frontend or backend mux stream ID.
+ */
+static int
+smp_fetch_sid(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn;
+ struct stconn *sc;
+ int64_t sid = 0;
+
+ if (!smp->strm)
+ return 0;
+
+ sc = (kw[0] == 'f' ? smp->strm->scf : smp->strm->scb);
+ conn = sc_conn(sc);
+
+ /* No connection */
+ if (!conn)
+ return 0;
+
+ /* No mux install, this may change */
+ if (!conn->mux) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ /* No sctl, report sid=0 in this case */
+ if (conn->mux->sctl) {
+ if (conn->mux->sctl(sc, MUX_SCTL_SID, &sid) == -1)
+ return 0;
+ }
+
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = sid;
+
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types should be declared using the
+ * appropriate pseudo-type. If not available it must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ { "bs.id", smp_fetch_sid, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ },
+ { "fs.id", smp_fetch_sid, 0, NULL, SMP_T_STR, SMP_USE_L6RES },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
diff --git a/src/stick_table.c b/src/stick_table.c
new file mode 100644
index 0000000..6427568
--- /dev/null
+++ b/src/stick_table.c
@@ -0,0 +1,5658 @@
+/*
+ * Stick tables management functions.
+ *
+ * Copyright 2009-2010 EXCELIANCE, Emeric Brun <ebrun@exceliance.fr>
+ * Copyright (C) 2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+#include <errno.h>
+
+#include <import/ebmbtree.h>
+#include <import/ebsttree.h>
+#include <import/ebistree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/dict.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/peers.h>
+#include <haproxy/pool.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+
+/* structure used to return a table key built from a sample */
+static THREAD_LOCAL struct stktable_key static_table_key;
+static int (*smp_fetch_src)(const struct arg *, struct sample *, const char *, void *);
+struct pool_head *pool_head_stk_ctr __read_mostly = NULL;
+struct stktable *stktables_list;
+struct eb_root stktable_by_name = EB_ROOT;
+
+#define round_ptr_size(i) (((i) + (sizeof(void *) - 1)) &~ (sizeof(void *) - 1))
+
+/* This function inserts stktable <t> into the tree of known stick-table.
+ * The stick-table ID is used as the storing key so it must already have
+ * been initialized.
+ */
+void stktable_store_name(struct stktable *t)
+{
+ t->name.key = t->id;
+ ebis_insert(&stktable_by_name, &t->name);
+}
+
+struct stktable *stktable_find_by_name(const char *name)
+{
+ struct ebpt_node *node;
+ struct stktable *t;
+
+ node = ebis_lookup(&stktable_by_name, name);
+ if (node) {
+ t = container_of(node, struct stktable, name);
+ if (strcmp(t->id, name) == 0)
+ return t;
+ }
+
+ return NULL;
+}
+
+/*
+ * Free an allocated sticky session <ts>, and decrease sticky sessions counter
+ * in table <t>. It's safe to call it under or out of a lock.
+ */
+void __stksess_free(struct stktable *t, struct stksess *ts)
+{
+ HA_ATOMIC_DEC(&t->current);
+ pool_free(t->pool, (void *)ts - round_ptr_size(t->data_size));
+}
+
+/*
+ * Free an allocated sticky session <ts>, and decrease sticky sessions counter
+ * in table <t>.
+ * This function locks the table
+ */
+void stksess_free(struct stktable *t, struct stksess *ts)
+{
+ void *data;
+ data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ if (data) {
+ dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict));
+ stktable_data_cast(data, std_t_dict) = NULL;
+ }
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
+ __stksess_free(t, ts);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+}
+
+/*
+ * Kill an stksess (only if its ref_cnt is zero). This must be called under the
+ * write lock. Returns zero if could not deleted, non-zero otherwise.
+ */
+int __stksess_kill(struct stktable *t, struct stksess *ts)
+{
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt))
+ return 0;
+
+ eb32_delete(&ts->exp);
+ if (ts->upd.node.leaf_p) {
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ eb32_delete(&ts->upd);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+ ebmb_delete(&ts->key);
+ __stksess_free(t, ts);
+ return 1;
+}
+
+/*
+ * Decrease the refcount if decrefcnt is not 0, and try to kill the stksess.
+ * Returns non-zero if deleted, zero otherwise.
+ * This function locks the table
+ */
+int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ int ret;
+
+ if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0)
+ return 0;
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ ret = __stksess_kill(t, ts);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ret;
+}
+
+/*
+ * Initialize or update the key in the sticky session <ts> present in table <t>
+ * from the value present in <key>.
+ */
+void stksess_setkey(struct stktable *t, struct stksess *ts, struct stktable_key *key)
+{
+ if (t->type != SMP_T_STR)
+ memcpy(ts->key.key, key->key, t->key_size);
+ else {
+ memcpy(ts->key.key, key->key, MIN(t->key_size - 1, key->key_len));
+ ts->key.key[MIN(t->key_size - 1, key->key_len)] = 0;
+ }
+}
+
+/* return a shard number for key <key> of len <len> present in table <t>. This
+ * takes into account the presence or absence of a peers section with shards
+ * and the number of shards, the table's hash_seed, and of course the key. The
+ * caller must pass a valid <key> and <len>. The shard number to be used by the
+ * entry is returned (from 1 to nb_shards, otherwise 0 for none).
+ */
+int stktable_get_key_shard(struct stktable *t, const void *key, size_t len)
+{
+ /* no peers section or no shards in the peers section */
+ if (!t->peers.p || !t->peers.p->nb_shards)
+ return 0;
+
+ return XXH64(key, len, t->hash_seed) % t->peers.p->nb_shards + 1;
+}
+
+/*
+ * Set the shard for <key> key of <ts> sticky session attached to <t> stick table.
+ * Use zero for stick-table without peers synchronisation.
+ */
+static void stksess_setkey_shard(struct stktable *t, struct stksess *ts,
+ struct stktable_key *key)
+{
+ size_t keylen;
+
+ if (t->type == SMP_T_STR)
+ keylen = key->key_len;
+ else
+ keylen = t->key_size;
+
+ ts->shard = stktable_get_key_shard(t, key->key, keylen);
+}
+
+/*
+ * Init sticky session <ts> of table <t>. The data parts are cleared and <ts>
+ * is returned.
+ */
+static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts)
+{
+ memset((void *)ts - t->data_size, 0, t->data_size);
+ ts->ref_cnt = 0;
+ ts->shard = 0;
+ ts->key.node.leaf_p = NULL;
+ ts->exp.node.leaf_p = NULL;
+ ts->upd.node.leaf_p = NULL;
+ ts->expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
+ HA_RWLOCK_INIT(&ts->lock);
+ return ts;
+}
+
+/*
+ * Trash oldest <to_batch> sticky sessions from table <t>
+ * Returns number of trashed sticky sessions. It may actually trash less
+ * than expected if finding these requires too long a search time (e.g.
+ * most of them have ts->ref_cnt>0).
+ */
+int __stktable_trash_oldest(struct stktable *t, int to_batch)
+{
+ struct stksess *ts;
+ struct eb32_node *eb;
+ int max_search = to_batch * 2; // no more than 50% misses
+ int batched = 0;
+ int looped = 0;
+
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (batched < to_batch) {
+
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ if (--max_search < 0)
+ break;
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
+
+ /* don't delete an entry which is currently referenced */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
+ continue;
+
+ eb32_delete(&ts->exp);
+
+ if (ts->expire != ts->exp.key) {
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->exps);
+
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
+
+ continue;
+ }
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
+ if (ts->upd.node.leaf_p) {
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ eb32_delete(&ts->upd);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+ __stksess_free(t, ts);
+ batched++;
+ }
+
+ return batched;
+}
+
+/*
+ * Trash oldest <to_batch> sticky sessions from table <t>
+ * Returns number of trashed sticky sessions.
+ * This function locks the table
+ */
+int stktable_trash_oldest(struct stktable *t, int to_batch)
+{
+ int ret;
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ ret = __stktable_trash_oldest(t, to_batch);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ret;
+}
+/*
+ * Allocate and initialise a new sticky session.
+ * The new sticky session is returned or NULL in case of lack of memory.
+ * Sticky sessions should only be allocated this way, and must be freed using
+ * stksess_free(). Table <t>'s sticky session counter is increased. If <key>
+ * is not NULL, it is assigned to the new session. It must be called unlocked
+ * as it may rely on a lock to trash older entries.
+ */
+struct stksess *stksess_new(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+ unsigned int current;
+
+ current = HA_ATOMIC_FETCH_ADD(&t->current, 1);
+
+ if (unlikely(current >= t->size)) {
+ /* the table was already full, we may have to purge entries */
+ if (t->nopurge || !stktable_trash_oldest(t, (t->size >> 8) + 1)) {
+ HA_ATOMIC_DEC(&t->current);
+ return NULL;
+ }
+ }
+
+ ts = pool_alloc(t->pool);
+ if (ts) {
+ ts = (void *)ts + round_ptr_size(t->data_size);
+ __stksess_init(t, ts);
+ if (key) {
+ stksess_setkey(t, ts, key);
+ stksess_setkey_shard(t, ts, key);
+ }
+ }
+
+ return ts;
+}
+
+/*
+ * Looks in table <t> for a sticky session matching key <key>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ */
+struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key)
+{
+ struct ebmb_node *eb;
+
+ if (t->type == SMP_T_STR)
+ eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1);
+ else
+ eb = ebmb_lookup(&t->keys, key->key, t->key_size);
+
+ if (unlikely(!eb)) {
+ /* no session found */
+ return NULL;
+ }
+
+ return ebmb_entry(eb, struct stksess, key);
+}
+
+/*
+ * Looks in table <t> for a sticky session matching key <key>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ * The refcount of the found entry is increased and this function
+ * is protected using the table lock
+ */
+struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key)
+{
+ struct stksess *ts;
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
+ ts = __stktable_lookup_key(t, key);
+ if (ts)
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return ts;
+}
+
+/*
+ * Looks in table <t> for a sticky session with same key as <ts>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ */
+struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts)
+{
+ struct ebmb_node *eb;
+
+ if (t->type == SMP_T_STR)
+ eb = ebst_lookup(&(t->keys), (char *)ts->key.key);
+ else
+ eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size);
+
+ if (unlikely(!eb))
+ return NULL;
+
+ return ebmb_entry(eb, struct stksess, key);
+}
+
+/*
+ * Looks in table <t> for a sticky session with same key as <ts>.
+ * Returns pointer on requested sticky session or NULL if none was found.
+ * The refcount of the found entry is increased and this function
+ * is protected using the table lock
+ */
+struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts)
+{
+ struct stksess *lts;
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
+ lts = __stktable_lookup(t, ts);
+ if (lts)
+ HA_ATOMIC_INC(&lts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ return lts;
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated if set.
+ * The node will be also inserted into the update tree if needed, at a position
+ * depending if the update is a local or coming from a remote node.
+ * If <decrefcnt> is set, the ts entry's ref_cnt will be decremented. The table's
+ * write lock may be taken.
+ */
+void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt)
+{
+ struct eb32_node * eb;
+ int use_wrlock = 0;
+ int do_wakeup = 0;
+
+ if (expire != HA_ATOMIC_LOAD(&ts->expire)) {
+ /* we'll need to set the expiration and to wake up the expiration timer .*/
+ HA_ATOMIC_STORE(&ts->expire, expire);
+ stktable_requeue_exp(t, ts);
+ }
+
+ /* If sync is enabled */
+ if (t->sync_task) {
+ try_lock_again:
+ /* We'll need to reliably check that the entry is in the tree.
+ * It's only inserted/deleted using a write lock so a read lock
+ * is sufficient to verify this. We may then need to upgrade it
+ * to perform an update (which is rare under load), and if the
+ * upgrade fails, we'll try again with a write lock directly.
+ */
+ if (use_wrlock)
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ else
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->updt_lock);
+
+ if (local) {
+ /* Check if this entry is not in the tree or not
+ * scheduled for at least one peer.
+ */
+ if (!ts->upd.node.leaf_p
+ || (int)(t->commitupdate - ts->upd.key) >= 0
+ || (int)(ts->upd.key - t->localupdate) >= 0) {
+ /* Time to upgrade the read lock to write lock if needed */
+ if (!use_wrlock) {
+ if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) {
+ /* failed, try again */
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
+ goto try_lock_again;
+ }
+ HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
+ }
+
+ /* here we're write-locked */
+
+ ts->upd.key = ++t->update;
+ t->localupdate = t->update;
+ eb32_delete(&ts->upd);
+ eb = eb32_insert(&t->updates, &ts->upd);
+ if (eb != &ts->upd) {
+ eb32_delete(eb);
+ eb32_insert(&t->updates, &ts->upd);
+ }
+ }
+ do_wakeup = 1;
+ }
+ else {
+ /* If this entry is not in the tree */
+
+ if (!ts->upd.node.leaf_p) {
+ /* Time to upgrade the read lock to write lock if needed */
+ if (!use_wrlock) {
+ if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) {
+ /* failed, try again */
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
+ goto try_lock_again;
+ }
+ HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock);
+ use_wrlock = 1;
+ }
+
+ /* here we're write-locked */
+
+ ts->upd.key= (++t->update)+(2147483648U);
+ eb = eb32_insert(&t->updates, &ts->upd);
+ if (eb != &ts->upd) {
+ eb32_delete(eb);
+ eb32_insert(&t->updates, &ts->upd);
+ }
+ }
+ }
+
+ /* drop the lock now */
+ if (use_wrlock)
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ else
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+
+ if (decrefcnt)
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+
+ if (do_wakeup)
+ task_wakeup(t->sync_task, TASK_WOKEN_MSG);
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated using the date of expiration coming from
+ * <t> stick-table configuration.
+ * The node will be also inserted into the update tree if needed, at a position
+ * considering the update is coming from a remote node
+ */
+void stktable_touch_remote(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ stktable_touch_with_exp(t, ts, 0, ts->expire, decrefcnt);
+}
+
+/* Update the expiration timer for <ts> but do not touch its expiration node.
+ * The table's expiration timer is updated using the date of expiration coming from
+ * <t> stick-table configuration.
+ * The node will be also inserted into the update tree if needed, at a position
+ * considering the update was made locally
+ */
+void stktable_touch_local(struct stktable *t, struct stksess *ts, int decrefcnt)
+{
+ int expire = tick_add(now_ms, MS_TO_TICKS(t->expire));
+
+ stktable_touch_with_exp(t, ts, 1, expire, decrefcnt);
+}
+/* Just decrease the ref_cnt of the current session. Does nothing if <ts> is NULL.
+ * Note that we still need to take the read lock because a number of other places
+ * (including in Lua and peers) update the ref_cnt non-atomically under the write
+ * lock.
+ */
+static void stktable_release(struct stktable *t, struct stksess *ts)
+{
+ if (!ts)
+ return;
+ HA_ATOMIC_DEC(&ts->ref_cnt);
+}
+
+/* Insert new sticky session <ts> in the table. It is assumed that it does not
+ * yet exist (the caller must check this). The table's timeout is updated if it
+ * is set. <ts> is returned if properly inserted, otherwise the one already
+ * present if any.
+ */
+struct stksess *__stktable_store(struct stktable *t, struct stksess *ts)
+{
+ struct ebmb_node *eb;
+
+ eb = ebmb_insert(&t->keys, &ts->key, t->key_size);
+ if (likely(eb == &ts->key)) {
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+ }
+ return ebmb_entry(eb, struct stksess, key); // most commonly this is <ts>
+}
+
+/* requeues the table's expiration task to take the recently added <ts> into
+ * account. This is performed atomically and doesn't require any lock.
+ */
+void stktable_requeue_exp(struct stktable *t, const struct stksess *ts)
+{
+ int old_exp, new_exp;
+ int expire = ts->expire;
+
+ if (!t->expire)
+ return;
+
+ /* set the task's expire to the newest expiration date. */
+ old_exp = HA_ATOMIC_LOAD(&t->exp_task->expire);
+ new_exp = tick_first(expire, old_exp);
+
+ /* let's not go further if we're already up to date */
+ if (new_exp == old_exp)
+ return;
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+
+ while (new_exp != old_exp &&
+ !HA_ATOMIC_CAS(&t->exp_task->expire, &old_exp, new_exp)) {
+ __ha_cpu_relax();
+ new_exp = tick_first(expire, old_exp);
+ }
+
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+
+ task_queue(t->exp_task);
+}
+
+/* Returns a valid or initialized stksess for the specified stktable_key in the
+ * specified table, or NULL if the key was NULL, or if no entry was found nor
+ * could be created. The entry's expiration is updated. This function locks the
+ * table, and the refcount of the entry is increased.
+ */
+struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key)
+{
+ struct stksess *ts, *ts2;
+
+ if (!key)
+ return NULL;
+
+ ts = stktable_lookup_key(table, key);
+ if (ts)
+ return ts;
+
+ /* No such entry exists, let's try to create a new one. this doesn't
+ * require locking yet.
+ */
+
+ ts = stksess_new(table, key);
+ if (!ts)
+ return NULL;
+
+ /* Now we're certain to have a ts. We need to store it. For this we'll
+ * need an exclusive access. We don't need an atomic upgrade, this is
+ * rare and an unlock+lock sequence will do the job fine. Given that
+ * this will not be atomic, the missing entry might appear in the mean
+ * tome so we have to be careful that the one we try to insert is the
+ * one we find.
+ */
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock);
+
+ ts2 = __stktable_store(table, ts);
+
+ HA_ATOMIC_INC(&ts2->ref_cnt);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock);
+
+ if (unlikely(ts2 != ts)) {
+ /* another entry was added in the mean time, let's
+ * switch to it.
+ */
+ __stksess_free(table, ts);
+ ts = ts2;
+ }
+
+ stktable_requeue_exp(table, ts);
+ return ts;
+}
+
+/* Lookup for an entry with the same key and store the submitted
+ * stksess if not found. This function locks the table either shared or
+ * exclusively, and the refcount of the entry is increased.
+ */
+struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts)
+{
+ struct stksess *ts;
+
+ HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->lock);
+ ts = __stktable_lookup(table, nts);
+ if (ts) {
+ HA_ATOMIC_INC(&ts->ref_cnt);
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock);
+ return ts;
+ }
+ ts = nts;
+
+ /* let's increment it before switching to exclusive */
+ HA_ATOMIC_INC(&ts->ref_cnt);
+
+ if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->lock) != 0) {
+ /* upgrade to seek lock failed, let's drop and take */
+ HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock);
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock);
+ }
+ else
+ HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->lock);
+
+ /* now we're write-locked */
+
+ __stktable_store(table, ts);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock);
+
+ stktable_requeue_exp(table, ts);
+ return ts;
+}
+
+/*
+ * Task processing function to trash expired sticky sessions. A pointer to the
+ * task itself is returned since it never dies.
+ */
+struct task *process_table_expire(struct task *task, void *context, unsigned int state)
+{
+ struct stktable *t = context;
+ struct stksess *ts;
+ struct eb32_node *eb;
+ int updt_locked = 0;
+ int looped = 0;
+ int exp_next;
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+ eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK);
+
+ while (1) {
+ if (unlikely(!eb)) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now if we
+ * have not yet visited it.
+ */
+ if (looped)
+ break;
+ looped = 1;
+ eb = eb32_first(&t->exps);
+ if (likely(!eb))
+ break;
+ }
+
+ if (likely(tick_is_lt(now_ms, eb->key))) {
+ /* timer not expired yet, revisit it later */
+ exp_next = eb->key;
+ goto out_unlock;
+ }
+
+ /* timer looks expired, detach it from the queue */
+ ts = eb32_entry(eb, struct stksess, exp);
+ eb = eb32_next(eb);
+
+ /* don't delete an entry which is currently referenced */
+ if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0)
+ continue;
+
+ eb32_delete(&ts->exp);
+
+ if (!tick_is_expired(ts->expire, now_ms)) {
+ if (!tick_isset(ts->expire))
+ continue;
+
+ ts->exp.key = ts->expire;
+ eb32_insert(&t->exps, &ts->exp);
+
+ /* the update might have jumped beyond the next element,
+ * possibly causing a wrapping. We need to check whether
+ * the next element should be used instead. If the next
+ * element doesn't exist it means we're on the right
+ * side and have to check the first one then. If it
+ * exists and is closer, we must use it, otherwise we
+ * use the current one.
+ */
+ if (!eb)
+ eb = eb32_first(&t->exps);
+
+ if (!eb || tick_is_lt(ts->exp.key, eb->key))
+ eb = &ts->exp;
+ continue;
+ }
+
+ /* session expired, trash it */
+ ebmb_delete(&ts->key);
+ if (ts->upd.node.leaf_p) {
+ if (!updt_locked) {
+ updt_locked = 1;
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ }
+ eb32_delete(&ts->upd);
+ }
+ __stksess_free(t, ts);
+ }
+
+ /* We have found no task to expire in any tree */
+ exp_next = TICK_ETERNITY;
+
+out_unlock:
+ task->expire = exp_next;
+ if (updt_locked)
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+ return task;
+}
+
+/* Perform minimal stick table initialization. In case of error, the
+ * function will return 0 and <err_msg> will contain hints about the
+ * error and it is up to the caller to free it.
+ *
+ * Returns 1 on success
+ */
+int stktable_init(struct stktable *t, char **err_msg)
+{
+ int peers_retval = 0;
+
+ t->hash_seed = XXH64(t->id, t->idlen, 0);
+
+ if (t->size) {
+ t->keys = EB_ROOT_UNIQUE;
+ memset(&t->exps, 0, sizeof(t->exps));
+ t->updates = EB_ROOT_UNIQUE;
+ HA_RWLOCK_INIT(&t->lock);
+
+ t->pool = create_pool("sticktables", sizeof(struct stksess) + round_ptr_size(t->data_size) + t->key_size, MEM_F_SHARED);
+
+ if ( t->expire ) {
+ t->exp_task = task_new_anywhere();
+ if (!t->exp_task)
+ goto mem_error;
+ t->exp_task->process = process_table_expire;
+ t->exp_task->context = (void *)t;
+ }
+ if (t->peers.p && t->peers.p->peers_fe && !(t->peers.p->peers_fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ peers_retval = peers_register_table(t->peers.p, t);
+ }
+
+ if (t->pool == NULL || peers_retval)
+ goto mem_error;
+ }
+ if (t->write_to.name) {
+ struct stktable *table;
+
+ /* postresolve write_to table */
+ table = stktable_find_by_name(t->write_to.name);
+ if (!table) {
+ memprintf(err_msg, "write-to: table '%s' doesn't exist", t->write_to.name);
+ ha_free(&t->write_to.name); /* no longer need this */
+ return 0;
+ }
+ ha_free(&t->write_to.name); /* no longer need this */
+ if (table->write_to.ptr) {
+ memprintf(err_msg, "write-to: table '%s' is already used as a source table", table->id);
+ return 0;
+ }
+ if (table->type != t->type) {
+ memprintf(err_msg, "write-to: cannot mix table types ('%s' has '%s' type and '%s' has '%s' type)",
+ table->id, stktable_types[table->type].kw,
+ t->id, stktable_types[t->type].kw);
+ return 0;
+ }
+ if (table->key_size != t->key_size) {
+ memprintf(err_msg, "write-to: cannot mix key sizes ('%s' has '%ld' key_size and '%s' has '%ld' key_size)",
+ table->id, (long)table->key_size,
+ t->id, (long)t->key_size);
+ return 0;
+ }
+
+ t->write_to.t = table;
+ }
+ return 1;
+
+ mem_error:
+ memprintf(err_msg, "memory allocation error");
+ return 0;
+}
+
+/* Performs stick table cleanup: it's meant to be called after the table
+ * has been initialized ith stktable_init(), else it will lead to undefined
+ * behavior.
+ *
+ * However it does not free the table pointer itself
+ */
+void stktable_deinit(struct stktable *t)
+{
+ if (!t)
+ return;
+ task_destroy(t->exp_task);
+ pool_destroy(t->pool);
+}
+
+/*
+ * Configuration keywords of known table types
+ */
+struct stktable_type stktable_types[SMP_TYPES] = {
+ [SMP_T_SINT] = { "integer", 0, 4 },
+ [SMP_T_IPV4] = { "ip", 0, 4 },
+ [SMP_T_IPV6] = { "ipv6", 0, 16 },
+ [SMP_T_STR] = { "string", STK_F_CUSTOM_KEYSIZE, 32 },
+ [SMP_T_BIN] = { "binary", STK_F_CUSTOM_KEYSIZE, 32 }
+};
+
+/*
+ * Parse table type configuration.
+ * Returns 0 on successful parsing, else 1.
+ * <myidx> is set at next configuration <args> index.
+ */
+int stktable_parse_type(char **args, int *myidx, unsigned long *type, size_t *key_size, const char *file, int linenum)
+{
+ for (*type = 0; *type < SMP_TYPES; (*type)++) {
+ if (!stktable_types[*type].kw)
+ continue;
+ if (strcmp(args[*myidx], stktable_types[*type].kw) != 0)
+ continue;
+
+ *key_size = stktable_types[*type].default_size;
+ (*myidx)++;
+
+ if (stktable_types[*type].flags & STK_F_CUSTOM_KEYSIZE) {
+ if (strcmp("len", args[*myidx]) == 0) {
+ char *stop;
+
+ (*myidx)++;
+ *key_size = strtol(args[*myidx], &stop, 10);
+ if (*stop != '\0' || !*key_size) {
+ ha_alert("parsing [%s:%d] : 'len' expects a positive integer argument.\n", file, linenum);
+ return 1;
+ }
+ if (*type == SMP_T_STR) {
+ /* null terminated string needs +1 for '\0'. */
+ (*key_size)++;
+ }
+ (*myidx)++;
+ }
+ }
+ return 0;
+ }
+ ha_alert("parsing [%s:%d] : %s: unknown type '%s'.\n", file, linenum, args[0], args[*myidx]);
+ return 1;
+}
+
+/* reserve some space for data type <type>, there is 2 optionnals
+ * argument at <sa> and <sa2> to configure this data type and
+ * they can be NULL if unused for a given type.
+ * Returns PE_NONE (0) if OK or an error code among :
+ * - PE_ENUM_OOR if <type> does not exist
+ * - PE_EXIST if <type> is already registered
+ * - PE_ARG_NOT_USE if <sa>/<sa2> was provided but not expected
+ * - PE_ARG_MISSING if <sa>/<sa2> was expected but not provided
+ * - PE_ARG_VALUE_OOR if type is an array and <sa> it out of array size range.
+ */
+int stktable_alloc_data_type(struct stktable *t, int type, const char *sa, const char *sa2)
+
+{
+ if (type >= STKTABLE_DATA_TYPES)
+ return PE_ENUM_OOR;
+
+ if (t->data_ofs[type])
+ /* already allocated */
+ return PE_EXIST;
+
+ t->data_nbelem[type] = 1;
+ if (stktable_data_types[type].is_array) {
+ /* arrays take their element count on first argument */
+ if (!sa)
+ return PE_ARG_MISSING;
+ t->data_nbelem[type] = atoi(sa);
+ if (!t->data_nbelem[type] || (t->data_nbelem[type] > STKTABLE_MAX_DT_ARRAY_SIZE))
+ return PE_ARG_VALUE_OOR;
+ sa = sa2;
+ }
+
+ switch (stktable_data_types[type].arg_type) {
+ case ARG_T_NONE:
+ if (sa)
+ return PE_ARG_NOT_USED;
+ break;
+ case ARG_T_INT:
+ if (!sa)
+ return PE_ARG_MISSING;
+ t->data_arg[type].i = atoi(sa);
+ break;
+ case ARG_T_DELAY:
+ if (!sa)
+ return PE_ARG_MISSING;
+ sa = parse_time_err(sa, &t->data_arg[type].u, TIME_UNIT_MS);
+ if (sa)
+ return PE_ARG_INVC; /* invalid char */
+ break;
+ }
+
+ t->data_size += t->data_nbelem[type] * stktable_type_size(stktable_data_types[type].std_type);
+ t->data_ofs[type] = -t->data_size;
+ return PE_NONE;
+}
+
+/*
+ * Parse a line with <linenum> as number in <file> configuration file to configure
+ * the stick-table with <t> as address and <id> as ID.
+ * <peers> provides the "peers" section pointer only if this function is called
+ * from a "peers" section.
+ * <nid> is the stick-table name which is sent over the network. It must be equal
+ * to <id> if this stick-table is parsed from a proxy section, and prefixed by <peers>
+ * "peers" section name followed by a '/' character if parsed from a "peers" section.
+ * This is the responsibility of the caller to check this.
+ * Return an error status with ERR_* flags set if required, 0 if no error was encountered.
+ */
+int parse_stick_table(const char *file, int linenum, char **args,
+ struct stktable *t, char *id, char *nid, struct peers *peers)
+{
+ int err_code = 0;
+ int idx = 1;
+ unsigned int val;
+
+ if (!id || !*id) {
+ ha_alert("parsing [%s:%d] : %s: ID not provided.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_ABORT;
+ goto out;
+ }
+
+ /* Store the "peers" section if this function is called from a "peers" section. */
+ if (peers) {
+ t->peers.p = peers;
+ idx++;
+ }
+
+ t->id = id;
+ t->idlen = strlen(id);
+ t->nid = nid;
+ t->type = (unsigned int)-1;
+ t->conf.file = file;
+ t->conf.line = linenum;
+ t->write_to.name = NULL;
+
+ while (*args[idx]) {
+ const char *err;
+
+ if (strcmp(args[idx], "size") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if ((err = parse_size_err(args[idx], &t->size))) {
+ ha_alert("parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'.\n",
+ file, linenum, args[0], *err, args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ idx++;
+ }
+ /* This argument does not exit in "peers" section. */
+ else if (!peers && strcmp(args[idx], "peers") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ ha_free(&t->peers.name);
+ t->peers.name = strdup(args[idx++]);
+ }
+ else if (strcmp(args[idx], "expire") == 0) {
+ idx++;
+ if (!*(args[idx])) {
+ ha_alert("parsing [%s:%d] : %s: missing argument after '%s'.\n",
+ file, linenum, args[0], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ err = parse_time_err(args[idx], &val, TIME_UNIT_MS);
+ if (err == PARSE_TIME_OVER) {
+ ha_alert("parsing [%s:%d]: %s: timer overflow in argument <%s> to <%s>, maximum value is 2147483647 ms (~24.8 days).\n",
+ file, linenum, args[0], args[idx], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err == PARSE_TIME_UNDER) {
+ ha_alert("parsing [%s:%d]: %s: timer underflow in argument <%s> to <%s>, minimum non-null value is 1 ms.\n",
+ file, linenum, args[0], args[idx], args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (err) {
+ ha_alert("parsing [%s:%d] : %s: unexpected character '%c' in argument of '%s'.\n",
+ file, linenum, args[0], *err, args[idx-1]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ t->expire = val;
+ idx++;
+ }
+ else if (strcmp(args[idx], "nopurge") == 0) {
+ t->nopurge = 1;
+ idx++;
+ }
+ else if (strcmp(args[idx], "type") == 0) {
+ idx++;
+ if (stktable_parse_type(args, &idx, &t->type, &t->key_size, file, linenum) != 0) {
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ /* idx already points to next arg */
+ }
+ else if (strcmp(args[idx], "store") == 0) {
+ int type, err;
+ char *cw, *nw, *sa, *sa2;
+
+ idx++;
+ nw = args[idx];
+ while (*nw) {
+ /* the "store" keyword supports a comma-separated list */
+ cw = nw;
+ sa = NULL; /* store arg */
+ sa2 = NULL;
+ while (*nw && *nw != ',') {
+ if (*nw == '(') {
+ *nw = 0;
+ sa = ++nw;
+ while (*nw != ')') {
+ if (!*nw) {
+ ha_alert("parsing [%s:%d] : %s: missing closing parenthesis after store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ if (*nw == ',') {
+ *nw = '\0';
+ sa2 = nw + 1;
+ }
+ nw++;
+ }
+ *nw = '\0';
+ }
+ nw++;
+ }
+ if (*nw)
+ *nw++ = '\0';
+ type = stktable_get_data_type(cw);
+ if (type < 0) {
+ ha_alert("parsing [%s:%d] : %s: unknown store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ err = stktable_alloc_data_type(t, type, sa, sa2);
+ switch (err) {
+ case PE_NONE: break;
+ case PE_EXIST:
+ ha_warning("parsing [%s:%d]: %s: store option '%s' already enabled, ignored.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_WARN;
+ break;
+
+ case PE_ARG_MISSING:
+ ha_alert("parsing [%s:%d] : %s: missing argument to store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ case PE_ARG_NOT_USED:
+ ha_alert("parsing [%s:%d] : %s: unexpected argument to store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ case PE_ARG_VALUE_OOR:
+ ha_alert("parsing [%s:%d] : %s: array size is out of allowed range (1-%d) for store option '%s'.\n",
+ file, linenum, args[0], STKTABLE_MAX_DT_ARRAY_SIZE, cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ default:
+ ha_alert("parsing [%s:%d] : %s: error when processing store option '%s'.\n",
+ file, linenum, args[0], cw);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ idx++;
+ if (t->data_ofs[STKTABLE_DT_GPT] && t->data_ofs[STKTABLE_DT_GPT0]) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpt' and 'gpt0' in a same table is not permitted as 'gpt' overrides 'gpt0'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (t->data_ofs[STKTABLE_DT_GPC] && (t->data_ofs[STKTABLE_DT_GPC0] || t->data_ofs[STKTABLE_DT_GPC1])) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpc' and 'gpc[0/1]' in a same table is not permitted as 'gpc' overrides 'gpc[0/1]'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ else if (t->data_ofs[STKTABLE_DT_GPC_RATE] && (t->data_ofs[STKTABLE_DT_GPC0_RATE] || t->data_ofs[STKTABLE_DT_GPC1_RATE])) {
+ ha_alert("parsing [%s:%d] : %s: simultaneous usage of 'gpc_rate' and 'gpc[0/1]_rate' in a same table is not permitted as 'gpc_rate' overrides 'gpc[0/1]_rate'.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+ else if (strcmp(args[idx], "srvkey") == 0) {
+ char *keytype;
+ idx++;
+ keytype = args[idx];
+ if (strcmp(keytype, "name") == 0) {
+ t->server_key_type = STKTABLE_SRV_NAME;
+ }
+ else if (strcmp(keytype, "addr") == 0) {
+ t->server_key_type = STKTABLE_SRV_ADDR;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : %s : unknown server key type '%s'.\n",
+ file, linenum, args[0], keytype);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ }
+ idx++;
+ }
+ else if (strcmp(args[idx], "write-to") == 0) {
+ char *write_to;
+
+ idx++;
+ write_to = args[idx];
+ if (!write_to[0]) {
+ ha_alert("parsing [%s:%d] : %s : write-to requires table name.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+
+ }
+ ha_free(&t->write_to.name);
+ t->write_to.name = strdup(write_to);
+ idx++;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : %s: unknown argument '%s'.\n",
+ file, linenum, args[0], args[idx]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
+
+ if (!t->size) {
+ ha_alert("parsing [%s:%d] : %s: missing size.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ if (t->type == (unsigned int)-1) {
+ ha_alert("parsing [%s:%d] : %s: missing type.\n",
+ file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ out:
+ return err_code;
+}
+
+/* Prepares a stktable_key from a sample <smp> to search into table <t>.
+ * Note that the sample *is* modified and that the returned key may point
+ * to it, so the sample must not be modified afterwards before the lookup.
+ * Returns NULL if the sample could not be converted (eg: no matching type),
+ * otherwise a pointer to the static stktable_key filled with what is needed
+ * for the lookup.
+ */
+struct stktable_key *smp_to_stkey(struct sample *smp, struct stktable *t)
+{
+ /* Convert sample. */
+ if (!sample_convert(smp, t->type))
+ return NULL;
+
+ /* Fill static_table_key. */
+ switch (t->type) {
+
+ case SMP_T_IPV4:
+ static_table_key.key = &smp->data.u.ipv4;
+ static_table_key.key_len = 4;
+ break;
+
+ case SMP_T_IPV6:
+ static_table_key.key = &smp->data.u.ipv6;
+ static_table_key.key_len = 16;
+ break;
+
+ case SMP_T_SINT:
+ /* The stick table require a 32bit unsigned int, "sint" is a
+ * signed 64 it, so we can convert it inplace.
+ */
+ smp->data.u.sint = (unsigned int)smp->data.u.sint;
+ static_table_key.key = &smp->data.u.sint;
+ static_table_key.key_len = 4;
+ break;
+
+ case SMP_T_STR:
+ if (!smp_make_safe(smp))
+ return NULL;
+ static_table_key.key = smp->data.u.str.area;
+ static_table_key.key_len = smp->data.u.str.data;
+ break;
+
+ case SMP_T_BIN:
+ if (smp->data.u.str.data < t->key_size) {
+ /* This type needs padding with 0. */
+ if (!smp_make_rw(smp))
+ return NULL;
+
+ if (smp->data.u.str.size < t->key_size)
+ if (!smp_dup(smp))
+ return NULL;
+ if (smp->data.u.str.size < t->key_size)
+ return NULL;
+ memset(smp->data.u.str.area + smp->data.u.str.data, 0,
+ t->key_size - smp->data.u.str.data);
+ smp->data.u.str.data = t->key_size;
+ }
+ static_table_key.key = smp->data.u.str.area;
+ static_table_key.key_len = smp->data.u.str.data;
+ break;
+
+ default: /* impossible case. */
+ return NULL;
+ }
+
+ return &static_table_key;
+}
+
+/*
+ * Process a fetch + format conversion as defined by the sample expression <expr>
+ * on request or response considering the <opt> parameter. Returns either NULL if
+ * no key could be extracted, or a pointer to the converted result stored in
+ * static_table_key in format <table_type>. If <smp> is not NULL, it will be reset
+ * and its flags will be initialized so that the caller gets a copy of the input
+ * sample, and knows why it was not accepted (eg: SMP_F_MAY_CHANGE is present
+ * without SMP_OPT_FINAL). The output will be usable like this :
+ *
+ * return MAY_CHANGE FINAL Meaning for the sample
+ * NULL 0 * Not present and will never be (eg: header)
+ * NULL 1 0 Not present or unstable, could change (eg: req_len)
+ * NULL 1 1 Not present, will not change anymore
+ * smp 0 * Present and will not change (eg: header)
+ * smp 1 0 not possible
+ * smp 1 1 Present, last known value (eg: request length)
+ */
+struct stktable_key *stktable_fetch_key(struct stktable *t, struct proxy *px, struct session *sess, struct stream *strm,
+ unsigned int opt, struct sample_expr *expr, struct sample *smp)
+{
+ if (smp)
+ memset(smp, 0, sizeof(*smp));
+
+ smp = sample_process(px, sess, strm, opt, expr, smp);
+ if (!smp)
+ return NULL;
+
+ if ((smp->flags & SMP_F_MAY_CHANGE) && !(opt & SMP_OPT_FINAL))
+ return NULL; /* we can only use stable samples */
+
+ return smp_to_stkey(smp, t);
+}
+
+/*
+ * Returns 1 if sample expression <expr> result can be converted to table key of
+ * type <table_type>, otherwise zero. Used in configuration check.
+ */
+int stktable_compatible_sample(struct sample_expr *expr, unsigned long table_type)
+{
+ int out_type;
+
+ if (table_type >= SMP_TYPES || !stktable_types[table_type].kw)
+ return 0;
+
+ out_type = smp_expr_output_type(expr);
+
+ /* Convert sample. */
+ if (!sample_casts[out_type][table_type])
+ return 0;
+
+ return 1;
+}
+
+/* Extra data types processing : after the last one, some room may remain
+ * before STKTABLE_DATA_TYPES that may be used to register extra data types
+ * at run time.
+ */
+struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = {
+ [STKTABLE_DT_SERVER_ID] = { .name = "server_id", .std_type = STD_T_SINT, .as_is = 1 },
+ [STKTABLE_DT_GPT0] = { .name = "gpt0", .std_type = STD_T_UINT, .as_is = 1 },
+ [STKTABLE_DT_GPC0] = { .name = "gpc0", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GPC0_RATE] = { .name = "gpc0_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_CONN_CNT] = { .name = "conn_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_CONN_RATE] = { .name = "conn_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_CONN_CUR] = { .name = "conn_cur", .std_type = STD_T_UINT, .is_local = 1 },
+ [STKTABLE_DT_SESS_CNT] = { .name = "sess_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_SESS_RATE] = { .name = "sess_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_HTTP_REQ_CNT] = { .name = "http_req_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_REQ_RATE] = { .name = "http_req_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_HTTP_ERR_CNT] = { .name = "http_err_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_ERR_RATE] = { .name = "http_err_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_BYTES_IN_CNT] = { .name = "bytes_in_cnt", .std_type = STD_T_ULL },
+ [STKTABLE_DT_BYTES_IN_RATE] = { .name = "bytes_in_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_BYTES_OUT_CNT] = { .name = "bytes_out_cnt", .std_type = STD_T_ULL },
+ [STKTABLE_DT_BYTES_OUT_RATE]= { .name = "bytes_out_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_GPC1] = { .name = "gpc1", .std_type = STD_T_UINT },
+ [STKTABLE_DT_GPC1_RATE] = { .name = "gpc1_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_SERVER_KEY] = { .name = "server_key", .std_type = STD_T_DICT, .as_is = 1 },
+ [STKTABLE_DT_HTTP_FAIL_CNT] = { .name = "http_fail_cnt", .std_type = STD_T_UINT },
+ [STKTABLE_DT_HTTP_FAIL_RATE]= { .name = "http_fail_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY },
+ [STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1, .as_is = 1 },
+ [STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 },
+ [STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY },
+};
+
+/* Registers stick-table extra data type with index <idx>, name <name>, type
+ * <std_type> and arg type <arg_type>. If the index is negative, the next free
+ * index is automatically allocated. The allocated index is returned, or -1 if
+ * no free index was found or <name> was already registered. The <name> is used
+ * directly as a pointer, so if it's not stable, the caller must allocate it.
+ */
+int stktable_register_data_store(int idx, const char *name, int std_type, int arg_type)
+{
+ if (idx < 0) {
+ for (idx = 0; idx < STKTABLE_DATA_TYPES; idx++) {
+ if (!stktable_data_types[idx].name)
+ break;
+
+ if (strcmp(stktable_data_types[idx].name, name) == 0)
+ return -1;
+ }
+ }
+
+ if (idx >= STKTABLE_DATA_TYPES)
+ return -1;
+
+ if (stktable_data_types[idx].name != NULL)
+ return -1;
+
+ stktable_data_types[idx].name = name;
+ stktable_data_types[idx].std_type = std_type;
+ stktable_data_types[idx].arg_type = arg_type;
+ return idx;
+}
+
+/*
+ * Returns the data type number for the stktable_data_type whose name is <name>,
+ * or <0 if not found.
+ */
+int stktable_get_data_type(char *name)
+{
+ int type;
+
+ for (type = 0; type < STKTABLE_DATA_TYPES; type++) {
+ if (!stktable_data_types[type].name)
+ continue;
+ if (strcmp(name, stktable_data_types[type].name) == 0)
+ return type;
+ }
+ /* For backwards compatibility */
+ if (strcmp(name, "server_name") == 0)
+ return STKTABLE_DT_SERVER_KEY;
+ return -1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns true if found, false otherwise. The input
+ * type is STR so that input samples are converted to string (since all types
+ * can be converted to strings), then the function casts the string again into
+ * the table's type. This is a double conversion, but in the future we might
+ * support automatic input types to perform the cast on the fly.
+ */
+static int sample_conv_in_table(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = !!ts;
+ smp->flags = SMP_F_VOL_TEST;
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the data rate received from clients in bytes/s
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_bytes_in_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_IN_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_BYTES_IN_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of connections for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_conn_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the number of concurrent connections for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_conn_cur(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CUR);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the rate of incoming connections from the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_conn_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_CONN_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the expiration delay for the key if the key is
+ * present in the table, otherwise the default value provided as second argument
+ * if any, if not (no default value), <not found> is returned.
+ */
+static int sample_conv_table_expire(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) { /* key not present */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* default value */
+ smp->data.u.sint = arg_p[1].data.sint;
+ return 1;
+ }
+
+ smp->data.u.sint = tick_remain(now_ms, ts->expire);
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the time the key remains unused if the key is
+ * present in the table, otherwise the default value provided as second argument
+ * if any, if not (no default value), <not found> is returned.
+ */
+static int sample_conv_table_idle(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) { /* key not present */
+ if (arg_p[1].type == ARGT_STOP)
+ return 0;
+
+ /* default value */
+ smp->data.u.sint = arg_p[1].data.sint;
+ return 1;
+ }
+
+ smp->data.u.sint = tick_remain(tick_remain(now_ms, ts->expire), t->expire);
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the data rate sent to clients in bytes/s
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_OUT_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_BYTES_OUT_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key
+ * if the key is present in the table, otherwise false, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPT, idx);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPT0 tag for the key
+ * if the key is present in the table, otherwise false, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpt0(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPT, 0);
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the value of the GPC[arg_p(0)] counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, idx);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg_p(1), and looks
+ * it up into this table. Returns the event rate of the GPC[arg_p(0)] counter
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not
+ * stored in the table, <not found> is returned.
+ */
+static int sample_conv_table_gpc_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+ unsigned int idx;
+
+ idx = arg_p[0].data.sint;
+
+ t = arg_p[1].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, idx);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPC0 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc0(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the event rate of the GPC0 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc0_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC0_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC0_RATE].u);
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, 0);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+ }
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the value of the GPC1 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc1(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the event rate of the GPC1 counter for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_gpc1_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GPC1_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC1_RATE].u);
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(t, ts, STKTABLE_DT_GPC_RATE, 1);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_GPC_RATE].u);
+ }
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP request errors
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not stored
+ * in the table, <not found> is returned.
+ */
+static int sample_conv_table_http_err_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP request error rate the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_http_err_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP response failures
+ * for the key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not stored
+ * in the table, <not found> is returned.
+ */
+static int sample_conv_table_http_fail_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP response failure rate for the key
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_http_fail_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of HTTP request for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_http_req_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the HTTP request rate the key if the key is
+ * present in the table, otherwise zero, so that comparisons can be easily
+ * performed. If the inspected parameter is not stored in the table, <not found>
+ * is returned.
+ */
+static int sample_conv_table_http_req_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_REQ_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the volume of datareceived from clients in kbytes
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_kbytes_in(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_IN_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the volume of data sent to clients in kbytes
+ * if the key is present in the table, otherwise zero, so that comparisons can
+ * be easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_kbytes_out(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_BYTES_OUT_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the server ID associated with the key if the
+ * key is present in the table, otherwise zero, so that comparisons can be
+ * easily performed. If the inspected parameter is not stored in the table,
+ * <not found> is returned.
+ */
+static int sample_conv_table_server_id(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_sint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the cumulated number of sessions for the
+ * key if the key is present in the table, otherwise zero, so that comparisons
+ * can be easily performed. If the inspected parameter is not stored in the
+ * table, <not found> is returned.
+ */
+static int sample_conv_table_sess_cnt(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SESS_CNT);
+ if (ptr)
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the session rate the key if the key is
+ * present in the table, otherwise zero, so that comparisons can be easily
+ * performed. If the inspected parameter is not stored in the table, <not found>
+ * is returned.
+ */
+static int sample_conv_table_sess_rate(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+ void *ptr;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts) /* key not present */
+ return 1;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_SESS_RATE);
+ if (ptr)
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[STKTABLE_DT_SESS_RATE].u);
+
+ stktable_release(t, ts);
+ return !!ptr;
+}
+
+/* Casts sample <smp> to the type of the table specified in arg(0), and looks
+ * it up into this table. Returns the amount of concurrent connections tracking
+ * the same key if the key is present in the table, otherwise zero, so that
+ * comparisons can be easily performed. If the inspected parameter is not
+ * stored in the table, <not found> is returned.
+ */
+static int sample_conv_table_trackers(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct stktable *t;
+ struct stktable_key *key;
+ struct stksess *ts;
+
+ t = arg_p[0].data.t;
+
+ key = smp_to_stkey(smp, t);
+ if (!key)
+ return 0;
+
+ ts = stktable_lookup_key(t, key);
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!ts)
+ return 1;
+
+ smp->data.u.sint = HA_ATOMIC_LOAD(&ts->ref_cnt);
+
+ stktable_release(t, ts);
+ return 1;
+}
+
+/* This function increments the gpc counter at index 'rule->arg.gpc.idx' of the
+ * array on the tracksc counter of index 'rule->arg.gpc.sc' stored into the
+ * <stream> or directly in the session <sess> if <stream> is set to NULL
+ *
+ * This function always returns ACT_RET_CONT and parameter flags is unused.
+ */
+static enum act_return action_inc_gpc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc_rate if it's tracked. Second, update its gpc if tracked. */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, rule->arg.gpc.idx);
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, rule->arg.gpc.idx);
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* Same as action_inc_gpc() but for gpc0 only */
+static enum act_return action_inc_gpc0(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc0_rate if it's tracked. Second, update its gpc0 if tracked. */
+ ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC0_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, 0);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC0);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* Same as action_inc_gpc() but for gpc1 only */
+static enum act_return action_inc_gpc1(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stkctr *stkctr = NULL;
+ unsigned int period = 0;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s && s->stkctr)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else if (sess->stkctr)
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+ else
+ return ACT_RET_CONT;
+
+ ts = stkctr_entry(stkctr);
+ if (ts) {
+ void *ptr1, *ptr2;
+
+ /* First, update gpc1_rate if it's tracked. Second, update its gpc1 if tracked. */
+ ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC1_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, 1);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPC1);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint)++;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+ }
+ return ACT_RET_CONT;
+}
+
+/* This function is a common parser for actions incrementing the GPC
+ * (General Purpose Counters). It understands the formats:
+ *
+ * sc-inc-gpc(<gpc IDX>,<track ID>)
+ * sc-inc-gpc0([<track ID>])
+ * sc-inc-gpc1([<track ID>])
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise it returns ACT_RET_PRS_OK.
+ */
+static enum act_parse_ret parse_inc_gpc(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *cmd_name = args[*arg-1];
+ char *error;
+
+ if (!global.tune.nb_stk_ctr) {
+ memprintf(err, "Cannot use '%s', stick-counters are disabled via tune.stick-counters", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ cmd_name += strlen("sc-inc-gpc");
+ if (*cmd_name == '(') {
+ cmd_name++; /* skip the '(' */
+ rule->arg.gpc.idx = strtoul(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ',') {
+ memprintf(err, "Missing gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ else {
+ cmd_name = error + 1; /* skip the ',' */
+ rule->arg.gpc.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpc.sc >= global.tune.nb_stk_ctr) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d (tune.stick-counters)",
+ args[*arg-1], global.tune.nb_stk_ctr-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_inc_gpc;
+ }
+ else if (*cmd_name == '0' ||*cmd_name == '1') {
+ char c = *cmd_name;
+
+ cmd_name++;
+ if (*cmd_name == '\0') {
+ /* default stick table id. */
+ rule->arg.gpc.sc = 0;
+ } else {
+ /* parse the stick table id. */
+ if (*cmd_name != '(') {
+ memprintf(err, "invalid stick table track ID. Expects %s(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ cmd_name++; /* jump the '(' */
+ rule->arg.gpc.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID. Expects %s(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpc.sc >= global.tune.nb_stk_ctr) {
+ memprintf(err, "invalid stick table track ID. The max allowed ID is %d (tune.stick-counters)",
+ global.tune.nb_stk_ctr-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ if (c == '1')
+ rule->action_ptr = action_inc_gpc1;
+ else
+ rule->action_ptr = action_inc_gpc0;
+ }
+ else {
+ /* default stick table id. */
+ memprintf(err, "invalid gpc ID '%s'. Expects sc-inc-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->action = ACT_CUSTOM;
+ return ACT_RET_PRS_OK;
+}
+
+/* This function sets the gpt at index 'rule->arg.gpt.idx' of the array on the
+ * tracksc counter of index 'rule->arg.gpt.sc' stored into the <stream> or
+ * directly in the session <sess> if <stream> is set to NULL. This gpt is
+ * set to the value computed by the expression 'rule->arg.gpt.expr' or if
+ * 'rule->arg.gpt.expr' is null directly to the value of 'rule->arg.gpt.value'.
+ *
+ * This function always returns ACT_RET_CONT and parameter flags is unused.
+ */
+static enum act_return action_set_gpt(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ void *ptr;
+ struct stksess *ts;
+ struct stkctr *stkctr = NULL;
+ unsigned int value = 0;
+ struct sample *smp;
+ int smp_opt_dir;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s && s->stkctr)
+ stkctr = &s->stkctr[rule->arg.gpt.sc];
+ else if (sess->stkctr)
+ stkctr = &sess->stkctr[rule->arg.gpt.sc];
+ else
+ return ACT_RET_CONT;
+
+ ts = stkctr_entry(stkctr);
+ if (!ts)
+ return ACT_RET_CONT;
+
+ /* Store the sample in the required sc, and ignore errors. */
+ ptr = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPT, rule->arg.gpt.idx);
+ if (ptr) {
+
+ if (!rule->arg.gpt.expr)
+ value = (unsigned int)(rule->arg.gpt.value);
+ else {
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_SES: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ default:
+ send_log(px, LOG_ERR, "stick table: internal error while setting gpt%u.", rule->arg.gpt.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: internal error while executing setting gpt%u.\n", rule->arg.gpt.idx);
+ return ACT_RET_CONT;
+ }
+
+ /* Fetch and cast the expression. */
+ smp = sample_fetch_as_type(px, sess, s, smp_opt_dir|SMP_OPT_FINAL, rule->arg.gpt.expr, SMP_T_SINT);
+ if (!smp) {
+ send_log(px, LOG_WARNING, "stick table: invalid expression or data type while setting gpt%u.", rule->arg.gpt.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: invalid expression or data type while setting gpt%u.\n", rule->arg.gpt.idx);
+ return ACT_RET_CONT;
+ }
+ value = (unsigned int)(smp->data.u.sint);
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_data_cast(ptr, std_t_uint) = value;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* Always returns 1. */
+static enum act_return action_set_gpt0(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ void *ptr;
+ struct stksess *ts;
+ struct stkctr *stkctr = NULL;
+ unsigned int value = 0;
+ struct sample *smp;
+ int smp_opt_dir;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s && s->stkctr)
+ stkctr = &s->stkctr[rule->arg.gpt.sc];
+ else if (sess->stkctr)
+ stkctr = &sess->stkctr[rule->arg.gpt.sc];
+ else
+ return ACT_RET_CONT;
+
+ ts = stkctr_entry(stkctr);
+ if (!ts)
+ return ACT_RET_CONT;
+
+ /* Store the sample in the required sc, and ignore errors. */
+ ptr = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPT, 0);
+
+ if (ptr) {
+ if (!rule->arg.gpt.expr)
+ value = (unsigned int)(rule->arg.gpt.value);
+ else {
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_SES: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ default:
+ send_log(px, LOG_ERR, "stick table: internal error while setting gpt0.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: internal error while executing setting gpt0.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* Fetch and cast the expression. */
+ smp = sample_fetch_as_type(px, sess, s, smp_opt_dir|SMP_OPT_FINAL, rule->arg.gpt.expr, SMP_T_SINT);
+ if (!smp) {
+ send_log(px, LOG_WARNING, "stick table: invalid expression or data type while setting gpt0.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: invalid expression or data type while setting gpt0.\n");
+ return ACT_RET_CONT;
+ }
+ value = (unsigned int)(smp->data.u.sint);
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_data_cast(ptr, std_t_uint) = value;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* This function is a parser for the "sc-set-gpt" and "sc-set-gpt0" actions.
+ * It understands the formats:
+ *
+ * sc-set-gpt(<gpt IDX>,<track ID>) <expression>
+ * sc-set-gpt0(<track ID>) <expression>
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error message.
+ * Otherwise, it returns ACT_RET_PRS_OK and the variable 'rule->arg.gpt.expr'
+ * is filled with the pointer to the expression to execute or NULL if the arg
+ * is directly an integer stored into 'rule->arg.gpt.value'.
+ */
+static enum act_parse_ret parse_set_gpt(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *cmd_name = args[*arg-1];
+ char *error;
+ int smp_val;
+
+ if (!global.tune.nb_stk_ctr) {
+ memprintf(err, "Cannot use '%s', stick-counters are disabled via tune.stick-counters", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ cmd_name += strlen("sc-set-gpt");
+ if (*cmd_name == '(') {
+ cmd_name++; /* skip the '(' */
+ rule->arg.gpt.idx = strtoul(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ',') {
+ memprintf(err, "Missing gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ else {
+ cmd_name = error + 1; /* skip the ',' */
+ rule->arg.gpt.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpt.sc >= global.tune.nb_stk_ctr) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d",
+ args[*arg-1], global.tune.nb_stk_ctr-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_set_gpt;
+ }
+ else if (*cmd_name == '0') {
+ cmd_name++;
+ if (*cmd_name == '\0') {
+ /* default stick table id. */
+ rule->arg.gpt.sc = 0;
+ } else {
+ /* parse the stick table id. */
+ if (*cmd_name != '(') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ cmd_name++; /* jump the '(' */
+ rule->arg.gpt.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects sc-set-gpt0(<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpt.sc >= global.tune.nb_stk_ctr) {
+ memprintf(err, "invalid stick table track ID '%s'. The max allowed ID is %d",
+ args[*arg-1], global.tune.nb_stk_ctr-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_set_gpt0;
+ }
+ else {
+ /* default stick table id. */
+ memprintf(err, "invalid gpt ID '%s'. Expects sc-set-gpt(<GPT ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* value may be either an integer or an expression */
+ rule->arg.gpt.expr = NULL;
+ rule->arg.gpt.value = strtol(args[*arg], &error, 10);
+ if (*error == '\0') {
+ /* valid integer, skip it */
+ (*arg)++;
+ } else {
+ rule->arg.gpt.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.gpt.expr)
+ return ACT_RET_PRS_ERR;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: smp_val = SMP_VAL_FE_CON_ACC; break;
+ case ACT_F_TCP_REQ_SES: smp_val = SMP_VAL_FE_SES_ACC; break;
+ case ACT_F_TCP_REQ_CNT: smp_val = SMP_VAL_FE_REQ_CNT; break;
+ case ACT_F_TCP_RES_CNT: smp_val = SMP_VAL_BE_RES_CNT; break;
+ case ACT_F_HTTP_REQ: smp_val = SMP_VAL_FE_HRQ_HDR; break;
+ case ACT_F_HTTP_RES: smp_val = SMP_VAL_BE_HRS_HDR; break;
+ default:
+ memprintf(err, "internal error, unexpected rule->from=%d, please report this bug!", rule->from);
+ return ACT_RET_PRS_ERR;
+ }
+ if (!(rule->arg.gpt.expr->fetch->val & smp_val)) {
+ memprintf(err, "fetch method '%s' extracts information from '%s', none of which is available here", args[*arg-1],
+ sample_src_names(rule->arg.gpt.expr->fetch->use));
+ free(rule->arg.gpt.expr);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ rule->action = ACT_CUSTOM;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* This function updates the gpc at index 'rule->arg.gpc.idx' of the array on
+ * the tracksc counter of index 'rule->arg.gpc.sc' stored into the <stream> or
+ * directly in the session <sess> if <stream> is set to NULL. This gpc is
+ * set to the value computed by the expression 'rule->arg.gpc.expr' or if
+ * 'rule->arg.gpc.expr' is null directly to the value of 'rule->arg.gpc.value'.
+ *
+ * This function always returns ACT_RET_CONT and parameter flags is unused.
+ */
+static enum act_return action_add_gpc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ void *ptr1, *ptr2;
+ struct stksess *ts;
+ struct stkctr *stkctr;
+ unsigned int value = 0;
+ struct sample *smp;
+ int smp_opt_dir;
+
+ /* Extract the stksess, return OK if no stksess available. */
+ if (s)
+ stkctr = &s->stkctr[rule->arg.gpc.sc];
+ else
+ stkctr = &sess->stkctr[rule->arg.gpc.sc];
+
+ ts = stkctr_entry(stkctr);
+ if (!ts)
+ return ACT_RET_CONT;
+
+ /* First, update gpc_rate if it's tracked. Second, update its gpc if tracked. */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC_RATE, rule->arg.gpc.idx);
+ ptr2 = stktable_data_ptr_idx(stkctr->table, ts, STKTABLE_DT_GPC, rule->arg.gpc.idx);
+
+ if (ptr1 || ptr2) {
+ if (!rule->arg.gpc.expr)
+ value = (unsigned int)(rule->arg.gpc.value);
+ else {
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_SES: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: smp_opt_dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: smp_opt_dir = SMP_OPT_DIR_RES; break;
+ default:
+ send_log(px, LOG_ERR, "stick table: internal error while setting gpc%u.", rule->arg.gpc.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: internal error while executing setting gpc%u.\n", rule->arg.gpc.idx);
+ return ACT_RET_CONT;
+ }
+
+ /* Fetch and cast the expression. */
+ smp = sample_fetch_as_type(px, sess, s, smp_opt_dir|SMP_OPT_FINAL, rule->arg.gpc.expr, SMP_T_SINT);
+ if (!smp) {
+ send_log(px, LOG_WARNING, "stick table: invalid expression or data type while setting gpc%u.", rule->arg.gpc.idx);
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("stick table: invalid expression or data type while setting gpc%u.\n", rule->arg.gpc.idx);
+ return ACT_RET_CONT;
+ }
+ value = (unsigned int)(smp->data.u.sint);
+ }
+
+ if (value) {
+ /* only update the value if non-null increment */
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (ptr1)
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u, value);
+
+ if (ptr2)
+ stktable_data_cast(ptr2, std_t_uint) += value;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ }
+ /* always touch the table so that it doesn't expire */
+ stktable_touch_local(stkctr->table, ts, 0);
+ }
+
+ return ACT_RET_CONT;
+}
+
+/* This function is a parser for the "sc-add-gpc" action. It understands the
+ * format:
+ *
+ * sc-add-gpc(<gpc IDX>,<track ID>) <expression>
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error message.
+ * Otherwise, it returns ACT_RET_PRS_OK and the variable 'rule->arg.gpc.expr'
+ * is filled with the pointer to the expression to execute or NULL if the arg
+ * is directly an integer stored into 'rule->arg.gpt.value'.
+ */
+static enum act_parse_ret parse_add_gpc(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *cmd_name = args[*arg-1];
+ char *error;
+ int smp_val;
+
+ cmd_name += strlen("sc-add-gpc");
+ if (*cmd_name != '(') {
+ memprintf(err, "Missing or invalid arguments for '%s'. Expects sc-add-gpc(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ cmd_name++; /* skip the '(' */
+ rule->arg.gpc.idx = strtoul(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ',') {
+ memprintf(err, "Missing gpc ID. Expects %s(<GPC ID>,<Track ID>)", args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ else {
+ cmd_name = error + 1; /* skip the ',' */
+ rule->arg.gpc.sc = strtol(cmd_name, &error, 10); /* Convert stick table id. */
+ if (*error != ')') {
+ memprintf(err, "invalid stick table track ID '%s'. Expects %s(<GPC ID>,<Track ID>)", cmd_name, args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (rule->arg.gpc.sc >= MAX_SESS_STKCTR) {
+ memprintf(err, "invalid stick table track ID '%s' for '%s'. The max allowed ID is %d",
+ cmd_name, args[*arg-1], MAX_SESS_STKCTR-1);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+ rule->action_ptr = action_add_gpc;
+
+ /* value may be either an integer or an expression */
+ rule->arg.gpc.expr = NULL;
+ rule->arg.gpc.value = strtol(args[*arg], &error, 10);
+ if (*error == '\0') {
+ /* valid integer, skip it */
+ (*arg)++;
+ } else {
+ rule->arg.gpc.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.gpc.expr)
+ return ACT_RET_PRS_ERR;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: smp_val = SMP_VAL_FE_CON_ACC; break;
+ case ACT_F_TCP_REQ_SES: smp_val = SMP_VAL_FE_SES_ACC; break;
+ case ACT_F_TCP_REQ_CNT: smp_val = SMP_VAL_FE_REQ_CNT; break;
+ case ACT_F_TCP_RES_CNT: smp_val = SMP_VAL_BE_RES_CNT; break;
+ case ACT_F_HTTP_REQ: smp_val = SMP_VAL_FE_HRQ_HDR; break;
+ case ACT_F_HTTP_RES: smp_val = SMP_VAL_BE_HRS_HDR; break;
+ default:
+ memprintf(err, "internal error, unexpected rule->from=%d, please report this bug!", rule->from);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(rule->arg.gpc.expr->fetch->val & smp_val)) {
+ memprintf(err, "fetch method '%s' extracts information from '%s', none of which is available here", args[*arg-1],
+ sample_src_names(rule->arg.gpc.expr->fetch->use));
+ free(rule->arg.gpc.expr);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ rule->action = ACT_CUSTOM;
+
+ return ACT_RET_PRS_OK;
+}
+
+/* set temp integer to the number of used entries in the table pointed to by expr.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_table_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = args->data.t->current;
+ return 1;
+}
+
+/* set temp integer to the number of free entries in the table pointed to by expr.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_table_avl(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stktable *t;
+
+ t = args->data.t;
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = t->size - t->current;
+ return 1;
+}
+
+/* Returns a pointer to a stkctr depending on the fetch keyword name.
+ * It is designed to be called as sc[0-9]_* sc_* or src_* exclusively.
+ * sc[0-9]_* will return a pointer to the respective field in the
+ * stream <l4>. sc_* requires an UINT argument specifying the stick
+ * counter number. src_* will fill a locally allocated structure with
+ * the table and entry corresponding to what is specified with src_*.
+ * NULL may be returned if the designated stkctr is not tracked. For
+ * the sc_* and sc[0-9]_* forms, an optional table argument may be
+ * passed. When present, the currently tracked key is then looked up
+ * in the specified table instead of the current table. The purpose is
+ * to be able to convert multiple values per key (eg: have gpc0 from
+ * multiple tables). <strm> is allowed to be NULL, in which case only
+ * the session will be consulted.
+ */
+struct stkctr *
+smp_fetch_sc_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr)
+{
+ struct stkctr *stkptr;
+ struct stksess *stksess;
+ unsigned int num = kw[2] - '0';
+ int arg = 0;
+
+ if (num == '_' - '0') {
+ /* sc_* variant, args[0] = ctr# (mandatory) */
+ num = args[arg++].data.sint;
+ }
+ else if (num > 9) { /* src_* variant, args[0] = table */
+ struct stktable_key *key;
+ struct connection *conn = objt_conn(sess->origin);
+ struct sample smp;
+
+ if (!conn)
+ return NULL;
+
+ /* Fetch source address in a sample. */
+ smp.px = NULL;
+ smp.sess = sess;
+ smp.strm = strm;
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, &smp, "src", NULL))
+ return NULL;
+
+ /* Converts into key. */
+ key = smp_to_stkey(&smp, args->data.t);
+ if (!key)
+ return NULL;
+
+ stkctr->table = args->data.t;
+ stkctr_set_entry(stkctr, stktable_lookup_key(stkctr->table, key));
+ return stkctr;
+ }
+
+ /* Here, <num> contains the counter number from 0 to 9 for
+ * the sc[0-9]_ form, or even higher using sc_(num) if needed.
+ * args[arg] is the first optional argument. We first lookup the
+ * ctr form the stream, then from the session if it was not there.
+ * But we must be sure the counter does not exceed global.tune.nb_stk_ctr.
+ */
+ if (num >= global.tune.nb_stk_ctr)
+ return NULL;
+
+ stkptr = NULL;
+ if (strm && strm->stkctr)
+ stkptr = &strm->stkctr[num];
+ if (!strm || !stkptr || !stkctr_entry(stkptr)) {
+ if (sess->stkctr)
+ stkptr = &sess->stkctr[num];
+ else
+ return NULL;
+ if (!stkctr_entry(stkptr))
+ return NULL;
+ }
+
+ stksess = stkctr_entry(stkptr);
+ if (!stksess)
+ return NULL;
+
+ if (unlikely(args[arg].type == ARGT_TAB)) {
+ /* an alternate table was specified, let's look up the same key there */
+ stkctr->table = args[arg].data.t;
+ stkctr_set_entry(stkctr, stktable_lookup(stkctr->table, stksess));
+ return stkctr;
+ }
+ return stkptr;
+}
+
+/* same as smp_fetch_sc_stkctr() but dedicated to src_* and can create
+ * the entry if it doesn't exist yet. This is needed for a few fetch
+ * functions which need to create an entry, such as src_inc_gpc* and
+ * src_clr_gpc*.
+ */
+struct stkctr *
+smp_create_src_stkctr(struct session *sess, struct stream *strm, const struct arg *args, const char *kw, struct stkctr *stkctr)
+{
+ struct stktable_key *key;
+ struct connection *conn = objt_conn(sess->origin);
+ struct sample smp;
+
+ if (strncmp(kw, "src_", 4) != 0)
+ return NULL;
+
+ if (!conn)
+ return NULL;
+
+ /* Fetch source address in a sample. */
+ smp.px = NULL;
+ smp.sess = sess;
+ smp.strm = strm;
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, &smp, "src", NULL))
+ return NULL;
+
+ /* Converts into key. */
+ key = smp_to_stkey(&smp, args->data.t);
+ if (!key)
+ return NULL;
+
+ stkctr->table = args->data.t;
+ stkctr_set_entry(stkctr, stktable_get_entry(stkctr->table, key));
+ return stkctr;
+}
+
+/* set return a boolean indicating if the requested stream counter is
+ * currently being tracked or not.
+ * Supports being called as "sc[0-9]_tracked" only.
+ */
+static int
+smp_fetch_sc_tracked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_BOOL;
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ smp->data.u.sint = !!stkctr;
+
+ /* release the ref count */
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+
+ return 1;
+}
+
+/* set <smp> to the General Purpose Tag of index set as first arg
+ * to value from the stream's tracked frontend counters or from the src.
+ * Supports being called as "sc_get_gpt(<gpt-idx>,<sc-idx>[,<table>])" or
+ * "src_get_gpt(<gpt-idx>[,<table>])" only. Value zero is returned if
+ * the key is new or gpt is not stored.
+ */
+static int
+smp_fetch_sc_get_gpt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Flag 0 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc0" or "src_get_gpt0" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpt0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT0);
+ if (!ptr)
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPT, 0);
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the GPC[args(0)]'s value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc_get_gpc(<gpc-idx>,<sc-idx>[,<table>])" or
+ * "src_get_gpc(<gpc-idx>[,<table>])" only. Value
+ * Value zero is returned if the key is new or gpc is not stored.
+ */
+static int
+smp_fetch_sc_get_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc0" or "src_get_gpc0" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters or from the src.
+ * Supports being called as "sc[0-9]_get_gpc1" or "src_get_gpc1" only. Value
+ * zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_get_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the GPC[args(0)]'s event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc_gpc_rate(<gpc-idx>,<sc-idx>[,<table])"
+ * or "src_gpc_rate(<gpc-idx>[,<table>])" only.
+ * Value zero is returned if the key is new or gpc_rate is not stored.
+ */
+static int
+smp_fetch_sc_gpc_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 0's event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc[0-9]_gpc0_rate" or "src_gpc0_rate" only.
+ * Value zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_gpc0_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0_RATE);
+ if (ptr) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 0);
+ if (ptr)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), period);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the General Purpose Counter 1's event rate from the stream's
+ * tracked frontend counters or from the src.
+ * Supports being called as "sc[0-9]_gpc1_rate" or "src_gpc1_rate" only.
+ * Value zero is returned if the key is new.
+ */
+static int
+smp_fetch_sc_gpc1_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1_RATE);
+ if (ptr) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 1);
+ if (ptr)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), period);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the GPC[args(0)] value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc_inc_gpc(<gpc-idx>,<sc-idx>[,<table>])"
+ * or "src_inc_gpc(<gpc-idx>[,<table>])" only.
+ */
+static int
+smp_fetch_sc_inc_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc0_rate if it's tracked. Second, update its
+ * gpc0 if tracked. Returns gpc0's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, idx);
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc[0-9]_inc_gpc0" or "src_inc_gpc0" only.
+ */
+static int
+smp_fetch_sc_inc_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc0_rate if it's tracked. Second, update its
+ * gpc0 if tracked. Returns gpc0's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC0_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 0);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Increment the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters and return it into temp integer.
+ * Supports being called as "sc[0-9]_inc_gpc1" or "src_inc_gpc1" only.
+ */
+static int
+smp_fetch_sc_inc_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int period = 0;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr1,*ptr2;
+
+
+ /* First, update gpc1_rate if it's tracked. Second, update its
+ * gpc1 if tracked. Returns gpc1's value otherwise the curr_ctr.
+ */
+ ptr1 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1_RATE);
+ if (ptr1) {
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC1_RATE].u;
+ }
+ else {
+ /* fallback on the gpc array */
+ ptr1 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC_RATE, 1);
+ if (ptr1)
+ period = stkctr->table->data_arg[STKTABLE_DT_GPC_RATE].u;
+ }
+
+ ptr2 = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr2) {
+ /* fallback on the gpc array */
+ ptr2 = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (ptr1 || ptr2) {
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (ptr1) {
+ update_freq_ctr_period(&stktable_data_cast(ptr1, std_t_frqp),
+ period, 1);
+ smp->data.u.sint = (&stktable_data_cast(ptr1, std_t_frqp))->curr_ctr;
+ }
+
+ if (ptr2)
+ smp->data.u.sint = ++stktable_data_cast(ptr2, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ else if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* Clear the GPC[args(0)] value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc_clr_gpc(<gpc-idx>,<sc-idx>[,<table>])"
+ * or "src_clr_gpc(<gpc-idx>[,<table>])" only.
+ */
+static int
+smp_fetch_sc_clr_gpc(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+ unsigned int idx;
+
+ idx = args[0].data.sint;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args + 1, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, idx);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* Clear the General Purpose Counter 0 value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc[0-9]_clr_gpc0" or "src_clr_gpc0" only.
+ */
+static int
+smp_fetch_sc_clr_gpc0(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC0);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 0);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* Clear the General Purpose Counter 1 value from the stream's tracked
+ * frontend counters and return its previous value into temp integer.
+ * Supports being called as "sc[0-9]_clr_gpc1" or "src_clr_gpc1" only.
+ */
+static int
+smp_fetch_sc_clr_gpc1(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+
+ if (!stkctr_entry(stkctr))
+ stkctr = smp_create_src_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+
+ if (stkctr && stkctr_entry(stkctr)) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC1);
+ if (!ptr) {
+ /* fallback on the gpc array */
+ ptr = stktable_data_ptr_idx(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GPC, 1);
+ }
+
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+ stktable_data_cast(ptr, std_t_uint) = 0;
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ /* If data was modified, we need to touch to re-schedule sync */
+ stktable_touch_local(stkctr->table, stkctr_entry(stkctr), (stkctr == &tmpstkctr) ? 1 : 0);
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of connections from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_conn_cnt" or
+ * "src_conn_cnt" only.
+ */
+static int
+smp_fetch_sc_conn_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+
+
+ }
+ return 1;
+}
+
+/* set <smp> to the connection rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_conn_rate" or "src_conn_rate"
+ * only.
+ */
+static int
+smp_fetch_sc_conn_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_CONN_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set temp integer to the number of connections from the stream's source address
+ * in the table pointed to by expr, after updating it.
+ * Accepts exactly 1 argument of type table.
+ */
+static int
+smp_fetch_src_updt_conn_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = objt_conn(smp->sess->origin);
+ struct stksess *ts;
+ struct stktable_key *key;
+ void *ptr;
+ struct stktable *t;
+
+ if (!conn)
+ return 0;
+
+ /* Fetch source address in a sample. */
+ if (!smp_fetch_src || !smp_fetch_src(empty_arg_list, smp, "src", NULL))
+ return 0;
+
+ /* Converts into key. */
+ key = smp_to_stkey(smp, args->data.t);
+ if (!key)
+ return 0;
+
+ t = args->data.t;
+
+ if ((ts = stktable_get_entry(t, key)) == NULL)
+ /* entry does not exist and could not be created */
+ return 0;
+
+ ptr = stktable_data_ptr(t, ts, STKTABLE_DT_CONN_CNT);
+ if (!ptr) {
+ return 0; /* parameter not stored in this table */
+ }
+
+ smp->data.type = SMP_T_SINT;
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+
+ smp->data.u.sint = ++stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ smp->flags = SMP_F_VOL_TEST;
+
+ stktable_touch_local(t, ts, 1);
+
+ /* Touch was previously performed by stktable_update_key */
+ return 1;
+}
+
+/* set <smp> to the number of concurrent connections from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_conn_cur" or
+ * "src_conn_cur" only.
+ */
+static int
+smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_CONN_CUR);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of streams from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_sess_cnt" or
+ * "src_sess_cnt" only.
+ */
+static int
+smp_fetch_sc_sess_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the stream rate from the stream's tracked frontend counters.
+ * Supports being called as "sc[0-9]_sess_rate" or "src_sess_rate" only.
+ */
+static int
+smp_fetch_sc_sess_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_SESS_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_SESS_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP requests from the stream's tracked
+ * frontend counters. Supports being called as "sc[0-9]_http_req_cnt" or
+ * "src_http_req_cnt" only.
+ */
+static int
+smp_fetch_sc_http_req_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_REQ_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP request rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_req_rate" or
+ * "src_http_req_rate" only.
+ */
+static int
+smp_fetch_sc_http_req_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_REQ_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_REQ_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP requests errors from the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_http_err_cnt" or
+ * "src_http_err_cnt" only.
+ */
+static int
+smp_fetch_sc_http_err_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_ERR_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP request error rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_err_rate" or
+ * "src_http_err_rate" only.
+ */
+static int
+smp_fetch_sc_http_err_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_ERR_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_ERR_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the cumulated number of HTTP response failures from the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_http_fail_cnt" or
+ * "src_http_fail_cnt" only.
+ */
+static int
+smp_fetch_sc_http_fail_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_FAIL_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_uint);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the HTTP response failure rate from the stream's tracked frontend
+ * counters. Supports being called as "sc[0-9]_http_fail_rate" or
+ * "src_http_fail_rate" only.
+ */
+static int
+smp_fetch_sc_http_fail_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_HTTP_FAIL_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_HTTP_FAIL_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of kbytes received from clients, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_kbytes_in" or "src_kbytes_in" only.
+ */
+static int
+smp_fetch_sc_kbytes_in(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_IN_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the data rate received from clients in bytes/s, as found
+ * in the stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_bytes_in_rate" or "src_bytes_in_rate" only.
+ */
+static int
+smp_fetch_sc_bytes_in_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_IN_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_BYTES_IN_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of kbytes sent to clients, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_kbytes_out" or "src_kbytes_out" only.
+ */
+static int
+smp_fetch_sc_kbytes_out(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_OUT_CNT);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = stktable_data_cast(ptr, std_t_ull) >> 10;
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the data rate sent to clients in bytes/s, as found in the
+ * stream's tracked frontend counters. Supports being called as
+ * "sc[0-9]_bytes_out_rate" or "src_bytes_out_rate" only.
+ */
+static int
+smp_fetch_sc_bytes_out_rate(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = 0;
+ if (stkctr_entry(stkctr) != NULL) {
+ void *ptr;
+
+ ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_BYTES_OUT_RATE);
+ if (!ptr) {
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ return 0; /* parameter not stored */
+ }
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ stkctr->table->data_arg[STKTABLE_DT_BYTES_OUT_RATE].u);
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock);
+
+ if (stkctr == &tmpstkctr)
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ return 1;
+}
+
+/* set <smp> to the number of active trackers on the SC entry in the stream's
+ * tracked frontend counters. Supports being called as "sc[0-9]_trackers" only.
+ */
+static int
+smp_fetch_sc_trackers(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct stkctr tmpstkctr;
+ struct stkctr *stkctr;
+
+ stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr);
+ if (!stkctr)
+ return 0;
+
+ smp->flags = SMP_F_VOL_TEST;
+ smp->data.type = SMP_T_SINT;
+ if (stkctr == &tmpstkctr) {
+ smp->data.u.sint = stkctr_entry(stkctr) ? (HA_ATOMIC_LOAD(&stkctr_entry(stkctr)->ref_cnt) - 1) : 0;
+ stktable_release(stkctr->table, stkctr_entry(stkctr));
+ }
+ else {
+ smp->data.u.sint = stkctr_entry(stkctr) ? HA_ATOMIC_LOAD(&stkctr_entry(stkctr)->ref_cnt) : 0;
+ }
+
+ return 1;
+}
+
+
+/* The functions below are used to manipulate table contents from the CLI.
+ * There are 3 main actions, "clear", "set" and "show". The code is shared
+ * between all actions, and the action is encoded in the void *private in
+ * the appctx as well as in the keyword registration, among one of the
+ * following values.
+ */
+
+enum {
+ STK_CLI_ACT_CLR,
+ STK_CLI_ACT_SET,
+ STK_CLI_ACT_SHOW,
+};
+
+/* Dump the status of a table to a stream connector's
+ * read buffer. It returns 0 if the output buffer is full
+ * and needs to be called again, otherwise non-zero.
+ */
+static int table_dump_head_to_buffer(struct buffer *msg,
+ struct appctx *appctx,
+ struct stktable *t, struct stktable *target)
+{
+ struct stream *s = __sc_strm(appctx_sc(appctx));
+
+ chunk_appendf(msg, "# table: %s, type: %s, size:%d, used:%d\n",
+ t->id, stktable_types[t->type].kw, t->size, t->current);
+
+ /* any other information should be dumped here */
+
+ if (target && (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) < ACCESS_LVL_OPER)
+ chunk_appendf(msg, "# contents not dumped due to insufficient privileges\n");
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/* Dump a table entry to a stream connector's
+ * read buffer. It returns 0 if the output buffer is full
+ * and needs to be called again, otherwise non-zero.
+ */
+static int table_dump_entry_to_buffer(struct buffer *msg,
+ struct appctx *appctx,
+ struct stktable *t, struct stksess *entry)
+{
+ int dt;
+
+ chunk_appendf(msg, "%p:", entry);
+
+ if (t->type == SMP_T_IPV4) {
+ char addr[INET_ADDRSTRLEN];
+ inet_ntop(AF_INET, (const void *)&entry->key.key, addr, sizeof(addr));
+ chunk_appendf(msg, " key=%s", addr);
+ }
+ else if (t->type == SMP_T_IPV6) {
+ char addr[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, (const void *)&entry->key.key, addr, sizeof(addr));
+ chunk_appendf(msg, " key=%s", addr);
+ }
+ else if (t->type == SMP_T_SINT) {
+ chunk_appendf(msg, " key=%u", read_u32(entry->key.key));
+ }
+ else if (t->type == SMP_T_STR) {
+ chunk_appendf(msg, " key=");
+ dump_text(msg, (const char *)entry->key.key, t->key_size);
+ }
+ else {
+ chunk_appendf(msg, " key=");
+ dump_binary(msg, (const char *)entry->key.key, t->key_size);
+ }
+
+ chunk_appendf(msg, " use=%d exp=%d shard=%d", HA_ATOMIC_LOAD(&entry->ref_cnt) - 1, tick_remain(now_ms, entry->expire), entry->shard);
+
+ for (dt = 0; dt < STKTABLE_DATA_TYPES; dt++) {
+ void *ptr;
+
+ if (t->data_ofs[dt] == 0)
+ continue;
+ if (stktable_data_types[dt].is_array) {
+ char tmp[16] = {};
+ const char *name_pfx = stktable_data_types[dt].name;
+ const char *name_sfx = NULL;
+ unsigned int idx = 0;
+ int i = 0;
+
+ /* split name to show index before first _ of the name
+ * for example: 'gpc3_rate' if array name is 'gpc_rate'.
+ */
+ for (i = 0 ; i < (sizeof(tmp) - 1); i++) {
+ if (!name_pfx[i])
+ break;
+ if (name_pfx[i] == '_') {
+ name_pfx = &tmp[0];
+ name_sfx = &stktable_data_types[dt].name[i];
+ break;
+ }
+ tmp[i] = name_pfx[i];
+ }
+
+ ptr = stktable_data_ptr_idx(t, entry, dt, idx);
+ while (ptr) {
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ chunk_appendf(msg, " %s%u%s(%u)=", name_pfx, idx, name_sfx ? name_sfx : "", t->data_arg[dt].u);
+ else
+ chunk_appendf(msg, " %s%u%s=", name_pfx, idx, name_sfx ? name_sfx : "");
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ chunk_appendf(msg, "%d", stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ chunk_appendf(msg, "%u", stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ chunk_appendf(msg, "%llu", stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ chunk_appendf(msg, "%u",
+ read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ }
+ ptr = stktable_data_ptr_idx(t, entry, dt, ++idx);
+ }
+ continue;
+ }
+ if (stktable_data_types[dt].arg_type == ARG_T_DELAY)
+ chunk_appendf(msg, " %s(%u)=", stktable_data_types[dt].name, t->data_arg[dt].u);
+ else
+ chunk_appendf(msg, " %s=", stktable_data_types[dt].name);
+
+ ptr = stktable_data_ptr(t, entry, dt);
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ chunk_appendf(msg, "%d", stktable_data_cast(ptr, std_t_sint));
+ break;
+ case STD_T_UINT:
+ chunk_appendf(msg, "%u", stktable_data_cast(ptr, std_t_uint));
+ break;
+ case STD_T_ULL:
+ chunk_appendf(msg, "%llu", stktable_data_cast(ptr, std_t_ull));
+ break;
+ case STD_T_FRQP:
+ chunk_appendf(msg, "%u",
+ read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ t->data_arg[dt].u));
+ break;
+ case STD_T_DICT: {
+ struct dict_entry *de;
+ de = stktable_data_cast(ptr, std_t_dict);
+ chunk_appendf(msg, "%s", de ? (char *)de->value.key : "-");
+ break;
+ }
+ }
+ }
+ chunk_appendf(msg, "\n");
+
+ if (applet_putchk(appctx, msg) == -1)
+ return 0;
+
+ return 1;
+}
+
+/* appctx context used by the "show table" command */
+struct show_table_ctx {
+ void *target; /* table we want to dump, or NULL for all */
+ struct stktable *t; /* table being currently dumped (first if NULL) */
+ struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */
+ long long value[STKTABLE_FILTER_LEN]; /* value to compare against */
+ signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */
+ signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */
+ enum {
+ STATE_NEXT = 0, /* px points to next table, entry=NULL */
+ STATE_DUMP, /* px points to curr table, entry is valid, refcount held */
+ STATE_DONE, /* done dumping */
+ } state;
+ char action; /* action on the table : one of STK_CLI_ACT_* */
+};
+
+/* Processes a single table entry matching a specific key passed in argument.
+ * returns 0 if wants to be called again, 1 if has ended processing.
+ */
+static int table_process_entry_per_key(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stktable *t = ctx->target;
+ struct stksess *ts;
+ struct sample key;
+ long long value;
+ int data_type;
+ int cur_arg;
+ void *ptr;
+ struct freq_ctr *frqp;
+
+ if (!*args[4])
+ return cli_err(appctx, "Key value expected\n");
+
+ memset(&key, 0, sizeof(key));
+ key.data.type = SMP_T_STR;
+ key.data.u.str.area = args[4];
+ key.data.u.str.data = strlen(args[4]);
+
+ switch (t->type) {
+ case SMP_T_IPV4:
+ case SMP_T_IPV6:
+ /* prefer input format over table type when parsing ip addresses,
+ * then let smp_to_stkey() do the conversion for us when needed
+ */
+ BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]);
+ if (!sample_casts[key.data.type][SMP_T_ADDR](&key))
+ return cli_err(appctx, "Invalid key\n");
+ break;
+ case SMP_T_SINT:
+ case SMP_T_STR:
+ break;
+ default:
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ return cli_err(appctx, "Showing keys from tables of type other than ip, ipv6, string and integer is not supported\n");
+ case STK_CLI_ACT_CLR:
+ return cli_err(appctx, "Removing keys from tables of type other than ip, ipv6, string and integer is not supported\n");
+ case STK_CLI_ACT_SET:
+ return cli_err(appctx, "Inserting keys into tables of type other than ip, ipv6, string and integer is not supported\n");
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+ }
+
+ /* try to convert key according to table type
+ * (it will fill static_table_key on success)
+ */
+ if (!smp_to_stkey(&key, t))
+ return cli_err(appctx, "Invalid key\n");
+
+ /* check permissions */
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ ts = stktable_lookup_key(t, &static_table_key);
+ if (!ts)
+ return 1;
+ chunk_reset(&trash);
+ if (!table_dump_head_to_buffer(&trash, appctx, t, t)) {
+ stktable_release(t, ts);
+ return 0;
+ }
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ if (!table_dump_entry_to_buffer(&trash, appctx, t, ts)) {
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_release(t, ts);
+ return 0;
+ }
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_release(t, ts);
+ break;
+
+ case STK_CLI_ACT_CLR:
+ ts = stktable_lookup_key(t, &static_table_key);
+ if (!ts)
+ return 1;
+
+ if (!stksess_kill(t, ts, 1)) {
+ /* don't delete an entry which is currently referenced */
+ return cli_err(appctx, "Entry currently in use, cannot remove\n");
+ }
+ break;
+
+ case STK_CLI_ACT_SET:
+ ts = stktable_get_entry(t, &static_table_key);
+ if (!ts) {
+ /* don't delete an entry which is currently referenced */
+ return cli_err(appctx, "Unable to allocate a new entry\n");
+ }
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+ for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) {
+ if (strncmp(args[cur_arg], "data.", 5) != 0) {
+ cli_err(appctx, "\"data.<type>\" followed by a value expected\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ data_type = stktable_get_data_type(args[cur_arg] + 5);
+ if (data_type < 0) {
+ cli_err(appctx, "Unknown data type\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ if (!t->data_ofs[data_type]) {
+ cli_err(appctx, "Data type not stored in this table\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ if (!*args[cur_arg+1] || strl2llrc(args[cur_arg+1], strlen(args[cur_arg+1]), &value) != 0) {
+ cli_err(appctx, "Require a valid integer value to store\n");
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ return 1;
+ }
+
+ ptr = stktable_data_ptr(t, ts, data_type);
+
+ switch (stktable_data_types[data_type].std_type) {
+ case STD_T_SINT:
+ stktable_data_cast(ptr, std_t_sint) = value;
+ break;
+ case STD_T_UINT:
+ stktable_data_cast(ptr, std_t_uint) = value;
+ break;
+ case STD_T_ULL:
+ stktable_data_cast(ptr, std_t_ull) = value;
+ break;
+ case STD_T_FRQP:
+ /* We set both the current and previous values. That way
+ * the reported frequency is stable during all the period
+ * then slowly fades out. This allows external tools to
+ * push measures without having to update them too often.
+ */
+ frqp = &stktable_data_cast(ptr, std_t_frqp);
+ /* First bit is reserved for the freq_ctr lock
+ Note: here we're still protected by the stksess lock
+ so we don't need to update the update the freq_ctr
+ using its internal lock */
+ frqp->curr_tick = now_ms & ~0x1;
+ frqp->prev_ctr = 0;
+ frqp->curr_ctr = value;
+ break;
+ }
+ }
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+ stktable_touch_local(t, ts, 1);
+ break;
+
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+ return 1;
+}
+
+/* Prepares the appctx fields with the data-based filters from the command line.
+ * Returns 0 if the dump can proceed, 1 if has ended processing.
+ */
+static int table_prepare_data_request(struct appctx *appctx, char **args)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ int i;
+ char *err = NULL;
+
+ if (ctx->action != STK_CLI_ACT_SHOW && ctx->action != STK_CLI_ACT_CLR)
+ return cli_err(appctx, "content-based lookup is only supported with the \"show\" and \"clear\" actions\n");
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++) {
+ if (i > 0 && !*args[3+3*i]) // number of filter entries can be less than STKTABLE_FILTER_LEN
+ break;
+ /* condition on stored data value */
+ ctx->data_type[i] = stktable_get_data_type(args[3+3*i] + 5);
+ if (ctx->data_type[i] < 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Unknown data type\n", i + 1));
+
+ if (!((struct stktable *)ctx->target)->data_ofs[ctx->data_type[i]])
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Data type not stored in this table\n", i + 1));
+
+ ctx->data_op[i] = get_std_op(args[4+3*i]);
+ if (ctx->data_op[i] < 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Require and operator among \"eq\", \"ne\", \"le\", \"ge\", \"lt\", \"gt\"\n", i + 1));
+
+ if (!*args[5+3*i] || strl2llrc(args[5+3*i], strlen(args[5+3*i]), &ctx->value[i]) != 0)
+ return cli_dynerr(appctx, memprintf(&err, "Filter entry #%i: Require a valid integer value to compare against\n", i + 1));
+ }
+
+ if (*args[3+3*i]) {
+ return cli_dynerr(appctx, memprintf(&err, "Detected extra data in filter, %ith word of input, after '%s'\n", 3+3*i + 1, args[2+3*i]));
+ }
+
+ /* OK we're done, all the fields are set */
+ return 0;
+}
+
+/* returns 0 if wants to be called, 1 if has ended processing */
+static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_table_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+ int i;
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++)
+ ctx->data_type[i] = -1;
+ ctx->target = NULL;
+ ctx->entry = NULL;
+ ctx->action = (long)private; // keyword argument, one of STK_CLI_ACT_*
+
+ if (*args[2]) {
+ ctx->t = ctx->target = stktable_find_by_name(args[2]);
+ if (!ctx->target)
+ return cli_err(appctx, "No such table\n");
+ }
+ else {
+ ctx->t = stktables_list;
+ if (ctx->action != STK_CLI_ACT_SHOW)
+ goto err_args;
+ return 0;
+ }
+
+ if (strcmp(args[3], "key") == 0)
+ return table_process_entry_per_key(appctx, args);
+ else if (strncmp(args[3], "data.", 5) == 0)
+ return table_prepare_data_request(appctx, args);
+ else if (*args[3])
+ goto err_args;
+
+ return 0;
+
+err_args:
+ switch (ctx->action) {
+ case STK_CLI_ACT_SHOW:
+ return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n");
+ case STK_CLI_ACT_CLR:
+ return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n");
+ case STK_CLI_ACT_SET:
+ return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n");
+ default:
+ return cli_err(appctx, "Unknown action\n");
+ }
+}
+
+/* This function is used to deal with table operations (dump or clear depending
+ * on the action stored in appctx->private). It returns 0 if the output buffer is
+ * full and it needs to be called again, otherwise non-zero.
+ */
+static int cli_io_handler_table(struct appctx *appctx)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct stream *s = __sc_strm(sc);
+ struct ebmb_node *eb;
+ int skip_entry;
+ int show = ctx->action == STK_CLI_ACT_SHOW;
+
+ /*
+ * We have 3 possible states in ctx->state :
+ * - STATE_NEXT : the proxy pointer points to the next table to
+ * dump, the entry pointer is NULL ;
+ * - STATE_DUMP : the proxy pointer points to the current table
+ * and the entry pointer points to the next entry to be dumped,
+ * and the refcount on the next entry is held ;
+ * - STATE_DONE : nothing left to dump, the buffer may contain some
+ * data though.
+ */
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) {
+ /* in case of abort, remove any refcount we might have set on an entry */
+ if (ctx->state == STATE_DUMP) {
+ stksess_kill_if_expired(ctx->t, ctx->entry, 1);
+ }
+ return 1;
+ }
+
+ chunk_reset(&trash);
+
+ while (ctx->state != STATE_DONE) {
+ switch (ctx->state) {
+ case STATE_NEXT:
+ if (!ctx->t ||
+ (ctx->target &&
+ ctx->t != ctx->target)) {
+ ctx->state = STATE_DONE;
+ break;
+ }
+
+ if (ctx->t->size) {
+ if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target))
+ return 0;
+
+ if (ctx->target &&
+ (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) {
+ /* dump entries only if table explicitly requested */
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ eb = ebmb_first(&ctx->t->keys);
+ if (eb) {
+ ctx->entry = ebmb_entry(eb, struct stksess, key);
+ HA_ATOMIC_INC(&ctx->entry->ref_cnt);
+ ctx->state = STATE_DUMP;
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ break;
+ }
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ }
+ }
+ ctx->t = ctx->t->next;
+ break;
+
+ case STATE_DUMP:
+ skip_entry = 0;
+
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+
+ if (ctx->data_type[0] >= 0) {
+ /* we're filtering on some data contents */
+ void *ptr;
+ int dt, i;
+ signed char op;
+ long long data, value;
+
+
+ for (i = 0; i < STKTABLE_FILTER_LEN; i++) {
+ if (ctx->data_type[i] == -1)
+ break;
+ dt = ctx->data_type[i];
+ ptr = stktable_data_ptr(ctx->t,
+ ctx->entry,
+ dt);
+
+ data = 0;
+ switch (stktable_data_types[dt].std_type) {
+ case STD_T_SINT:
+ data = stktable_data_cast(ptr, std_t_sint);
+ break;
+ case STD_T_UINT:
+ data = stktable_data_cast(ptr, std_t_uint);
+ break;
+ case STD_T_ULL:
+ data = stktable_data_cast(ptr, std_t_ull);
+ break;
+ case STD_T_FRQP:
+ data = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp),
+ ctx->t->data_arg[dt].u);
+ break;
+ }
+
+ op = ctx->data_op[i];
+ value = ctx->value[i];
+
+ /* skip the entry if the data does not match the test and the value */
+ if ((data < value &&
+ (op == STD_OP_EQ || op == STD_OP_GT || op == STD_OP_GE)) ||
+ (data == value &&
+ (op == STD_OP_NE || op == STD_OP_GT || op == STD_OP_LT)) ||
+ (data > value &&
+ (op == STD_OP_EQ || op == STD_OP_LT || op == STD_OP_LE))) {
+ skip_entry = 1;
+ break;
+ }
+ }
+ }
+
+ if (show && !skip_entry &&
+ !table_dump_entry_to_buffer(&trash, appctx, ctx->t, ctx->entry)) {
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+ return 0;
+ }
+
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock);
+
+ HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ HA_ATOMIC_DEC(&ctx->entry->ref_cnt);
+
+ eb = ebmb_next(&ctx->entry->key);
+ if (eb) {
+ struct stksess *old = ctx->entry;
+ ctx->entry = ebmb_entry(eb, struct stksess, key);
+ if (show)
+ __stksess_kill_if_expired(ctx->t, old);
+ else if (!skip_entry && !ctx->entry->ref_cnt)
+ __stksess_kill(ctx->t, old);
+ HA_ATOMIC_INC(&ctx->entry->ref_cnt);
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+ break;
+ }
+
+
+ if (show)
+ __stksess_kill_if_expired(ctx->t, ctx->entry);
+ else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt))
+ __stksess_kill(ctx->t, ctx->entry);
+
+ HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock);
+
+ ctx->t = ctx->t->next;
+ ctx->state = STATE_NEXT;
+ break;
+
+ default:
+ break;
+ }
+ }
+ return 1;
+}
+
+static void cli_release_show_table(struct appctx *appctx)
+{
+ struct show_table_ctx *ctx = appctx->svcctx;
+
+ if (ctx->state == STATE_DUMP) {
+ stksess_kill_if_expired(ctx->t, ctx->entry, 1);
+ }
+}
+
+static int stk_parse_stick_counters(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *error;
+ int counters;
+
+ counters = strtol(args[1], &error, 10);
+ if (*error != 0) {
+ memprintf(err, "%s: '%s' is an invalid number", args[0], args[1]);
+ return -1;
+ }
+
+ if (counters < 0) {
+ memprintf(err, "%s: the number of stick-counters may not be negative (was %d)", args[0], counters);
+ return -1;
+ }
+
+ global.tune.nb_stk_ctr = counters;
+ return 0;
+}
+
+/* This function creates the stk_ctr pools after the configuration parsing. It
+ * returns 0 on success otherwise ERR_*. If nb_stk_ctr is 0, the pool remains
+ * NULL.
+ */
+static int stkt_create_stk_ctr_pool(void)
+{
+ if (!global.tune.nb_stk_ctr)
+ return 0;
+
+ pool_head_stk_ctr = create_pool("stk_ctr", sizeof(*((struct session*)0)->stkctr) * global.tune.nb_stk_ctr, MEM_F_SHARED);
+ if (!pool_head_stk_ctr) {
+ ha_alert("out of memory while creating the stick-counters pool.\n");
+ return ERR_ABORT;
+ }
+ return 0;
+}
+
+static void stkt_late_init(void)
+{
+ struct sample_fetch *f;
+
+ f = find_sample_fetch("src", strlen("src"));
+ if (f)
+ smp_fetch_src = f->process;
+ hap_register_post_check(stkt_create_stk_ctr_pool);
+}
+
+INITCALL0(STG_INIT, stkt_late_init);
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "clear", "table", NULL }, "clear table <table> [<filter>]* : remove an entry from a table (filter: data/key)", cli_parse_table_req, cli_io_handler_table, cli_release_show_table, (void *)STK_CLI_ACT_CLR },
+ { { "set", "table", NULL }, "set table <table> key <k> [data.* <v>]* : update or create a table entry's data", cli_parse_table_req, cli_io_handler_table, NULL, (void *)STK_CLI_ACT_SET },
+ { { "show", "table", NULL }, "show table <table> [<filter>]* : report table usage stats or dump this table's contents (filter: data/key)", cli_parse_table_req, cli_io_handler_table, cli_release_show_table, (void *)STK_CLI_ACT_SHOW },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct action_kw_list tcp_conn_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_conn_kws);
+
+static struct action_kw_list tcp_sess_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_sess_kws);
+
+static struct action_kw_list tcp_req_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_kws);
+
+static struct action_kw_list tcp_res_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_kws);
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list http_res_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_kws);
+
+static struct action_kw_list http_after_res_kws = { { }, {
+ { "sc-add-gpc", parse_add_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc0", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-inc-gpc1", parse_inc_gpc, KWF_MATCH_PREFIX },
+ { "sc-set-gpt", parse_set_gpt, KWF_MATCH_PREFIX },
+ { "sc-set-gpt0", parse_set_gpt, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &http_after_res_kws);
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, {
+ { "sc_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc", smp_fetch_sc_clr_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "sc_conn_cnt", smp_fetch_sc_conn_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_conn_cur", smp_fetch_sc_conn_cur, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_conn_rate", smp_fetch_sc_conn_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpt", smp_fetch_sc_get_gpt, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpt0", smp_fetch_sc_get_gpt0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN },
+ { "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_err_rate", smp_fetch_sc_http_err_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_http_req_rate", smp_fetch_sc_http_req_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc", smp_fetch_sc_inc_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_kbytes_in", smp_fetch_sc_kbytes_in, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc_kbytes_out", smp_fetch_sc_kbytes_out, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc_sess_cnt", smp_fetch_sc_sess_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_sess_rate", smp_fetch_sc_sess_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc_tracked", smp_fetch_sc_tracked, ARG2(1,SINT,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc_trackers", smp_fetch_sc_trackers, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc0_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc0_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc0_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc0_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc", smp_fetch_sc_clr_gpc, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc1_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc1_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc1_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc1_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_cur", smp_fetch_sc_conn_cur, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_conn_rate", smp_fetch_sc_conn_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc2_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "sc2_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_sess_rate", smp_fetch_sc_sess_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "sc2_tracked", smp_fetch_sc_tracked, ARG1(0,TAB), NULL, SMP_T_BOOL, SMP_USE_INTRN, },
+ { "sc2_trackers", smp_fetch_sc_trackers, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "src_bytes_in_rate", smp_fetch_sc_bytes_in_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_bytes_out_rate", smp_fetch_sc_bytes_out_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc", smp_fetch_sc_clr_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc0", smp_fetch_sc_clr_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_clr_gpc1", smp_fetch_sc_clr_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_cnt", smp_fetch_sc_conn_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_cur", smp_fetch_sc_conn_cur, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_conn_rate", smp_fetch_sc_conn_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpt" , smp_fetch_sc_get_gpt, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_err_rate", smp_fetch_sc_http_err_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_fail_cnt", smp_fetch_sc_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_fail_rate", smp_fetch_sc_http_fail_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_req_cnt", smp_fetch_sc_http_req_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_http_req_rate", smp_fetch_sc_http_req_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc", smp_fetch_sc_inc_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc0", smp_fetch_sc_inc_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_inc_gpc1", smp_fetch_sc_inc_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_kbytes_in", smp_fetch_sc_kbytes_in, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_kbytes_out", smp_fetch_sc_kbytes_out, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_sess_cnt", smp_fetch_sc_sess_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_sess_rate", smp_fetch_sc_sess_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "src_updt_conn_cnt", smp_fetch_src_updt_conn_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, },
+ { "table_avl", smp_fetch_table_avl, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "table_cnt", smp_fetch_table_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_fetch_keywords);
+
+/* Note: must not be declared <const> as its list will be overwritten */
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "in_table", sample_conv_in_table, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_BOOL },
+ { "table_bytes_in_rate", sample_conv_table_bytes_in_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_bytes_out_rate", sample_conv_table_bytes_out_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_cnt", sample_conv_table_conn_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_cur", sample_conv_table_conn_cur, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_conn_rate", sample_conv_table_conn_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_expire", sample_conv_table_expire, ARG2(1,TAB,SINT), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpt", sample_conv_table_gpt, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpt0", sample_conv_table_gpt0, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc", sample_conv_table_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc0", sample_conv_table_gpc0, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc1", sample_conv_table_gpc1, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_fail_rate", sample_conv_table_http_fail_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_req_cnt", sample_conv_table_http_req_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_http_req_rate", sample_conv_table_http_req_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_idle", sample_conv_table_idle, ARG2(1,TAB,SINT), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_kbytes_in", sample_conv_table_kbytes_in, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_kbytes_out", sample_conv_table_kbytes_out, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_server_id", sample_conv_table_server_id, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_sess_cnt", sample_conv_table_sess_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_sess_rate", sample_conv_table_sess_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { "table_trackers", sample_conv_table_trackers, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "tune.stick-counters", stk_parse_stick_counters },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/stream.c b/src/stream.c
new file mode 100644
index 0000000..a3c0c93
--- /dev/null
+++ b/src/stream.c
@@ -0,0 +1,4045 @@
+/*
+ * Stream management functions.
+ *
+ * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <import/ebistree.h>
+
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/activity.h>
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/arg.h>
+#include <haproxy/backend.h>
+#include <haproxy/capture.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/connection.h>
+#include <haproxy/dict.h>
+#include <haproxy/dynbuf.h>
+#include <haproxy/fd.h>
+#include <haproxy/filters.h>
+#include <haproxy/freq_ctr.h>
+#include <haproxy/frontend.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/http_ana.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/log.h>
+#include <haproxy/pipe.h>
+#include <haproxy/pool.h>
+#include <haproxy/proxy.h>
+#include <haproxy/queue.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sample.h>
+#include <haproxy/session.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream.h>
+#include <haproxy/task.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+
+DECLARE_POOL(pool_head_stream, "stream", sizeof(struct stream));
+DECLARE_POOL(pool_head_uniqueid, "uniqueid", UNIQUEID_LEN);
+
+/* incremented by each "show sess" to fix a delimiter between streams */
+unsigned stream_epoch = 0;
+
+/* List of all use-service keywords. */
+static struct list service_keywords = LIST_HEAD_INIT(service_keywords);
+
+
+/* trace source and events */
+static void strm_trace(enum trace_level level, uint64_t mask,
+ const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4);
+
+/* The event representation is split like this :
+ * strm - stream
+ * sc - stream connector
+ * http - http analyzis
+ * tcp - tcp analyzis
+ *
+ * STRM_EV_* macros are defined in <proto/stream.h>
+ */
+static const struct trace_event strm_trace_events[] = {
+ { .mask = STRM_EV_STRM_NEW, .name = "strm_new", .desc = "new stream" },
+ { .mask = STRM_EV_STRM_FREE, .name = "strm_free", .desc = "release stream" },
+ { .mask = STRM_EV_STRM_ERR, .name = "strm_err", .desc = "error during stream processing" },
+ { .mask = STRM_EV_STRM_ANA, .name = "strm_ana", .desc = "stream analyzers" },
+ { .mask = STRM_EV_STRM_PROC, .name = "strm_proc", .desc = "stream processing" },
+
+ { .mask = STRM_EV_CS_ST, .name = "sc_state", .desc = "processing connector states" },
+
+ { .mask = STRM_EV_HTTP_ANA, .name = "http_ana", .desc = "HTTP analyzers" },
+ { .mask = STRM_EV_HTTP_ERR, .name = "http_err", .desc = "error during HTTP analyzis" },
+
+ { .mask = STRM_EV_TCP_ANA, .name = "tcp_ana", .desc = "TCP analyzers" },
+ { .mask = STRM_EV_TCP_ERR, .name = "tcp_err", .desc = "error during TCP analyzis" },
+
+ { .mask = STRM_EV_FLT_ANA, .name = "flt_ana", .desc = "Filter analyzers" },
+ { .mask = STRM_EV_FLT_ERR, .name = "flt_err", .desc = "error during filter analyzis" },
+ {}
+};
+
+static const struct name_desc strm_trace_lockon_args[4] = {
+ /* arg1 */ { /* already used by the stream */ },
+ /* arg2 */ { },
+ /* arg3 */ { },
+ /* arg4 */ { }
+};
+
+static const struct name_desc strm_trace_decoding[] = {
+#define STRM_VERB_CLEAN 1
+ { .name="clean", .desc="only user-friendly stuff, generally suitable for level \"user\"" },
+#define STRM_VERB_MINIMAL 2
+ { .name="minimal", .desc="report info on streams and connectors" },
+#define STRM_VERB_SIMPLE 3
+ { .name="simple", .desc="add info on request and response channels" },
+#define STRM_VERB_ADVANCED 4
+ { .name="advanced", .desc="add info on channel's buffer for data and developer levels only" },
+#define STRM_VERB_COMPLETE 5
+ { .name="complete", .desc="add info on channel's buffer" },
+ { /* end */ }
+};
+
+struct trace_source trace_strm = {
+ .name = IST("stream"),
+ .desc = "Applicative stream",
+ .arg_def = TRC_ARG1_STRM, // TRACE()'s first argument is always a stream
+ .default_cb = strm_trace,
+ .known_events = strm_trace_events,
+ .lockon_args = strm_trace_lockon_args,
+ .decoding = strm_trace_decoding,
+ .report_events = ~0, // report everything by default
+};
+
+#define TRACE_SOURCE &trace_strm
+INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE);
+
+/* the stream traces always expect that arg1, if non-null, is of a stream (from
+ * which we can derive everything), that arg2, if non-null, is an http
+ * transaction, that arg3, if non-null, is an http message.
+ */
+static void strm_trace(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ const struct stream *s = a1;
+ const struct http_txn *txn = a2;
+ const struct http_msg *msg = a3;
+ struct task *task;
+ const struct channel *req, *res;
+ struct htx *htx;
+
+ if (!s || src->verbosity < STRM_VERB_CLEAN)
+ return;
+
+ task = s->task;
+ req = &s->req;
+ res = &s->res;
+ htx = (msg ? htxbuf(&msg->chn->buf) : NULL);
+
+ /* General info about the stream (htx/tcp, id...) */
+ chunk_appendf(&trace_buf, " : [%u,%s]",
+ s->uniq_id, ((s->flags & SF_HTX) ? "HTX" : "TCP"));
+ if (isttest(s->unique_id)) {
+ chunk_appendf(&trace_buf, " id=");
+ b_putist(&trace_buf, s->unique_id);
+ }
+
+ /* Front and back stream connector state */
+ chunk_appendf(&trace_buf, " SC=(%s,%s)",
+ sc_state_str(s->scf->state), sc_state_str(s->scb->state));
+
+ /* If txn is defined, HTTP req/rep states */
+ if (txn)
+ chunk_appendf(&trace_buf, " HTTP=(%s,%s)",
+ h1_msg_state_str(txn->req.msg_state), h1_msg_state_str(txn->rsp.msg_state));
+ if (msg)
+ chunk_appendf(&trace_buf, " %s", ((msg->chn->flags & CF_ISRESP) ? "RESPONSE" : "REQUEST"));
+
+ if (src->verbosity == STRM_VERB_CLEAN)
+ return;
+
+ /* If msg defined, display status-line if possible (verbosity > MINIMAL) */
+ if (src->verbosity > STRM_VERB_MINIMAL && htx && htx_nbblks(htx)) {
+ const struct htx_blk *blk = __htx_get_head_blk(htx);
+ const struct htx_sl *sl = htx_get_blk_ptr(htx, blk);
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_REQ_SL || type == HTX_BLK_RES_SL)
+ chunk_appendf(&trace_buf, " - \"%.*s %.*s %.*s\"",
+ HTX_SL_P1_LEN(sl), HTX_SL_P1_PTR(sl),
+ HTX_SL_P2_LEN(sl), HTX_SL_P2_PTR(sl),
+ HTX_SL_P3_LEN(sl), HTX_SL_P3_PTR(sl));
+ }
+
+ chunk_appendf(&trace_buf, " - t=%p t.exp=%d s=(%p,0x%08x,0x%x)",
+ task, tick_isset(task->expire) ? TICKS_TO_MS(task->expire - now_ms) : TICK_ETERNITY, s, s->flags, s->conn_err_type);
+
+ /* If txn defined info about HTTP msgs, otherwise info about SI. */
+ if (txn) {
+ chunk_appendf(&trace_buf, " txn.flags=0x%08x, http.flags=(0x%08x,0x%08x) status=%d",
+ txn->flags, txn->req.flags, txn->rsp.flags, txn->status);
+ }
+ else {
+ chunk_appendf(&trace_buf, " scf=(%p,%d,0x%08x,0x%x) scb=(%p,%d,0x%08x,0x%x) scf.exp(r,w)=(%d,%d) scb.exp(r,w)=(%d,%d) retries=%d",
+ s->scf, s->scf->state, s->scf->flags, s->scf->sedesc->flags,
+ s->scb, s->scb->state, s->scb->flags, s->scb->sedesc->flags,
+ tick_isset(sc_ep_rcv_ex(s->scf)) ? TICKS_TO_MS(sc_ep_rcv_ex(s->scf) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_snd_ex(s->scf)) ? TICKS_TO_MS(sc_ep_snd_ex(s->scf) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_rcv_ex(s->scb)) ? TICKS_TO_MS(sc_ep_rcv_ex(s->scb) - now_ms) : TICK_ETERNITY,
+ tick_isset(sc_ep_snd_ex(s->scb)) ? TICKS_TO_MS(sc_ep_snd_ex(s->scb) - now_ms) : TICK_ETERNITY,
+ s->conn_retries);
+ }
+
+ if (src->verbosity == STRM_VERB_MINIMAL)
+ return;
+
+
+ /* If txn defined, don't display all channel info */
+ if (src->verbosity == STRM_VERB_SIMPLE || txn) {
+ chunk_appendf(&trace_buf, " req=(%p .fl=0x%08x .exp=%d)",
+ req, req->flags, tick_isset(req->analyse_exp) ? TICKS_TO_MS(req->analyse_exp - now_ms) : TICK_ETERNITY);
+ chunk_appendf(&trace_buf, " res=(%p .fl=0x%08x .exp=%d)",
+ res, res->flags, tick_isset(res->analyse_exp) ? TICKS_TO_MS(res->analyse_exp - now_ms) : TICK_ETERNITY);
+ }
+ else {
+ chunk_appendf(&trace_buf, " req=(%p .fl=0x%08x .ana=0x%08x .exp=%u .o=%lu .tot=%llu .to_fwd=%u)",
+ req, req->flags, req->analysers, req->analyse_exp,
+ (long)req->output, req->total, req->to_forward);
+ chunk_appendf(&trace_buf, " res=(%p .fl=0x%08x .ana=0x%08x .exp=%u .o=%lu .tot=%llu .to_fwd=%u)",
+ res, res->flags, res->analysers, res->analyse_exp,
+ (long)res->output, res->total, res->to_forward);
+ }
+
+ if (src->verbosity == STRM_VERB_SIMPLE ||
+ (src->verbosity == STRM_VERB_ADVANCED && src->level < TRACE_LEVEL_DATA))
+ return;
+
+ /* channels' buffer info */
+ if (s->flags & SF_HTX) {
+ struct htx *rqhtx = htxbuf(&req->buf);
+ struct htx *rphtx = htxbuf(&res->buf);
+
+ chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)",
+ rqhtx->data, rqhtx->size, htx_nbblks(rqhtx),
+ rphtx->data, rphtx->size, htx_nbblks(rphtx));
+ }
+ else {
+ chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)",
+ (unsigned int)b_data(&req->buf), b_orig(&req->buf),
+ (unsigned int)b_head_ofs(&req->buf), (unsigned int)b_size(&req->buf),
+ (unsigned int)b_data(&res->buf), b_orig(&res->buf),
+ (unsigned int)b_head_ofs(&res->buf), (unsigned int)b_size(&res->buf));
+ }
+
+ /* If msg defined, display htx info if defined (level > USER) */
+ if (src->level > TRACE_LEVEL_USER && htx && htx_nbblks(htx)) {
+ int full = 0;
+
+ /* Full htx info (level > STATE && verbosity > SIMPLE) */
+ if (src->level > TRACE_LEVEL_STATE) {
+ if (src->verbosity == STRM_VERB_COMPLETE)
+ full = 1;
+ }
+
+ chunk_memcat(&trace_buf, "\n\t", 2);
+ htx_dump(&trace_buf, htx, full);
+ }
+}
+
+/* Upgrade an existing stream for stream connector <sc>. Return < 0 on error. This
+ * is only valid right after a TCP to H1 upgrade. The stream should be
+ * "reativated" by removing SF_IGNORE flag. And the right mode must be set. On
+ * success, <input> buffer is transferred to the stream and thus points to
+ * BUF_NULL. On error, it is unchanged and it is the caller responsibility to
+ * release it (this never happens for now).
+ */
+int stream_upgrade_from_sc(struct stconn *sc, struct buffer *input)
+{
+ struct stream *s = __sc_strm(sc);
+ const struct mux_ops *mux = sc_mux_ops(sc);
+
+ if (mux) {
+ if (mux->flags & MX_FL_HTX)
+ s->flags |= SF_HTX;
+ }
+
+ if (!b_is_null(input)) {
+ /* Xfer the input buffer to the request channel. <input> will
+ * than point to BUF_NULL. From this point, it is the stream
+ * responsibility to release it.
+ */
+ s->req.buf = *input;
+ *input = BUF_NULL;
+ s->req.total = (IS_HTX_STRM(s) ? htxbuf(&s->req.buf)->data : b_data(&s->req.buf));
+ sc_ep_report_read_activity(s->scf);
+ }
+
+ s->req.flags |= CF_READ_EVENT; /* Always report a read event */
+ s->flags &= ~SF_IGNORE;
+
+ task_wakeup(s->task, TASK_WOKEN_INIT);
+ return 0;
+}
+
+/* Callback used to wake up a stream when an input buffer is available. The
+ * stream <s>'s stream connectors are checked for a failed buffer allocation
+ * as indicated by the presence of the SC_FL_NEED_BUFF flag and the lack of a
+ * buffer, and and input buffer is assigned there (at most one). The function
+ * returns 1 and wakes the stream up if a buffer was taken, otherwise zero.
+ * It's designed to be called from __offer_buffer().
+ */
+int stream_buf_available(void *arg)
+{
+ struct stream *s = arg;
+
+ if (!s->req.buf.size && !sc_ep_have_ff_data(s->scb) && s->scf->flags & SC_FL_NEED_BUFF &&
+ b_alloc(&s->req.buf))
+ sc_have_buff(s->scf);
+ else if (!s->res.buf.size && !sc_ep_have_ff_data(s->scf) && s->scb->flags & SC_FL_NEED_BUFF &&
+ b_alloc(&s->res.buf))
+ sc_have_buff(s->scb);
+ else
+ return 0;
+
+ task_wakeup(s->task, TASK_WOKEN_RES);
+ return 1;
+
+}
+
+/* This function is called from the session handler which detects the end of
+ * handshake, in order to complete initialization of a valid stream. It must be
+ * called with a completely initialized session. It returns the pointer to
+ * the newly created stream, or NULL in case of fatal error. The client-facing
+ * end point is assigned to <origin>, which must be valid. The stream's task
+ * is configured with a nice value inherited from the listener's nice if any.
+ * The task's context is set to the new stream, and its function is set to
+ * process_stream(). Target and analysers are null. <input> is used as input
+ * buffer for the request channel and may contain data. On success, it is
+ * transfer to the stream and <input> is set to BUF_NULL. On error, <input>
+ * buffer is unchanged and it is the caller responsibility to release it.
+ */
+struct stream *stream_new(struct session *sess, struct stconn *sc, struct buffer *input)
+{
+ struct stream *s;
+ struct task *t;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_NEW);
+ if (unlikely((s = pool_alloc(pool_head_stream)) == NULL))
+ goto out_fail_alloc;
+
+ /* minimum stream initialization required for an embryonic stream is
+ * fairly low. We need very little to execute L4 ACLs, then we need a
+ * task to make the client-side connection live on its own.
+ * - flags
+ * - stick-entry tracking
+ */
+ s->flags = 0;
+ s->logs.logwait = sess->fe->to_log;
+ s->logs.level = 0;
+ s->logs.request_ts = 0;
+ s->logs.t_queue = -1;
+ s->logs.t_connect = -1;
+ s->logs.t_data = -1;
+ s->logs.t_close = 0;
+ s->logs.bytes_in = s->logs.bytes_out = 0;
+ s->logs.prx_queue_pos = 0; /* we get the number of pending conns before us */
+ s->logs.srv_queue_pos = 0; /* we will get this number soon */
+ s->obj_type = OBJ_TYPE_STREAM;
+
+ s->logs.accept_date = sess->accept_date;
+ s->logs.accept_ts = sess->accept_ts;
+ s->logs.t_handshake = sess->t_handshake;
+ s->logs.t_idle = sess->t_idle;
+
+ /* default logging function */
+ s->do_log = strm_log;
+
+ /* default error reporting function, may be changed by analysers */
+ s->srv_error = default_srv_error;
+
+ /* Initialise the current rule list pointer to NULL. We are sure that
+ * any rulelist match the NULL pointer.
+ */
+ s->current_rule_list = NULL;
+ s->current_rule = NULL;
+ s->rules_exp = TICK_ETERNITY;
+ s->last_rule_file = NULL;
+ s->last_rule_line = 0;
+
+ s->stkctr = NULL;
+ if (pool_head_stk_ctr) {
+ s->stkctr = pool_alloc(pool_head_stk_ctr);
+ if (!s->stkctr)
+ goto out_fail_alloc;
+
+ /* Copy SC counters for the stream. We don't touch refcounts because
+ * any reference we have is inherited from the session. Since the stream
+ * doesn't exist without the session, the session's existence guarantees
+ * we don't lose the entry. During the store operation, the stream won't
+ * touch these ones.
+ */
+ memcpy(s->stkctr, sess->stkctr, sizeof(s->stkctr[0]) * global.tune.nb_stk_ctr);
+ }
+
+ s->sess = sess;
+
+ s->stream_epoch = _HA_ATOMIC_LOAD(&stream_epoch);
+ s->uniq_id = _HA_ATOMIC_FETCH_ADD(&global.req_count, 1);
+
+ /* OK, we're keeping the stream, so let's properly initialize the stream */
+ LIST_INIT(&s->back_refs);
+
+ LIST_INIT(&s->buffer_wait.list);
+ s->buffer_wait.target = s;
+ s->buffer_wait.wakeup_cb = stream_buf_available;
+
+ s->lat_time = s->cpu_time = 0;
+ s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
+ s->pcli_next_pid = 0;
+ s->pcli_flags = 0;
+ s->unique_id = IST_NULL;
+
+ if ((t = task_new_here()) == NULL)
+ goto out_fail_alloc;
+
+ s->task = t;
+ s->pending_events = 0;
+ s->conn_retries = 0;
+ s->conn_exp = TICK_ETERNITY;
+ s->conn_err_type = STRM_ET_NONE;
+ s->prev_conn_state = SC_ST_INI;
+ t->process = process_stream;
+ t->context = s;
+ t->expire = TICK_ETERNITY;
+ if (sess->listener)
+ t->nice = sess->listener->bind_conf->nice;
+
+ /* Note: initially, the stream's backend points to the frontend.
+ * This changes later when switching rules are executed or
+ * when the default backend is assigned.
+ */
+ s->be = sess->fe;
+ s->req_cap = NULL;
+ s->res_cap = NULL;
+
+ /* Initialize all the variables contexts even if not used.
+ * This permits to prune these contexts without errors.
+ *
+ * We need to make sure that those lists are not re-initialized
+ * by stream-dependant underlying code because we could lose
+ * track of already defined variables, leading to data inconsistency
+ * and memory leaks...
+ *
+ * For reference: we had a very old bug caused by vars_txn and
+ * vars_reqres being accidentally re-initialized in http_create_txn()
+ * (https://github.com/haproxy/haproxy/issues/1935)
+ */
+ vars_init_head(&s->vars_txn, SCOPE_TXN);
+ vars_init_head(&s->vars_reqres, SCOPE_REQ);
+
+ /* Set SF_HTX flag for HTTP frontends. */
+ if (sess->fe->mode == PR_MODE_HTTP)
+ s->flags |= SF_HTX;
+
+ s->scf = sc;
+ if (sc_attach_strm(s->scf, s) < 0)
+ goto out_fail_attach_scf;
+
+ s->scb = sc_new_from_strm(s, SC_FL_ISBACK);
+ if (!s->scb)
+ goto out_fail_alloc_scb;
+
+ sc_set_state(s->scf, SC_ST_EST);
+
+ if (likely(sess->fe->options2 & PR_O2_INDEPSTR))
+ s->scf->flags |= SC_FL_INDEP_STR;
+
+ if (likely(sess->fe->options2 & PR_O2_INDEPSTR))
+ s->scb->flags |= SC_FL_INDEP_STR;
+
+ if (sc_ep_test(sc, SE_FL_WEBSOCKET))
+ s->flags |= SF_WEBSOCKET;
+ if (sc_conn(sc)) {
+ const struct mux_ops *mux = sc_mux_ops(sc);
+
+ if (mux && mux->flags & MX_FL_HTX)
+ s->flags |= SF_HTX;
+ }
+
+ stream_init_srv_conn(s);
+ s->target = sess->fe->default_target;
+
+ s->pend_pos = NULL;
+ s->priority_class = 0;
+ s->priority_offset = 0;
+
+ /* init store persistence */
+ s->store_count = 0;
+
+ channel_init(&s->req);
+ s->req.flags |= CF_READ_EVENT; /* the producer is already connected */
+ s->req.analysers = sess->listener ? sess->listener->bind_conf->analysers : sess->fe->fe_req_ana;
+
+ if (IS_HTX_STRM(s)) {
+ /* Be sure to have HTTP analysers because in case of
+ * "destructive" stream upgrade, they may be missing (e.g
+ * TCP>H2)
+ */
+ s->req.analysers |= AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE;
+ }
+
+ if (!sess->fe->fe_req_ana) {
+ channel_auto_connect(&s->req); /* don't wait to establish connection */
+ channel_auto_close(&s->req); /* let the producer forward close requests */
+ }
+
+ s->scf->ioto = sess->fe->timeout.client;
+ s->req.analyse_exp = TICK_ETERNITY;
+
+ channel_init(&s->res);
+ s->res.flags |= CF_ISRESP;
+ s->res.analysers = 0;
+
+ if (sess->fe->options2 & PR_O2_NODELAY) {
+ s->scf->flags |= SC_FL_SND_NEVERWAIT;
+ s->scb->flags |= SC_FL_SND_NEVERWAIT;
+ }
+
+ s->scb->ioto = TICK_ETERNITY;
+ s->res.analyse_exp = TICK_ETERNITY;
+
+ s->txn = NULL;
+ s->hlua = NULL;
+
+ s->resolv_ctx.requester = NULL;
+ s->resolv_ctx.hostname_dn = NULL;
+ s->resolv_ctx.hostname_dn_len = 0;
+ s->resolv_ctx.parent = NULL;
+
+ s->tunnel_timeout = TICK_ETERNITY;
+
+ LIST_APPEND(&th_ctx->streams, &s->list);
+
+ if (flt_stream_init(s) < 0 || flt_stream_start(s) < 0)
+ goto out_fail_accept;
+
+ /* just in case the caller would have pre-disabled it */
+ se_will_consume(s->scf->sedesc);
+
+ if (sess->fe->accept && sess->fe->accept(s) < 0)
+ goto out_fail_accept;
+
+ if (!b_is_null(input)) {
+ /* Xfer the input buffer to the request channel. <input> will
+ * than point to BUF_NULL. From this point, it is the stream
+ * responsibility to release it.
+ */
+ s->req.buf = *input;
+ *input = BUF_NULL;
+ s->req.total = (IS_HTX_STRM(s) ? htxbuf(&s->req.buf)->data : b_data(&s->req.buf));
+ sc_ep_report_read_activity(s->scf);
+ }
+
+ /* it is important not to call the wakeup function directly but to
+ * pass through task_wakeup(), because this one knows how to apply
+ * priorities to tasks. Using multi thread we must be sure that
+ * stream is fully initialized before calling task_wakeup. So
+ * the caller must handle the task_wakeup
+ */
+ DBG_TRACE_LEAVE(STRM_EV_STRM_NEW, s);
+ task_wakeup(s->task, TASK_WOKEN_INIT);
+ return s;
+
+ /* Error unrolling */
+ out_fail_accept:
+ flt_stream_release(s, 0);
+ LIST_DELETE(&s->list);
+ sc_free(s->scb);
+ out_fail_alloc_scb:
+ out_fail_attach_scf:
+ task_destroy(t);
+ out_fail_alloc:
+ if (s)
+ pool_free(pool_head_stk_ctr, s->stkctr);
+ pool_free(pool_head_stream, s);
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_NEW|STRM_EV_STRM_ERR);
+ return NULL;
+}
+
+/*
+ * frees the context associated to a stream. It must have been removed first.
+ */
+void stream_free(struct stream *s)
+{
+ struct session *sess = strm_sess(s);
+ struct proxy *fe = sess->fe;
+ struct bref *bref, *back;
+ int i;
+
+ DBG_TRACE_POINT(STRM_EV_STRM_FREE, s);
+
+ /* detach the stream from its own task before even releasing it so
+ * that walking over a task list never exhibits a dying stream.
+ */
+ s->task->context = NULL;
+ __ha_barrier_store();
+
+ pendconn_free(s);
+
+ if (objt_server(s->target)) { /* there may be requests left pending in queue */
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&__objt_server(s->target)->cur_sess);
+ }
+ if (may_dequeue_tasks(__objt_server(s->target), s->be))
+ process_srv_queue(__objt_server(s->target));
+ }
+
+ if (unlikely(s->srv_conn)) {
+ /* the stream still has a reserved slot on a server, but
+ * it should normally be only the same as the one above,
+ * so this should not happen in fact.
+ */
+ sess_change_server(s, NULL);
+ }
+
+ /* We may still be present in the buffer wait queue */
+ if (LIST_INLIST(&s->buffer_wait.list))
+ LIST_DEL_INIT(&s->buffer_wait.list);
+
+ if (s->req.buf.size || s->res.buf.size) {
+ int count = !!s->req.buf.size + !!s->res.buf.size;
+
+ b_free(&s->req.buf);
+ b_free(&s->res.buf);
+ offer_buffers(NULL, count);
+ }
+
+ pool_free(pool_head_uniqueid, s->unique_id.ptr);
+ s->unique_id = IST_NULL;
+
+ flt_stream_stop(s);
+ flt_stream_release(s, 0);
+
+ hlua_ctx_destroy(s->hlua);
+ s->hlua = NULL;
+ if (s->txn)
+ http_destroy_txn(s);
+
+ /* ensure the client-side transport layer is destroyed */
+ /* Be sure it is useless !! */
+ /* if (cli_cs) */
+ /* cs_close(cli_cs); */
+
+ for (i = 0; i < s->store_count; i++) {
+ if (!s->store[i].ts)
+ continue;
+ stksess_free(s->store[i].table, s->store[i].ts);
+ s->store[i].ts = NULL;
+ }
+
+ if (s->resolv_ctx.requester) {
+ __decl_thread(struct resolvers *resolvers = s->resolv_ctx.parent->arg.resolv.resolvers);
+
+ HA_SPIN_LOCK(DNS_LOCK, &resolvers->lock);
+ ha_free(&s->resolv_ctx.hostname_dn);
+ s->resolv_ctx.hostname_dn_len = 0;
+ resolv_unlink_resolution(s->resolv_ctx.requester);
+ HA_SPIN_UNLOCK(DNS_LOCK, &resolvers->lock);
+
+ pool_free(resolv_requester_pool, s->resolv_ctx.requester);
+ s->resolv_ctx.requester = NULL;
+ }
+
+ if (fe) {
+ if (s->req_cap) {
+ struct cap_hdr *h;
+ for (h = fe->req_cap; h; h = h->next)
+ pool_free(h->pool, s->req_cap[h->index]);
+ pool_free(fe->req_cap_pool, s->req_cap);
+ }
+
+ if (s->res_cap) {
+ struct cap_hdr *h;
+ for (h = fe->rsp_cap; h; h = h->next)
+ pool_free(h->pool, s->res_cap[h->index]);
+ pool_free(fe->rsp_cap_pool, s->res_cap);
+ }
+ }
+
+ /* Cleanup all variable contexts. */
+ if (!LIST_ISEMPTY(&s->vars_txn.head))
+ vars_prune(&s->vars_txn, s->sess, s);
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+
+ stream_store_counters(s);
+ pool_free(pool_head_stk_ctr, s->stkctr);
+
+ list_for_each_entry_safe(bref, back, &s->back_refs, users) {
+ /* we have to unlink all watchers. We must not relink them if
+ * this stream was the last one in the list. This is safe to do
+ * here because we're touching our thread's list so we know
+ * that other streams are not active, and the watchers will
+ * only touch their node under thread isolation.
+ */
+ LIST_DEL_INIT(&bref->users);
+ if (s->list.n != &th_ctx->streams)
+ LIST_APPEND(&LIST_ELEM(s->list.n, struct stream *, list)->back_refs, &bref->users);
+ bref->ref = s->list.n;
+ __ha_barrier_store();
+ }
+ LIST_DELETE(&s->list);
+
+ sc_destroy(s->scb);
+ sc_destroy(s->scf);
+
+ pool_free(pool_head_stream, s);
+
+ /* We may want to free the maximum amount of pools if the proxy is stopping */
+ if (fe && unlikely(fe->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) {
+ pool_flush(pool_head_buffer);
+ pool_flush(pool_head_http_txn);
+ pool_flush(pool_head_requri);
+ pool_flush(pool_head_capture);
+ pool_flush(pool_head_stream);
+ pool_flush(pool_head_session);
+ pool_flush(pool_head_connection);
+ pool_flush(pool_head_pendconn);
+ pool_flush(fe->req_cap_pool);
+ pool_flush(fe->rsp_cap_pool);
+ }
+}
+
+
+/* Allocates a work buffer for stream <s>. It is meant to be called inside
+ * process_stream(). It will only allocate the side needed for the function
+ * to work fine, which is the response buffer so that an error message may be
+ * built and returned. Response buffers may be allocated from the reserve, this
+ * is critical to ensure that a response may always flow and will never block a
+ * server from releasing a connection. Returns 0 in case of failure, non-zero
+ * otherwise.
+ */
+static int stream_alloc_work_buffer(struct stream *s)
+{
+ if (b_alloc(&s->res.buf))
+ return 1;
+ return 0;
+}
+
+/* releases unused buffers after processing. Typically used at the end of the
+ * update() functions. It will try to wake up as many tasks/applets as the
+ * number of buffers that it releases. In practice, most often streams are
+ * blocked on a single buffer, so it makes sense to try to wake two up when two
+ * buffers are released at once.
+ */
+void stream_release_buffers(struct stream *s)
+{
+ int offer = 0;
+
+ if (c_size(&s->req) && c_empty(&s->req)) {
+ offer++;
+ b_free(&s->req.buf);
+ }
+ if (c_size(&s->res) && c_empty(&s->res)) {
+ offer++;
+ b_free(&s->res.buf);
+ }
+
+ /* if we're certain to have at least 1 buffer available, and there is
+ * someone waiting, we can wake up a waiter and offer them.
+ */
+ if (offer)
+ offer_buffers(s, offer);
+}
+
+void stream_process_counters(struct stream *s)
+{
+ struct session *sess = s->sess;
+ unsigned long long bytes;
+ int i;
+
+ bytes = s->req.total - s->logs.bytes_in;
+ s->logs.bytes_in = s->req.total;
+ if (bytes) {
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.bytes_in, bytes);
+ _HA_ATOMIC_ADD(&s->be->be_counters.bytes_in, bytes);
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.bytes_in, bytes);
+
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->bytes_in, bytes);
+
+ for (i = 0; i < global.tune.nb_stk_ctr; i++) {
+ if (!stkctr_inc_bytes_in_ctr(&s->stkctr[i], bytes))
+ stkctr_inc_bytes_in_ctr(&sess->stkctr[i], bytes);
+ }
+ }
+
+ bytes = s->res.total - s->logs.bytes_out;
+ s->logs.bytes_out = s->res.total;
+ if (bytes) {
+ _HA_ATOMIC_ADD(&sess->fe->fe_counters.bytes_out, bytes);
+ _HA_ATOMIC_ADD(&s->be->be_counters.bytes_out, bytes);
+
+ if (objt_server(s->target))
+ _HA_ATOMIC_ADD(&__objt_server(s->target)->counters.bytes_out, bytes);
+
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_ADD(&sess->listener->counters->bytes_out, bytes);
+
+ for (i = 0; i < global.tune.nb_stk_ctr; i++) {
+ if (!stkctr_inc_bytes_out_ctr(&s->stkctr[i], bytes))
+ stkctr_inc_bytes_out_ctr(&sess->stkctr[i], bytes);
+ }
+ }
+}
+
+/* Abort processing on the both channels in same time */
+void stream_abort(struct stream *s)
+{
+ channel_abort(&s->req);
+ channel_abort(&s->res);
+}
+
+/*
+ * Returns a message to the client ; the connection is shut down for read,
+ * and the request is cleared so that no server connection can be initiated.
+ * The buffer is marked for read shutdown on the other side to protect the
+ * message, and the buffer write is enabled. The message is contained in a
+ * "chunk". If it is null, then an empty message is used. The reply buffer does
+ * not need to be empty before this, and its contents will not be overwritten.
+ * The primary goal of this function is to return error messages to a client.
+ */
+void stream_retnclose(struct stream *s, const struct buffer *msg)
+{
+ struct channel *ic = &s->req;
+ struct channel *oc = &s->res;
+
+ channel_auto_read(ic);
+ channel_abort(ic);
+ channel_erase(ic);
+ channel_truncate(oc);
+
+ if (likely(msg && msg->data))
+ co_inject(oc, msg->area, msg->data);
+
+ channel_auto_read(oc);
+ channel_auto_close(oc);
+ sc_schedule_abort(s->scb);
+}
+
+int stream_set_timeout(struct stream *s, enum act_timeout_name name, int timeout)
+{
+ switch (name) {
+ case ACT_TIMEOUT_CLIENT:
+ s->scf->ioto = timeout;
+ return 1;
+
+ case ACT_TIMEOUT_SERVER:
+ s->scb->ioto = timeout;
+ return 1;
+
+ case ACT_TIMEOUT_TUNNEL:
+ s->tunnel_timeout = timeout;
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * This function handles the transition between the SC_ST_CON state and the
+ * SC_ST_EST state. It must only be called after switching from SC_ST_CON (or
+ * SC_ST_INI or SC_ST_RDY) to SC_ST_EST, but only when a ->proto is defined.
+ * Note that it will switch the interface to SC_ST_DIS if we already have
+ * the SC_FL_ABRT_DONE flag, it means we were able to forward the request, and
+ * receive the response, before process_stream() had the opportunity to
+ * make the switch from SC_ST_CON to SC_ST_EST. When that happens, we want
+ * to go through back_establish() anyway, to make sure the analysers run.
+ * Timeouts are cleared. Error are reported on the channel so that analysers
+ * can handle them.
+ */
+void back_establish(struct stream *s)
+{
+ struct connection *conn = sc_conn(s->scb);
+ struct channel *req = &s->req;
+ struct channel *rep = &s->res;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+ /* First, centralize the timers information, and clear any irrelevant
+ * timeout.
+ */
+ s->logs.t_connect = ns_to_ms(now_ns - s->logs.accept_ts);
+ s->conn_exp = TICK_ETERNITY;
+ s->flags &= ~SF_CONN_EXP;
+
+ /* errors faced after sending data need to be reported */
+ if ((s->scb->flags & SC_FL_ERROR) && req->flags & CF_WROTE_DATA) {
+ s->req.flags |= CF_WRITE_EVENT;
+ s->res.flags |= CF_READ_EVENT;
+ s->conn_err_type = STRM_ET_DATA_ERR;
+ DBG_TRACE_STATE("read/write error", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ if (objt_server(s->target))
+ health_adjust(__objt_server(s->target), HANA_STATUS_L4_OK);
+
+ if (!IS_HTX_STRM(s)) { /* let's allow immediate data connection in this case */
+ /* if the user wants to log as soon as possible, without counting
+ * bytes from the server, then this is the right moment. */
+ if (!LIST_ISEMPTY(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) {
+ /* note: no pend_pos here, session is established */
+ s->logs.t_close = s->logs.t_connect; /* to get a valid end date */
+ s->do_log(s);
+ }
+ }
+ else {
+ s->scb->flags |= SC_FL_RCV_ONCE; /* a single read is enough to get response headers */
+ }
+
+ rep->analysers |= strm_fe(s)->fe_rsp_ana | s->be->be_rsp_ana;
+
+ se_have_more_data(s->scb->sedesc);
+ rep->flags |= CF_READ_EVENT; /* producer is now attached */
+ sc_ep_report_read_activity(s->scb);
+ if (conn) {
+ /* real connections have timeouts
+ * if already defined, it means that a set-timeout rule has
+ * been executed so do not overwrite them
+ */
+ if (!tick_isset(s->scb->ioto))
+ s->scb->ioto = s->be->timeout.server;
+ if (!tick_isset(s->tunnel_timeout))
+ s->tunnel_timeout = s->be->timeout.tunnel;
+
+ /* The connection is now established, try to read data from the
+ * underlying layer, and subscribe to recv events. We use a
+ * delayed recv here to give a chance to the data to flow back
+ * by the time we process other tasks.
+ */
+ sc_chk_rcv(s->scb);
+ }
+ /* If we managed to get the whole response, and we don't have anything
+ * left to send, or can't, switch to SC_ST_DIS now. */
+ if ((s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) || (s->scf->flags & SC_FL_SHUT_DONE)) {
+ s->scb->state = SC_ST_DIS;
+ DBG_TRACE_STATE("response channel shutdwn for read/write", STRM_EV_STRM_PROC|STRM_EV_CS_ST|STRM_EV_STRM_ERR, s);
+ }
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_PROC|STRM_EV_CS_ST, s);
+}
+
+/* Set correct stream termination flags in case no analyser has done it. It
+ * also counts a failed request if the server state has not reached the request
+ * stage.
+ */
+void sess_set_term_flags(struct stream *s)
+{
+ if (!(s->flags & SF_FINST_MASK)) {
+ if (s->scb->state == SC_ST_INI) {
+ /* anything before REQ in fact */
+ _HA_ATOMIC_INC(&strm_fe(s)->fe_counters.failed_req);
+ if (strm_li(s) && strm_li(s)->counters)
+ _HA_ATOMIC_INC(&strm_li(s)->counters->failed_req);
+
+ s->flags |= SF_FINST_R;
+ }
+ else if (s->scb->state == SC_ST_QUE)
+ s->flags |= SF_FINST_Q;
+ else if (sc_state_in(s->scb->state, SC_SB_REQ|SC_SB_TAR|SC_SB_ASS|SC_SB_CON|SC_SB_CER|SC_SB_RDY))
+ s->flags |= SF_FINST_C;
+ else if (s->scb->state == SC_ST_EST || s->prev_conn_state == SC_ST_EST)
+ s->flags |= SF_FINST_D;
+ else
+ s->flags |= SF_FINST_L;
+ }
+}
+
+/* This function parses the use-service action ruleset. It executes
+ * the associated ACL and set an applet as a stream or txn final node.
+ * it returns ACT_RET_ERR if an error occurs, the proxy left in
+ * consistent state. It returns ACT_RET_STOP in success case because
+ * use-service must be a terminal action. Returns ACT_RET_YIELD
+ * if the initialisation function require more data.
+ */
+enum act_return process_use_service(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+
+{
+ struct appctx *appctx;
+
+ /* Initialises the applet if it is required. */
+ if (flags & ACT_OPT_FIRST) {
+ /* Register applet. this function schedules the applet. */
+ s->target = &rule->applet.obj_type;
+ appctx = sc_applet_create(s->scb, objt_applet(s->target));
+ if (unlikely(!appctx))
+ return ACT_RET_ERR;
+
+ /* Finish initialisation of the context. */
+ appctx->rule = rule;
+ if (appctx_init(appctx) == -1)
+ return ACT_RET_ERR;
+ }
+ else
+ appctx = __sc_appctx(s->scb);
+
+ if (rule->from != ACT_F_HTTP_REQ) {
+ if (sess->fe == s->be) /* report it if the request was intercepted by the frontend */
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.intercepted_req);
+
+ /* The flag SF_ASSIGNED prevent from server assignment. */
+ s->flags |= SF_ASSIGNED;
+ }
+
+ /* Now we can schedule the applet. */
+ applet_need_more_data(appctx);
+ appctx_wakeup(appctx);
+ return ACT_RET_STOP;
+}
+
+/* This stream analyser checks the switching rules and changes the backend
+ * if appropriate. The default_backend rule is also considered, then the
+ * target backend's forced persistence rules are also evaluated last if any.
+ * It returns 1 if the processing can continue on next analysers, or zero if it
+ * either needs more data or wants to immediately abort the request.
+ */
+static int process_switching_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct persist_rule *prst_rule;
+ struct session *sess = s->sess;
+ struct proxy *fe = sess->fe;
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ /* now check whether we have some switching rules for this request */
+ if (!(s->flags & SF_BE_ASSIGNED)) {
+ struct switching_rule *rule;
+
+ list_for_each_entry(rule, &fe->switching_rules, list) {
+ int ret = 1;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, fe, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* If the backend name is dynamic, try to resolve the name.
+ * If we can't resolve the name, or if any error occurs, break
+ * the loop and fallback to the default backend.
+ */
+ struct proxy *backend = NULL;
+
+ if (rule->dynamic) {
+ struct buffer *tmp;
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto sw_failed;
+
+ if (build_logline(s, tmp->area, tmp->size, &rule->be.expr))
+ backend = proxy_be_by_name(tmp->area);
+
+ free_trash_chunk(tmp);
+ tmp = NULL;
+
+ if (!backend)
+ break;
+ }
+ else
+ backend = rule->be.backend;
+
+ if (!stream_set_backend(s, backend))
+ goto sw_failed;
+ break;
+ }
+ }
+
+ /* To ensure correct connection accounting on the backend, we
+ * have to assign one if it was not set (eg: a listen). This
+ * measure also takes care of correctly setting the default
+ * backend if any. Don't do anything if an upgrade is already in
+ * progress.
+ */
+ if (!(s->flags & (SF_BE_ASSIGNED|SF_IGNORE)))
+ if (!stream_set_backend(s, fe->defbe.be ? fe->defbe.be : s->be))
+ goto sw_failed;
+
+ /* No backend assigned but no error reported. It happens when a
+ * TCP stream is upgraded to HTTP/2.
+ */
+ if ((s->flags & (SF_BE_ASSIGNED|SF_IGNORE)) == SF_IGNORE) {
+ DBG_TRACE_DEVEL("leaving with no backend because of a destructive upgrade", STRM_EV_STRM_ANA, s);
+ return 0;
+ }
+
+ }
+
+ /* we don't want to run the TCP or HTTP filters again if the backend has not changed */
+ if (fe == s->be) {
+ s->req.analysers &= ~AN_REQ_INSPECT_BE;
+ s->req.analysers &= ~AN_REQ_HTTP_PROCESS_BE;
+ s->req.analysers &= ~AN_REQ_FLT_START_BE;
+ }
+
+ /* as soon as we know the backend, we must check if we have a matching forced or ignored
+ * persistence rule, and report that in the stream.
+ */
+ list_for_each_entry(prst_rule, &s->be->persist_rules, list) {
+ int ret = 1;
+
+ if (prst_rule->cond) {
+ ret = acl_exec_cond(prst_rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (prst_rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* no rule, or the rule matches */
+ if (prst_rule->type == PERSIST_TYPE_FORCE) {
+ s->flags |= SF_FORCE_PRST;
+ } else {
+ s->flags |= SF_IGNORE_PRST;
+ }
+ break;
+ }
+ }
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+
+ sw_failed:
+ /* immediately abort this request in case of allocation failure */
+ stream_abort(s);
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+
+ if (s->txn)
+ s->txn->status = 500;
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_ANA|STRM_EV_STRM_ERR, s);
+ return 0;
+}
+
+/* This stream analyser works on a request. It applies all use-server rules on
+ * it then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_server_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct server_rule *rule;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ if (!(s->flags & SF_ASSIGNED)) {
+ list_for_each_entry(rule, &px->server_rules, list) {
+ int ret;
+
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+
+ if (ret) {
+ struct server *srv;
+
+ if (rule->dynamic) {
+ struct buffer *tmp = get_trash_chunk();
+
+ if (!build_logline(s, tmp->area, tmp->size, &rule->expr))
+ break;
+
+ srv = findserver(s->be, tmp->area);
+ if (!srv)
+ break;
+ }
+ else
+ srv = rule->srv.ptr;
+
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (px->options & PR_O_PERSIST) ||
+ (s->flags & SF_FORCE_PRST)) {
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ break;
+ }
+ /* if the server is not UP, let's go on with next rules
+ * just in case another one is suited.
+ */
+ }
+ }
+ }
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+static inline void sticking_rule_find_target(struct stream *s,
+ struct stktable *t, struct stksess *ts)
+{
+ struct proxy *px = s->be;
+ struct eb32_node *node;
+ struct dict_entry *de;
+ void *ptr;
+ struct server *srv;
+
+ /* Look for the server name previously stored in <t> stick-table */
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ de = stktable_data_cast(ptr, std_t_dict);
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (de) {
+ struct ebpt_node *node;
+
+ if (t->server_key_type == STKTABLE_SRV_NAME) {
+ node = ebis_lookup(&px->conf.used_server_name, de->value.key);
+ if (node) {
+ srv = container_of(node, struct server, conf.name);
+ goto found;
+ }
+ } else if (t->server_key_type == STKTABLE_SRV_ADDR) {
+ HA_RWLOCK_RDLOCK(PROXY_LOCK, &px->lock);
+ node = ebis_lookup(&px->used_server_addr, de->value.key);
+ HA_RWLOCK_RDUNLOCK(PROXY_LOCK, &px->lock);
+ if (node) {
+ srv = container_of(node, struct server, addr_node);
+ goto found;
+ }
+ }
+ }
+
+ /* Look for the server ID */
+ HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ node = eb32_lookup(&px->conf.used_server_id, stktable_data_cast(ptr, std_t_sint));
+ HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ if (!node)
+ return;
+
+ srv = container_of(node, struct server, conf.id);
+ found:
+ if ((srv->cur_state != SRV_ST_STOPPED) ||
+ (px->options & PR_O_PERSIST) || (s->flags & SF_FORCE_PRST)) {
+ s->flags |= SF_DIRECT | SF_ASSIGNED;
+ s->target = &srv->obj_type;
+ }
+}
+
+/* This stream analyser works on a request. It applies all sticking rules on
+ * it then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_sticking_rules(struct stream *s, struct channel *req, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct sticking_rule *rule;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ list_for_each_entry(rule, &px->sticking_rules, list) {
+ int ret = 1 ;
+ int i;
+
+ /* Only the first stick store-request of each table is applied
+ * and other ones are ignored. The purpose is to allow complex
+ * configurations which look for multiple entries by decreasing
+ * order of precision and to stop at the first which matches.
+ * An example could be a store of the IP address from an HTTP
+ * header first, then from the source if not found.
+ */
+ if (rule->flags & STK_IS_STORE) {
+ for (i = 0; i < s->store_count; i++) {
+ if (rule->table.t == s->store[i].table)
+ break;
+ }
+
+ if (i != s->store_count)
+ continue;
+ }
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ struct stktable_key *key;
+
+ key = stktable_fetch_key(rule->table.t, px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->expr, NULL);
+ if (!key)
+ continue;
+
+ if (rule->flags & STK_IS_MATCH) {
+ struct stksess *ts;
+
+ if ((ts = stktable_lookup_key(rule->table.t, key)) != NULL) {
+ if (!(s->flags & SF_ASSIGNED))
+ sticking_rule_find_target(s, rule->table.t, ts);
+ stktable_touch_local(rule->table.t, ts, 1);
+ }
+ }
+ if (rule->flags & STK_IS_STORE) {
+ if (s->store_count < (sizeof(s->store) / sizeof(s->store[0]))) {
+ struct stksess *ts;
+
+ ts = stksess_new(rule->table.t, key);
+ if (ts) {
+ s->store[s->store_count].table = rule->table.t;
+ s->store[s->store_count++].ts = ts;
+ }
+ }
+ }
+ }
+ }
+
+ req->analysers &= ~an_bit;
+ req->analyse_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+/* This stream analyser works on a response. It applies all store rules on it
+ * then returns 1. The data must already be present in the buffer otherwise
+ * they won't match. It always returns 1.
+ */
+static int process_store_rules(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct proxy *px = s->be;
+ struct session *sess = s->sess;
+ struct sticking_rule *rule;
+ int i;
+ int nbreq = s->store_count;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA, s);
+
+ list_for_each_entry(rule, &px->storersp_rules, list) {
+ int ret = 1 ;
+
+ /* Only the first stick store-response of each table is applied
+ * and other ones are ignored. The purpose is to allow complex
+ * configurations which look for multiple entries by decreasing
+ * order of precision and to stop at the first which matches.
+ * An example could be a store of a set-cookie value, with a
+ * fallback to a parameter found in a 302 redirect.
+ *
+ * The store-response rules are not allowed to override the
+ * store-request rules for the same table, but they may coexist.
+ * Thus we can have up to one store-request entry and one store-
+ * response entry for the same table at any time.
+ */
+ for (i = nbreq; i < s->store_count; i++) {
+ if (rule->table.t == s->store[i].table)
+ break;
+ }
+
+ /* skip existing entries for this table */
+ if (i < s->store_count)
+ continue;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ struct stktable_key *key;
+
+ key = stktable_fetch_key(rule->table.t, px, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, rule->expr, NULL);
+ if (!key)
+ continue;
+
+ if (s->store_count < (sizeof(s->store) / sizeof(s->store[0]))) {
+ struct stksess *ts;
+
+ ts = stksess_new(rule->table.t, key);
+ if (ts) {
+ s->store[s->store_count].table = rule->table.t;
+ s->store[s->store_count++].ts = ts;
+ }
+ }
+ }
+ }
+
+ /* process store request and store response */
+ for (i = 0; i < s->store_count; i++) {
+ struct stksess *ts;
+ void *ptr;
+ char *key;
+ struct dict_entry *de;
+ struct stktable *t = s->store[i].table;
+
+ if (!objt_server(s->target) || (__objt_server(s->target)->flags & SRV_F_NON_STICK)) {
+ stksess_free(s->store[i].table, s->store[i].ts);
+ s->store[i].ts = NULL;
+ continue;
+ }
+
+ ts = stktable_set_entry(t, s->store[i].ts);
+ if (ts != s->store[i].ts) {
+ /* the entry already existed, we can free ours */
+ stksess_free(t, s->store[i].ts);
+ }
+ s->store[i].ts = NULL;
+
+ if (t->server_key_type == STKTABLE_SRV_NAME)
+ key = __objt_server(s->target)->id;
+ else if (t->server_key_type == STKTABLE_SRV_ADDR)
+ key = __objt_server(s->target)->addr_node.key;
+ else
+ key = NULL;
+
+ HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock);
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_ID);
+ stktable_data_cast(ptr, std_t_sint) = __objt_server(s->target)->puid;
+
+ if (key) {
+ de = dict_insert(&server_key_dict, key);
+ if (de) {
+ ptr = __stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY);
+ stktable_data_cast(ptr, std_t_dict) = de;
+ }
+ }
+
+ HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock);
+
+ stktable_touch_local(t, ts, 1);
+ }
+ s->store_count = 0; /* everything is stored */
+
+ rep->analysers &= ~an_bit;
+ rep->analyse_exp = TICK_ETERNITY;
+
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA, s);
+ return 1;
+}
+
+/* Set the stream to HTTP mode, if necessary. The minimal request HTTP analysers
+ * are set and the client mux is upgraded. It returns 1 if the stream processing
+ * may continue or 0 if it should be stopped. It happens on error or if the
+ * upgrade required a new stream. The mux protocol may be specified.
+ */
+int stream_set_http_mode(struct stream *s, const struct mux_proto_list *mux_proto)
+{
+ struct stconn *sc = s->scf;
+ struct connection *conn;
+
+ /* Already an HTTP stream */
+ if (IS_HTX_STRM(s))
+ return 1;
+
+ s->req.analysers |= AN_REQ_WAIT_HTTP|AN_REQ_HTTP_PROCESS_FE;
+
+ if (unlikely(!s->txn && !http_create_txn(s)))
+ return 0;
+
+ conn = sc_conn(sc);
+ if (conn) {
+ se_have_more_data(s->scf->sedesc);
+ /* Make sure we're unsubscribed, the the new
+ * mux will probably want to subscribe to
+ * the underlying XPRT
+ */
+ if (s->scf->wait_event.events)
+ conn->mux->unsubscribe(sc, s->scf->wait_event.events, &(s->scf->wait_event));
+
+ if (conn->mux->flags & MX_FL_NO_UPG)
+ return 0;
+
+ sc_conn_prepare_endp_upgrade(sc);
+ if (conn_upgrade_mux_fe(conn, sc, &s->req.buf,
+ (mux_proto ? mux_proto->token : ist("")),
+ PROTO_MODE_HTTP) == -1) {
+ sc_conn_abort_endp_upgrade(sc);
+ return 0;
+ }
+ sc_conn_commit_endp_upgrade(sc);
+
+ s->req.flags &= ~(CF_READ_EVENT|CF_AUTO_CONNECT);
+ s->req.total = 0;
+ s->flags |= SF_IGNORE;
+ if (sc_ep_test(sc, SE_FL_DETACHED)) {
+ /* If stream connector is detached, it means it was not
+ * reused by the new mux. Son destroy it, disable
+ * logging, and abort the stream process. Thus the
+ * stream will be silently destroyed. The new mux will
+ * create new streams.
+ */
+ s->logs.logwait = 0;
+ s->logs.level = 0;
+ stream_abort(s);
+ s->req.analysers &= AN_REQ_FLT_END;
+ s->req.analyse_exp = TICK_ETERNITY;
+ }
+ }
+
+ return 1;
+}
+
+
+/* Updates at once the channel flags, and timers of both stream connectors of a
+ * same stream, to complete the work after the analysers, then updates the data
+ * layer below. This will ensure that any synchronous update performed at the
+ * data layer will be reflected in the channel flags and/or stream connector.
+ * Note that this does not change the stream connector's current state, though
+ * it updates the previous state to the current one.
+ */
+void stream_update_both_sc(struct stream *s)
+{
+ struct stconn *scf = s->scf;
+ struct stconn *scb = s->scb;
+ struct channel *req = &s->req;
+ struct channel *res = &s->res;
+
+ req->flags &= ~(CF_READ_EVENT|CF_WRITE_EVENT);
+ res->flags &= ~(CF_READ_EVENT|CF_WRITE_EVENT);
+
+ s->prev_conn_state = scb->state;
+
+ /* let's recompute both sides states */
+ if (sc_state_in(scf->state, SC_SB_RDY|SC_SB_EST))
+ sc_update(scf);
+
+ if (sc_state_in(scb->state, SC_SB_RDY|SC_SB_EST))
+ sc_update(scb);
+
+ /* stream connectors are processed outside of process_stream() and must be
+ * handled at the latest moment.
+ */
+ if (sc_appctx(scf)) {
+ if (sc_is_recv_allowed(scf) || sc_is_send_allowed(scf))
+ appctx_wakeup(__sc_appctx(scf));
+ }
+ if (sc_appctx(scb)) {
+ if (sc_is_recv_allowed(scb) || sc_is_send_allowed(scb))
+ appctx_wakeup(__sc_appctx(scb));
+ }
+}
+
+/* check SC and channel timeouts, and close the corresponding stream connectors
+ * for future reads or writes.
+ * Note: this will also concern upper layers but we do not touch any other
+ * flag. We must be careful and correctly detect state changes when calling
+ * them.
+ */
+static void stream_handle_timeouts(struct stream *s)
+{
+ stream_check_conn_timeout(s);
+
+ sc_check_timeouts(s->scf);
+ channel_check_timeout(&s->req);
+ sc_check_timeouts(s->scb);
+ channel_check_timeout(&s->res);
+
+ if (unlikely(!(s->scb->flags & SC_FL_SHUT_DONE) && (s->req.flags & CF_WRITE_TIMEOUT))) {
+ s->scb->flags |= SC_FL_NOLINGER;
+ sc_shutdown(s->scb);
+ }
+
+ if (unlikely(!(s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && (s->req.flags & CF_READ_TIMEOUT))) {
+ if (s->scf->flags & SC_FL_NOHALF)
+ s->scf->flags |= SC_FL_NOLINGER;
+ sc_abort(s->scf);
+ }
+ if (unlikely(!(s->scf->flags & SC_FL_SHUT_DONE) && (s->res.flags & CF_WRITE_TIMEOUT))) {
+ s->scf->flags |= SC_FL_NOLINGER;
+ sc_shutdown(s->scf);
+ }
+
+ if (unlikely(!(s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && (s->res.flags & CF_READ_TIMEOUT))) {
+ if (s->scb->flags & SC_FL_NOHALF)
+ s->scb->flags |= SC_FL_NOLINGER;
+ sc_abort(s->scb);
+ }
+
+ if (HAS_FILTERS(s))
+ flt_stream_check_timeouts(s);
+}
+
+/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to update the
+ * latency when entering process_stream().
+ */
+static void stream_cond_update_cpu_latency(struct stream *s)
+{
+ uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
+
+ s->lat_time += lat;
+}
+
+/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to do that before
+ * logging in order to report accurate CPU usage. In this case we count that
+ * final part and reset the wake date so that the scheduler doesn't do it a
+ * second time, and by doing so we also avoid an extra call to clock_gettime().
+ * The CPU usage will be off by the little time needed to run over stream_free()
+ * but that's only marginal.
+ */
+static void stream_cond_update_cpu_usage(struct stream *s)
+{
+ uint32_t cpu;
+
+ /* stats are only registered for non-zero wake dates */
+ if (likely(!th_ctx->sched_wake_date))
+ return;
+
+ cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
+ s->cpu_time += cpu;
+ HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
+ th_ctx->sched_wake_date = 0;
+}
+
+/* this functions is called directly by the scheduler for tasks whose
+ * ->process points to process_stream(), and is used to keep latencies
+ * and CPU usage measurements accurate.
+ */
+void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
+{
+ struct stream *s = t->context;
+ s->lat_time += lat;
+ s->cpu_time += cpu;
+}
+
+
+/* This macro is very specific to the function below. See the comments in
+ * process_stream() below to understand the logic and the tests.
+ */
+#define UPDATE_ANALYSERS(real, list, back, flag) { \
+ list = (((list) & ~(flag)) | ~(back)) & (real); \
+ back = real; \
+ if (!(list)) \
+ break; \
+ if (((list) ^ ((list) & ((list) - 1))) < (flag)) \
+ continue; \
+}
+
+/* These 2 following macros call an analayzer for the specified channel if the
+ * right flag is set. The first one is used for "filterable" analyzers. If a
+ * stream has some registered filters, pre and post analyaze callbacks are
+ * called. The second are used for other analyzers (AN_REQ/RES_FLT_* and
+ * AN_REQ/RES_HTTP_XFER_BODY) */
+#define FLT_ANALYZE(strm, chn, fun, list, back, flag, ...) \
+ { \
+ if ((list) & (flag)) { \
+ if (HAS_FILTERS(strm)) { \
+ if (!flt_pre_analyze((strm), (chn), (flag))) \
+ break; \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ if (!flt_post_analyze((strm), (chn), (flag))) \
+ break; \
+ } \
+ else { \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ } \
+ UPDATE_ANALYSERS((chn)->analysers, (list), \
+ (back), (flag)); \
+ } \
+ }
+
+#define ANALYZE(strm, chn, fun, list, back, flag, ...) \
+ { \
+ if ((list) & (flag)) { \
+ if (!fun((strm), (chn), (flag), ##__VA_ARGS__)) \
+ break; \
+ UPDATE_ANALYSERS((chn)->analysers, (list), \
+ (back), (flag)); \
+ } \
+ }
+
+/* Processes the client, server, request and response jobs of a stream task,
+ * then puts it back to the wait queue in a clean state, or cleans up its
+ * resources if it must be deleted. Returns in <next> the date the task wants
+ * to be woken up, or TICK_ETERNITY. In order not to call all functions for
+ * nothing too many times, the request and response buffers flags are monitored
+ * and each function is called only if at least another function has changed at
+ * least one flag it is interested in.
+ */
+struct task *process_stream(struct task *t, void *context, unsigned int state)
+{
+ struct server *srv;
+ struct stream *s = context;
+ struct session *sess = s->sess;
+ unsigned int scf_flags, scb_flags;
+ unsigned int rqf_last, rpf_last;
+ unsigned int rq_prod_last, rq_cons_last;
+ unsigned int rp_cons_last, rp_prod_last;
+ unsigned int req_ana_back, res_ana_back;
+ struct channel *req, *res;
+ struct stconn *scf, *scb;
+ unsigned int rate;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
+
+ activity[tid].stream_calls++;
+ stream_cond_update_cpu_latency(s);
+
+ req = &s->req;
+ res = &s->res;
+
+ scf = s->scf;
+ scb = s->scb;
+
+ /* First, attempt to receive pending data from I/O layers */
+ sc_conn_sync_recv(scf);
+ sc_conn_sync_recv(scb);
+
+ /* Let's check if we're looping without making any progress, e.g. due
+ * to a bogus analyser or the fact that we're ignoring a read0. The
+ * call_rate counter only counts calls with no progress made.
+ */
+ if (!((req->flags | res->flags) & (CF_READ_EVENT|CF_WRITE_EVENT))) {
+ rate = update_freq_ctr(&s->call_rate, 1);
+ if (rate >= 100000 && s->call_rate.prev_ctr) // make sure to wait at least a full second
+ stream_dump_and_crash(&s->obj_type, read_freq_ctr(&s->call_rate));
+ }
+
+ /* this data may be no longer valid, clear it */
+ if (s->txn)
+ memset(&s->txn->auth, 0, sizeof(s->txn->auth));
+
+ /* This flag must explicitly be set every time */
+ req->flags &= ~CF_WAKE_WRITE;
+ res->flags &= ~CF_WAKE_WRITE;
+
+ /* Keep a copy of req/rep flags so that we can detect shutdowns */
+ rqf_last = req->flags & ~CF_MASK_ANALYSER;
+ rpf_last = res->flags & ~CF_MASK_ANALYSER;
+
+ /* we don't want the stream connector functions to recursively wake us up */
+ scf->flags |= SC_FL_DONT_WAKE;
+ scb->flags |= SC_FL_DONT_WAKE;
+
+ /* Keep a copy of SC flags */
+ scf_flags = scf->flags;
+ scb_flags = scb->flags;
+
+ /* update pending events */
+ s->pending_events |= (state & TASK_WOKEN_ANY);
+
+ /* 1a: Check for low level timeouts if needed. We just set a flag on
+ * stream connectors when their timeouts have expired.
+ */
+ if (unlikely(s->pending_events & TASK_WOKEN_TIMER)) {
+ stream_handle_timeouts(s);
+
+ /* Once in a while we're woken up because the task expires. But
+ * this does not necessarily mean that a timeout has been reached.
+ * So let's not run a whole stream processing if only an expiration
+ * timeout needs to be refreshed.
+ */
+ if (!((scf->flags | scb->flags) & (SC_FL_ERROR|SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_SHUT_DONE)) &&
+ !((req->flags | res->flags) & (CF_READ_EVENT|CF_READ_TIMEOUT|CF_WRITE_EVENT|CF_WRITE_TIMEOUT)) &&
+ !(s->flags & SF_CONN_EXP) &&
+ ((s->pending_events & TASK_WOKEN_ANY) == TASK_WOKEN_TIMER)) {
+ scf->flags &= ~SC_FL_DONT_WAKE;
+ scb->flags &= ~SC_FL_DONT_WAKE;
+ goto update_exp_and_leave;
+ }
+ }
+
+ resync_stconns:
+ /* below we may emit error messages so we have to ensure that we have
+ * our buffers properly allocated. If the allocation failed, an error is
+ * triggered.
+ *
+ * NOTE: An error is returned because the mechanism to queue entities
+ * waiting for a buffer is totally broken for now. However, this
+ * part must be refactored. When it will be handled, this part
+ * must be be reviewed too.
+ */
+ if (!stream_alloc_work_buffer(s)) {
+ scf->flags |= SC_FL_ERROR;
+ s->conn_err_type = STRM_ET_CONN_RES;
+
+ scb->flags |= SC_FL_ERROR;
+ s->conn_err_type = STRM_ET_CONN_RES;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_RESOURCE;
+ sess_set_term_flags(s);
+ }
+
+ /* 1b: check for low-level errors reported at the stream connector.
+ * First we check if it's a retryable error (in which case we don't
+ * want to tell the buffer). Otherwise we report the error one level
+ * upper by setting flags into the buffers. Note that the side towards
+ * the client cannot have connect (hence retryable) errors. Also, the
+ * connection setup code must be able to deal with any type of abort.
+ */
+ srv = objt_server(s->target);
+ if (unlikely(scf->flags & SC_FL_ERROR)) {
+ if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS)) {
+ sc_abort(scf);
+ sc_shutdown(scf);
+ //sc_report_error(scf); TODO: Be sure it is useless
+ if (!(req->analysers) && !(res->analysers)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_CLICL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ }
+ }
+ }
+
+ if (unlikely(scb->flags & SC_FL_ERROR)) {
+ if (sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS)) {
+ sc_abort(scb);
+ sc_shutdown(scb);
+ //sc_report_error(scb); TODO: Be sure it is useless
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.failed_resp);
+ if (!(req->analysers) && !(res->analysers)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_SRVCL;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ }
+ }
+ /* note: maybe we should process connection errors here ? */
+ }
+
+ if (sc_state_in(scb->state, SC_SB_CON|SC_SB_RDY)) {
+ /* we were trying to establish a connection on the server side,
+ * maybe it succeeded, maybe it failed, maybe we timed out, ...
+ */
+ if (scb->state == SC_ST_RDY)
+ back_handle_st_rdy(s);
+ else if (s->scb->state == SC_ST_CON)
+ back_handle_st_con(s);
+
+ if (scb->state == SC_ST_CER)
+ back_handle_st_cer(s);
+ else if (scb->state == SC_ST_EST)
+ back_establish(s);
+
+ /* state is now one of SC_ST_CON (still in progress), SC_ST_EST
+ * (established), SC_ST_DIS (abort), SC_ST_CLO (last error),
+ * SC_ST_ASS/SC_ST_TAR/SC_ST_REQ for retryable errors.
+ */
+ }
+
+ rq_prod_last = scf->state;
+ rq_cons_last = scb->state;
+ rp_cons_last = scf->state;
+ rp_prod_last = scb->state;
+
+ /* Check for connection closure */
+ DBG_TRACE_POINT(STRM_EV_STRM_PROC, s);
+
+ /* nothing special to be done on client side */
+ if (unlikely(scf->state == SC_ST_DIS)) {
+ scf->state = SC_ST_CLO;
+
+ /* This is needed only when debugging is enabled, to indicate
+ * client-side close.
+ */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) ||
+ (global.mode & MODE_VERBOSE)))) {
+ chunk_printf(&trash, "%08x:%s.clicls[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+ }
+
+ /* When a server-side connection is released, we have to count it and
+ * check for pending connections on this server.
+ */
+ if (unlikely(scb->state == SC_ST_DIS)) {
+ scb->state = SC_ST_CLO;
+ srv = objt_server(s->target);
+ if (srv) {
+ if (s->flags & SF_CURR_SESS) {
+ s->flags &= ~SF_CURR_SESS;
+ _HA_ATOMIC_DEC(&srv->cur_sess);
+ }
+ sess_change_server(s, NULL);
+ if (may_dequeue_tasks(srv, s->be))
+ process_srv_queue(srv);
+ }
+
+ /* This is needed only when debugging is enabled, to indicate
+ * server-side close.
+ */
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) ||
+ (global.mode & MODE_VERBOSE)))) {
+ if (s->prev_conn_state == SC_ST_EST) {
+ chunk_printf(&trash, "%08x:%s.srvcls[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+ }
+ }
+
+ /*
+ * Note: of the transient states (REQ, CER, DIS), only REQ may remain
+ * at this point.
+ */
+
+ resync_request:
+ /* Analyse request */
+ if (((req->flags & ~rqf_last) & CF_MASK_ANALYSER) ||
+ ((scf->flags ^ scf_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) ||
+ ((scb->flags ^ scb_flags) & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) ||
+ (req->analysers && (scb->flags & SC_FL_SHUT_DONE)) ||
+ scf->state != rq_prod_last ||
+ scb->state != rq_cons_last ||
+ s->pending_events & TASK_WOKEN_MSG) {
+ unsigned int scf_flags_ana = scf->flags;
+ unsigned int scb_flags_ana = scb->flags;
+
+ if (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) {
+ int max_loops = global.tune.maxpollevents;
+ unsigned int ana_list;
+ unsigned int ana_back;
+
+ /* it's up to the analysers to stop new connections,
+ * disable reading or closing. Note: if an analyser
+ * disables any of these bits, it is responsible for
+ * enabling them again when it disables itself, so
+ * that other analysers are called in similar conditions.
+ */
+ channel_auto_read(req);
+ channel_auto_connect(req);
+ channel_auto_close(req);
+
+ /* We will call all analysers for which a bit is set in
+ * req->analysers, following the bit order from LSB
+ * to MSB. The analysers must remove themselves from
+ * the list when not needed. Any analyser may return 0
+ * to break out of the loop, either because of missing
+ * data to take a decision, or because it decides to
+ * kill the stream. We loop at least once through each
+ * analyser, and we may loop again if other analysers
+ * are added in the middle.
+ *
+ * We build a list of analysers to run. We evaluate all
+ * of these analysers in the order of the lower bit to
+ * the higher bit. This ordering is very important.
+ * An analyser will often add/remove other analysers,
+ * including itself. Any changes to itself have no effect
+ * on the loop. If it removes any other analysers, we
+ * want those analysers not to be called anymore during
+ * this loop. If it adds an analyser that is located
+ * after itself, we want it to be scheduled for being
+ * processed during the loop. If it adds an analyser
+ * which is located before it, we want it to switch to
+ * it immediately, even if it has already been called
+ * once but removed since.
+ *
+ * In order to achieve this, we compare the analyser
+ * list after the call with a copy of it before the
+ * call. The work list is fed with analyser bits that
+ * appeared during the call. Then we compare previous
+ * work list with the new one, and check the bits that
+ * appeared. If the lowest of these bits is lower than
+ * the current bit, it means we have enabled a previous
+ * analyser and must immediately loop again.
+ */
+
+ ana_list = ana_back = req->analysers;
+ while (ana_list && max_loops--) {
+ /* Warning! ensure that analysers are always placed in ascending order! */
+ ANALYZE (s, req, flt_start_analyze, ana_list, ana_back, AN_REQ_FLT_START_FE);
+ FLT_ANALYZE(s, req, tcp_inspect_request, ana_list, ana_back, AN_REQ_INSPECT_FE);
+ FLT_ANALYZE(s, req, http_wait_for_request, ana_list, ana_back, AN_REQ_WAIT_HTTP);
+ FLT_ANALYZE(s, req, http_wait_for_request_body, ana_list, ana_back, AN_REQ_HTTP_BODY);
+ FLT_ANALYZE(s, req, http_process_req_common, ana_list, ana_back, AN_REQ_HTTP_PROCESS_FE, sess->fe);
+ FLT_ANALYZE(s, req, process_switching_rules, ana_list, ana_back, AN_REQ_SWITCHING_RULES);
+ ANALYZE (s, req, flt_start_analyze, ana_list, ana_back, AN_REQ_FLT_START_BE);
+ FLT_ANALYZE(s, req, tcp_inspect_request, ana_list, ana_back, AN_REQ_INSPECT_BE);
+ FLT_ANALYZE(s, req, http_process_req_common, ana_list, ana_back, AN_REQ_HTTP_PROCESS_BE, s->be);
+ FLT_ANALYZE(s, req, http_process_tarpit, ana_list, ana_back, AN_REQ_HTTP_TARPIT);
+ FLT_ANALYZE(s, req, process_server_rules, ana_list, ana_back, AN_REQ_SRV_RULES);
+ FLT_ANALYZE(s, req, http_process_request, ana_list, ana_back, AN_REQ_HTTP_INNER);
+ FLT_ANALYZE(s, req, tcp_persist_rdp_cookie, ana_list, ana_back, AN_REQ_PRST_RDP_COOKIE);
+ FLT_ANALYZE(s, req, process_sticking_rules, ana_list, ana_back, AN_REQ_STICKING_RULES);
+ ANALYZE (s, req, flt_analyze_http_headers, ana_list, ana_back, AN_REQ_FLT_HTTP_HDRS);
+ ANALYZE (s, req, http_request_forward_body, ana_list, ana_back, AN_REQ_HTTP_XFER_BODY);
+ ANALYZE (s, req, pcli_wait_for_request, ana_list, ana_back, AN_REQ_WAIT_CLI);
+ ANALYZE (s, req, flt_xfer_data, ana_list, ana_back, AN_REQ_FLT_XFER_DATA);
+ ANALYZE (s, req, flt_end_analyze, ana_list, ana_back, AN_REQ_FLT_END);
+ break;
+ }
+ }
+
+ rq_prod_last = scf->state;
+ rq_cons_last = scb->state;
+ req->flags &= ~CF_WAKE_ONCE;
+ rqf_last = req->flags;
+ scf_flags = (scf_flags & ~(SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) | (scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED));
+ scb_flags = (scb_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED));
+
+ if (((scf->flags ^ scf_flags_ana) & (SC_FL_EOS|SC_FL_ABRT_DONE)) || ((scb->flags ^ scb_flags_ana) & SC_FL_SHUT_DONE))
+ goto resync_request;
+ }
+
+ /* we'll monitor the request analysers while parsing the response,
+ * because some response analysers may indirectly enable new request
+ * analysers (eg: HTTP keep-alive).
+ */
+ req_ana_back = req->analysers;
+
+ resync_response:
+ /* Analyse response */
+
+ if (((res->flags & ~rpf_last) & CF_MASK_ANALYSER) ||
+ ((scb->flags ^ scb_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) ||
+ ((scf->flags ^ scf_flags) & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) ||
+ (res->analysers && (scf->flags & SC_FL_SHUT_DONE)) ||
+ scf->state != rp_cons_last ||
+ scb->state != rp_prod_last ||
+ s->pending_events & TASK_WOKEN_MSG) {
+ unsigned int scb_flags_ana = scb->flags;
+ unsigned int scf_flags_ana = scf->flags;
+
+ if (sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) {
+ int max_loops = global.tune.maxpollevents;
+ unsigned int ana_list;
+ unsigned int ana_back;
+
+ /* it's up to the analysers to stop disable reading or
+ * closing. Note: if an analyser disables any of these
+ * bits, it is responsible for enabling them again when
+ * it disables itself, so that other analysers are called
+ * in similar conditions.
+ */
+ channel_auto_read(res);
+ channel_auto_close(res);
+
+ /* We will call all analysers for which a bit is set in
+ * res->analysers, following the bit order from LSB
+ * to MSB. The analysers must remove themselves from
+ * the list when not needed. Any analyser may return 0
+ * to break out of the loop, either because of missing
+ * data to take a decision, or because it decides to
+ * kill the stream. We loop at least once through each
+ * analyser, and we may loop again if other analysers
+ * are added in the middle.
+ */
+
+ ana_list = ana_back = res->analysers;
+ while (ana_list && max_loops--) {
+ /* Warning! ensure that analysers are always placed in ascending order! */
+ ANALYZE (s, res, flt_start_analyze, ana_list, ana_back, AN_RES_FLT_START_FE);
+ ANALYZE (s, res, flt_start_analyze, ana_list, ana_back, AN_RES_FLT_START_BE);
+ FLT_ANALYZE(s, res, tcp_inspect_response, ana_list, ana_back, AN_RES_INSPECT);
+ FLT_ANALYZE(s, res, http_wait_for_response, ana_list, ana_back, AN_RES_WAIT_HTTP);
+ FLT_ANALYZE(s, res, process_store_rules, ana_list, ana_back, AN_RES_STORE_RULES);
+ FLT_ANALYZE(s, res, http_process_res_common, ana_list, ana_back, AN_RES_HTTP_PROCESS_BE, s->be);
+ ANALYZE (s, res, flt_analyze_http_headers, ana_list, ana_back, AN_RES_FLT_HTTP_HDRS);
+ ANALYZE (s, res, http_response_forward_body, ana_list, ana_back, AN_RES_HTTP_XFER_BODY);
+ ANALYZE (s, res, pcli_wait_for_response, ana_list, ana_back, AN_RES_WAIT_CLI);
+ ANALYZE (s, res, flt_xfer_data, ana_list, ana_back, AN_RES_FLT_XFER_DATA);
+ ANALYZE (s, res, flt_end_analyze, ana_list, ana_back, AN_RES_FLT_END);
+ break;
+ }
+ }
+
+ rp_cons_last = scf->state;
+ rp_prod_last = scb->state;
+ res->flags &= ~CF_WAKE_ONCE;
+ rpf_last = res->flags;
+ scb_flags = (scb_flags & ~(SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) | (scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED));
+ scf_flags = (scf_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED));
+
+ if (((scb->flags ^ scb_flags_ana) & (SC_FL_EOS|SC_FL_ABRT_DONE)) || ((scf->flags ^ scf_flags_ana) & SC_FL_SHUT_DONE))
+ goto resync_response;
+ }
+
+ /* we'll monitor the response analysers because some response analysers
+ * may be enabled/disabled later
+ */
+ res_ana_back = res->analysers;
+
+ /* maybe someone has added some request analysers, so we must check and loop */
+ if (req->analysers & ~req_ana_back)
+ goto resync_request;
+
+ if ((req->flags & ~rqf_last) & CF_MASK_ANALYSER)
+ goto resync_request;
+
+ /* FIXME: here we should call protocol handlers which rely on
+ * both buffers.
+ */
+
+
+ /*
+ * Now we propagate unhandled errors to the stream. Normally
+ * we're just in a data phase here since it means we have not
+ * seen any analyser who could set an error status.
+ */
+ srv = objt_server(s->target);
+ if (unlikely(!(s->flags & SF_ERR_MASK))) {
+ if ((scf->flags & SC_FL_ERROR) || req->flags & (CF_READ_TIMEOUT|CF_WRITE_TIMEOUT)) {
+ /* Report it if the client got an error or a read timeout expired */
+ req->analysers &= AN_REQ_FLT_END;
+ channel_auto_close(req);
+ if (scf->flags & SC_FL_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLICL;
+ }
+ else if (req->flags & CF_READ_TIMEOUT) {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLITO;
+ }
+ else {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVTO;
+ }
+ sess_set_term_flags(s);
+
+ /* Abort the request if a client error occurred while
+ * the backend stream connector is in the SC_ST_INI
+ * state. It is switched into the SC_ST_CLO state and
+ * the request channel is erased. */
+ if (scb->state == SC_ST_INI) {
+ s->scb->state = SC_ST_CLO;
+ channel_abort(req);
+ if (IS_HTX_STRM(s))
+ channel_htx_erase(req, htxbuf(&req->buf));
+ else
+ channel_erase(req);
+ }
+ }
+ else if ((scb->flags & SC_FL_ERROR) || res->flags & (CF_READ_TIMEOUT|CF_WRITE_TIMEOUT)) {
+ /* Report it if the server got an error or a read timeout expired */
+ res->analysers &= AN_RES_FLT_END;
+ channel_auto_close(res);
+ if (scb->flags & SC_FL_ERROR) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVCL;
+ }
+ else if (res->flags & CF_READ_TIMEOUT) {
+ _HA_ATOMIC_INC(&s->be->be_counters.srv_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.srv_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->srv_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.srv_aborts);
+ s->flags |= SF_ERR_SRVTO;
+ }
+ else {
+ _HA_ATOMIC_INC(&s->be->be_counters.cli_aborts);
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.cli_aborts);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->cli_aborts);
+ if (srv)
+ _HA_ATOMIC_INC(&srv->counters.cli_aborts);
+ s->flags |= SF_ERR_CLITO;
+ }
+ sess_set_term_flags(s);
+ }
+ }
+
+ /*
+ * Here we take care of forwarding unhandled data. This also includes
+ * connection establishments and shutdown requests.
+ */
+
+
+ /* If no one is interested in analysing data, it's time to forward
+ * everything. We configure the buffer to forward indefinitely.
+ * Note that we're checking SC_FL_ABRT_WANTED as an indication of a possible
+ * recent call to channel_abort().
+ */
+ if (unlikely((!req->analysers || (req->analysers == AN_REQ_FLT_END && !(req->flags & CF_FLT_ANALYZE))) &&
+ !(scf->flags & SC_FL_ABRT_WANTED) && !(scb->flags & SC_FL_SHUT_DONE) &&
+ (sc_state_in(scf->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO)) &&
+ (req->to_forward != CHN_INFINITE_FORWARD))) {
+ /* This buffer is freewheeling, there's no analyser
+ * attached to it. If any data are left in, we'll permit them to
+ * move.
+ */
+ channel_auto_read(req);
+ channel_auto_connect(req);
+ channel_auto_close(req);
+
+ if (IS_HTX_STRM(s)) {
+ struct htx *htx = htxbuf(&req->buf);
+
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ co_set_data(req, htx->data);
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) &&
+ !(scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(scb->flags & SC_FL_SHUT_WANTED))
+ channel_htx_forward_forever(req, htx);
+ }
+ else {
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer (which might possibly not be connected yet).
+ */
+ c_adv(req, ci_data(req));
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) &&
+ !(scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(scb->flags & SC_FL_SHUT_WANTED))
+ channel_forward_forever(req);
+ }
+ }
+
+ /* reflect what the L7 analysers have seen last */
+ rqf_last = req->flags;
+ scf_flags = (scf_flags & ~(SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) | (scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED));
+ scb_flags = (scb_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED));
+
+ /* it's possible that an upper layer has requested a connection setup or abort.
+ * There are 2 situations where we decide to establish a new connection :
+ * - there are data scheduled for emission in the buffer
+ * - the CF_AUTO_CONNECT flag is set (active connection)
+ */
+ if (scb->state == SC_ST_INI) {
+ if (!(scb->flags & SC_FL_SHUT_DONE)) {
+ if ((req->flags & CF_AUTO_CONNECT) || co_data(req)) {
+ /* If we have an appctx, there is no connect method, so we
+ * immediately switch to the connected state, otherwise we
+ * perform a connection request.
+ */
+ scb->state = SC_ST_REQ; /* new connection requested */
+ s->conn_retries = 0;
+ if ((s->be->retry_type &~ PR_RE_CONN_FAILED) &&
+ (s->be->mode == PR_MODE_HTTP) &&
+ !(s->txn->flags & TX_D_L7_RETRY))
+ s->txn->flags |= TX_L7_RETRY;
+
+ if (s->be->options & PR_O_ABRT_CLOSE) {
+ struct connection *conn = sc_conn(scf);
+
+ if (conn && conn->mux && conn->mux->ctl)
+ conn->mux->ctl(conn, MUX_CTL_SUBS_RECV, NULL);
+ }
+ }
+ }
+ else {
+ s->scb->state = SC_ST_CLO; /* shutw+ini = abort */
+ sc_schedule_shutdown(scb);
+ sc_schedule_abort(scb);
+ }
+ }
+
+
+ /* we may have a pending connection request, or a connection waiting
+ * for completion.
+ */
+ if (sc_state_in(scb->state, SC_SB_REQ|SC_SB_QUE|SC_SB_TAR|SC_SB_ASS)) {
+ /* prune the request variables and swap to the response variables. */
+ if (s->vars_reqres.scope != SCOPE_RES) {
+ if (!LIST_ISEMPTY(&s->vars_reqres.head))
+ vars_prune(&s->vars_reqres, s->sess, s);
+ vars_init_head(&s->vars_reqres, SCOPE_RES);
+ }
+
+ do {
+ /* nb: step 1 might switch from QUE to ASS, but we first want
+ * to give a chance to step 2 to perform a redirect if needed.
+ */
+ if (scb->state != SC_ST_REQ)
+ back_try_conn_req(s);
+ if (scb->state == SC_ST_REQ)
+ back_handle_st_req(s);
+
+ /* get a chance to complete an immediate connection setup */
+ if (scb->state == SC_ST_RDY)
+ goto resync_stconns;
+
+ /* applets directly go to the ESTABLISHED state. Similarly,
+ * servers experience the same fate when their connection
+ * is reused.
+ */
+ if (unlikely(scb->state == SC_ST_EST))
+ back_establish(s);
+
+ srv = objt_server(s->target);
+ if (scb->state == SC_ST_ASS && srv && srv->rdr_len && (s->flags & SF_REDIRECTABLE))
+ http_perform_server_redirect(s, scb);
+ } while (scb->state == SC_ST_ASS);
+ }
+
+ /* Let's see if we can send the pending request now */
+ sc_conn_sync_send(scb);
+
+ /*
+ * Now forward all shutdown requests between both sides of the request buffer
+ */
+
+ /* first, let's check if the request buffer needs to shutdown(write), which may
+ * happen either because the input is closed or because we want to force a close
+ * once the server has begun to respond. If a half-closed timeout is set, we adjust
+ * the other side's timeout as well. However this doesn't have effect during the
+ * connection setup unless the backend has abortonclose set.
+ */
+ if (unlikely((req->flags & CF_AUTO_CLOSE) && (scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ !(scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) &&
+ (scb->state != SC_ST_CON || (s->be->options & PR_O_ABRT_CLOSE)))) {
+ sc_schedule_shutdown(scb);
+ }
+
+ /* shutdown(write) pending */
+ if (unlikely((scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == SC_FL_SHUT_WANTED &&
+ (!co_data(req) || (req->flags & CF_WRITE_TIMEOUT)))) {
+ if (scf->flags & SC_FL_ERROR)
+ scb->flags |= SC_FL_NOLINGER;
+ sc_shutdown(scb);
+ }
+
+ /* shutdown(write) done on server side, we must stop the client too */
+ if (unlikely((scb->flags & SC_FL_SHUT_DONE) && !(scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED))) &&
+ !req->analysers)
+ sc_schedule_abort(scf);
+
+ /* shutdown(read) pending */
+ if (unlikely((scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) == SC_FL_ABRT_WANTED)) {
+ if (scf->flags & SC_FL_NOHALF)
+ scf->flags |= SC_FL_NOLINGER;
+ sc_abort(scf);
+ }
+
+ /* Benchmarks have shown that it's optimal to do a full resync now */
+ if (scf->state == SC_ST_DIS ||
+ sc_state_in(scb->state, SC_SB_RDY|SC_SB_DIS) ||
+ ((scf->flags & SC_FL_ERROR) && scf->state != SC_ST_CLO) ||
+ ((scb->flags & SC_FL_ERROR) && scb->state != SC_ST_CLO))
+ goto resync_stconns;
+
+ /* otherwise we want to check if we need to resync the req buffer or not */
+ if (((scf->flags ^ scf_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE)) || ((scb->flags ^ scb_flags) & SC_FL_SHUT_DONE))
+ goto resync_request;
+
+ /* perform output updates to the response buffer */
+
+ /* If no one is interested in analysing data, it's time to forward
+ * everything. We configure the buffer to forward indefinitely.
+ * Note that we're checking SC_FL_ABRT_WANTED as an indication of a possible
+ * recent call to channel_abort().
+ */
+ if (unlikely((!res->analysers || (res->analysers == AN_RES_FLT_END && !(res->flags & CF_FLT_ANALYZE))) &&
+ !(scf->flags & SC_FL_ABRT_WANTED) && !(scb->flags & SC_FL_SHUT_WANTED) &&
+ sc_state_in(scb->state, SC_SB_EST|SC_SB_DIS|SC_SB_CLO) &&
+ (res->to_forward != CHN_INFINITE_FORWARD))) {
+ /* This buffer is freewheeling, there's no analyser
+ * attached to it. If any data are left in, we'll permit them to
+ * move.
+ */
+ channel_auto_read(res);
+ channel_auto_close(res);
+
+ if (IS_HTX_STRM(s)) {
+ struct htx *htx = htxbuf(&res->buf);
+
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ co_set_data(res, htx->data);
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) &&
+ !(scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(scb->flags & SC_FL_SHUT_WANTED))
+ channel_htx_forward_forever(res, htx);
+ }
+ else {
+ /* We'll let data flow between the producer (if still connected)
+ * to the consumer.
+ */
+ c_adv(res, ci_data(res));
+ if ((global.tune.options & GTUNE_USE_FAST_FWD) &&
+ !(scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(scb->flags & SC_FL_SHUT_WANTED))
+ channel_forward_forever(res);
+ }
+
+ /* if we have no analyser anymore in any direction and have a
+ * tunnel timeout set, use it now. Note that we must respect
+ * the half-closed timeouts as well.
+ */
+ if (!req->analysers && s->tunnel_timeout) {
+ scf->ioto = scb->ioto = s->tunnel_timeout;
+
+ if (!IS_HTX_STRM(s)) {
+ if ((scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_SHUT_DONE)) && tick_isset(sess->fe->timeout.clientfin))
+ scf->ioto = sess->fe->timeout.clientfin;
+ if ((scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_SHUT_DONE)) && tick_isset(s->be->timeout.serverfin))
+ scb->ioto = s->be->timeout.serverfin;
+ }
+ }
+ }
+
+ /* reflect what the L7 analysers have seen last */
+ rpf_last = res->flags;
+ scb_flags = (scb_flags & ~(SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) | (scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED));
+ scf_flags = (scf_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED));
+
+ /* Let's see if we can send the pending response now */
+ sc_conn_sync_send(scf);
+
+ /*
+ * Now forward all shutdown requests between both sides of the buffer
+ */
+
+ /*
+ * FIXME: this is probably where we should produce error responses.
+ */
+
+ /* first, let's check if the response buffer needs to shutdown(write) */
+ if (unlikely((res->flags & CF_AUTO_CLOSE) && (scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) &&
+ !(scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)))) {
+ sc_schedule_shutdown(scf);
+ }
+
+ /* shutdown(write) pending */
+ if (unlikely((scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) == SC_FL_SHUT_WANTED &&
+ (!co_data(res) || (res->flags & CF_WRITE_TIMEOUT)))) {
+ sc_shutdown(scf);
+ }
+
+ /* shutdown(write) done on the client side, we must stop the server too */
+ if (unlikely((scf->flags & SC_FL_SHUT_DONE) && !(scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED))) &&
+ !res->analysers)
+ sc_schedule_abort(scb);
+
+ /* shutdown(read) pending */
+ if (unlikely((scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) == SC_FL_ABRT_WANTED)) {
+ if (scb->flags & SC_FL_NOHALF)
+ scb->flags |= SC_FL_NOLINGER;
+ sc_abort(scb);
+ }
+
+ if (scf->state == SC_ST_DIS ||
+ sc_state_in(scb->state, SC_SB_RDY|SC_SB_DIS) ||
+ ((scf->flags & SC_FL_ERROR) && scf->state != SC_ST_CLO) ||
+ ((scb->flags & SC_FL_ERROR) && scb->state != SC_ST_CLO))
+ goto resync_stconns;
+
+ if ((req->flags & ~rqf_last) & CF_MASK_ANALYSER)
+ goto resync_request;
+
+ if (((scb->flags ^ scb_flags) & (SC_FL_EOS|SC_FL_ABRT_DONE|SC_FL_ABRT_WANTED)) ||
+ ((scf->flags ^ scf_flags) & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) ||
+ (res->analysers ^ res_ana_back))
+ goto resync_response;
+
+ if ((((req->flags ^ rqf_last) | (res->flags ^ rpf_last)) & CF_MASK_ANALYSER) ||
+ (req->analysers ^ req_ana_back))
+ goto resync_request;
+
+ /* we're interested in getting wakeups again */
+ scf->flags &= ~SC_FL_DONT_WAKE;
+ scb->flags &= ~SC_FL_DONT_WAKE;
+
+ if (likely((scf->state != SC_ST_CLO) || !sc_state_in(scb->state, SC_SB_INI|SC_SB_CLO) ||
+ (req->analysers & AN_REQ_FLT_END) || (res->analysers & AN_RES_FLT_END))) {
+ if ((sess->fe->options & PR_O_CONTSTATS) && (s->flags & SF_BE_ASSIGNED) && !(s->flags & SF_IGNORE))
+ stream_process_counters(s);
+
+ stream_update_both_sc(s);
+
+ /* Reset pending events now */
+ s->pending_events = 0;
+
+ update_exp_and_leave:
+ /* Note: please ensure that if you branch here you disable SC_FL_DONT_WAKE */
+ if (!req->analysers)
+ req->analyse_exp = TICK_ETERNITY;
+ if (!res->analysers)
+ res->analyse_exp = TICK_ETERNITY;
+
+ if ((sess->fe->options & PR_O_CONTSTATS) && (s->flags & SF_BE_ASSIGNED) &&
+ (!tick_isset(req->analyse_exp) || tick_is_expired(req->analyse_exp, now_ms)))
+ req->analyse_exp = tick_add(now_ms, 5000);
+
+ t->expire = (tick_is_expired(t->expire, now_ms) ? 0 : t->expire);
+ t->expire = tick_first(t->expire, sc_ep_rcv_ex(scf));
+ t->expire = tick_first(t->expire, sc_ep_snd_ex(scf));
+ t->expire = tick_first(t->expire, sc_ep_rcv_ex(scb));
+ t->expire = tick_first(t->expire, sc_ep_snd_ex(scb));
+ t->expire = tick_first(t->expire, req->analyse_exp);
+ t->expire = tick_first(t->expire, res->analyse_exp);
+ t->expire = tick_first(t->expire, s->conn_exp);
+
+ if (unlikely(tick_is_expired(t->expire, now_ms))) {
+ /* Some events prevented the timeouts to be handled but nothing evolved.
+ So do it now and resyunc the stconns
+ */
+ stream_handle_timeouts(s);
+ goto resync_stconns;
+ }
+
+ s->pending_events &= ~(TASK_WOKEN_TIMER | TASK_WOKEN_RES);
+ stream_release_buffers(s);
+
+ DBG_TRACE_DEVEL("queuing", STRM_EV_STRM_PROC, s);
+ return t; /* nothing more to do */
+ }
+
+ DBG_TRACE_DEVEL("releasing", STRM_EV_STRM_PROC, s);
+
+ if (s->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_DEC(&s->be->beconn);
+
+ if (unlikely((global.mode & MODE_DEBUG) &&
+ (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) {
+ chunk_printf(&trash, "%08x:%s.closed[%04x:%04x]\n",
+ s->uniq_id, s->be->id,
+ (unsigned short)conn_fd(sc_conn(scf)),
+ (unsigned short)conn_fd(sc_conn(scb)));
+ DISGUISE(write(1, trash.area, trash.data));
+ }
+
+ if (!(s->flags & SF_IGNORE)) {
+ s->logs.t_close = ns_to_ms(now_ns - s->logs.accept_ts);
+
+ stream_process_counters(s);
+
+ if (s->txn && s->txn->status) {
+ int n;
+
+ n = s->txn->status / 100;
+ if (n < 1 || n > 5)
+ n = 0;
+
+ if (sess->fe->mode == PR_MODE_HTTP) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[n]);
+ }
+ if ((s->flags & SF_BE_ASSIGNED) &&
+ (s->be->mode == PR_MODE_HTTP)) {
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.rsp[n]);
+ _HA_ATOMIC_INC(&s->be->be_counters.p.http.cum_req);
+ }
+ }
+
+ /* let's do a final log if we need it */
+ if (!LIST_ISEMPTY(&sess->fe->logformat) && s->logs.logwait &&
+ !(s->flags & SF_MONITOR) &&
+ (!(sess->fe->options & PR_O_NULLNOLOG) || req->total)) {
+ /* we may need to know the position in the queue */
+ pendconn_free(s);
+
+ stream_cond_update_cpu_usage(s);
+ s->do_log(s);
+ }
+
+ /* update time stats for this stream */
+ stream_update_time_stats(s);
+ }
+
+ /* the task MUST not be in the run queue anymore */
+ stream_free(s);
+ task_destroy(t);
+ return NULL;
+}
+
+/* Update the stream's backend and server time stats */
+void stream_update_time_stats(struct stream *s)
+{
+ int t_request;
+ int t_queue;
+ int t_connect;
+ int t_data;
+ int t_close;
+ struct server *srv;
+ unsigned int samples_window;
+
+ t_request = 0;
+ t_queue = s->logs.t_queue;
+ t_connect = s->logs.t_connect;
+ t_close = s->logs.t_close;
+ t_data = s->logs.t_data;
+
+ if (s->be->mode != PR_MODE_HTTP)
+ t_data = t_connect;
+
+ if (t_connect < 0 || t_data < 0)
+ return;
+
+ if ((llong)(s->logs.request_ts - s->logs.accept_ts) >= 0)
+ t_request = ns_to_ms(s->logs.request_ts - s->logs.accept_ts);
+
+ t_data -= t_connect;
+ t_connect -= t_queue;
+ t_queue -= t_request;
+
+ srv = objt_server(s->target);
+ if (srv) {
+ samples_window = (((s->be->mode == PR_MODE_HTTP) ?
+ srv->counters.p.http.cum_req : srv->counters.cum_lbconn) > TIME_STATS_SAMPLES) ? TIME_STATS_SAMPLES : 0;
+ swrate_add_dynamic(&srv->counters.q_time, samples_window, t_queue);
+ swrate_add_dynamic(&srv->counters.c_time, samples_window, t_connect);
+ swrate_add_dynamic(&srv->counters.d_time, samples_window, t_data);
+ swrate_add_dynamic(&srv->counters.t_time, samples_window, t_close);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.qtime_max, t_queue);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.ctime_max, t_connect);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.dtime_max, t_data);
+ HA_ATOMIC_UPDATE_MAX(&srv->counters.ttime_max, t_close);
+ }
+ samples_window = (((s->be->mode == PR_MODE_HTTP) ?
+ s->be->be_counters.p.http.cum_req : s->be->be_counters.cum_lbconn) > TIME_STATS_SAMPLES) ? TIME_STATS_SAMPLES : 0;
+ swrate_add_dynamic(&s->be->be_counters.q_time, samples_window, t_queue);
+ swrate_add_dynamic(&s->be->be_counters.c_time, samples_window, t_connect);
+ swrate_add_dynamic(&s->be->be_counters.d_time, samples_window, t_data);
+ swrate_add_dynamic(&s->be->be_counters.t_time, samples_window, t_close);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.qtime_max, t_queue);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.ctime_max, t_connect);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.dtime_max, t_data);
+ HA_ATOMIC_UPDATE_MAX(&s->be->be_counters.ttime_max, t_close);
+}
+
+/*
+ * This function adjusts sess->srv_conn and maintains the previous and new
+ * server's served stream counts. Setting newsrv to NULL is enough to release
+ * current connection slot. This function also notifies any LB algo which might
+ * expect to be informed about any change in the number of active streams on a
+ * server.
+ */
+void sess_change_server(struct stream *strm, struct server *newsrv)
+{
+ struct server *oldsrv = strm->srv_conn;
+
+ if (oldsrv == newsrv)
+ return;
+
+ if (oldsrv) {
+ _HA_ATOMIC_DEC(&oldsrv->served);
+ _HA_ATOMIC_DEC(&oldsrv->proxy->served);
+ __ha_barrier_atomic_store();
+ if (oldsrv->proxy->lbprm.server_drop_conn)
+ oldsrv->proxy->lbprm.server_drop_conn(oldsrv);
+ stream_del_srv_conn(strm);
+ }
+
+ if (newsrv) {
+ _HA_ATOMIC_INC(&newsrv->served);
+ _HA_ATOMIC_INC(&newsrv->proxy->served);
+ __ha_barrier_atomic_store();
+ if (newsrv->proxy->lbprm.server_take_conn)
+ newsrv->proxy->lbprm.server_take_conn(newsrv);
+ stream_add_srv_conn(strm, newsrv);
+ }
+}
+
+/* Handle server-side errors for default protocols. It is called whenever a a
+ * connection setup is aborted or a request is aborted in queue. It sets the
+ * stream termination flags so that the caller does not have to worry about
+ * them. It's installed as ->srv_error for the server-side stream connector.
+ */
+void default_srv_error(struct stream *s, struct stconn *sc)
+{
+ int err_type = s->conn_err_type;
+ int err = 0, fin = 0;
+
+ if (err_type & STRM_ET_QUEUE_ABRT) {
+ err = SF_ERR_CLICL;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_CONN_ABRT) {
+ err = SF_ERR_CLICL;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_QUEUE_TO) {
+ err = SF_ERR_SRVTO;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_QUEUE_ERR) {
+ err = SF_ERR_SRVCL;
+ fin = SF_FINST_Q;
+ }
+ else if (err_type & STRM_ET_CONN_TO) {
+ err = SF_ERR_SRVTO;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_CONN_ERR) {
+ err = SF_ERR_SRVCL;
+ fin = SF_FINST_C;
+ }
+ else if (err_type & STRM_ET_CONN_RES) {
+ err = SF_ERR_RESOURCE;
+ fin = SF_FINST_C;
+ }
+ else /* STRM_ET_CONN_OTHER and others */ {
+ err = SF_ERR_INTERNAL;
+ fin = SF_FINST_C;
+ }
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= err;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= fin;
+}
+
+/* kill a stream and set the termination flags to <why> (one of SF_ERR_*) */
+void stream_shutdown(struct stream *stream, int why)
+{
+ if (stream->scb->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))
+ return;
+
+ sc_schedule_shutdown(stream->scb);
+ sc_schedule_abort(stream->scb);
+ stream->task->nice = 1024;
+ if (!(stream->flags & SF_ERR_MASK))
+ stream->flags |= why;
+ task_wakeup(stream->task, TASK_WOKEN_OTHER);
+}
+
+/* dumps an error message for type <type> at ptr <ptr> related to stream <s>,
+ * having reached loop rate <rate>, then aborts hoping to retrieve a core.
+ */
+void stream_dump_and_crash(enum obj_type *obj, int rate)
+{
+ struct stream *s;
+ char *msg = NULL;
+ const void *ptr;
+
+ ptr = s = objt_stream(obj);
+ if (!s) {
+ const struct appctx *appctx = objt_appctx(obj);
+ if (!appctx)
+ return;
+ ptr = appctx;
+ s = appctx_strm(appctx);
+ if (!s)
+ return;
+ }
+
+ chunk_reset(&trash);
+ chunk_printf(&trash, " ");
+ strm_dump_to_buffer(&trash, s, " ", HA_ATOMIC_LOAD(&global.anon_key));
+
+ if (ptr != s) { // that's an appctx
+ const struct appctx *appctx = ptr;
+
+ chunk_appendf(&trash, " applet=%p(", appctx->applet);
+ resolve_sym_name(&trash, NULL, appctx->applet);
+ chunk_appendf(&trash, ")");
+
+ chunk_appendf(&trash, " handler=%p(", appctx->applet->fct);
+ resolve_sym_name(&trash, NULL, appctx->applet->fct);
+ chunk_appendf(&trash, ")");
+ }
+
+ memprintf(&msg,
+ "A bogus %s [%p] is spinning at %d calls per second and refuses to die, "
+ "aborting now! Please report this error to developers:\n"
+ "%s\n",
+ obj_type_name(obj), ptr, rate, trash.area);
+
+ ha_alert("%s", msg);
+ send_log(NULL, LOG_EMERG, "%s", msg);
+ ABORT_NOW();
+}
+
+/* initialize the require structures */
+static void init_stream()
+{
+ int thr;
+
+ for (thr = 0; thr < MAX_THREADS; thr++)
+ LIST_INIT(&ha_thread_ctx[thr].streams);
+}
+INITCALL0(STG_INIT, init_stream);
+
+/* Generates a unique ID based on the given <format>, stores it in the given <strm> and
+ * returns the unique ID.
+ *
+ * If this function fails to allocate memory IST_NULL is returned.
+ *
+ * If an ID is already stored within the stream nothing happens existing unique ID is
+ * returned.
+ */
+struct ist stream_generate_unique_id(struct stream *strm, struct list *format)
+{
+ if (isttest(strm->unique_id)) {
+ return strm->unique_id;
+ }
+ else {
+ char *unique_id;
+ int length;
+ if ((unique_id = pool_alloc(pool_head_uniqueid)) == NULL)
+ return IST_NULL;
+
+ length = build_logline(strm, unique_id, UNIQUEID_LEN, format);
+ strm->unique_id = ist2(unique_id, length);
+
+ return strm->unique_id;
+ }
+}
+
+/************************************************************************/
+/* All supported ACL keywords must be declared here. */
+/************************************************************************/
+static enum act_return stream_action_set_log_level(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ s->logs.level = (uintptr_t)rule->arg.act.p[0];
+ return ACT_RET_CONT;
+}
+
+
+/* Parse a "set-log-level" action. It takes the level value as argument. It
+ * returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret stream_parse_set_log_level(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int level;
+
+ if (!*args[*cur_arg]) {
+ bad_log_level:
+ memprintf(err, "expects exactly 1 argument (log level name or 'silent')");
+ return ACT_RET_PRS_ERR;
+ }
+ if (strcmp(args[*cur_arg], "silent") == 0)
+ level = -1;
+ else if ((level = get_log_level(args[*cur_arg]) + 1) == 0)
+ goto bad_log_level;
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = stream_action_set_log_level;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)level;
+ return ACT_RET_PRS_OK;
+}
+
+static enum act_return stream_action_set_nice(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ s->task->nice = (uintptr_t)rule->arg.act.p[0];
+ return ACT_RET_CONT;
+}
+
+
+/* Parse a "set-nice" action. It takes the nice value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret stream_parse_set_nice(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int nice;
+
+ if (!*args[*cur_arg]) {
+ bad_log_level:
+ memprintf(err, "expects exactly 1 argument (integer value)");
+ return ACT_RET_PRS_ERR;
+ }
+
+ nice = atoi(args[*cur_arg]);
+ if (nice < -1024)
+ nice = -1024;
+ else if (nice > 1024)
+ nice = 1024;
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = stream_action_set_nice;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)nice;
+ return ACT_RET_PRS_OK;
+}
+
+
+static enum act_return tcp_action_switch_stream_mode(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ enum pr_mode mode = (uintptr_t)rule->arg.act.p[0];
+ const struct mux_proto_list *mux_proto = rule->arg.act.p[1];
+
+ if (!IS_HTX_STRM(s) && mode == PR_MODE_HTTP) {
+ if (!stream_set_http_mode(s, mux_proto)) {
+ stream_abort(s);
+ return ACT_RET_ABRT;
+ }
+ }
+ return ACT_RET_STOP;
+}
+
+
+static int check_tcp_switch_stream_mode(struct act_rule *rule, struct proxy *px, char **err)
+{
+ const struct mux_proto_list *mux_ent;
+ const struct mux_proto_list *mux_proto = rule->arg.act.p[1];
+ enum pr_mode pr_mode = (uintptr_t)rule->arg.act.p[0];
+ enum proto_proxy_mode mode = conn_pr_mode_to_proto_mode(pr_mode);
+
+ if (pr_mode == PR_MODE_HTTP)
+ px->options |= PR_O_HTTP_UPG;
+
+ if (mux_proto) {
+ mux_ent = conn_get_best_mux_entry(mux_proto->token, PROTO_SIDE_FE, mode);
+ if (!mux_ent || !isteq(mux_ent->token, mux_proto->token)) {
+ memprintf(err, "MUX protocol '%.*s' is not compatible with the selected mode",
+ (int)mux_proto->token.len, mux_proto->token.ptr);
+ return 0;
+ }
+ }
+ else {
+ mux_ent = conn_get_best_mux_entry(IST_NULL, PROTO_SIDE_FE, mode);
+ if (!mux_ent) {
+ memprintf(err, "Unable to find compatible MUX protocol with the selected mode");
+ return 0;
+ }
+ }
+
+ /* Update the mux */
+ rule->arg.act.p[1] = (void *)mux_ent;
+ return 1;
+
+}
+
+static enum act_parse_ret stream_parse_switch_mode(const char **args, int *cur_arg,
+ struct proxy *px, struct act_rule *rule,
+ char **err)
+{
+ const struct mux_proto_list *mux_proto = NULL;
+ struct ist proto;
+ enum pr_mode mode;
+
+ /* must have at least the mode */
+ if (*(args[*cur_arg]) == 0) {
+ memprintf(err, "'%s %s' expects a mode as argument.", args[0], args[*cur_arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (!(px->cap & PR_CAP_FE)) {
+ memprintf(err, "'%s %s' not allowed because %s '%s' has no frontend capability",
+ args[0], args[*cur_arg-1], proxy_type_str(px), px->id);
+ return ACT_RET_PRS_ERR;
+ }
+ /* Check if the mode. For now "tcp" is disabled because downgrade is not
+ * supported and PT is the only TCP mux.
+ */
+ if (strcmp(args[*cur_arg], "http") == 0)
+ mode = PR_MODE_HTTP;
+ else {
+ memprintf(err, "'%s %s' expects a valid mode (got '%s').", args[0], args[*cur_arg-1], args[*cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* check the proto, if specified */
+ if (*(args[*cur_arg+1]) && strcmp(args[*cur_arg+1], "proto") == 0) {
+ if (*(args[*cur_arg+2]) == 0) {
+ memprintf(err, "'%s %s': '%s' expects a protocol as argument.",
+ args[0], args[*cur_arg-1], args[*cur_arg+1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ proto = ist(args[*cur_arg + 2]);
+ mux_proto = get_mux_proto(proto);
+ if (!mux_proto) {
+ memprintf(err, "'%s %s': '%s' expects a valid MUX protocol, if specified (got '%s')",
+ args[0], args[*cur_arg-1], args[*cur_arg+1], args[*cur_arg+2]);
+ return ACT_RET_PRS_ERR;
+ }
+ *cur_arg += 2;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_switch_stream_mode;
+ rule->check_ptr = check_tcp_switch_stream_mode;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)mode;
+ rule->arg.act.p[1] = (void *)mux_proto;
+ return ACT_RET_PRS_OK;
+}
+
+/* 0=OK, <0=Alert, >0=Warning */
+static enum act_parse_ret stream_parse_use_service(const char **args, int *cur_arg,
+ struct proxy *px, struct act_rule *rule,
+ char **err)
+{
+ struct action_kw *kw;
+
+ /* Check if the service name exists. */
+ if (*(args[*cur_arg]) == 0) {
+ memprintf(err, "'%s' expects a service name.", args[0]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* lookup for keyword corresponding to a service. */
+ kw = action_lookup(&service_keywords, args[*cur_arg]);
+ if (!kw) {
+ memprintf(err, "'%s' unknown service name.", args[1]);
+ return ACT_RET_PRS_ERR;
+ }
+ (*cur_arg)++;
+
+ /* executes specific rule parser. */
+ rule->kw = kw;
+ if (kw->parse((const char **)args, cur_arg, px, rule, err) == ACT_RET_PRS_ERR)
+ return ACT_RET_PRS_ERR;
+
+ /* Register processing function. */
+ rule->action_ptr = process_use_service;
+ rule->action = ACT_CUSTOM;
+
+ return ACT_RET_PRS_OK;
+}
+
+void service_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&service_keywords, &kw_list->list);
+}
+
+struct action_kw *service_find(const char *kw)
+{
+ return action_lookup(&service_keywords, kw);
+}
+
+/* Lists the known services on <out>. If <out> is null, emit them on stdout one
+ * per line.
+ */
+void list_services(FILE *out)
+{
+ const struct action_kw *akwp, *akwn;
+ struct action_kw_list *kw_list;
+ int found = 0;
+ int i;
+
+ if (out)
+ fprintf(out, "Available services :");
+
+ for (akwn = akwp = NULL;; akwp = akwn) {
+ list_for_each_entry(kw_list, &service_keywords, list) {
+ for (i = 0; kw_list->kw[i].kw != NULL; i++) {
+ if (strordered(akwp ? akwp->kw : NULL,
+ kw_list->kw[i].kw,
+ akwn != akwp ? akwn->kw : NULL))
+ akwn = &kw_list->kw[i];
+ found = 1;
+ }
+ }
+ if (akwn == akwp)
+ break;
+ if (out)
+ fprintf(out, " %s", akwn->kw);
+ else
+ printf("%s\n", akwn->kw);
+ }
+ if (!found && out)
+ fprintf(out, " none\n");
+}
+
+/* appctx context used by the "show sess" command */
+/* flags used for show_sess_ctx.flags */
+#define CLI_SHOWSESS_F_SUSP 0x00000001 /* show only suspicious streams */
+
+struct show_sess_ctx {
+ struct bref bref; /* back-reference from the session being dumped */
+ void *target; /* session we want to dump, or NULL for all */
+ unsigned int thr; /* the thread number being explored (0..MAX_THREADS-1) */
+ unsigned int uid; /* if non-null, the uniq_id of the session being dumped */
+ unsigned int min_age; /* minimum age of streams to dump */
+ unsigned int flags; /* CLI_SHOWSESS_* */
+ int section; /* section of the session being dumped */
+ int pos; /* last position of the current session's buffer */
+};
+
+/* This function appends a complete dump of a stream state onto the buffer,
+ * possibly anonymizing using the specified anon_key. The caller is responsible
+ * for ensuring that enough room remains in the buffer to dump a complete
+ * stream at once. Each new output line will be prefixed with <pfx> if non-null,
+ * which is used to preserve indenting.
+ */
+void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const char *pfx, uint32_t anon_key)
+{
+ struct stconn *scf, *scb;
+ struct tm tm;
+ extern const char *monthname[12];
+ char pn[INET6_ADDRSTRLEN];
+ struct connection *conn;
+ struct appctx *tmpctx;
+
+ pfx = pfx ? pfx : "";
+
+ get_localtime(strm->logs.accept_date.tv_sec, &tm);
+ chunk_appendf(buf,
+ "%p: [%02d/%s/%04d:%02d:%02d:%02d.%06d] id=%u proto=%s",
+ strm,
+ tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, (int)(strm->logs.accept_date.tv_usec),
+ strm->uniq_id,
+ strm_li(strm) ? strm_li(strm)->rx.proto->name : "?");
+
+ conn = objt_conn(strm_orig(strm));
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(buf, " source=%s:%d\n",
+ HA_ANON_STR(anon_key, pn), get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(buf, " source=unix:%d\n", strm_li(strm)->luid);
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(buf, "\n");
+ break;
+ }
+
+ chunk_appendf(buf,
+ "%s flags=0x%x, conn_retries=%d, conn_exp=%s conn_et=0x%03x srv_conn=%p, pend_pos=%p waiting=%d epoch=%#x\n", pfx,
+ strm->flags, strm->conn_retries,
+ strm->conn_exp ?
+ tick_is_expired(strm->conn_exp, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(strm->conn_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ strm->conn_err_type, strm->srv_conn, strm->pend_pos,
+ LIST_INLIST(&strm->buffer_wait.list), strm->stream_epoch);
+
+ chunk_appendf(buf,
+ "%s frontend=%s (id=%u mode=%s), listener=%s (id=%u)", pfx,
+ HA_ANON_STR(anon_key, strm_fe(strm)->id), strm_fe(strm)->uuid, proxy_mode_str(strm_fe(strm)->mode),
+ strm_li(strm) ? strm_li(strm)->name ? strm_li(strm)->name : "?" : "?",
+ strm_li(strm) ? strm_li(strm)->luid : 0);
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(buf, " addr=%s:%d\n",
+ HA_ANON_STR(anon_key, pn), get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(buf, " addr=unix:%d\n", strm_li(strm)->luid);
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(buf, "\n");
+ break;
+ }
+
+ if (strm->be->cap & PR_CAP_BE)
+ chunk_appendf(buf,
+ "%s backend=%s (id=%u mode=%s)", pfx,
+ HA_ANON_STR(anon_key, strm->be->id),
+ strm->be->uuid, proxy_mode_str(strm->be->mode));
+ else
+ chunk_appendf(buf, "%s backend=<NONE> (id=-1 mode=-)", pfx);
+
+ conn = sc_conn(strm->scb);
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(buf, " addr=%s:%d\n",
+ HA_ANON_STR(anon_key, pn), get_host_port(conn->src));
+ break;
+ case AF_UNIX:
+ chunk_appendf(buf, " addr=unix\n");
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(buf, "\n");
+ break;
+ }
+
+ if (strm->be->cap & PR_CAP_BE)
+ chunk_appendf(buf,
+ "%s server=%s (id=%u)", pfx,
+ objt_server(strm->target) ? HA_ANON_STR(anon_key, __objt_server(strm->target)->id) : "<none>",
+ objt_server(strm->target) ? __objt_server(strm->target)->puid : 0);
+ else
+ chunk_appendf(buf, "%s server=<NONE> (id=-1)", pfx);
+
+ switch (conn && conn_get_dst(conn) ? addr_to_str(conn->dst, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(buf, " addr=%s:%d\n",
+ HA_ANON_STR(anon_key, pn), get_host_port(conn->dst));
+ break;
+ case AF_UNIX:
+ chunk_appendf(buf, " addr=unix\n");
+ break;
+ default:
+ /* no more information to print right now */
+ chunk_appendf(buf, "\n");
+ break;
+ }
+
+ chunk_appendf(buf,
+ "%s task=%p (state=0x%02x nice=%d calls=%u rate=%u exp=%s tid=%d(%d/%d)%s", pfx,
+ strm->task,
+ strm->task->state,
+ strm->task->nice, strm->task->calls, read_freq_ctr(&strm->call_rate),
+ strm->task->expire ?
+ tick_is_expired(strm->task->expire, now_ms) ? "<PAST>" :
+ human_time(TICKS_TO_MS(strm->task->expire - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ strm->task->tid,
+ ha_thread_info[strm->task->tid].tgid,
+ ha_thread_info[strm->task->tid].ltid,
+ task_in_rq(strm->task) ? ", running" : "");
+
+ chunk_appendf(buf,
+ " age=%s)\n",
+ human_time(ns_to_sec(now_ns) - ns_to_sec(strm->logs.request_ts), 1));
+
+ if (strm->txn)
+ chunk_appendf(buf,
+ "%s txn=%p flags=0x%x meth=%d status=%d req.st=%s rsp.st=%s req.f=0x%02x rsp.f=0x%02x\n", pfx,
+ strm->txn, strm->txn->flags, strm->txn->meth, strm->txn->status,
+ h1_msg_state_str(strm->txn->req.msg_state), h1_msg_state_str(strm->txn->rsp.msg_state),
+ strm->txn->req.flags, strm->txn->rsp.flags);
+
+ scf = strm->scf;
+ chunk_appendf(buf, "%s scf=%p flags=0x%08x ioto=%s state=%s endp=%s,%p,0x%08x sub=%d", pfx,
+ scf, scf->flags, human_time(scf->ioto, TICKS_TO_MS(1000)), sc_state_str(scf->state),
+ (sc_ep_test(scf, SE_FL_T_MUX) ? "CONN" : (sc_ep_test(scf, SE_FL_T_APPLET) ? "APPCTX" : "NONE")),
+ scf->sedesc->se, sc_ep_get(scf), scf->wait_event.events);
+ chunk_appendf(buf, " rex=%s",
+ sc_ep_rcv_ex(scf) ? human_time(TICKS_TO_MS(sc_ep_rcv_ex(scf) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " wex=%s",
+ sc_ep_snd_ex(scf) ? human_time(TICKS_TO_MS(sc_ep_snd_ex(scf) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " rto=%s",
+ tick_isset(scf->sedesc->lra) ? human_time(TICKS_TO_MS(tick_add(scf->sedesc->lra, scf->ioto) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " wto=%s\n",
+ tick_isset(scf->sedesc->fsb) ? human_time(TICKS_TO_MS(tick_add(scf->sedesc->fsb, scf->ioto) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash, "%s iobuf.flags=0x%08x .pipe=%d .buf=%u@%p+%u/%u\n", pfx,
+ scf->sedesc->iobuf.flags,
+ scf->sedesc->iobuf.pipe ? scf->sedesc->iobuf.pipe->data : 0,
+ scf->sedesc->iobuf.buf ? (unsigned int)b_data(scf->sedesc->iobuf.buf): 0,
+ scf->sedesc->iobuf.buf ? b_orig(scf->sedesc->iobuf.buf): NULL,
+ scf->sedesc->iobuf.buf ? (unsigned int)b_head_ofs(scf->sedesc->iobuf.buf): 0,
+ scf->sedesc->iobuf.buf ? (unsigned int)b_size(scf->sedesc->iobuf.buf): 0);
+
+ if ((conn = sc_conn(scf)) != NULL) {
+ if (conn->mux && conn->mux->show_sd) {
+ char muxpfx[100] = "";
+
+ snprintf(muxpfx, sizeof(muxpfx), "%s ", pfx);
+ chunk_appendf(buf, "%s ", pfx);
+ conn->mux->show_sd(buf, scf->sedesc, muxpfx);
+ chunk_appendf(buf, "\n");
+ }
+
+ chunk_appendf(buf,
+ "%s co0=%p ctrl=%s xprt=%s mux=%s data=%s target=%s:%p\n", pfx,
+ conn,
+ conn_get_ctrl_name(conn),
+ conn_get_xprt_name(conn),
+ conn_get_mux_name(conn),
+ sc_get_data_name(scf),
+ obj_type_name(conn->target),
+ obj_base_ptr(conn->target));
+
+ chunk_appendf(buf,
+ "%s flags=0x%08x fd=%d fd.state=%02x updt=%d fd.tmask=0x%lx\n", pfx,
+ conn->flags,
+ conn_fd(conn),
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].state : 0,
+ conn_fd(conn) >= 0 ? !!(fdtab[conn->handle.fd].update_mask & ti->ltid_bit) : 0,
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].thread_mask: 0);
+ }
+ else if ((tmpctx = sc_appctx(scf)) != NULL) {
+ chunk_appendf(buf,
+ "%s app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n", pfx,
+ tmpctx,
+ tmpctx->st0,
+ tmpctx->st1,
+ tmpctx->applet->name,
+ tmpctx->t->tid,
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
+ }
+
+ scb = strm->scb;
+ chunk_appendf(buf, "%s scb=%p flags=0x%08x ioto=%s state=%s endp=%s,%p,0x%08x sub=%d", pfx,
+ scb, scb->flags, human_time(scb->ioto, TICKS_TO_MS(1000)), sc_state_str(scb->state),
+ (sc_ep_test(scb, SE_FL_T_MUX) ? "CONN" : (sc_ep_test(scb, SE_FL_T_APPLET) ? "APPCTX" : "NONE")),
+ scb->sedesc->se, sc_ep_get(scb), scb->wait_event.events);
+ chunk_appendf(buf, " rex=%s",
+ sc_ep_rcv_ex(scb) ? human_time(TICKS_TO_MS(sc_ep_rcv_ex(scb) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " wex=%s",
+ sc_ep_snd_ex(scb) ? human_time(TICKS_TO_MS(sc_ep_snd_ex(scb) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " rto=%s",
+ tick_isset(scb->sedesc->lra) ? human_time(TICKS_TO_MS(tick_add(scb->sedesc->lra, scb->ioto) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+ chunk_appendf(buf, " wto=%s\n",
+ tick_isset(scb->sedesc->fsb) ? human_time(TICKS_TO_MS(tick_add(scb->sedesc->fsb, scb->ioto) - now_ms), TICKS_TO_MS(1000)) : "<NEVER>");
+
+ chunk_appendf(&trash, "%s iobuf.flags=0x%08x .pipe=%d .buf=%u@%p+%u/%u\n", pfx,
+ scb->sedesc->iobuf.flags,
+ scb->sedesc->iobuf.pipe ? scb->sedesc->iobuf.pipe->data : 0,
+ scb->sedesc->iobuf.buf ? (unsigned int)b_data(scb->sedesc->iobuf.buf): 0,
+ scb->sedesc->iobuf.buf ? b_orig(scb->sedesc->iobuf.buf): NULL,
+ scb->sedesc->iobuf.buf ? (unsigned int)b_head_ofs(scb->sedesc->iobuf.buf): 0,
+ scb->sedesc->iobuf.buf ? (unsigned int)b_size(scb->sedesc->iobuf.buf): 0);
+
+ if ((conn = sc_conn(scb)) != NULL) {
+ if (conn->mux && conn->mux->show_sd) {
+ char muxpfx[100] = "";
+
+ snprintf(muxpfx, sizeof(muxpfx), "%s ", pfx);
+ chunk_appendf(buf, "%s ", pfx);
+ conn->mux->show_sd(buf, scb->sedesc, muxpfx);
+ chunk_appendf(buf, "\n");
+ }
+
+ chunk_appendf(buf,
+ "%s co1=%p ctrl=%s xprt=%s mux=%s data=%s target=%s:%p\n", pfx,
+ conn,
+ conn_get_ctrl_name(conn),
+ conn_get_xprt_name(conn),
+ conn_get_mux_name(conn),
+ sc_get_data_name(scb),
+ obj_type_name(conn->target),
+ obj_base_ptr(conn->target));
+
+ chunk_appendf(buf,
+ "%s flags=0x%08x fd=%d fd.state=%02x updt=%d fd.tmask=0x%lx\n", pfx,
+ conn->flags,
+ conn_fd(conn),
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].state : 0,
+ conn_fd(conn) >= 0 ? !!(fdtab[conn->handle.fd].update_mask & ti->ltid_bit) : 0,
+ conn_fd(conn) >= 0 ? fdtab[conn->handle.fd].thread_mask: 0);
+ }
+ else if ((tmpctx = sc_appctx(scb)) != NULL) {
+ chunk_appendf(buf,
+ "%s app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n", pfx,
+ tmpctx,
+ tmpctx->st0,
+ tmpctx->st1,
+ tmpctx->applet->name,
+ tmpctx->t->tid,
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
+ }
+
+ if (HAS_FILTERS(strm)) {
+ const struct filter *flt;
+
+ chunk_appendf(buf, "%s filters={", pfx);
+ list_for_each_entry(flt, &strm->strm_flt.filters, list) {
+ if (flt->list.p != &strm->strm_flt.filters)
+ chunk_appendf(buf, ", ");
+ chunk_appendf(buf, "%p=\"%s\"", flt, FLT_ID(flt));
+ }
+ chunk_appendf(buf, "}\n");
+ }
+
+ chunk_appendf(buf,
+ "%s req=%p (f=0x%06x an=0x%x tofwd=%d total=%lld)\n"
+ "%s an_exp=%s buf=%p data=%p o=%u p=%u i=%u size=%u\n",
+ pfx,
+ &strm->req,
+ strm->req.flags, strm->req.analysers,
+ strm->req.to_forward, strm->req.total,
+ pfx,
+ strm->req.analyse_exp ?
+ human_time(TICKS_TO_MS(strm->req.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ &strm->req.buf,
+ b_orig(&strm->req.buf), (unsigned int)co_data(&strm->req),
+ (unsigned int)ci_head_ofs(&strm->req), (unsigned int)ci_data(&strm->req),
+ (unsigned int)strm->req.buf.size);
+
+ if (IS_HTX_STRM(strm)) {
+ struct htx *htx = htxbuf(&strm->req.buf);
+
+ chunk_appendf(buf,
+ "%s htx=%p flags=0x%x size=%u data=%u used=%u wrap=%s extra=%llu\n", pfx,
+ htx, htx->flags, htx->size, htx->data, htx_nbblks(htx),
+ (htx->tail >= htx->head) ? "NO" : "YES",
+ (unsigned long long)htx->extra);
+ }
+ if (HAS_FILTERS(strm) && strm->strm_flt.current[0]) {
+ const struct filter *flt = strm->strm_flt.current[0];
+
+ chunk_appendf(buf, "%s current_filter=%p (id=\"%s\" flags=0x%x pre=0x%x post=0x%x) \n", pfx,
+ flt, flt->config->id, flt->flags, flt->pre_analyzers, flt->post_analyzers);
+ }
+
+ chunk_appendf(buf,
+ "%s res=%p (f=0x%06x an=0x%x tofwd=%d total=%lld)\n"
+ "%s an_exp=%s buf=%p data=%p o=%u p=%u i=%u size=%u\n",
+ pfx,
+ &strm->res,
+ strm->res.flags, strm->res.analysers,
+ strm->res.to_forward, strm->res.total,
+ pfx,
+ strm->res.analyse_exp ?
+ human_time(TICKS_TO_MS(strm->res.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "<NEVER>",
+ &strm->res.buf,
+ b_orig(&strm->res.buf), (unsigned int)co_data(&strm->res),
+ (unsigned int)ci_head_ofs(&strm->res), (unsigned int)ci_data(&strm->res),
+ (unsigned int)strm->res.buf.size);
+
+ if (IS_HTX_STRM(strm)) {
+ struct htx *htx = htxbuf(&strm->res.buf);
+
+ chunk_appendf(buf,
+ "%s htx=%p flags=0x%x size=%u data=%u used=%u wrap=%s extra=%llu\n", pfx,
+ htx, htx->flags, htx->size, htx->data, htx_nbblks(htx),
+ (htx->tail >= htx->head) ? "NO" : "YES",
+ (unsigned long long)htx->extra);
+ }
+
+ if (HAS_FILTERS(strm) && strm->strm_flt.current[1]) {
+ const struct filter *flt = strm->strm_flt.current[1];
+
+ chunk_appendf(buf, "%s current_filter=%p (id=\"%s\" flags=0x%x pre=0x%x post=0x%x) \n", pfx,
+ flt, flt->config->id, flt->flags, flt->pre_analyzers, flt->post_analyzers);
+ }
+
+ if (strm->current_rule_list && strm->current_rule) {
+ const struct act_rule *rule = strm->current_rule;
+ chunk_appendf(buf, "%s current_rule=\"%s\" [%s:%d]\n", pfx, rule->kw->kw, rule->conf.file, rule->conf.line);
+ }
+}
+
+/* This function dumps a complete stream state onto the stream connector's
+ * read buffer. The stream has to be set in strm. It returns 0 if the output
+ * buffer is full and it needs to be called again, otherwise non-zero. It is
+ * designed to be called from stats_dump_strm_to_buffer() below.
+ */
+static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm)
+{
+ struct appctx *appctx = __sc_appctx(sc);
+ struct show_sess_ctx *ctx = appctx->svcctx;
+
+ chunk_reset(&trash);
+
+ if (ctx->section > 0 && ctx->uid != strm->uniq_id) {
+ /* stream changed, no need to go any further */
+ chunk_appendf(&trash, " *** session terminated while we were watching it ***\n");
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+ goto done;
+ }
+
+ switch (ctx->section) {
+ case 0: /* main status of the stream */
+ ctx->uid = strm->uniq_id;
+ ctx->section = 1;
+ __fallthrough;
+
+ case 1:
+ strm_dump_to_buffer(&trash, strm, "", appctx->cli_anon_key);
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ /* use other states to dump the contents */
+ }
+ /* end of dump */
+ done:
+ ctx->uid = 0;
+ ctx->section = 0;
+ return 1;
+ full:
+ return 0;
+}
+
+static int cli_parse_show_sess(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct show_sess_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx));
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ /* now all sessions by default */
+ ctx->target = NULL;
+ ctx->min_age = 0;
+ ctx->section = 0; /* start with stream status */
+ ctx->pos = 0;
+ ctx->thr = 0;
+
+ if (*args[2] && strcmp(args[2], "older") == 0) {
+ unsigned timeout;
+ const char *res;
+
+ if (!*args[3])
+ return cli_err(appctx, "Expects a minimum age (in seconds by default).\n");
+
+ res = parse_time_err(args[3], &timeout, TIME_UNIT_S);
+ if (res != 0)
+ return cli_err(appctx, "Invalid age.\n");
+
+ ctx->min_age = timeout;
+ ctx->target = (void *)-1; /* show all matching entries */
+ }
+ else if (*args[2] && strcmp(args[2], "susp") == 0) {
+ ctx->flags |= CLI_SHOWSESS_F_SUSP;
+ ctx->target = (void *)-1; /* show all matching entries */
+ }
+ else if (*args[2] && strcmp(args[2], "all") == 0)
+ ctx->target = (void *)-1;
+ else if (*args[2])
+ ctx->target = (void *)strtoul(args[2], NULL, 0);
+
+ /* The back-ref must be reset, it will be detected and set by
+ * the dump code upon first invocation.
+ */
+ LIST_INIT(&ctx->bref.users);
+
+ /* let's set our own stream's epoch to the current one and increment
+ * it so that we know which streams were already there before us.
+ */
+ appctx_strm(appctx)->stream_epoch = _HA_ATOMIC_FETCH_ADD(&stream_epoch, 1);
+ return 0;
+}
+
+/* This function dumps all streams' states onto the stream connector's
+ * read buffer. It returns 0 if the output buffer is full and it needs
+ * to be called again, otherwise non-zero. It proceeds in an isolated
+ * thread so there is no thread safety issue here.
+ */
+static int cli_io_handler_dump_sess(struct appctx *appctx)
+{
+ struct show_sess_ctx *ctx = appctx->svcctx;
+ struct stconn *sc = appctx_sc(appctx);
+ struct connection *conn;
+
+ thread_isolate();
+
+ if (ctx->thr >= global.nbthread) {
+ /* already terminated */
+ goto done;
+ }
+
+ /* FIXME: Don't watch the other side !*/
+ if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) {
+ /* If we're forced to shut down, we might have to remove our
+ * reference to the last stream being dumped.
+ */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ }
+ goto done;
+ }
+
+ chunk_reset(&trash);
+
+ /* first, let's detach the back-ref from a possible previous stream */
+ if (!LIST_ISEMPTY(&ctx->bref.users)) {
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ } else if (!ctx->bref.ref) {
+ /* first call, start with first stream */
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].streams.n;
+ }
+
+ /* and start from where we stopped */
+ while (1) {
+ char pn[INET6_ADDRSTRLEN];
+ struct stream *curr_strm;
+ int done= 0;
+
+ if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].streams)
+ done = 1;
+ else {
+ /* check if we've found a stream created after issuing the "show sess" */
+ curr_strm = LIST_ELEM(ctx->bref.ref, struct stream *, list);
+ if ((int)(curr_strm->stream_epoch - appctx_strm(appctx)->stream_epoch) > 0)
+ done = 1;
+ }
+
+ if (done) {
+ ctx->thr++;
+ if (ctx->thr >= global.nbthread)
+ break;
+ ctx->bref.ref = ha_thread_ctx[ctx->thr].streams.n;
+ continue;
+ }
+
+ if (ctx->min_age) {
+ uint age = ns_to_sec(now_ns) - ns_to_sec(curr_strm->logs.request_ts);
+ if (age < ctx->min_age)
+ goto next_sess;
+ }
+
+ if (ctx->flags & CLI_SHOWSESS_F_SUSP) {
+ /* only show suspicious streams. Non-suspicious ones have a valid
+ * expiration date in the future and a valid front endpoint.
+ */
+ if (curr_strm->task->expire &&
+ !tick_is_expired(curr_strm->task->expire, now_ms) &&
+ curr_strm->scf && curr_strm->scf->sedesc && curr_strm->scf->sedesc->se)
+ goto next_sess;
+ }
+
+ if (ctx->target) {
+ if (ctx->target != (void *)-1 && ctx->target != curr_strm)
+ goto next_sess;
+
+ LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users);
+ /* call the proper dump() function and return if we're missing space */
+ if (!stats_dump_full_strm_to_buffer(sc, curr_strm))
+ goto full;
+
+ /* stream dump complete */
+ LIST_DELETE(&ctx->bref.users);
+ LIST_INIT(&ctx->bref.users);
+ if (ctx->target != (void *)-1) {
+ ctx->target = NULL;
+ break;
+ }
+ else
+ goto next_sess;
+ }
+
+ chunk_appendf(&trash,
+ "%p: proto=%s",
+ curr_strm,
+ strm_li(curr_strm) ? strm_li(curr_strm)->rx.proto->name : "?");
+
+ conn = objt_conn(strm_orig(curr_strm));
+ switch (conn && conn_get_src(conn) ? addr_to_str(conn->src, pn, sizeof(pn)) : AF_UNSPEC) {
+ case AF_INET:
+ case AF_INET6:
+ chunk_appendf(&trash,
+ " src=%s:%d fe=%s be=%s srv=%s",
+ HA_ANON_CLI(pn),
+ get_host_port(conn->src),
+ HA_ANON_CLI(strm_fe(curr_strm)->id),
+ (curr_strm->be->cap & PR_CAP_BE) ? HA_ANON_CLI(curr_strm->be->id) : "<NONE>",
+ objt_server(curr_strm->target) ? HA_ANON_CLI(__objt_server(curr_strm->target)->id) : "<none>"
+ );
+ break;
+ case AF_UNIX:
+ chunk_appendf(&trash,
+ " src=unix:%d fe=%s be=%s srv=%s",
+ strm_li(curr_strm)->luid,
+ HA_ANON_CLI(strm_fe(curr_strm)->id),
+ (curr_strm->be->cap & PR_CAP_BE) ? HA_ANON_CLI(curr_strm->be->id) : "<NONE>",
+ objt_server(curr_strm->target) ? HA_ANON_CLI(__objt_server(curr_strm->target)->id) : "<none>"
+ );
+ break;
+ }
+
+ chunk_appendf(&trash,
+ " ts=%02x epoch=%#x age=%s calls=%u rate=%u cpu=%llu lat=%llu",
+ curr_strm->task->state, curr_strm->stream_epoch,
+ human_time(ns_to_sec(now_ns) - ns_to_sec(curr_strm->logs.request_ts), 1),
+ curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
+ (unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
+
+ chunk_appendf(&trash,
+ " rq[f=%06xh,i=%u,an=%02xh",
+ curr_strm->req.flags,
+ (unsigned int)ci_data(&curr_strm->req),
+ curr_strm->req.analysers);
+
+ chunk_appendf(&trash,
+ ",ax=%s]",
+ curr_strm->req.analyse_exp ?
+ human_time(TICKS_TO_MS(curr_strm->req.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ " rp[f=%06xh,i=%u,an=%02xh",
+ curr_strm->res.flags,
+ (unsigned int)ci_data(&curr_strm->res),
+ curr_strm->res.analysers);
+ chunk_appendf(&trash,
+ ",ax=%s]",
+ curr_strm->res.analyse_exp ?
+ human_time(TICKS_TO_MS(curr_strm->res.analyse_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ conn = sc_conn(curr_strm->scf);
+ chunk_appendf(&trash," scf=[%d,%1xh,fd=%d",
+ curr_strm->scf->state, curr_strm->scf->flags, conn_fd(conn));
+ chunk_appendf(&trash, ",rex=%s",
+ sc_ep_rcv_ex(curr_strm->scf) ?
+ human_time(TICKS_TO_MS(sc_ep_rcv_ex(curr_strm->scf) - now_ms),
+ TICKS_TO_MS(1000)) : "");
+ chunk_appendf(&trash,",wex=%s]",
+ sc_ep_snd_ex(curr_strm->scf) ?
+ human_time(TICKS_TO_MS(sc_ep_snd_ex(curr_strm->scf) - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ conn = sc_conn(curr_strm->scb);
+ chunk_appendf(&trash, " scb=[%d,%1xh,fd=%d",
+ curr_strm->scb->state, curr_strm->scb->flags, conn_fd(conn));
+ chunk_appendf(&trash, ",rex=%s",
+ sc_ep_rcv_ex(curr_strm->scb) ?
+ human_time(TICKS_TO_MS(sc_ep_rcv_ex(curr_strm->scb) - now_ms),
+ TICKS_TO_MS(1000)) : "");
+ chunk_appendf(&trash, ",wex=%s]",
+ sc_ep_snd_ex(curr_strm->scb) ?
+ human_time(TICKS_TO_MS(sc_ep_snd_ex(curr_strm->scb) - now_ms),
+ TICKS_TO_MS(1000)) : "");
+
+ chunk_appendf(&trash,
+ " exp=%s rc=%d c_exp=%s",
+ curr_strm->task->expire ?
+ human_time(TICKS_TO_MS(curr_strm->task->expire - now_ms),
+ TICKS_TO_MS(1000)) : "",
+ curr_strm->conn_retries,
+ curr_strm->conn_exp ?
+ human_time(TICKS_TO_MS(curr_strm->conn_exp - now_ms),
+ TICKS_TO_MS(1000)) : "");
+ if (task_in_rq(curr_strm->task))
+ chunk_appendf(&trash, " run(nice=%d)", curr_strm->task->nice);
+
+ chunk_appendf(&trash, "\n");
+
+ if (applet_putchk(appctx, &trash) == -1) {
+ /* let's try again later from this stream. We add ourselves into
+ * this stream's users so that it can remove us upon termination.
+ */
+ LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users);
+ goto full;
+ }
+
+ next_sess:
+ ctx->bref.ref = curr_strm->list.n;
+ }
+
+ if (ctx->target && ctx->target != (void *)-1) {
+ /* specified stream not found */
+ if (ctx->section > 0)
+ chunk_appendf(&trash, " *** session terminated while we were watching it ***\n");
+ else
+ chunk_appendf(&trash, "Session not found.\n");
+
+ if (applet_putchk(appctx, &trash) == -1)
+ goto full;
+
+ ctx->target = NULL;
+ ctx->uid = 0;
+ goto done;
+ }
+
+ done:
+ thread_release();
+ return 1;
+ full:
+ thread_release();
+ return 0;
+}
+
+static void cli_release_show_sess(struct appctx *appctx)
+{
+ struct show_sess_ctx *ctx = appctx->svcctx;
+
+ if (ctx->thr < global.nbthread) {
+ /* a dump was aborted, either in error or timeout. We need to
+ * safely detach from the target stream's list. It's mandatory
+ * to lock because a stream on the target thread could be moving
+ * our node.
+ */
+ thread_isolate();
+ if (!LIST_ISEMPTY(&ctx->bref.users))
+ LIST_DELETE(&ctx->bref.users);
+ thread_release();
+ }
+}
+
+/* Parses the "shutdown session" directive, it always returns 1 */
+static int cli_parse_shutdown_session(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct stream *strm, *ptr;
+ int thr;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ ptr = (void *)strtoul(args[2], NULL, 0);
+ if (!ptr)
+ return cli_err(appctx, "Session pointer expected (use 'show sess').\n");
+
+ strm = NULL;
+
+ thread_isolate();
+
+ /* first, look for the requested stream in the stream table */
+ for (thr = 0; strm != ptr && thr < global.nbthread; thr++) {
+ list_for_each_entry(strm, &ha_thread_ctx[thr].streams, list) {
+ if (strm == ptr) {
+ stream_shutdown(strm, SF_ERR_KILLED);
+ break;
+ }
+ }
+ }
+
+ thread_release();
+
+ /* do we have the stream ? */
+ if (strm != ptr)
+ return cli_err(appctx, "No such session (use 'show sess').\n");
+
+ return 1;
+}
+
+/* Parses the "shutdown session server" directive, it always returns 1 */
+static int cli_parse_shutdown_sessions_server(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct server *sv;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_ADMIN))
+ return 1;
+
+ sv = cli_find_server(appctx, args[3]);
+ if (!sv)
+ return 1;
+
+ /* kill all the stream that are on this server */
+ HA_SPIN_LOCK(SERVER_LOCK, &sv->lock);
+ srv_shutdown_streams(sv, SF_ERR_KILLED);
+ HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock);
+ return 1;
+}
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "show", "sess", NULL }, "show sess [<id>|all|susp|older <age>] : report the list of current sessions or dump this exact session", cli_parse_show_sess, cli_io_handler_dump_sess, cli_release_show_sess },
+ { { "shutdown", "session", NULL }, "shutdown session [id] : kill a specific session", cli_parse_shutdown_session, NULL, NULL },
+ { { "shutdown", "sessions", "server" }, "shutdown sessions server <bk>/<srv> : kill sessions on a server", cli_parse_shutdown_sessions_server, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+/* main configuration keyword registration. */
+static struct action_kw_list stream_tcp_req_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { "switch-mode", stream_parse_switch_mode },
+ { "use-service", stream_parse_use_service },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &stream_tcp_req_keywords);
+
+/* main configuration keyword registration. */
+static struct action_kw_list stream_tcp_res_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &stream_tcp_res_keywords);
+
+static struct action_kw_list stream_http_req_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { "use-service", stream_parse_use_service },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &stream_http_req_keywords);
+
+static struct action_kw_list stream_http_res_keywords = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { "set-nice", stream_parse_set_nice },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &stream_http_res_keywords);
+
+static struct action_kw_list stream_http_after_res_actions = { ILH, {
+ { "set-log-level", stream_parse_set_log_level },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &stream_http_after_res_actions);
+
+static int smp_fetch_cur_client_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = TICKS_TO_MS(smp->strm->scf->ioto);
+ return 1;
+}
+
+static int smp_fetch_cur_server_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = TICKS_TO_MS(smp->strm->scb->ioto);
+ return 1;
+}
+
+static int smp_fetch_cur_tunnel_timeout(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ smp->data.u.sint = TICKS_TO_MS(smp->strm->tunnel_timeout);
+ return 1;
+}
+
+static int smp_fetch_last_rule_file(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_STR;
+ if (!smp->strm || !smp->strm->last_rule_file)
+ return 0;
+
+ smp->flags |= SMP_F_CONST;
+ smp->data.u.str.area = (char *)smp->strm->last_rule_file;
+ smp->data.u.str.data = strlen(smp->strm->last_rule_file);
+ return 1;
+}
+
+static int smp_fetch_last_rule_line(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm || !smp->strm->last_rule_line)
+ return 0;
+
+ smp->data.u.sint = smp->strm->last_rule_line;
+ return 1;
+}
+
+static int smp_fetch_sess_term_state(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ struct buffer *trash = get_trash_chunk();
+
+ smp->flags = SMP_F_VOLATILE;
+ smp->data.type = SMP_T_STR;
+ if (!smp->strm)
+ return 0;
+
+ trash->area[trash->data++] = sess_term_cond[(smp->strm->flags & SF_ERR_MASK) >> SF_ERR_SHIFT];
+ trash->area[trash->data++] = sess_fin_state[(smp->strm->flags & SF_FINST_MASK) >> SF_FINST_SHIFT];
+
+ smp->data.u.str = *trash;
+ smp->data.type = SMP_T_STR;
+ smp->flags &= ~SMP_F_CONST;
+ return 1;
+}
+
+static int smp_fetch_conn_retries(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+
+ if (!sc_state_in(smp->strm->scb->state, SC_SB_DIS|SC_SB_CLO))
+ smp->flags |= SMP_F_VOL_TEST;
+ smp->data.u.sint = smp->strm->conn_retries;
+ return 1;
+}
+
+static int smp_fetch_id32(const struct arg *args, struct sample *smp, const char *km, void *private)
+{
+ smp->flags = SMP_F_VOL_TXN;
+ smp->data.type = SMP_T_SINT;
+ if (!smp->strm)
+ return 0;
+ smp->data.u.sint = smp->strm->uniq_id;
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Please take care of keeping this list alphabetically sorted.
+ */
+static struct sample_fetch_kw_list smp_kws = {ILH, {
+ { "cur_client_timeout", smp_fetch_cur_client_timeout, 0, NULL, SMP_T_SINT, SMP_USE_FTEND, },
+ { "cur_server_timeout", smp_fetch_cur_server_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "cur_tunnel_timeout", smp_fetch_cur_tunnel_timeout, 0, NULL, SMP_T_SINT, SMP_USE_BKEND, },
+ { "last_rule_file", smp_fetch_last_rule_file, 0, NULL, SMP_T_STR, SMP_USE_INTRN, },
+ { "last_rule_line", smp_fetch_last_rule_line, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "txn.conn_retries", smp_fetch_conn_retries, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV, },
+ { "txn.id32", smp_fetch_id32, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
+ { "txn.sess_term_state",smp_fetch_sess_term_state, 0, NULL, SMP_T_STR, SMP_USE_INTRN, },
+ { NULL, NULL, 0, 0, 0 },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/task.c b/src/task.c
new file mode 100644
index 0000000..1ab5212
--- /dev/null
+++ b/src/task.c
@@ -0,0 +1,979 @@
+/*
+ * Task management functions.
+ *
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <string.h>
+
+#include <import/eb32tree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/activity.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/list.h>
+#include <haproxy/pool.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+
+extern struct task *process_stream(struct task *t, void *context, unsigned int state);
+extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
+
+DECLARE_POOL(pool_head_task, "task", sizeof(struct task));
+DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
+
+/* This is the memory pool containing all the signal structs. These
+ * struct are used to store each required signal between two tasks.
+ */
+DECLARE_POOL(pool_head_notification, "notification", sizeof(struct notification));
+
+/* The lock protecting all wait queues at once. For now we have no better
+ * alternative since a task may have to be removed from a queue and placed
+ * into another one. Storing the WQ index into the task doesn't seem to be
+ * sufficient either.
+ */
+__decl_aligned_rwlock(wq_lock);
+
+/* Flags the task <t> for immediate destruction and puts it into its first
+ * thread's shared tasklet list if not yet queued/running. This will bypass
+ * the priority scheduling and make the task show up as fast as possible in
+ * the other thread's queue. Note that this operation isn't idempotent and is
+ * not supposed to be run on the same task from multiple threads at once. It's
+ * the caller's responsibility to make sure it is the only one able to kill the
+ * task.
+ */
+void task_kill(struct task *t)
+{
+ unsigned int state = t->state;
+ unsigned int thr;
+
+ BUG_ON(state & TASK_KILLED);
+
+ while (1) {
+ while (state & (TASK_RUNNING | TASK_QUEUED)) {
+ /* task already in the queue and about to be executed,
+ * or even currently running. Just add the flag and be
+ * done with it, the process loop will detect it and kill
+ * it. The CAS will fail if we arrive too late.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_KILLED))
+ return;
+ }
+
+ /* We'll have to wake it up, but we must also secure it so that
+ * it doesn't vanish under us. TASK_QUEUED guarantees nobody will
+ * add past us.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED | TASK_KILLED)) {
+ /* Bypass the tree and go directly into the shared tasklet list.
+ * Note: that's a task so it must be accounted for as such. Pick
+ * the task's first thread for the job.
+ */
+ thr = t->tid >= 0 ? t->tid : tid;
+
+ /* Beware: tasks that have never run don't have their ->list empty yet! */
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list,
+ list_to_mt_list(&((struct tasklet *)t)->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].tasks_in_list);
+ wake_thread(thr);
+ return;
+ }
+ }
+}
+
+/* Equivalent of task_kill for tasklets. Mark the tasklet <t> for destruction.
+ * It will be deleted on the next scheduler invocation. This function is
+ * thread-safe : a thread can kill a tasklet of another thread.
+ */
+void tasklet_kill(struct tasklet *t)
+{
+ unsigned int state = t->state;
+ unsigned int thr;
+
+ BUG_ON(state & TASK_KILLED);
+
+ while (1) {
+ while (state & (TASK_IN_LIST)) {
+ /* Tasklet already in the list ready to be executed. Add
+ * the killed flag and wait for the process loop to
+ * detect it.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_KILLED))
+ return;
+ }
+
+ /* Mark the tasklet as killed and wake the thread to process it
+ * as soon as possible.
+ */
+ if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_IN_LIST | TASK_KILLED)) {
+ thr = t->tid >= 0 ? t->tid : tid;
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list,
+ list_to_mt_list(&t->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ wake_thread(thr);
+ return;
+ }
+ }
+}
+
+/* Do not call this one, please use tasklet_wakeup_on() instead, as this one is
+ * the slow path of tasklet_wakeup_on() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one. A negative <thr> designates
+ * the current thread.
+ */
+void __tasklet_wakeup_on(struct tasklet *tl, int thr)
+{
+ if (likely(thr < 0)) {
+ /* this tasklet runs on the caller thread */
+ if (tl->state & TASK_HEAVY) {
+ LIST_APPEND(&th_ctx->tasklets[TL_HEAVY], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_HEAVY;
+ }
+ else if (tl->state & TASK_SELF_WAKING) {
+ LIST_APPEND(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if ((struct task *)tl == th_ctx->current) {
+ _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+ LIST_APPEND(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if (th_ctx->current_queue < 0) {
+ LIST_APPEND(&th_ctx->tasklets[TL_URGENT], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_URGENT;
+ }
+ else {
+ LIST_APPEND(&th_ctx->tasklets[th_ctx->current_queue], &tl->list);
+ th_ctx->tl_class_mask |= 1 << th_ctx->current_queue;
+ }
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ } else {
+ /* this tasklet runs on a specific thread. */
+ MT_LIST_APPEND(&ha_thread_ctx[thr].shared_tasklet_list, list_to_mt_list(&tl->list));
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ wake_thread(thr);
+ }
+}
+
+/* Do not call this one, please use tasklet_wakeup_after_on() instead, as this one is
+ * the slow path of tasklet_wakeup_after() which performs some preliminary checks
+ * and sets TASK_IN_LIST before calling this one.
+ */
+struct list *__tasklet_wakeup_after(struct list *head, struct tasklet *tl)
+{
+ BUG_ON(tl->tid >= 0 && tid != tl->tid);
+ /* this tasklet runs on the caller thread */
+ if (!head) {
+ if (tl->state & TASK_HEAVY) {
+ LIST_INSERT(&th_ctx->tasklets[TL_HEAVY], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_HEAVY;
+ }
+ else if (tl->state & TASK_SELF_WAKING) {
+ LIST_INSERT(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if ((struct task *)tl == th_ctx->current) {
+ _HA_ATOMIC_OR(&tl->state, TASK_SELF_WAKING);
+ LIST_INSERT(&th_ctx->tasklets[TL_BULK], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_BULK;
+ }
+ else if (th_ctx->current_queue < 0) {
+ LIST_INSERT(&th_ctx->tasklets[TL_URGENT], &tl->list);
+ th_ctx->tl_class_mask |= 1 << TL_URGENT;
+ }
+ else {
+ LIST_INSERT(&th_ctx->tasklets[th_ctx->current_queue], &tl->list);
+ th_ctx->tl_class_mask |= 1 << th_ctx->current_queue;
+ }
+ }
+ else {
+ LIST_APPEND(head, &tl->list);
+ }
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ return &tl->list;
+}
+
+/* Puts the task <t> in run queue at a position depending on t->nice. <t> is
+ * returned. The nice value assigns boosts in 32th of the run queue size. A
+ * nice value of -1024 sets the task to -tasks_run_queue*32, while a nice value
+ * of 1024 sets the task to tasks_run_queue*32. The state flags are cleared, so
+ * the caller will have to set its flags after this call.
+ * The task must not already be in the run queue. If unsure, use the safer
+ * task_wakeup() function.
+ */
+void __task_wakeup(struct task *t)
+{
+ struct eb_root *root = &th_ctx->rqueue;
+ int thr __maybe_unused = t->tid >= 0 ? t->tid : tid;
+
+#ifdef USE_THREAD
+ if (thr != tid) {
+ root = &ha_thread_ctx[thr].rqueue_shared;
+
+ _HA_ATOMIC_INC(&ha_thread_ctx[thr].rq_total);
+ HA_SPIN_LOCK(TASK_RQ_LOCK, &ha_thread_ctx[thr].rqsh_lock);
+
+ t->rq.key = _HA_ATOMIC_ADD_FETCH(&ha_thread_ctx[thr].rqueue_ticks, 1);
+ __ha_barrier_store();
+ } else
+#endif
+ {
+ _HA_ATOMIC_INC(&th_ctx->rq_total);
+ t->rq.key = _HA_ATOMIC_ADD_FETCH(&th_ctx->rqueue_ticks, 1);
+ }
+
+ if (likely(t->nice)) {
+ int offset;
+
+ _HA_ATOMIC_INC(&tg_ctx->niced_tasks);
+ offset = t->nice * (int)global.tune.runqueue_depth;
+ t->rq.key += offset;
+ }
+
+ if (_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)
+ t->wake_date = now_mono_time();
+
+ eb32_insert(root, &t->rq);
+
+#ifdef USE_THREAD
+ if (thr != tid) {
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &ha_thread_ctx[thr].rqsh_lock);
+
+ /* If all threads that are supposed to handle this task are sleeping,
+ * wake one.
+ */
+ wake_thread(thr);
+ }
+#endif
+ return;
+}
+
+/*
+ * __task_queue()
+ *
+ * Inserts a task into wait queue <wq> at the position given by its expiration
+ * date. It does not matter if the task was already in the wait queue or not,
+ * as it will be unlinked. The task MUST NOT have an infinite expiration timer.
+ * Last, tasks must not be queued further than the end of the tree, which is
+ * between <now_ms> and <now_ms> + 2^31 ms (now+24days in 32bit).
+ *
+ * This function should not be used directly, it is meant to be called by the
+ * inline version of task_queue() which performs a few cheap preliminary tests
+ * before deciding to call __task_queue(). Moreover this function doesn't care
+ * at all about locking so the caller must be careful when deciding whether to
+ * lock or not around this call.
+ */
+void __task_queue(struct task *task, struct eb_root *wq)
+{
+#ifdef USE_THREAD
+ BUG_ON((wq == &tg_ctx->timers && task->tid >= 0) ||
+ (wq == &th_ctx->timers && task->tid < 0) ||
+ (wq != &tg_ctx->timers && wq != &th_ctx->timers));
+#endif
+ /* if this happens the process is doomed anyway, so better catch it now
+ * so that we have the caller in the stack.
+ */
+ BUG_ON(task->expire == TICK_ETERNITY);
+
+ if (likely(task_in_wq(task)))
+ __task_unlink_wq(task);
+
+ /* the task is not in the queue now */
+ task->wq.key = task->expire;
+#ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES
+ if (tick_is_lt(task->wq.key, now_ms))
+ /* we're queuing too far away or in the past (most likely) */
+ return;
+#endif
+
+ eb32_insert(wq, &task->wq);
+}
+
+/*
+ * Extract all expired timers from the timer queue, and wakes up all
+ * associated tasks.
+ */
+void wake_expired_tasks()
+{
+ struct thread_ctx * const tt = th_ctx; // thread's tasks
+ int max_processed = global.tune.runqueue_depth;
+ struct task *task;
+ struct eb32_node *eb;
+ __decl_thread(int key);
+
+ while (1) {
+ if (max_processed-- <= 0)
+ goto leave;
+
+ eb = eb32_lookup_ge(&tt->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&tt->timers);
+ if (likely(!eb))
+ break;
+ }
+
+ /* It is possible that this task was left at an earlier place in the
+ * tree because a recent call to task_queue() has not moved it. This
+ * happens when the new expiration date is later than the old one.
+ * Since it is very unlikely that we reach a timeout anyway, it's a
+ * lot cheaper to proceed like this because we almost never update
+ * the tree. We may also find disabled expiration dates there. Since
+ * we have detached the task from the tree, we simply call task_queue
+ * to take care of this. Note that we might occasionally requeue it at
+ * the same place, before <eb>, so we have to check if this happens,
+ * and adjust <eb>, otherwise we may skip it which is not what we want.
+ * We may also not requeue the task (and not point eb at it) if its
+ * expiration time is not set. We also make sure we leave the real
+ * expiration date for the next task in the queue so that when calling
+ * next_timer_expiry() we're guaranteed to see the next real date and
+ * not the next apparent date. This is in order to avoid useless
+ * wakeups.
+ */
+
+ task = eb32_entry(eb, struct task, wq);
+ if (tick_is_expired(task->expire, now_ms)) {
+ /* expired task, wake it up */
+ __task_unlink_wq(task);
+ _task_wakeup(task, TASK_WOKEN_TIMER, 0);
+ }
+ else if (task->expire != eb->key) {
+ /* task is not expired but its key doesn't match so let's
+ * update it and skip to next apparently expired task.
+ */
+ __task_unlink_wq(task);
+ if (tick_isset(task->expire))
+ __task_queue(task, &tt->timers);
+ }
+ else {
+ /* task not expired and correctly placed. It may not be eternal. */
+ BUG_ON(task->expire == TICK_ETERNITY);
+ break;
+ }
+ }
+
+#ifdef USE_THREAD
+ if (eb_is_empty(&tg_ctx->timers))
+ goto leave;
+
+ HA_RWLOCK_RDLOCK(TASK_WQ_LOCK, &wq_lock);
+ eb = eb32_lookup_ge(&tg_ctx->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ eb = eb32_first(&tg_ctx->timers);
+ if (likely(!eb)) {
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+ }
+ key = eb->key;
+
+ if (tick_is_lt(now_ms, key)) {
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+
+ /* There's really something of interest here, let's visit the queue */
+
+ if (HA_RWLOCK_TRYRDTOSK(TASK_WQ_LOCK, &wq_lock)) {
+ /* if we failed to grab the lock it means another thread is
+ * already doing the same here, so let it do the job.
+ */
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ goto leave;
+ }
+
+ while (1) {
+ lookup_next:
+ if (max_processed-- <= 0)
+ break;
+ eb = eb32_lookup_ge(&tg_ctx->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&tg_ctx->timers);
+ if (likely(!eb))
+ break;
+ }
+
+ task = eb32_entry(eb, struct task, wq);
+
+ /* Check for any competing run of the task (quite rare but may
+ * involve a dangerous concurrent access on task->expire). In
+ * order to protect against this, we'll take an exclusive access
+ * on TASK_RUNNING before checking/touching task->expire. If the
+ * task is already RUNNING on another thread, it will deal by
+ * itself with the requeuing so we must not do anything and
+ * simply quit the loop for now, because we cannot wait with the
+ * WQ lock held as this would prevent the running thread from
+ * requeuing the task. One annoying effect of holding RUNNING
+ * here is that a concurrent task_wakeup() will refrain from
+ * waking it up. This forces us to check for a wakeup after
+ * releasing the flag.
+ */
+ if (HA_ATOMIC_FETCH_OR(&task->state, TASK_RUNNING) & TASK_RUNNING)
+ break;
+
+ if (tick_is_expired(task->expire, now_ms)) {
+ /* expired task, wake it up */
+ HA_RWLOCK_SKTOWR(TASK_WQ_LOCK, &wq_lock);
+ __task_unlink_wq(task);
+ HA_RWLOCK_WRTOSK(TASK_WQ_LOCK, &wq_lock);
+ task_drop_running(task, TASK_WOKEN_TIMER);
+ }
+ else if (task->expire != eb->key) {
+ /* task is not expired but its key doesn't match so let's
+ * update it and skip to next apparently expired task.
+ */
+ HA_RWLOCK_SKTOWR(TASK_WQ_LOCK, &wq_lock);
+ __task_unlink_wq(task);
+ if (tick_isset(task->expire))
+ __task_queue(task, &tg_ctx->timers);
+ HA_RWLOCK_WRTOSK(TASK_WQ_LOCK, &wq_lock);
+ task_drop_running(task, 0);
+ goto lookup_next;
+ }
+ else {
+ /* task not expired and correctly placed. It may not be eternal. */
+ BUG_ON(task->expire == TICK_ETERNITY);
+ task_drop_running(task, 0);
+ break;
+ }
+ }
+
+ HA_RWLOCK_SKUNLOCK(TASK_WQ_LOCK, &wq_lock);
+#endif
+leave:
+ return;
+}
+
+/* Checks the next timer for the current thread by looking into its own timer
+ * list and the global one. It may return TICK_ETERNITY if no timer is present.
+ * Note that the next timer might very well be slightly in the past.
+ */
+int next_timer_expiry()
+{
+ struct thread_ctx * const tt = th_ctx; // thread's tasks
+ struct eb32_node *eb;
+ int ret = TICK_ETERNITY;
+ __decl_thread(int key = TICK_ETERNITY);
+
+ /* first check in the thread-local timers */
+ eb = eb32_lookup_ge(&tt->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb) {
+ /* we might have reached the end of the tree, typically because
+ * <now_ms> is in the first half and we're first scanning the last
+ * half. Let's loop back to the beginning of the tree now.
+ */
+ eb = eb32_first(&tt->timers);
+ }
+
+ if (eb)
+ ret = eb->key;
+
+#ifdef USE_THREAD
+ if (!eb_is_empty(&tg_ctx->timers)) {
+ HA_RWLOCK_RDLOCK(TASK_WQ_LOCK, &wq_lock);
+ eb = eb32_lookup_ge(&tg_ctx->timers, now_ms - TIMER_LOOK_BACK);
+ if (!eb)
+ eb = eb32_first(&tg_ctx->timers);
+ if (eb)
+ key = eb->key;
+ HA_RWLOCK_RDUNLOCK(TASK_WQ_LOCK, &wq_lock);
+ if (eb)
+ ret = tick_first(ret, key);
+ }
+#endif
+ return ret;
+}
+
+/* Walks over tasklet lists th_ctx->tasklets[0..TL_CLASSES-1] and run at most
+ * budget[TL_*] of them. Returns the number of entries effectively processed
+ * (tasks and tasklets merged). The count of tasks in the list for the current
+ * thread is adjusted.
+ */
+unsigned int run_tasks_from_lists(unsigned int budgets[])
+{
+ struct task *(*process)(struct task *t, void *ctx, unsigned int state);
+ struct list *tl_queues = th_ctx->tasklets;
+ struct task *t;
+ uint8_t budget_mask = (1 << TL_CLASSES) - 1;
+ struct sched_activity *profile_entry = NULL;
+ unsigned int done = 0;
+ unsigned int queue;
+ unsigned int state;
+ void *ctx;
+
+ for (queue = 0; queue < TL_CLASSES;) {
+ th_ctx->current_queue = queue;
+
+ /* global.tune.sched.low-latency is set */
+ if (global.tune.options & GTUNE_SCHED_LOW_LATENCY) {
+ if (unlikely(th_ctx->tl_class_mask & budget_mask & ((1 << queue) - 1))) {
+ /* a lower queue index has tasks again and still has a
+ * budget to run them. Let's switch to it now.
+ */
+ queue = (th_ctx->tl_class_mask & 1) ? 0 :
+ (th_ctx->tl_class_mask & 2) ? 1 : 2;
+ continue;
+ }
+
+ if (unlikely(queue > TL_URGENT &&
+ budget_mask & (1 << TL_URGENT) &&
+ !MT_LIST_ISEMPTY(&th_ctx->shared_tasklet_list))) {
+ /* an urgent tasklet arrived from another thread */
+ break;
+ }
+
+ if (unlikely(queue > TL_NORMAL &&
+ budget_mask & (1 << TL_NORMAL) &&
+ (!eb_is_empty(&th_ctx->rqueue) || !eb_is_empty(&th_ctx->rqueue_shared)))) {
+ /* a task was woken up by a bulk tasklet or another thread */
+ break;
+ }
+ }
+
+ if (LIST_ISEMPTY(&tl_queues[queue])) {
+ th_ctx->tl_class_mask &= ~(1 << queue);
+ queue++;
+ continue;
+ }
+
+ if (!budgets[queue]) {
+ budget_mask &= ~(1 << queue);
+ queue++;
+ continue;
+ }
+
+ budgets[queue]--;
+ activity[tid].ctxsw++;
+
+ t = (struct task *)LIST_ELEM(tl_queues[queue].n, struct tasklet *, list);
+ ctx = t->context;
+ process = t->process;
+ t->calls++;
+
+ th_ctx->sched_wake_date = t->wake_date;
+ if (th_ctx->sched_wake_date) {
+ uint32_t now_ns = now_mono_time();
+ uint32_t lat = now_ns - th_ctx->sched_wake_date;
+
+ t->wake_date = 0;
+ th_ctx->sched_call_date = now_ns;
+ profile_entry = sched_activity_entry(sched_activity, t->process, t->caller);
+ th_ctx->sched_profile_entry = profile_entry;
+ HA_ATOMIC_ADD(&profile_entry->lat_time, lat);
+ HA_ATOMIC_INC(&profile_entry->calls);
+ }
+ __ha_barrier_store();
+
+ th_ctx->current = t;
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_STUCK); // this thread is still running
+
+ _HA_ATOMIC_DEC(&th_ctx->rq_total);
+ LIST_DEL_INIT(&((struct tasklet *)t)->list);
+ __ha_barrier_store();
+
+ if (t->state & TASK_F_TASKLET) {
+ /* this is a tasklet */
+ state = _HA_ATOMIC_FETCH_AND(&t->state, TASK_PERSISTENT);
+ __ha_barrier_atomic_store();
+
+ if (likely(!(state & TASK_KILLED))) {
+ process(t, ctx, state);
+ }
+ else {
+ done++;
+ th_ctx->current = NULL;
+ pool_free(pool_head_tasklet, t);
+ __ha_barrier_store();
+ continue;
+ }
+ } else {
+ /* This is a regular task */
+
+ /* We must be the exclusive owner of the TASK_RUNNING bit, and
+ * have to be careful that the task is not being manipulated on
+ * another thread finding it expired in wake_expired_tasks().
+ * The TASK_RUNNING bit will be set during these operations,
+ * they are extremely rare and do not last long so the best to
+ * do here is to wait.
+ */
+ state = _HA_ATOMIC_LOAD(&t->state);
+ do {
+ while (unlikely(state & TASK_RUNNING)) {
+ __ha_cpu_relax();
+ state = _HA_ATOMIC_LOAD(&t->state);
+ }
+ } while (!_HA_ATOMIC_CAS(&t->state, &state, (state & TASK_PERSISTENT) | TASK_RUNNING));
+
+ __ha_barrier_atomic_store();
+
+ _HA_ATOMIC_DEC(&ha_thread_ctx[tid].tasks_in_list);
+
+ /* Note for below: if TASK_KILLED arrived before we've read the state, we
+ * directly free the task. Otherwise it will be seen after processing and
+ * it's freed on the exit path.
+ */
+ if (likely(!(state & TASK_KILLED) && process == process_stream))
+ t = process_stream(t, ctx, state);
+ else if (!(state & TASK_KILLED) && process != NULL)
+ t = process(t, ctx, state);
+ else {
+ task_unlink_wq(t);
+ __task_free(t);
+ th_ctx->current = NULL;
+ __ha_barrier_store();
+ /* We don't want max_processed to be decremented if
+ * we're just freeing a destroyed task, we should only
+ * do so if we really ran a task.
+ */
+ continue;
+ }
+
+ /* If there is a pending state we have to wake up the task
+ * immediately, else we defer it into wait queue
+ */
+ if (t != NULL) {
+ state = _HA_ATOMIC_LOAD(&t->state);
+ if (unlikely(state & TASK_KILLED)) {
+ task_unlink_wq(t);
+ __task_free(t);
+ }
+ else {
+ task_queue(t);
+ task_drop_running(t, 0);
+ }
+ }
+ }
+
+ th_ctx->current = NULL;
+ __ha_barrier_store();
+
+ /* stats are only registered for non-zero wake dates */
+ if (unlikely(th_ctx->sched_wake_date))
+ HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
+ done++;
+ }
+ th_ctx->current_queue = -1;
+
+ return done;
+}
+
+/* The run queue is chronologically sorted in a tree. An insertion counter is
+ * used to assign a position to each task. This counter may be combined with
+ * other variables (eg: nice value) to set the final position in the tree. The
+ * counter may wrap without a problem, of course. We then limit the number of
+ * tasks processed to 200 in any case, so that general latency remains low and
+ * so that task positions have a chance to be considered. The function scans
+ * both the global and local run queues and picks the most urgent task between
+ * the two. We need to grab the global runqueue lock to touch it so it's taken
+ * on the very first access to the global run queue and is released as soon as
+ * it reaches the end.
+ *
+ * The function adjusts <next> if a new event is closer.
+ */
+void process_runnable_tasks()
+{
+ struct thread_ctx * const tt = th_ctx;
+ struct eb32_node *lrq; // next local run queue entry
+ struct eb32_node *grq; // next global run queue entry
+ struct task *t;
+ const unsigned int default_weights[TL_CLASSES] = {
+ [TL_URGENT] = 64, // ~50% of CPU bandwidth for I/O
+ [TL_NORMAL] = 48, // ~37% of CPU bandwidth for tasks
+ [TL_BULK] = 16, // ~13% of CPU bandwidth for self-wakers
+ [TL_HEAVY] = 1, // never more than 1 heavy task at once
+ };
+ unsigned int max[TL_CLASSES]; // max to be run per class
+ unsigned int max_total; // sum of max above
+ struct mt_list *tmp_list;
+ unsigned int queue;
+ int max_processed;
+ int lpicked, gpicked;
+ int heavy_queued = 0;
+ int budget;
+
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_STUCK); // this thread is still running
+
+ if (!thread_has_tasks()) {
+ activity[tid].empty_rq++;
+ return;
+ }
+
+ max_processed = global.tune.runqueue_depth;
+
+ if (likely(tg_ctx->niced_tasks))
+ max_processed = (max_processed + 3) / 4;
+
+ if (max_processed < th_ctx->rq_total && th_ctx->rq_total <= 2*max_processed) {
+ /* If the run queue exceeds the budget by up to 50%, let's cut it
+ * into two identical halves to improve latency.
+ */
+ max_processed = th_ctx->rq_total / 2;
+ }
+
+ not_done_yet:
+ max[TL_URGENT] = max[TL_NORMAL] = max[TL_BULK] = 0;
+
+ /* urgent tasklets list gets a default weight of ~50% */
+ if ((tt->tl_class_mask & (1 << TL_URGENT)) ||
+ !MT_LIST_ISEMPTY(&tt->shared_tasklet_list))
+ max[TL_URGENT] = default_weights[TL_URGENT];
+
+ /* normal tasklets list gets a default weight of ~37% */
+ if ((tt->tl_class_mask & (1 << TL_NORMAL)) ||
+ !eb_is_empty(&th_ctx->rqueue) || !eb_is_empty(&th_ctx->rqueue_shared))
+ max[TL_NORMAL] = default_weights[TL_NORMAL];
+
+ /* bulk tasklets list gets a default weight of ~13% */
+ if ((tt->tl_class_mask & (1 << TL_BULK)))
+ max[TL_BULK] = default_weights[TL_BULK];
+
+ /* heavy tasks are processed only once and never refilled in a
+ * call round. That budget is not lost either as we don't reset
+ * it unless consumed.
+ */
+ if (!heavy_queued) {
+ if ((tt->tl_class_mask & (1 << TL_HEAVY)))
+ max[TL_HEAVY] = default_weights[TL_HEAVY];
+ else
+ max[TL_HEAVY] = 0;
+ heavy_queued = 1;
+ }
+
+ /* Now compute a fair share of the weights. Total may slightly exceed
+ * 100% due to rounding, this is not a problem. Note that while in
+ * theory the sum cannot be NULL as we cannot get there without tasklets
+ * to process, in practice it seldom happens when multiple writers
+ * conflict and rollback on MT_LIST_TRY_APPEND(shared_tasklet_list), causing
+ * a first MT_LIST_ISEMPTY() to succeed for thread_has_task() and the
+ * one above to finally fail. This is extremely rare and not a problem.
+ */
+ max_total = max[TL_URGENT] + max[TL_NORMAL] + max[TL_BULK] + max[TL_HEAVY];
+ if (!max_total)
+ goto leave;
+
+ for (queue = 0; queue < TL_CLASSES; queue++)
+ max[queue] = ((unsigned)max_processed * max[queue] + max_total - 1) / max_total;
+
+ /* The heavy queue must never process more than very few tasks at once
+ * anyway. We set the limit to 1 if running on low_latency scheduling,
+ * given that we know that other values can have an impact on latency
+ * (~500us end-to-end connection achieved at 130kcps in SSL), 1 + one
+ * per 1024 tasks if there is at least one non-heavy task while still
+ * respecting the ratios above, or 1 + one per 128 tasks if only heavy
+ * tasks are present. This allows to drain excess SSL handshakes more
+ * efficiently if the queue becomes congested.
+ */
+ if (max[TL_HEAVY] > 1) {
+ if (global.tune.options & GTUNE_SCHED_LOW_LATENCY)
+ budget = 1;
+ else if (tt->tl_class_mask & ~(1 << TL_HEAVY))
+ budget = 1 + tt->rq_total / 1024;
+ else
+ budget = 1 + tt->rq_total / 128;
+
+ if (max[TL_HEAVY] > budget)
+ max[TL_HEAVY] = budget;
+ }
+
+ lrq = grq = NULL;
+
+ /* pick up to max[TL_NORMAL] regular tasks from prio-ordered run queues */
+ /* Note: the grq lock is always held when grq is not null */
+ lpicked = gpicked = 0;
+ budget = max[TL_NORMAL] - tt->tasks_in_list;
+ while (lpicked + gpicked < budget) {
+ if (!eb_is_empty(&th_ctx->rqueue_shared) && !grq) {
+#ifdef USE_THREAD
+ HA_SPIN_LOCK(TASK_RQ_LOCK, &th_ctx->rqsh_lock);
+ grq = eb32_lookup_ge(&th_ctx->rqueue_shared, _HA_ATOMIC_LOAD(&tt->rqueue_ticks) - TIMER_LOOK_BACK);
+ if (unlikely(!grq)) {
+ grq = eb32_first(&th_ctx->rqueue_shared);
+ if (!grq)
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &th_ctx->rqsh_lock);
+ }
+#endif
+ }
+
+ /* If a global task is available for this thread, it's in grq
+ * now and the global RQ is locked.
+ */
+
+ if (!lrq) {
+ lrq = eb32_lookup_ge(&tt->rqueue, _HA_ATOMIC_LOAD(&tt->rqueue_ticks) - TIMER_LOOK_BACK);
+ if (unlikely(!lrq))
+ lrq = eb32_first(&tt->rqueue);
+ }
+
+ if (!lrq && !grq)
+ break;
+
+ if (likely(!grq || (lrq && (int)(lrq->key - grq->key) <= 0))) {
+ t = eb32_entry(lrq, struct task, rq);
+ lrq = eb32_next(lrq);
+ eb32_delete(&t->rq);
+ lpicked++;
+ }
+#ifdef USE_THREAD
+ else {
+ t = eb32_entry(grq, struct task, rq);
+ grq = eb32_next(grq);
+ eb32_delete(&t->rq);
+
+ if (unlikely(!grq)) {
+ grq = eb32_first(&th_ctx->rqueue_shared);
+ if (!grq)
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &th_ctx->rqsh_lock);
+ }
+ gpicked++;
+ }
+#endif
+ if (t->nice)
+ _HA_ATOMIC_DEC(&tg_ctx->niced_tasks);
+
+ /* Add it to the local task list */
+ LIST_APPEND(&tt->tasklets[TL_NORMAL], &((struct tasklet *)t)->list);
+ }
+
+ /* release the rqueue lock */
+ if (grq) {
+ HA_SPIN_UNLOCK(TASK_RQ_LOCK, &th_ctx->rqsh_lock);
+ grq = NULL;
+ }
+
+ if (lpicked + gpicked) {
+ tt->tl_class_mask |= 1 << TL_NORMAL;
+ _HA_ATOMIC_ADD(&tt->tasks_in_list, lpicked + gpicked);
+ activity[tid].tasksw += lpicked + gpicked;
+ }
+
+ /* Merge the list of tasklets waken up by other threads to the
+ * main list.
+ */
+ tmp_list = MT_LIST_BEHEAD(&tt->shared_tasklet_list);
+ if (tmp_list) {
+ LIST_SPLICE_END_DETACHED(&tt->tasklets[TL_URGENT], (struct list *)tmp_list);
+ if (!LIST_ISEMPTY(&tt->tasklets[TL_URGENT]))
+ tt->tl_class_mask |= 1 << TL_URGENT;
+ }
+
+ /* execute tasklets in each queue */
+ max_processed -= run_tasks_from_lists(max);
+
+ /* some tasks may have woken other ones up */
+ if (max_processed > 0 && thread_has_tasks())
+ goto not_done_yet;
+
+ leave:
+ if (tt->tl_class_mask)
+ activity[tid].long_rq++;
+}
+
+/*
+ * Delete every tasks before running the master polling loop
+ */
+void mworker_cleantasks()
+{
+ struct task *t;
+ int i;
+ struct eb32_node *tmp_wq = NULL;
+ struct eb32_node *tmp_rq = NULL;
+
+#ifdef USE_THREAD
+ /* cleanup the global run queue */
+ tmp_rq = eb32_first(&th_ctx->rqueue_shared);
+ while (tmp_rq) {
+ t = eb32_entry(tmp_rq, struct task, rq);
+ tmp_rq = eb32_next(tmp_rq);
+ task_destroy(t);
+ }
+ /* cleanup the timers queue */
+ tmp_wq = eb32_first(&tg_ctx->timers);
+ while (tmp_wq) {
+ t = eb32_entry(tmp_wq, struct task, wq);
+ tmp_wq = eb32_next(tmp_wq);
+ task_destroy(t);
+ }
+#endif
+ /* clean the per thread run queue */
+ for (i = 0; i < global.nbthread; i++) {
+ tmp_rq = eb32_first(&ha_thread_ctx[i].rqueue);
+ while (tmp_rq) {
+ t = eb32_entry(tmp_rq, struct task, rq);
+ tmp_rq = eb32_next(tmp_rq);
+ task_destroy(t);
+ }
+ /* cleanup the per thread timers queue */
+ tmp_wq = eb32_first(&ha_thread_ctx[i].timers);
+ while (tmp_wq) {
+ t = eb32_entry(tmp_wq, struct task, wq);
+ tmp_wq = eb32_next(tmp_wq);
+ task_destroy(t);
+ }
+ }
+}
+
+/* perform minimal initializations */
+static void init_task()
+{
+ int i, q;
+
+ for (i = 0; i < MAX_TGROUPS; i++)
+ memset(&ha_tgroup_ctx[i].timers, 0, sizeof(ha_tgroup_ctx[i].timers));
+
+ for (i = 0; i < MAX_THREADS; i++) {
+ for (q = 0; q < TL_CLASSES; q++)
+ LIST_INIT(&ha_thread_ctx[i].tasklets[q]);
+ MT_LIST_INIT(&ha_thread_ctx[i].shared_tasklet_list);
+ }
+}
+
+/* config parser for global "tune.sched.low-latency", accepts "on" or "off" */
+static int cfg_parse_tune_sched_low_latency(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (strcmp(args[1], "on") == 0)
+ global.tune.options |= GTUNE_SCHED_LOW_LATENCY;
+ else if (strcmp(args[1], "off") == 0)
+ global.tune.options &= ~GTUNE_SCHED_LOW_LATENCY;
+ else {
+ memprintf(err, "'%s' expects either 'on' or 'off' but got '%s'.", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "tune.sched.low-latency", cfg_parse_tune_sched_low_latency },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+INITCALL0(STG_PREPARE, init_task);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_act.c b/src/tcp_act.c
new file mode 100644
index 0000000..8b44047
--- /dev/null
+++ b/src/tcp_act.c
@@ -0,0 +1,749 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/action-t.h>
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/proto_tcp.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/server.h>
+#include <haproxy/session.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tools.h>
+
+static enum act_return tcp_action_attach_srv(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct server *srv = rule->arg.attach_srv.srv;
+ struct sample *name_smp;
+ struct connection *conn = objt_conn(sess->origin);
+ if (!conn)
+ return ACT_RET_ABRT;
+
+ conn_set_reverse(conn, &srv->obj_type);
+
+ if (rule->arg.attach_srv.name) {
+ name_smp = sample_fetch_as_type(sess->fe, sess, s,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ rule->arg.attach_srv.name, SMP_T_STR);
+ /* TODO strdup du buffer du sample */
+ if (name_smp) {
+ struct buffer *buf = &name_smp->data.u.str;
+ char *area = malloc(b_data(buf));
+
+ if (!area)
+ return ACT_RET_ERR;
+
+ conn->reverse.name = b_make(area, b_data(buf), 0, 0);
+ b_ncat(&conn->reverse.name, buf, b_data(buf));
+ }
+ }
+
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-src" action. May be called from {tcp,http}request.
+ * It only changes the address and tries to preserve the original port. If the
+ * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
+ */
+static enum act_return tcp_action_req_set_src(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *src;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_src(cli_conn))
+ goto end;
+ src = cli_conn->src;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_src(sess))
+ goto end;
+ src = sess->src;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_src(s->scf))
+ goto end;
+ src = s->scf->src;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
+ if (smp) {
+ int port = get_net_port(src);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ ((struct sockaddr_in *)src)->sin_family = AF_INET;
+ ((struct sockaddr_in *)src)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
+ ((struct sockaddr_in *)src)->sin_port = port;
+ } else if (smp->data.type == SMP_T_IPV6) {
+ ((struct sockaddr_in6 *)src)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)src)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)src)->sin6_port = port;
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-dst" action. May be called from {tcp,http}request.
+ * It only changes the address and tries to preserve the original port. If the
+ * previous family was neither AF_INET nor AF_INET6, the port is set to zero.
+ */
+static enum act_return tcp_action_req_set_dst(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *dst;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_dst(cli_conn))
+ goto end;
+ dst = cli_conn->dst;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_dst(sess))
+ goto end;
+ dst = sess->dst;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_dst(s->scf))
+ goto end;
+ dst = s->scf->dst;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_ADDR);
+ if (smp) {
+ int port = get_net_port(dst);
+
+ if (smp->data.type == SMP_T_IPV4) {
+ ((struct sockaddr_in *)dst)->sin_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr.s_addr = smp->data.u.ipv4.s_addr;
+ ((struct sockaddr_in *)dst)->sin_port = port;
+ } else if (smp->data.type == SMP_T_IPV6) {
+ ((struct sockaddr_in6 *)dst)->sin6_family = AF_INET6;
+ memcpy(&((struct sockaddr_in6 *)dst)->sin6_addr, &smp->data.u.ipv6, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)dst)->sin6_port = port;
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-src-port" action. May be called from {tcp,http}request.
+ * We must test the sin_family before setting the port. If the address family
+ * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
+ * and the port is assigned.
+ */
+static enum act_return tcp_action_req_set_src_port(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *src;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_src(cli_conn))
+ goto end;
+ src = cli_conn->src;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_src(sess))
+ goto end;
+ src = sess->src;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_src(s->scf))
+ goto end;
+ src = s->scf->src;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (smp) {
+ if (src->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)src)->sin6_port = htons(smp->data.u.sint);
+ } else {
+ if (src->ss_family != AF_INET) {
+ src->ss_family = AF_INET;
+ ((struct sockaddr_in *)src)->sin_addr.s_addr = 0;
+ }
+ ((struct sockaddr_in *)src)->sin_port = htons(smp->data.u.sint);
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/*
+ * Execute the "set-dst-port" action. May be called from {tcp,http}request.
+ * We must test the sin_family before setting the port. If the address family
+ * is neither AF_INET nor AF_INET6, the address is forced to AF_INET "0.0.0.0"
+ * and the port is assigned.
+ */
+static enum act_return tcp_action_req_set_dst_port(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct connection *cli_conn;
+ struct sockaddr_storage *dst;
+ struct sample *smp;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ cli_conn = objt_conn(sess->origin);
+ if (!cli_conn || !conn_get_dst(cli_conn))
+ goto end;
+ dst = cli_conn->dst;
+ break;
+
+ case ACT_F_TCP_REQ_SES:
+ if (!sess_get_dst(sess))
+ goto end;
+ dst = sess->dst;
+ break;
+
+ case ACT_F_TCP_REQ_CNT:
+ case ACT_F_HTTP_REQ:
+ if (!sc_get_dst(s->scf))
+ goto end;
+ dst = s->scf->dst;
+ break;
+
+ default:
+ goto end;
+ }
+
+ smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
+ if (smp) {
+ if (dst->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)dst)->sin6_port = htons(smp->data.u.sint);
+ } else {
+ if (dst->ss_family != AF_INET) {
+ dst->ss_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr.s_addr = 0;
+ }
+ ((struct sockaddr_in *)dst)->sin_port = htons(smp->data.u.sint);
+ }
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/* Executes the "silent-drop" action. May be called from {tcp,http}{request,response}.
+ * If rule->arg.act.p[0] is 0, TCP_REPAIR is tried first, with a fallback to
+ * sending a RST with TTL 1 towards the client. If it is [1-255], we will skip
+ * TCP_REPAIR and prepare the socket to send a RST with the requested TTL when
+ * the connection is killed by channel_abort().
+ */
+static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *strm, int flags)
+{
+ struct connection *conn = objt_conn(sess->origin);
+ unsigned int ttl __maybe_unused = (uintptr_t)rule->arg.act.p[0];
+ char tcp_repair_enabled __maybe_unused;
+
+ if (ttl == 0) {
+ tcp_repair_enabled = 1;
+ ttl = 1;
+ } else {
+ tcp_repair_enabled = 0;
+ }
+
+ if (!conn)
+ goto out;
+
+ if (!conn_ctrl_ready(conn))
+ goto out;
+
+#ifdef TCP_QUICKACK
+ /* drain is needed only to send the quick ACK */
+ conn_ctrl_drain(conn);
+
+ /* re-enable quickack if it was disabled to ack all data and avoid
+ * retransmits from the client that might trigger a real reset.
+ */
+ setsockopt(conn->handle.fd, IPPROTO_TCP, TCP_QUICKACK, &one, sizeof(one));
+#endif
+ /* lingering must absolutely be disabled so that we don't send a
+ * shutdown(), this is critical to the TCP_REPAIR trick. When no stream
+ * is present, returning with ERR will cause lingering to be disabled.
+ */
+ if (strm)
+ strm->scf->flags |= SC_FL_NOLINGER;
+
+ if (conn->flags & CO_FL_FDLESS)
+ goto out;
+
+ /* We're on the client-facing side, we must force to disable lingering to
+ * ensure we will use an RST exclusively and kill any pending data.
+ */
+ HA_ATOMIC_OR(&fdtab[conn->handle.fd].state, FD_LINGER_RISK);
+
+#ifdef TCP_REPAIR
+ /* try to put socket in repair mode if sending a RST was not requested by
+ * config. this often fails due to missing permissions (CAP_NET_ADMIN capability)
+ */
+ if (tcp_repair_enabled && (setsockopt(conn->handle.fd, IPPROTO_TCP, TCP_REPAIR, &one, sizeof(one)) == 0)) {
+ /* socket will be quiet now */
+ goto out;
+ }
+#endif
+
+ /* Either TCP_REPAIR is not defined, it failed (eg: permissions), or was
+ * not executed because a RST with a specific TTL was requested to be sent.
+ * Set the TTL of the client connection before the connection is killed
+ * by channel_abort and a RST packet will be emitted by the TCP/IP stack.
+ */
+#ifdef IP_TTL
+ if (conn->src && conn->src->ss_family == AF_INET)
+ setsockopt(conn->handle.fd, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl));
+#endif
+#ifdef IPV6_UNICAST_HOPS
+ if (conn->src && conn->src->ss_family == AF_INET6)
+ setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl));
+#endif
+ out:
+ /* kill the stream if any */
+ if (strm) {
+ stream_abort(strm);
+ strm->req.analysers &= AN_REQ_FLT_END;
+ strm->res.analysers &= AN_RES_FLT_END;
+ if (strm->flags & SF_BE_ASSIGNED)
+ _HA_ATOMIC_INC(&strm->be->be_counters.denied_req);
+ if (!(strm->flags & SF_ERR_MASK))
+ strm->flags |= SF_ERR_PRXCOND;
+ if (!(strm->flags & SF_FINST_MASK))
+ strm->flags |= SF_FINST_R;
+ }
+
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+
+ return ACT_RET_ABRT;
+}
+
+
+#if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE)
+static enum act_return tcp_action_set_mark(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ conn_set_mark(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]);
+ return ACT_RET_CONT;
+}
+#endif
+
+#ifdef IP_TOS
+static enum act_return tcp_action_set_tos(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ conn_set_tos(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]);
+ return ACT_RET_CONT;
+}
+#endif
+
+/*
+ * Release the sample expr when releasing attach-srv action
+ */
+static void release_attach_srv_action(struct act_rule *rule)
+{
+ ha_free(&rule->arg.attach_srv.srvname);
+ release_sample_expr(rule->arg.attach_srv.name);
+}
+
+/*
+ * Release the sample expr when releasing a set src/dst action
+ */
+static void release_set_src_dst_action(struct act_rule *rule)
+{
+ release_sample_expr(rule->arg.expr);
+}
+
+static int tcp_check_attach_srv(struct act_rule *rule, struct proxy *px, char **err)
+{
+ struct proxy *be = NULL;
+ struct server *srv = NULL;
+ char *name = rule->arg.attach_srv.srvname;
+ struct ist be_name, sv_name;
+
+ if (px->mode != PR_MODE_HTTP) {
+ memprintf(err, "attach-srv rule requires HTTP proxy mode");
+ return 0;
+ }
+
+ sv_name = ist(name);
+ be_name = istsplit(&sv_name, '/');
+ if (!istlen(sv_name)) {
+ memprintf(err, "attach-srv rule: invalid server name '%s'", name);
+ return 0;
+ }
+
+ if (!(be = proxy_be_by_name(ist0(be_name)))) {
+ memprintf(err, "attach-srv rule: no such backend '%s/%s'", ist0(be_name), ist0(sv_name));
+ return 0;
+ }
+ if (!(srv = server_find_by_name(be, ist0(sv_name)))) {
+ memprintf(err, "attach-srv rule: no such server '%s/%s'", ist0(be_name), ist0(sv_name));
+ return 0;
+ }
+
+ if ((rule->arg.attach_srv.name && (!srv->use_ssl || !srv->sni_expr)) ||
+ (!rule->arg.attach_srv.name && srv->use_ssl && srv->sni_expr)) {
+ memprintf(err, "attach-srv rule: connection will never be used; either specify name argument in conjunction with defined SSL SNI on targeted server or none of these");
+ return 0;
+ }
+
+ rule->arg.attach_srv.srv = srv;
+
+ return 1;
+}
+
+static enum act_parse_ret tcp_parse_attach_srv(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ char *srvname;
+ struct sample_expr *expr;
+
+ /* TODO duplicated code from check_kw_experimental() */
+ if (!experimental_directives_allowed) {
+ memprintf(err, "parsing [%s:%d] : '%s' action is experimental, must be allowed via a global 'expose-experimental-directives'",
+ px->conf.args.file, px->conf.args.line, args[2]);
+ return ACT_RET_PRS_ERR;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = tcp_action_attach_srv;
+ rule->release_ptr = release_attach_srv_action;
+ rule->check_ptr = tcp_check_attach_srv;
+ rule->arg.attach_srv.srvname = NULL;
+ rule->arg.attach_srv.name = NULL;
+
+ srvname = my_strndup(args[*cur_arg], strlen(args[*cur_arg]));
+ if (!srvname)
+ goto err;
+ rule->arg.attach_srv.srvname = srvname;
+
+ ++(*cur_arg);
+
+ if (strcmp(args[*cur_arg], "name") == 0) {
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "missing name value");
+ return ACT_RET_PRS_ERR;
+ }
+ ++(*cur_arg);
+
+ expr = sample_parse_expr((char **)args, cur_arg, px->conf.args.file, px->conf.args.line,
+ err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ rule->arg.attach_srv.name = expr;
+ rule->release_ptr = release_attach_srv_action;
+ }
+
+ return ACT_RET_PRS_OK;
+
+ err:
+ ha_free(&rule->arg.attach_srv.srvname);
+ release_sample_expr(rule->arg.attach_srv.name);
+ return ACT_RET_PRS_ERR;
+}
+
+/* parse "set-{src,dst}[-port]" action */
+static enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ int cur_arg;
+ struct sample_expr *expr;
+ unsigned int where;
+
+ cur_arg = *orig_arg;
+ expr = sample_parse_expr((char **)args, &cur_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL);
+ if (!expr)
+ return ACT_RET_PRS_ERR;
+
+ where = 0;
+ if (px->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_HRQ_HDR;
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ args[cur_arg-1], sample_src_names(expr->fetch->use));
+ free(expr);
+ return ACT_RET_PRS_ERR;
+ }
+ rule->arg.expr = expr;
+ rule->action = ACT_CUSTOM;
+
+ if (strcmp(args[*orig_arg - 1], "set-src") == 0) {
+ rule->action_ptr = tcp_action_req_set_src;
+ } else if (strcmp(args[*orig_arg - 1], "set-src-port") == 0) {
+ rule->action_ptr = tcp_action_req_set_src_port;
+ } else if (strcmp(args[*orig_arg - 1], "set-dst") == 0) {
+ rule->action_ptr = tcp_action_req_set_dst;
+ } else if (strcmp(args[*orig_arg - 1], "set-dst-port") == 0) {
+ rule->action_ptr = tcp_action_req_set_dst_port;
+ } else {
+ return ACT_RET_PRS_ERR;
+ }
+
+ rule->release_ptr = release_set_src_dst_action;
+ (*orig_arg)++;
+
+ return ACT_RET_PRS_OK;
+}
+
+
+/* Parse a "set-mark" action. It takes the MARK value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+#if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE)
+ char *endp;
+ unsigned int mark;
+
+ if (!*args[*cur_arg]) {
+ memprintf(err, "expects exactly 1 argument (integer/hex value)");
+ return ACT_RET_PRS_ERR;
+ }
+ mark = strtoul(args[*cur_arg], &endp, 0);
+ if (endp && *endp != '\0') {
+ memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_set_mark;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)mark;
+ global.last_checks |= LSTCHK_NETADM;
+ return ACT_RET_PRS_OK;
+#else
+ memprintf(err, "not supported on this platform (SO_MARK|SO_USER_COOKIE|SO_RTABLE undefined)");
+ return ACT_RET_PRS_ERR;
+#endif
+}
+
+
+/* Parse a "set-tos" action. It takes the TOS value as argument. It returns
+ * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error.
+ */
+static enum act_parse_ret tcp_parse_set_tos(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+#ifdef IP_TOS
+ char *endp;
+ int tos;
+
+ if (!*args[*cur_arg]) {
+ memprintf(err, "expects exactly 1 argument (integer/hex value)");
+ return ACT_RET_PRS_ERR;
+ }
+ tos = strtol(args[*cur_arg], &endp, 0);
+ if (endp && *endp != '\0') {
+ memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp);
+ return ACT_RET_PRS_ERR;
+ }
+
+ (*cur_arg)++;
+
+ /* Register processing function. */
+ rule->action_ptr = tcp_action_set_tos;
+ rule->action = ACT_CUSTOM;
+ rule->arg.act.p[0] = (void *)(uintptr_t)tos;
+ return ACT_RET_PRS_OK;
+#else
+ memprintf(err, "not supported on this platform (IP_TOS undefined)");
+ return ACT_RET_PRS_ERR;
+#endif
+}
+
+/* Parse a "silent-drop" action. It may take 2 optional arguments to define a
+ * "rst-ttl" parameter. It returns ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR
+ * on error.
+ */
+static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *cur_arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ unsigned int rst_ttl = 0;
+ char *endp;
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = tcp_exec_action_silent_drop;
+
+ if (strcmp(args[*cur_arg], "rst-ttl") == 0) {
+ if (!*args[*cur_arg + 1]) {
+ memprintf(err, "missing rst-ttl value\n");
+ return ACT_RET_PRS_ERR;
+ }
+
+ rst_ttl = (unsigned int)strtoul(args[*cur_arg + 1], &endp, 0);
+
+ if (endp && *endp != '\0') {
+ memprintf(err, "invalid character starting at '%s' (value 1-255 expected)\n",
+ endp);
+ return ACT_RET_PRS_ERR;
+ }
+ if ((rst_ttl == 0) || (rst_ttl > 255) ) {
+ memprintf(err, "valid rst-ttl values are [1-255]\n");
+ return ACT_RET_PRS_ERR;
+ }
+
+ *cur_arg += 2;
+ }
+
+ rule->arg.act.p[0] = (void *)(uintptr_t)rst_ttl;
+ return ACT_RET_PRS_OK;
+}
+
+
+static struct action_kw_list tcp_req_conn_actions = {ILH, {
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_req_conn_actions);
+
+static struct action_kw_list tcp_req_sess_actions = {ILH, {
+ { "attach-srv" , tcp_parse_attach_srv },
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_actions);
+
+static struct action_kw_list tcp_req_cont_actions = {ILH, {
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-dst" , tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions);
+
+static struct action_kw_list tcp_res_cont_actions = {ILH, {
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions);
+
+static struct action_kw_list http_req_actions = {ILH, {
+ { "set-dst", tcp_parse_set_src_dst },
+ { "set-dst-port", tcp_parse_set_src_dst },
+ { "set-mark", tcp_parse_set_mark },
+ { "set-src", tcp_parse_set_src_dst },
+ { "set-src-port", tcp_parse_set_src_dst },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions);
+
+static struct action_kw_list http_res_actions = {ILH, {
+ { "set-mark", tcp_parse_set_mark },
+ { "set-tos", tcp_parse_set_tos },
+ { "silent-drop", tcp_parse_silent_drop },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_actions);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_rules.c b/src/tcp_rules.c
new file mode 100644
index 0000000..9ce6c90
--- /dev/null
+++ b/src/tcp_rules.c
@@ -0,0 +1,1428 @@
+/*
+ * "tcp" rules processing
+ *
+ * Copyright 2000-2016 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <haproxy/acl.h>
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/arg-t.h>
+#include <haproxy/capture-t.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/channel.h>
+#include <haproxy/connection.h>
+#include <haproxy/global.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/proxy.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/stconn.h>
+#include <haproxy/stick_table.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+
+
+#define TRACE_SOURCE &trace_strm
+
+/* List head of all known action keywords for "tcp-request connection" */
+struct list tcp_req_conn_keywords = LIST_HEAD_INIT(tcp_req_conn_keywords);
+struct list tcp_req_sess_keywords = LIST_HEAD_INIT(tcp_req_sess_keywords);
+struct list tcp_req_cont_keywords = LIST_HEAD_INIT(tcp_req_cont_keywords);
+struct list tcp_res_cont_keywords = LIST_HEAD_INIT(tcp_res_cont_keywords);
+
+/*
+ * Register keywords.
+ */
+void tcp_req_conn_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_conn_keywords, &kw_list->list);
+}
+
+void tcp_req_sess_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_sess_keywords, &kw_list->list);
+}
+
+void tcp_req_cont_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_req_cont_keywords, &kw_list->list);
+}
+
+void tcp_res_cont_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_res_cont_keywords, &kw_list->list);
+}
+
+/*
+ * Return the struct tcp_req_action_kw associated to a keyword.
+ */
+struct action_kw *tcp_req_conn_action(const char *kw)
+{
+ return action_lookup(&tcp_req_conn_keywords, kw);
+}
+
+struct action_kw *tcp_req_sess_action(const char *kw)
+{
+ return action_lookup(&tcp_req_sess_keywords, kw);
+}
+
+struct action_kw *tcp_req_cont_action(const char *kw)
+{
+ return action_lookup(&tcp_req_cont_keywords, kw);
+}
+
+struct action_kw *tcp_res_cont_action(const char *kw)
+{
+ return action_lookup(&tcp_res_cont_keywords, kw);
+}
+
+/* This function performs the TCP request analysis on the current request. It
+ * returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * request. It relies on buffers flags, and updates s->req->analysers. The
+ * function may be called for frontend rules and backend rules. It only relies
+ * on the backend pointer so this works for both cases.
+ */
+int tcp_inspect_request(struct stream *s, struct channel *req, int an_bit)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct act_rule *rule;
+ int partial;
+ int act_opts = 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ def_rules = ((s->be->defpx &&
+ (sess->fe->mode == PR_MODE_TCP || sess->fe->mode == PR_MODE_HTTP) &&
+ (an_bit == AN_REQ_INSPECT_FE || s->be->defpx != sess->fe->defpx)) ? &s->be->defpx->tcp_req.inspect_rules : NULL);
+ rules = &s->be->tcp_req.inspect_rules;
+
+ /* We don't know whether we have enough data, so must proceed
+ * this way :
+ * - iterate through all rules in their declaration order
+ * - if one rule returns MISS, it means the inspect delay is
+ * not over yet, then return immediately, otherwise consider
+ * it as a non-match.
+ * - if one rule returns OK, then return OK
+ * - if one rule returns KO, then return KO
+ */
+
+ if ((s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) || channel_full(req, global.tune.maxrewrite) ||
+ sc_waiting_room(s->scf) ||
+ !s->be->tcp_req.inspect_delay || tick_is_expired(s->rules_exp, now_ms)) {
+ partial = SMP_OPT_FINAL;
+ /* Action may yield while the inspect_delay is not expired and there is no read error */
+ if ((s->scf->flags & SC_FL_ERROR) || !s->be->tcp_req.inspect_delay || tick_is_expired(s->rules_exp, now_ms))
+ act_opts |= ACT_OPT_FINAL;
+ }
+ else
+ partial = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if ((def_rules && s->current_rule_list == def_rules) || s->current_rule_list == rules)
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ enum acl_test_res ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_REQ | partial);
+ if (ret == ACL_TEST_MISS)
+ goto missing_data;
+
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, s->be, s->sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ if (act_opts & ACT_OPT_FINAL) {
+ send_log(s->be, LOG_WARNING,
+ "Internal error: yield not allowed if the inspect-delay expired "
+ "for the tcp-request content actions.");
+ goto internal;
+ }
+ goto missing_data;
+ case ACT_RET_DENY:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ case ACT_RET_ABRT:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto abort;
+ case ACT_RET_ERR:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto internal;
+ case ACT_RET_INV:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto invalid;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ }
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if we get there, it means we have no rule which matches, or
+ * we have an explicit accept, so we apply the default accept.
+ */
+ req->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ req->analyse_exp = s->rules_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+
+ missing_data:
+ channel_dont_connect(req);
+ /* just set the request timeout once at the beginning of the request */
+ if (!tick_isset(s->rules_exp) && s->be->tcp_req.inspect_delay)
+ s->rules_exp = tick_add(now_ms, s->be->tcp_req.inspect_delay);
+ req->analyse_exp = tick_first((tick_is_expired(req->analyse_exp, now_ms) ? 0 : req->analyse_exp), s->rules_exp);
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 0;
+
+ deny:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_req);
+ goto reject;
+
+ internal:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->internal_errors);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto reject;
+
+ invalid:
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.failed_req);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->failed_req);
+
+ reject:
+ sc_must_kill_conn(s->scf);
+ stream_abort(s);
+
+ abort:
+ req->analysers &= AN_REQ_FLT_END;
+ s->current_rule = s->current_rule_list = NULL;
+ req->analyse_exp = s->rules_exp = TICK_ETERNITY;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_R;
+ DBG_TRACE_DEVEL("leaving on error|deny|abort", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_TCP_ERR, s);
+ return 0;
+}
+
+/* This function performs the TCP response analysis on the current response. It
+ * returns 1 if the processing can continue on next analysers, or zero if it
+ * needs more data, encounters an error, or wants to immediately abort the
+ * response. It relies on buffers flags, and updates s->rep->analysers. The
+ * function may be called for backend rules.
+ */
+int tcp_inspect_response(struct stream *s, struct channel *rep, int an_bit)
+{
+ struct list *def_rules, *rules;
+ struct session *sess = s->sess;
+ struct act_rule *rule;
+ int partial;
+ int act_opts = 0;
+
+ DBG_TRACE_ENTER(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+
+ def_rules = (s->be->defpx && (s->be->mode == PR_MODE_TCP || s->be->mode == PR_MODE_HTTP) ? &s->be->defpx->tcp_rep.inspect_rules : NULL);
+ rules = &s->be->tcp_rep.inspect_rules;
+
+ /* We don't know whether we have enough data, so must proceed
+ * this way :
+ * - iterate through all rules in their declaration order
+ * - if one rule returns MISS, it means the inspect delay is
+ * not over yet, then return immediately, otherwise consider
+ * it as a non-match.
+ * - if one rule returns OK, then return OK
+ * - if one rule returns KO, then return KO
+ */
+ if ((s->scb->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) || channel_full(rep, global.tune.maxrewrite) ||
+ sc_waiting_room(s->scb) ||
+ !s->be->tcp_rep.inspect_delay || tick_is_expired(s->rules_exp, now_ms)) {
+ partial = SMP_OPT_FINAL;
+ /* Action may yield while the inspect_delay is not expired and there is no read error */
+ if ((s->scb->flags & SC_FL_ERROR) || !s->be->tcp_rep.inspect_delay || tick_is_expired(s->rules_exp, now_ms))
+ act_opts |= ACT_OPT_FINAL;
+ }
+ else
+ partial = 0;
+
+ /* If "the current_rule_list" match the executed rule list, we are in
+ * resume condition. If a resume is needed it is always in the action
+ * and never in the ACL or converters. In this case, we initialise the
+ * current rule, and go to the action execution point.
+ */
+ if (s->current_rule) {
+ rule = s->current_rule;
+ s->current_rule = NULL;
+ if ((def_rules && s->current_rule_list == def_rules) || s->current_rule_list == rules)
+ goto resume_execution;
+ }
+ s->current_rule_list = ((!def_rules || s->current_rule_list == def_rules) ? rules : def_rules);
+
+ restart:
+ list_for_each_entry(rule, s->current_rule_list, list) {
+ enum acl_test_res ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, s->be, sess, s, SMP_OPT_DIR_RES | partial);
+ if (ret == ACL_TEST_MISS)
+ goto missing_data;
+
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ act_opts |= ACT_OPT_FIRST;
+resume_execution:
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, s->be, s->sess, s, act_opts)) {
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ case ACT_RET_YIELD:
+ s->current_rule = rule;
+ if (act_opts & ACT_OPT_FINAL) {
+ send_log(s->be, LOG_WARNING,
+ "Internal error: yield not allowed if the inspect-delay expired "
+ "for the tcp-response content actions.");
+ goto internal;
+ }
+ channel_dont_close(rep);
+ goto missing_data;
+ case ACT_RET_DENY:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ case ACT_RET_ABRT:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto abort;
+ case ACT_RET_ERR:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto internal;
+ case ACT_RET_INV:
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto invalid;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto deny;
+ }
+ else if (rule->action == ACT_TCP_CLOSE) {
+ s->scb->flags |= SC_FL_NOLINGER | SC_FL_NOHALF;
+ sc_must_kill_conn(s->scb);
+ sc_abort(s->scb);
+ sc_shutdown(s->scb);
+ s->last_rule_file = rule->conf.file;
+ s->last_rule_line = rule->conf.line;
+ goto end;
+ }
+ }
+ }
+
+ if (def_rules && s->current_rule_list == def_rules) {
+ s->current_rule_list = rules;
+ goto restart;
+ }
+
+ end:
+ /* if we get there, it means we have no rule which matches, or
+ * we have an explicit accept, so we apply the default accept.
+ */
+ rep->analysers &= ~an_bit;
+ s->current_rule = s->current_rule_list = NULL;
+ rep->analyse_exp = s->rules_exp = TICK_ETERNITY;
+ DBG_TRACE_LEAVE(STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 1;
+
+ missing_data:
+ /* just set the analyser timeout once at the beginning of the response */
+ if (!tick_isset(s->rules_exp) && s->be->tcp_rep.inspect_delay)
+ s->rules_exp = tick_add(now_ms, s->be->tcp_rep.inspect_delay);
+ rep->analyse_exp = tick_first((tick_is_expired(rep->analyse_exp, now_ms) ? 0 : rep->analyse_exp), s->rules_exp);
+ DBG_TRACE_DEVEL("waiting for more data", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA, s);
+ return 0;
+
+ deny:
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.denied_resp);
+ _HA_ATOMIC_INC(&s->be->be_counters.denied_resp);
+ if (s->sess->listener && s->sess->listener->counters)
+ _HA_ATOMIC_INC(&s->sess->listener->counters->denied_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.denied_resp);
+ goto reject;
+
+ internal:
+ _HA_ATOMIC_INC(&s->sess->fe->fe_counters.internal_errors);
+ _HA_ATOMIC_INC(&s->be->be_counters.internal_errors);
+ if (s->sess->listener && s->sess->listener->counters)
+ _HA_ATOMIC_INC(&s->sess->listener->counters->internal_errors);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors);
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_INTERNAL;
+ goto reject;
+
+ invalid:
+ _HA_ATOMIC_INC(&s->be->be_counters.failed_resp);
+ if (objt_server(s->target))
+ _HA_ATOMIC_INC(&__objt_server(s->target)->counters.failed_resp);
+
+ reject:
+ sc_must_kill_conn(s->scb);
+ stream_abort(s);
+
+ abort:
+ rep->analysers &= AN_RES_FLT_END;
+ s->current_rule = s->current_rule_list = NULL;
+ rep->analyse_exp = s->rules_exp = TICK_ETERNITY;
+
+ if (!(s->flags & SF_ERR_MASK))
+ s->flags |= SF_ERR_PRXCOND;
+ if (!(s->flags & SF_FINST_MASK))
+ s->flags |= SF_FINST_D;
+ DBG_TRACE_DEVEL("leaving on error", STRM_EV_STRM_ANA|STRM_EV_TCP_ANA|STRM_EV_TCP_ERR, s);
+ return 0;
+}
+
+
+/* This function performs the TCP layer4 analysis on the current request. It
+ * returns 0 if a reject rule matches, otherwise 1 if either an accept rule
+ * matches or if no more rule matches. It can only use rules which don't need
+ * any data. This only works on connection-based client-facing stream connectors.
+ */
+int tcp_exec_l4_rules(struct session *sess)
+{
+ struct proxy *px = sess->fe;
+ struct act_rule *rule;
+ struct connection *conn = objt_conn(sess->origin);
+ int result = 1;
+ enum acl_test_res ret;
+
+ if (!conn)
+ return result;
+
+ if (sess->fe->defpx && (sess->fe->mode == PR_MODE_TCP || sess->fe->mode == PR_MODE_HTTP))
+ px = sess->fe->defpx;
+
+ restart:
+ list_for_each_entry(rule, &px->tcp_req.l4_rules, list) {
+ ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, sess->fe, sess, NULL, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, sess->fe, sess, NULL, ACT_OPT_FINAL | ACT_OPT_FIRST)) {
+ case ACT_RET_YIELD:
+ /* yield is not allowed at this point. If this return code is
+ * used it is a bug, so I prefer to abort the process.
+ */
+ send_log(sess->fe, LOG_WARNING,
+ "Internal error: yield not allowed with tcp-request connection actions.");
+ /* fall through */
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ goto end;
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_DENY:
+ case ACT_RET_ABRT:
+ case ACT_RET_ERR:
+ case ACT_RET_INV:
+ result = 0;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_conn);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_conn);
+
+ result = 0;
+ goto end;
+ }
+ else if (rule->action == ACT_TCP_EXPECT_PX) {
+ if (!(conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(conn) < 0) {
+ result = 0;
+ goto end;
+ }
+ }
+ conn->flags |= CO_FL_ACCEPT_PROXY;
+ }
+ else if (rule->action == ACT_TCP_EXPECT_CIP) {
+ if (!(conn->flags & CO_FL_HANDSHAKE)) {
+ if (xprt_add_hs(conn) < 0) {
+ result = 0;
+ goto end;
+ }
+ }
+ conn->flags |= CO_FL_ACCEPT_CIP;
+ }
+ }
+ }
+
+ if (px != sess->fe) {
+ px = sess->fe;
+ goto restart;
+ }
+ end:
+ return result;
+}
+
+/* This function performs the TCP layer5 analysis on the current request. It
+ * returns 0 if a reject rule matches, otherwise 1 if either an accept rule
+ * matches or if no more rule matches. It can only use rules which don't need
+ * any data. This only works on session-based client-facing stream connectors.
+ * An example of valid use case is to track a stick-counter on the source
+ * address extracted from the proxy protocol.
+ */
+int tcp_exec_l5_rules(struct session *sess)
+{
+ struct proxy *px = sess->fe;
+ struct act_rule *rule;
+ int result = 1;
+ enum acl_test_res ret;
+
+ if (sess->fe->defpx && (sess->fe->mode == PR_MODE_TCP || sess->fe->mode == PR_MODE_HTTP))
+ px = sess->fe->defpx;
+
+ restart:
+ list_for_each_entry(rule, &px->tcp_req.l5_rules, list) {
+ ret = ACL_TEST_PASS;
+
+ if (rule->cond) {
+ ret = acl_exec_cond(rule->cond, sess->fe, sess, NULL, SMP_OPT_DIR_REQ|SMP_OPT_FINAL);
+ ret = acl_pass(ret);
+ if (rule->cond->pol == ACL_COND_UNLESS)
+ ret = !ret;
+ }
+
+ if (ret) {
+ /* Always call the action function if defined */
+ if (rule->action_ptr) {
+ switch (rule->action_ptr(rule, sess->fe, sess, NULL, ACT_OPT_FINAL | ACT_OPT_FIRST)) {
+ case ACT_RET_YIELD:
+ /* yield is not allowed at this point. If this return code is
+ * used it is a bug, so I prefer to abort the process.
+ */
+ send_log(sess->fe, LOG_WARNING,
+ "Internal error: yield not allowed with tcp-request session actions.");
+ /* fall through */
+ case ACT_RET_STOP:
+ case ACT_RET_DONE:
+ goto end;
+ case ACT_RET_CONT:
+ break;
+ case ACT_RET_DENY:
+ case ACT_RET_ABRT:
+ case ACT_RET_ERR:
+ case ACT_RET_INV:
+ result = 0;
+ goto end;
+ }
+ continue; /* eval the next rule */
+ }
+
+ /* If not action function defined, check for known actions */
+ if (rule->action == ACT_ACTION_ALLOW) {
+ goto end;
+ }
+ else if (rule->action == ACT_ACTION_DENY) {
+ _HA_ATOMIC_INC(&sess->fe->fe_counters.denied_sess);
+ if (sess->listener && sess->listener->counters)
+ _HA_ATOMIC_INC(&sess->listener->counters->denied_sess);
+
+ result = 0;
+ goto end;
+ }
+ }
+ }
+
+ if (px != sess->fe) {
+ px = sess->fe;
+ goto restart;
+ }
+ end:
+ return result;
+}
+
+/* Parse a tcp-response rule. Return a negative value in case of failure */
+static int tcp_parse_response_rule(char **args, int arg, int section_type,
+ struct proxy *curpx, const struct proxy *defpx,
+ struct act_rule *rule, char **err,
+ unsigned int where,
+ const char *file, int line)
+{
+ if ((curpx == defpx && strlen(defpx->id) == 0) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "%s %s is only allowed in 'backend' sections or 'defaults' section with a name",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "accept") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "reject") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_DENY;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "close") == 0) {
+ arg++;
+ rule->action = ACT_TCP_CLOSE;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else {
+ struct action_kw *kw;
+ kw = tcp_res_cont_action(args[arg]);
+ if (kw) {
+ arg++;
+ rule->kw = kw;
+ if (kw->parse((const char **)args, &arg, curpx, rule, err) == ACT_RET_PRS_ERR)
+ return -1;
+ } else {
+ const char *extra[] = { "accept", "reject", "close", NULL };
+ const char *best = action_suggest(args[arg], &tcp_res_cont_keywords, extra);
+
+ action_build_list(&tcp_res_cont_keywords, &trash);
+ memprintf(err,
+ "'%s %s' expects 'accept', 'close', 'reject', %s in %s '%s' (got '%s').%s%s%s",
+ args[0], args[1], trash.area,
+ proxy_type_str(curpx), curpx->id, args[arg],
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ return -1;
+ }
+ }
+
+ if (strcmp(args[arg], "if") == 0 || strcmp(args[arg], "unless") == 0) {
+ if ((rule->cond = build_acl_cond(file, line, &curpx->acl, curpx, (const char **)args+arg, err)) == NULL) {
+ memprintf(err,
+ "'%s %s %s' : error detected in %s '%s' while parsing '%s' condition : %s",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg], *err);
+ return -1;
+ }
+ }
+ else if (*args[arg]) {
+ memprintf(err,
+ "'%s %s %s' only accepts 'if' or 'unless', in %s '%s' (got '%s')",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg]);
+ return -1;
+ }
+ return 0;
+}
+
+
+/* This function executes a track-sc* actions. On success, it returns
+ * ACT_RET_CONT. If it must yield, it return ACT_RET_YIELD. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return tcp_action_track_sc(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct stksess *ts;
+ struct stktable *t;
+ struct stktable_key *key;
+ struct sample smp;
+ int opt;
+
+ opt = SMP_OPT_DIR_REQ;
+ if (flags & ACT_FLAG_FINAL)
+ opt |= SMP_OPT_FINAL;
+
+ t = rule->arg.trk_ctr.table.t;
+ if (rule->from == ACT_F_TCP_REQ_CNT) { /* L7 rules: use the stream */
+ if (stkctr_entry(&s->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, s->be, sess, s, opt, rule->arg.trk_ctr.expr, &smp);
+
+ if ((smp.flags & SMP_F_MAY_CHANGE) && !(flags & ACT_FLAG_FINAL))
+ return ACT_RET_YIELD; /* key might appear later */
+
+ if (key && (ts = stktable_get_entry(t, key))) {
+ stream_track_stkctr(&s->stkctr[rule->action], t, ts);
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_CONTENT);
+ if (sess->fe != s->be)
+ stkctr_set_flags(&s->stkctr[rule->action], STKCTR_TRACK_BACKEND);
+ }
+ }
+ else { /* L4/L5 rules: use the session */
+ if (stkctr_entry(&sess->stkctr[rule->action]))
+ goto end;
+
+ key = stktable_fetch_key(t, sess->fe, sess, NULL, opt, rule->arg.trk_ctr.expr, NULL);
+ if (key && (ts = stktable_get_entry(t, key)))
+ stream_track_stkctr(&sess->stkctr[rule->action], t, ts);
+ }
+
+ end:
+ return ACT_RET_CONT;
+}
+
+/* This function executes a capture actions. It executes a fetch expression,
+ * turns the result into a string and puts it in a capture slot. On success, it
+ * returns ACT_RET_CONT. If it must yield, it return ACT_RET_YIELD. Otherwsize
+ * ACT_RET_ERR is returned.
+ */
+static enum act_return tcp_action_capture(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample *key;
+ struct cap_hdr *h = rule->arg.cap.hdr;
+ char **cap = s->req_cap;
+ int len, opt;
+
+ opt = ((rule->from == ACT_F_TCP_REQ_CNT) ? SMP_OPT_DIR_REQ : SMP_OPT_DIR_RES);
+ if (flags & ACT_FLAG_FINAL)
+ opt |= SMP_OPT_FINAL;
+
+ key = sample_fetch_as_type(s->be, sess, s, opt, rule->arg.cap.expr, SMP_T_STR);
+ if (!key)
+ goto end;
+
+ if ((key->flags & SMP_F_MAY_CHANGE) && !(flags & ACT_FLAG_FINAL))
+ return ACT_RET_YIELD; /* key might appear later */
+
+ if (cap[h->index] == NULL) {
+ cap[h->index] = pool_alloc(h->pool);
+ if (cap[h->index] == NULL) /* no more capture memory, ignore error */
+ goto end;
+ }
+
+ len = key->data.u.str.data;
+ if (len > h->len)
+ len = h->len;
+
+ memcpy(cap[h->index], key->data.u.str.area, len);
+ cap[h->index][len] = 0;
+
+ end:
+ return ACT_RET_CONT;
+}
+
+static void release_tcp_capture(struct act_rule * rule)
+{
+ release_sample_expr(rule->arg.cap.expr);
+}
+
+
+static void release_tcp_track_sc(struct act_rule * rule)
+{
+ release_sample_expr(rule->arg.trk_ctr.expr);
+}
+
+/* Parse a tcp-request rule. Return a negative value in case of failure */
+static int tcp_parse_request_rule(char **args, int arg, int section_type,
+ struct proxy *curpx, const struct proxy *defpx,
+ struct act_rule *rule, char **err,
+ unsigned int where, const char *file, int line)
+{
+ if (curpx == defpx && strlen(defpx->id) == 0) {
+ memprintf(err, "%s %s is not allowed in anonymous 'defaults' sections",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "accept") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_ALLOW;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "reject") == 0) {
+ arg++;
+ rule->action = ACT_ACTION_DENY;
+ rule->flags |= ACT_FLAG_FINAL;
+ }
+ else if (strcmp(args[arg], "capture") == 0) {
+ struct sample_expr *expr;
+ struct cap_hdr *hdr;
+ int kw = arg;
+ int len = 0;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err,
+ "'%s %s %s' : proxy '%s' has no frontend capability",
+ args[0], args[1], args[kw], curpx->id);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_REQ_CNT)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg++;
+
+ curpx->conf.args.ctx = ARGC_CAP;
+ expr = sample_parse_expr(args, &arg, file, line, err, &curpx->conf.args, NULL);
+ if (!expr) {
+ memprintf(err,
+ "'%s %s %s' : %s",
+ args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "'%s %s %s' : fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], args[1], args[kw], args[arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return -1;
+ }
+
+ if (strcmp(args[arg], "len") == 0) {
+ arg++;
+ if (!args[arg]) {
+ memprintf(err,
+ "'%s %s %s' : missing length value",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ len = atoi(args[arg]);
+ if (len <= 0) {
+ memprintf(err,
+ "'%s %s %s' : length must be > 0",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ arg++;
+ }
+
+ if (!len) {
+ memprintf(err,
+ "'%s %s %s' : a positive 'len' argument is mandatory",
+ args[0], args[1], args[kw]);
+ free(expr);
+ return -1;
+ }
+
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ release_sample_expr(expr);
+ return -1;
+ }
+ hdr->next = curpx->req_cap;
+ hdr->name = NULL; /* not a header capture */
+ hdr->namelen = 0;
+ hdr->len = len;
+ hdr->pool = create_pool("caphdr", hdr->len + 1, MEM_F_SHARED);
+ hdr->index = curpx->nb_req_cap++;
+
+ curpx->req_cap = hdr;
+ curpx->to_log |= LW_REQHDR;
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ rule->arg.cap.expr = expr;
+ rule->arg.cap.hdr = hdr;
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = tcp_action_capture;
+ rule->check_ptr = check_capture;
+ rule->release_ptr = release_tcp_capture;
+ }
+ else if (strncmp(args[arg], "track-sc", 8) == 0) {
+ struct sample_expr *expr;
+ int kw = arg;
+ unsigned int tsc_num;
+ const char *tsc_num_str;
+
+ arg++;
+
+ tsc_num_str = &args[kw][8];
+ if (cfg_parse_track_sc_num(&tsc_num, tsc_num_str, tsc_num_str + strlen(tsc_num_str), err) == -1) {
+ memprintf(err, "'%s %s %s' : %s", args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ curpx->conf.args.ctx = ARGC_TRK;
+ expr = sample_parse_expr(args, &arg, file, line, err, &curpx->conf.args, NULL);
+ if (!expr) {
+ memprintf(err,
+ "'%s %s %s' : %s",
+ args[0], args[1], args[kw], *err);
+ return -1;
+ }
+
+ if (!(expr->fetch->val & where)) {
+ memprintf(err,
+ "'%s %s %s' : fetch method '%s' extracts information from '%s', none of which is available here",
+ args[0], args[1], args[kw], args[arg-1], sample_src_names(expr->fetch->use));
+ release_sample_expr(expr);
+ return -1;
+ }
+
+ /* check if we need to allocate an http_txn struct for HTTP parsing */
+ curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY);
+
+ if (strcmp(args[arg], "table") == 0) {
+ arg++;
+ if (!args[arg]) {
+ memprintf(err,
+ "'%s %s %s' : missing table name",
+ args[0], args[1], args[kw]);
+ release_sample_expr(expr);
+ return -1;
+ }
+ /* we copy the table name for now, it will be resolved later */
+ rule->arg.trk_ctr.table.n = strdup(args[arg]);
+ arg++;
+ }
+ rule->action = tsc_num;
+ rule->arg.trk_ctr.expr = expr;
+ rule->action_ptr = tcp_action_track_sc;
+ rule->check_ptr = check_trk_action;
+ rule->release_ptr = release_tcp_track_sc;
+ }
+ else if (strcmp(args[arg], "expect-proxy") == 0) {
+ if (strcmp(args[arg+1], "layer4") != 0) {
+ memprintf(err,
+ "'%s %s %s' only supports 'layer4' in %s '%s' (got '%s')",
+ args[0], args[1], args[arg], proxy_type_str(curpx), curpx->id, args[arg+1]);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_CON_ACC)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg += 2;
+ rule->action = ACT_TCP_EXPECT_PX;
+ }
+ else if (strcmp(args[arg], "expect-netscaler-cip") == 0) {
+ if (strcmp(args[arg+1], "layer4") != 0) {
+ memprintf(err,
+ "'%s %s %s' only supports 'layer4' in %s '%s' (got '%s')",
+ args[0], args[1], args[arg], proxy_type_str(curpx), curpx->id, args[arg+1]);
+ return -1;
+ }
+
+ if (!(where & SMP_VAL_FE_CON_ACC)) {
+ memprintf(err,
+ "'%s %s' is not allowed in '%s %s' rules in %s '%s'",
+ args[arg], args[arg+1], args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ arg += 2;
+ rule->action = ACT_TCP_EXPECT_CIP;
+ }
+ else {
+ struct action_kw *kw;
+ if (where & SMP_VAL_FE_CON_ACC) {
+ /* L4 */
+ kw = tcp_req_conn_action(args[arg]);
+ rule->kw = kw;
+ } else if (where & SMP_VAL_FE_SES_ACC) {
+ /* L5 */
+ kw = tcp_req_sess_action(args[arg]);
+ rule->kw = kw;
+ } else {
+ /* L6 */
+ kw = tcp_req_cont_action(args[arg]);
+ rule->kw = kw;
+ }
+ if (kw) {
+ arg++;
+ if (kw->parse((const char **)args, &arg, curpx, rule, err) == ACT_RET_PRS_ERR)
+ return -1;
+ } else {
+ const char *extra[] = { "accept", "reject", "capture", "track-sc", "expect-proxy", "expect-netscaler-cip", NULL };
+ const char *best = NULL;
+
+
+ if (where & SMP_VAL_FE_CON_ACC) {
+ action_build_list(&tcp_req_conn_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_conn_keywords, extra);
+ }
+ else if (where & SMP_VAL_FE_SES_ACC) {
+ action_build_list(&tcp_req_sess_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_sess_keywords, extra);
+ }
+ else {
+ action_build_list(&tcp_req_cont_keywords, &trash);
+ best = action_suggest(args[arg], &tcp_req_cont_keywords, extra);
+ }
+
+ memprintf(err,
+ "'%s %s' expects 'accept', 'reject', 'capture', 'expect-proxy', 'expect-netscaler-cip', 'track-sc0' ... 'track-sc%d', %s "
+ "in %s '%s' (got '%s').%s%s%s\n",
+ args[0], args[1], global.tune.nb_stk_ctr-1,
+ trash.area, proxy_type_str(curpx),
+ curpx->id, args[arg],
+ best ? " Did you mean '" : "",
+ best ? best : "",
+ best ? "' maybe ?" : "");
+ return -1;
+ }
+ }
+
+ if (strcmp(args[arg], "if") == 0 || strcmp(args[arg], "unless") == 0) {
+ if ((rule->cond = build_acl_cond(file, line, &curpx->acl, curpx, (const char **)args+arg, err)) == NULL) {
+ memprintf(err,
+ "'%s %s %s' : error detected in %s '%s' while parsing '%s' condition : %s",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg], *err);
+ return -1;
+ }
+ }
+ else if (*args[arg]) {
+ memprintf(err,
+ "'%s %s %s' only accepts 'if' or 'unless', in %s '%s' (got '%s')",
+ args[0], args[1], args[2], proxy_type_str(curpx), curpx->id, args[arg]);
+ return -1;
+ }
+ return 0;
+}
+
+/* This function should be called to parse a line starting with the "tcp-response"
+ * keyword.
+ */
+static int tcp_parse_tcp_rep(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *ptr = NULL;
+ unsigned int val;
+ int warn = 0;
+ int arg;
+ struct act_rule *rule;
+ unsigned int where;
+ const struct acl *acl;
+ const char *kw;
+
+ if (!*args[1]) {
+ memprintf(err, "missing argument for '%s' in %s '%s'",
+ args[0], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ if (strcmp(args[1], "inspect-delay") == 0) {
+ if ((curpx == defpx && strlen(defpx->id) == 0) || !(curpx->cap & PR_CAP_BE)) {
+ memprintf(err, "%s %s is only allowed in 'backend' sections or 'defaults' section with a name",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (!*args[2] || (ptr = parse_time_err(args[2], &val, TIME_UNIT_MS))) {
+ memprintf(err,
+ "'%s %s' expects a positive delay in milliseconds, in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+
+ if (ptr == PARSE_TIME_OVER)
+ memprintf(err, "%s (timer overflow in '%s', maximum value is 2147483647 ms or ~24.8 days)", *err, args[2]);
+ else if (ptr == PARSE_TIME_UNDER)
+ memprintf(err, "%s (timer underflow in '%s', minimum non-null value is 1 ms)", *err, args[2]);
+ else if (ptr)
+ memprintf(err, "%s (unexpected character '%c')", *err, *ptr);
+ return -1;
+ }
+
+ if (curpx->tcp_rep.inspect_delay) {
+ memprintf(err, "ignoring %s %s (was already defined) in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return 1;
+ }
+ curpx->tcp_rep.inspect_delay = val;
+ return 0;
+ }
+
+ rule = new_act_rule(ACT_F_TCP_RES_CNT, file, line);
+ if (!rule) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ return -1;
+ }
+ LIST_INIT(&rule->list);
+ arg = 1;
+ where = 0;
+
+ if (strcmp(args[1], "content") == 0) {
+ arg++;
+
+ if (curpx->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_RES_CNT;
+ if (curpx->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_RES_CNT;
+ if (tcp_parse_response_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ LIST_APPEND(&curpx->tcp_rep.inspect_rules, &rule->list);
+ }
+ else {
+ memprintf(err,
+ "'%s' expects 'inspect-delay' or 'content' in %s '%s' (got '%s')",
+ args[0], proxy_type_str(curpx), curpx->id, args[1]);
+ goto error;
+ }
+
+ return warn;
+ error:
+ free_act_rule(rule);
+ return -1;
+}
+
+
+/* This function should be called to parse a line starting with the "tcp-request"
+ * keyword.
+ */
+static int tcp_parse_tcp_req(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ const char *ptr = NULL;
+ unsigned int val;
+ int warn = 0;
+ int arg;
+ struct act_rule *rule;
+ unsigned int where;
+ const struct acl *acl;
+ const char *kw;
+
+ if (!*args[1]) {
+ if (curpx == defpx)
+ memprintf(err, "missing argument for '%s' in defaults section", args[0]);
+ else
+ memprintf(err, "missing argument for '%s' in %s '%s'",
+ args[0], proxy_type_str(curpx), curpx->id);
+ return -1;
+ }
+
+ if (strcmp(args[1], "inspect-delay") == 0) {
+ if (curpx == defpx && strlen(defpx->id) == 0) {
+ memprintf(err, "%s %s is not allowed in anonymous 'defaults' sections",
+ args[0], args[1]);
+ return -1;
+ }
+
+ if (!*args[2] || (ptr = parse_time_err(args[2], &val, TIME_UNIT_MS))) {
+ memprintf(err,
+ "'%s %s' expects a positive delay in milliseconds, in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+
+ if (ptr == PARSE_TIME_OVER)
+ memprintf(err, "%s (timer overflow in '%s', maximum value is 2147483647 ms or ~24.8 days)", *err, args[2]);
+ else if (ptr == PARSE_TIME_UNDER)
+ memprintf(err, "%s (timer underflow in '%s', minimum non-null value is 1 ms)", *err, args[2]);
+ else if (ptr)
+ memprintf(err, "%s (unexpected character '%c')", *err, *ptr);
+ return -1;
+ }
+
+ if (curpx->tcp_req.inspect_delay) {
+ memprintf(err, "ignoring %s %s (was already defined) in %s '%s'",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ return 1;
+ }
+ curpx->tcp_req.inspect_delay = val;
+ return 0;
+ }
+
+ rule = new_act_rule(0, file, line);
+ if (!rule) {
+ memprintf(err, "parsing [%s:%d] : out of memory", file, line);
+ return -1;
+ }
+ LIST_INIT(&rule->list);
+ arg = 1;
+ where = 0;
+
+ if (strcmp(args[1], "content") == 0) {
+ arg++;
+
+ if (curpx->cap & PR_CAP_FE)
+ where |= SMP_VAL_FE_REQ_CNT;
+ if (curpx->cap & PR_CAP_BE)
+ where |= SMP_VAL_BE_REQ_CNT;
+ rule->from = ACT_F_TCP_REQ_CNT;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_cont(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.inspect_rules, &rule->list);
+ }
+ else if (strcmp(args[1], "connection") == 0) {
+ arg++;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s is not allowed because %s %s is not a frontend",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+
+ where |= SMP_VAL_FE_CON_ACC;
+ rule->from = ACT_F_TCP_REQ_CON;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_conn(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.l4_rules, &rule->list);
+ }
+ else if (strcmp(args[1], "session") == 0) {
+ arg++;
+
+ if (!(curpx->cap & PR_CAP_FE)) {
+ memprintf(err, "%s %s is not allowed because %s %s is not a frontend",
+ args[0], args[1], proxy_type_str(curpx), curpx->id);
+ goto error;
+ }
+
+ where |= SMP_VAL_FE_SES_ACC;
+ rule->from = ACT_F_TCP_REQ_SES;
+ if (tcp_parse_request_rule(args, arg, section_type, curpx, defpx, rule, err, where, file, line) < 0)
+ goto error;
+
+ acl = rule->cond ? acl_cond_conflicts(rule->cond, where) : NULL;
+ if (acl) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' will never match in '%s %s' because it only involves keywords that are incompatible with '%s'",
+ acl->name, args[0], args[1], sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl will never match in '%s %s' because it uses keyword '%s' which is incompatible with '%s'",
+ args[0], args[1],
+ LIST_ELEM(acl->expr.n, struct acl_expr *, list)->kw,
+ sample_ckp_names(where));
+ warn++;
+ }
+ else if (rule->cond && acl_cond_kw_conflicts(rule->cond, where, &acl, &kw)) {
+ if (acl->name && *acl->name)
+ memprintf(err,
+ "acl '%s' involves keyword '%s' which is incompatible with '%s'",
+ acl->name, kw, sample_ckp_names(where));
+ else
+ memprintf(err,
+ "anonymous acl involves keyword '%s' which is incompatible with '%s'",
+ kw, sample_ckp_names(where));
+ warn++;
+ }
+
+ /* the following function directly emits the warning */
+ warnif_misplaced_tcp_sess(curpx, file, line, args[0]);
+ LIST_APPEND(&curpx->tcp_req.l5_rules, &rule->list);
+ }
+ else {
+ if (curpx == defpx)
+ memprintf(err,
+ "'%s' expects 'inspect-delay', 'connection', or 'content' in defaults section (got '%s')",
+ args[0], args[1]);
+ else
+ memprintf(err,
+ "'%s' expects 'inspect-delay', 'connection', or 'content' in %s '%s' (got '%s')",
+ args[0], proxy_type_str(curpx), curpx->id, args[1]);
+ goto error;
+ }
+
+ return warn;
+ error:
+ free_act_rule(rule);
+ return -1;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "tcp-request", tcp_parse_tcp_req },
+ { CFG_LISTEN, "tcp-response", tcp_parse_tcp_rep },
+ { 0, NULL, NULL },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcp_sample.c b/src/tcp_sample.c
new file mode 100644
index 0000000..9fbf920
--- /dev/null
+++ b/src/tcp_sample.c
@@ -0,0 +1,641 @@
+/*
+ * AF_INET/AF_INET6 SOCK_STREAM protocol layer (tcp)
+ *
+ * Copyright 2000-2013 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* this is to have tcp_info defined on systems using musl
+ * library, such as Alpine Linux.
+ */
+#define _GNU_SOURCE
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/listener-t.h>
+#include <haproxy/namespace.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/sample.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/session.h>
+#include <haproxy/tools.h>
+
+/* Fetch the connection's source IPv4/IPv6 address. Depending on the keyword, it
+ * may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_src(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'b') { /* bc_src */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_src */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ switch (src->ss_family) {
+ case AF_INET:
+ smp->data.u.ipv4 = ((struct sockaddr_in *)src)->sin_addr;
+ smp->data.type = SMP_T_IPV4;
+ break;
+ case AF_INET6:
+ smp->data.u.ipv6 = ((struct sockaddr_in6 *)src)->sin6_addr;
+ smp->data.type = SMP_T_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* set temp integer to the connection's source port. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_sport(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'b') { /* bc_src_port */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_src_port */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src_port */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ if (!(smp->data.u.sint = get_host_port(src)))
+ return 0;
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* fetch the connection's destination IPv4/IPv6 address. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_dst(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'b') { /* bc_dst */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_dst */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ switch (dst->ss_family) {
+ case AF_INET:
+ smp->data.u.ipv4 = ((struct sockaddr_in *)dst)->sin_addr;
+ smp->data.type = SMP_T_IPV4;
+ break;
+ case AF_INET6:
+ smp->data.u.ipv6 = ((struct sockaddr_in6 *)dst)->sin6_addr;
+ smp->data.type = SMP_T_IPV6;
+ break;
+ default:
+ return 0;
+ }
+
+ smp->flags = 0;
+ return 1;
+}
+
+/* check if the destination address of the front connection is local to the
+ * system or if it was intercepted.
+ */
+int smp_fetch_dst_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct listener *li = smp->sess->listener;
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'f') { /* fc_dst_is_local */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst_is_local */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = 0;
+ smp->data.u.sint = addr_is_local(li->rx.settings->netns, dst);
+ return smp->data.u.sint >= 0;
+}
+
+/* check if the source address of the front connection is local to the system
+ * or not.
+ */
+int smp_fetch_src_is_local(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct listener *li = smp->sess->listener;
+ const struct sockaddr_storage *src = NULL;
+
+ if (kw[0] == 'f') { /* fc_src_is_local */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_src(conn))
+ src = conn_src(conn);
+ }
+ else /* src_is_local */
+ src = (smp->strm ? sc_src(smp->strm->scf) : sess_src(smp->sess));
+
+ if (!src)
+ return 0;
+
+ smp->data.type = SMP_T_BOOL;
+ smp->flags = 0;
+ smp->data.u.sint = addr_is_local(li->rx.settings->netns, src);
+ return smp->data.u.sint >= 0;
+}
+
+/* set temp integer to the connexion's destination port. Depending on the
+ * keyword, it may be the frontend or the backend connection.
+ */
+static int
+smp_fetch_dport(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct sockaddr_storage *dst = NULL;
+
+ if (kw[0] == 'b') { /* bc_dst_port */
+ struct connection *conn = ((obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ ? sc_conn(__objt_check(smp->sess->origin)->sc)
+ : (smp->strm ? sc_conn(smp->strm->scb): NULL));
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else if (kw[0] == 'f') { /* fc_dst_port */
+ struct connection *conn = objt_conn(smp->sess->origin);
+
+ if (conn && conn_get_dst(conn))
+ dst = conn_dst(conn);
+ }
+ else /* dst_port */
+ dst = (smp->strm ? sc_dst(smp->strm->scf) : sess_dst(smp->sess));
+
+ if (!dst)
+ return 0;
+
+ smp->data.type = SMP_T_SINT;
+ if (!(smp->data.u.sint = get_host_port(dst)))
+ return 0;
+
+ smp->flags = 0;
+ return 1;
+}
+
+#ifdef TCP_INFO
+
+
+/* Validates the arguments passed to "fc_*" fetch keywords returning a time
+ * value. These keywords support an optional string representing the unit of the
+ * result: "us" for microseconds and "ms" for milliseconds". Returns 0 on error
+ * and non-zero if OK.
+ */
+static int val_fc_time_value(struct arg *args, char **err)
+{
+ if (args[0].type == ARGT_STR) {
+ if (strcmp(args[0].data.str.area, "us") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = TIME_UNIT_US;
+ }
+ else if (strcmp(args[0].data.str.area, "ms") == 0) {
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = TIME_UNIT_MS;
+ }
+ else {
+ memprintf(err, "expects 'us' or 'ms', got '%s'",
+ args[0].data.str.area);
+ return 0;
+ }
+ }
+ else {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Validates the arguments passed to "fc_*" fetch keywords returning a
+ * counter. These keywords should be used without any keyword, but because of a
+ * bug in previous versions, an optional string argument may be passed. In such
+ * case, the argument is ignored and a warning is emitted. Returns 0 on error
+ * and non-zero if OK.
+ */
+static int var_fc_counter(struct arg *args, char **err)
+{
+ if (args[0].type != ARGT_STOP) {
+ ha_warning("no argument supported for 'fc_*' sample expressions returning counters.\n");
+ if (args[0].type == ARGT_STR)
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_STOP;
+ }
+
+ return 1;
+}
+
+/* Returns some tcp_info data if it's available. "dir" must be set to 0 if
+ * the client connection is required, otherwise it is set to 1. "val" represents
+ * the required value.
+ * If the function fails it returns 0, otherwise it returns 1 and "result" is filled.
+ */
+static inline int get_tcp_info(const struct arg *args, struct sample *smp,
+ int dir, int val)
+{
+ struct connection *conn;
+ struct tcp_info info;
+ socklen_t optlen;
+
+ /* strm can be null. */
+ if (!smp->strm)
+ return 0;
+
+ /* get the object associated with the stream connector.The
+ * object can be other thing than a connection. For example,
+ * it be a appctx.
+ */
+ conn = (dir == 0 ? sc_conn(smp->strm->scf) : sc_conn(smp->strm->scb));
+ if (!conn)
+ return 0;
+
+ /* The fd may not be available for the tcp_info struct, and the
+ syscal can fail. */
+ optlen = sizeof(info);
+ if ((conn->flags & CO_FL_FDLESS) ||
+ getsockopt(conn->handle.fd, IPPROTO_TCP, TCP_INFO, &info, &optlen) == -1)
+ return 0;
+
+ /* extract the value. */
+ smp->data.type = SMP_T_SINT;
+ switch (val) {
+#if defined(__APPLE__)
+ case 0: smp->data.u.sint = info.tcpi_rttcur; break;
+ case 1: smp->data.u.sint = info.tcpi_rttvar; break;
+ case 2: smp->data.u.sint = info.tcpi_tfo_syn_data_acked; break;
+ case 4: smp->data.u.sint = info.tcpi_tfo_syn_loss; break;
+ case 5: smp->data.u.sint = info.tcpi_rto; break;
+#else
+ /* all other platforms supporting TCP_INFO have these ones */
+ case 0: smp->data.u.sint = info.tcpi_rtt; break;
+ case 1: smp->data.u.sint = info.tcpi_rttvar; break;
+# if defined(__linux__)
+ /* these ones are common to all Linux versions */
+ case 2: smp->data.u.sint = info.tcpi_unacked; break;
+ case 3: smp->data.u.sint = info.tcpi_sacked; break;
+ case 4: smp->data.u.sint = info.tcpi_lost; break;
+ case 5: smp->data.u.sint = info.tcpi_retrans; break;
+ case 6: smp->data.u.sint = info.tcpi_fackets; break;
+ case 7: smp->data.u.sint = info.tcpi_reordering; break;
+# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ /* the ones are found on FreeBSD, NetBSD and OpenBSD featuring TCP_INFO */
+ case 2: smp->data.u.sint = info.__tcpi_unacked; break;
+ case 3: smp->data.u.sint = info.__tcpi_sacked; break;
+ case 4: smp->data.u.sint = info.__tcpi_lost; break;
+ case 5: smp->data.u.sint = info.__tcpi_retrans; break;
+ case 6: smp->data.u.sint = info.__tcpi_fackets; break;
+ case 7: smp->data.u.sint = info.__tcpi_reordering; break;
+# endif
+#endif // apple
+ default: return 0;
+ }
+
+ return 1;
+}
+
+/* get the mean rtt of a client connection */
+static int
+smp_fetch_fc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 0))
+ return 0;
+
+ /* By default or if explicitly specified, convert rtt to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+/* get the variance of the mean rtt of a client connection */
+static int
+smp_fetch_fc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 1))
+ return 0;
+
+ /* By default or if explicitly specified, convert rttvar to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+/* get the mean rtt of a backend connection */
+static int
+smp_fetch_bc_rtt(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 1, 0))
+ return 0;
+
+ /* By default or if explicitly specified, convert rtt to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+/* get the variance of the mean rtt of a backend connection */
+static int
+smp_fetch_bc_rttvar(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 1, 1))
+ return 0;
+
+ /* By default or if explicitly specified, convert rttvar to ms */
+ if (!args || args[0].type == ARGT_STOP || args[0].data.sint == TIME_UNIT_MS)
+ smp->data.u.sint = (smp->data.u.sint + 500) / 1000;
+
+ return 1;
+}
+
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the unacked counter on a client connection */
+static int
+smp_fetch_fc_unacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 2))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the sacked counter on a client connection */
+static int
+smp_fetch_fc_sacked(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 3))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the lost counter on a client connection */
+static int
+smp_fetch_fc_lost(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 4))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+/* get the retrans counter on a client connection */
+static int
+smp_fetch_fc_retrans(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 5))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the fackets counter on a client connection */
+static int
+smp_fetch_fc_fackets(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 6))
+ return 0;
+ return 1;
+}
+#endif
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+/* get the reordering counter on a client connection */
+static int
+smp_fetch_fc_reordering(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ if (!get_tcp_info(args, smp, 0, 7))
+ return 0;
+ return 1;
+}
+#endif
+#endif // TCP_INFO
+
+/* Validates the data unit argument passed to "accept_date" fetch. Argument 0 support an
+ * optional string representing the unit of the result: "s" for seconds, "ms" for
+ * milliseconds and "us" for microseconds.
+ * Returns 0 on error and non-zero if OK.
+ */
+int smp_check_accept_date_unit(struct arg *args, char **err)
+{
+ if (args[0].type == ARGT_STR) {
+ long long int unit;
+
+ if (strcmp(args[0].data.str.area, "s") == 0) {
+ unit = TIME_UNIT_S;
+ }
+ else if (strcmp(args[0].data.str.area, "ms") == 0) {
+ unit = TIME_UNIT_MS;
+ }
+ else if (strcmp(args[0].data.str.area, "us") == 0) {
+ unit = TIME_UNIT_US;
+ }
+ else {
+ memprintf(err, "expects 's', 'ms' or 'us', got '%s'",
+ args[0].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = unit;
+ }
+ else if (args[0].type != ARGT_STOP) {
+ memprintf(err, "Unexpected arg type");
+ return 0;
+ }
+
+ return 1;
+}
+
+/* retrieve the accept or request date in epoch time, converts it to milliseconds
+ * or microseconds if asked to in optional args[1] unit param */
+static int
+smp_fetch_accept_date(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct strm_logs *logs;
+ struct timeval tv;
+
+ if (!smp->strm)
+ return 0;
+
+ logs = &smp->strm->logs;
+
+ if (kw[0] == 'r') { /* request_date */
+ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0);
+ } else { /* accept_date */
+ tv.tv_sec = logs->accept_date.tv_sec;
+ tv.tv_usec = logs->accept_date.tv_usec;
+ }
+
+ smp->data.u.sint = tv.tv_sec;
+
+ /* report in milliseconds */
+ if (args[0].type == ARGT_SINT && args[0].data.sint == TIME_UNIT_MS) {
+ smp->data.u.sint *= 1000;
+ smp->data.u.sint += tv.tv_usec / 1000;
+ }
+ /* report in microseconds */
+ else if (args[0].type == ARGT_SINT && args[0].data.sint == TIME_UNIT_US) {
+ smp->data.u.sint *= 1000000;
+ smp->data.u.sint += tv.tv_usec;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->flags |= SMP_F_VOL_TEST | SMP_F_MAY_CHANGE;
+ return 1;
+}
+
+/* Note: must not be declared <const> as its list will be overwritten.
+ * Note: fetches that may return multiple types should be declared using the
+ * appropriate pseudo-type. If not available it must be declared as the lowest
+ * common denominator, the type that can be casted into all other ones.
+ */
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+ /* timestamps */
+ { "accept_date", smp_fetch_accept_date, ARG1(0,STR), smp_check_accept_date_unit, SMP_T_SINT, SMP_USE_L4CLI },
+ { "request_date", smp_fetch_accept_date, ARG1(0,STR), smp_check_accept_date_unit, SMP_T_SINT, SMP_USE_HRQHP },
+
+ { "bc_dst", smp_fetch_dst, 0, NULL, SMP_T_ADDR, SMP_USE_L4SRV },
+ { "bc_dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+ { "bc_src", smp_fetch_src, 0, NULL, SMP_T_ADDR, SMP_USE_L4SRV },
+ { "bc_src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
+
+ { "dst", smp_fetch_dst, 0, NULL, SMP_T_ADDR, SMP_USE_L4CLI },
+ { "dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "fc_dst", smp_fetch_dst, 0, NULL, SMP_T_ADDR, SMP_USE_L4CLI },
+ { "fc_dst_is_local", smp_fetch_dst_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_dst_port", smp_fetch_dport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "fc_src", smp_fetch_src, 0, NULL, SMP_T_ADDR, SMP_USE_L4CLI },
+ { "fc_src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "fc_src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+
+ { "src", smp_fetch_src, 0, NULL, SMP_T_ADDR, SMP_USE_L4CLI },
+ { "src_is_local", smp_fetch_src_is_local, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
+ { "src_port", smp_fetch_sport, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
+#ifdef TCP_INFO
+ { "fc_rtt", smp_fetch_fc_rtt, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+ { "fc_rttvar", smp_fetch_fc_rttvar, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+ { "bc_rtt", smp_fetch_bc_rtt, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+ { "bc_rttvar", smp_fetch_bc_rttvar, ARG1(0,STR), val_fc_time_value, SMP_T_SINT, SMP_USE_L4CLI },
+
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_unacked", smp_fetch_fc_unacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_sacked", smp_fetch_fc_sacked, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_retrans", smp_fetch_fc_retrans, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_fackets", smp_fetch_fc_fackets, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__APPLE__)
+ { "fc_lost", smp_fetch_fc_lost, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#if defined(__linux__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
+ { "fc_reordering", smp_fetch_fc_reordering, ARG1(0,STR), var_fc_counter, SMP_T_SINT, SMP_USE_L4CLI },
+#endif
+#endif // TCP_INFO
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tcpcheck.c b/src/tcpcheck.c
new file mode 100644
index 0000000..d30ecb5
--- /dev/null
+++ b/src/tcpcheck.c
@@ -0,0 +1,5150 @@
+/*
+ * Health-checks functions.
+ *
+ * Copyright 2000-2009,2020 Willy Tarreau <w@1wt.eu>
+ * Copyright 2007-2010 Krzysztof Piotr Oledzki <ole@ans.pl>
+ * Copyright 2013 Baptiste Assmann <bedis9@gmail.com>
+ * Copyright 2020 Gaetan Rivet <grive@u256.net>
+ * Copyright 2020 Christopher Faulet <cfaulet@haproxy.com>
+ * Crown Copyright 2022 Defence Science and Technology Laboratory <dstlipgroup@dstl.gov.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <haproxy/action.h>
+#include <haproxy/api.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/chunk.h>
+#include <haproxy/connection.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/h1.h>
+#include <haproxy/http.h>
+#include <haproxy/http_htx.h>
+#include <haproxy/htx.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/proxy-t.h>
+#include <haproxy/regex.h>
+#include <haproxy/sample.h>
+#include <haproxy/server.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/ticks.h>
+#include <haproxy/tools.h>
+#include <haproxy/trace.h>
+#include <haproxy/vars.h>
+
+
+#define TRACE_SOURCE &trace_check
+
+/* Global tree to share all tcp-checks */
+struct eb_root shared_tcpchecks = EB_ROOT;
+
+
+DECLARE_POOL(pool_head_tcpcheck_rule, "tcpcheck_rule", sizeof(struct tcpcheck_rule));
+
+/**************************************************************************/
+/*************** Init/deinit tcp-check rules and ruleset ******************/
+/**************************************************************************/
+/* Releases memory allocated for a log-format string */
+static void free_tcpcheck_fmt(struct list *fmt)
+{
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+}
+
+/* Releases memory allocated for an HTTP header used in a tcp-check send rule */
+void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr)
+{
+ if (!hdr)
+ return;
+
+ free_tcpcheck_fmt(&hdr->value);
+ istfree(&hdr->name);
+ free(hdr);
+}
+
+/* Releases memory allocated for an HTTP header list used in a tcp-check send
+ * rule
+ */
+static void free_tcpcheck_http_hdrs(struct list *hdrs)
+{
+ struct tcpcheck_http_hdr *hdr, *bhdr;
+
+ list_for_each_entry_safe(hdr, bhdr, hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ free_tcpcheck_http_hdr(hdr);
+ }
+}
+
+/* Releases memory allocated for a tcp-check. If in_pool is set, it means the
+ * tcp-check was allocated using a memory pool (it is used to instantiate email
+ * alerts).
+ */
+void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool)
+{
+ if (!rule)
+ return;
+
+ free(rule->comment);
+ switch (rule->action) {
+ case TCPCHK_ACT_SEND:
+ switch (rule->send.type) {
+ case TCPCHK_SEND_STRING:
+ case TCPCHK_SEND_BINARY:
+ istfree(&rule->send.data);
+ break;
+ case TCPCHK_SEND_STRING_LF:
+ case TCPCHK_SEND_BINARY_LF:
+ free_tcpcheck_fmt(&rule->send.fmt);
+ break;
+ case TCPCHK_SEND_HTTP:
+ free(rule->send.http.meth.str.area);
+ if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&rule->send.http.uri);
+ else
+ free_tcpcheck_fmt(&rule->send.http.uri_fmt);
+ istfree(&rule->send.http.vsn);
+ free_tcpcheck_http_hdrs(&rule->send.http.hdrs);
+ if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&rule->send.http.body);
+ else
+ free_tcpcheck_fmt(&rule->send.http.body_fmt);
+ break;
+ case TCPCHK_SEND_UNDEF:
+ break;
+ }
+ break;
+ case TCPCHK_ACT_EXPECT:
+ free_tcpcheck_fmt(&rule->expect.onerror_fmt);
+ free_tcpcheck_fmt(&rule->expect.onsuccess_fmt);
+ release_sample_expr(rule->expect.status_expr);
+ switch (rule->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ free(rule->expect.codes.codes);
+ break;
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_BINARY:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ istfree(&rule->expect.data);
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ regex_free(rule->expect.regex);
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ free_tcpcheck_fmt(&rule->expect.fmt);
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG)
+ regex_free(rule->expect.hdr.name_re);
+ else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT)
+ free_tcpcheck_fmt(&rule->expect.hdr.name_fmt);
+ else
+ istfree(&rule->expect.hdr.name);
+
+ if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG)
+ regex_free(rule->expect.hdr.value_re);
+ else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT)
+ free_tcpcheck_fmt(&rule->expect.hdr.value_fmt);
+ else if (!(rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE))
+ istfree(&rule->expect.hdr.value);
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ case TCPCHK_EXPECT_UNDEF:
+ break;
+ }
+ break;
+ case TCPCHK_ACT_CONNECT:
+ free(rule->connect.sni);
+ free(rule->connect.alpn);
+ release_sample_expr(rule->connect.port_expr);
+ break;
+ case TCPCHK_ACT_COMMENT:
+ break;
+ case TCPCHK_ACT_ACTION_KW:
+ free(rule->action_kw.rule);
+ break;
+ }
+
+ if (in_pool)
+ pool_free(pool_head_tcpcheck_rule, rule);
+ else
+ free(rule);
+}
+
+/* Creates a tcp-check variable used in preset variables before executing a
+ * tcp-check ruleset.
+ */
+struct tcpcheck_var *create_tcpcheck_var(const struct ist name)
+{
+ struct tcpcheck_var *var = NULL;
+
+ var = calloc(1, sizeof(*var));
+ if (var == NULL)
+ return NULL;
+
+ var->name = istdup(name);
+ if (!isttest(var->name)) {
+ free(var);
+ return NULL;
+ }
+
+ LIST_INIT(&var->list);
+ return var;
+}
+
+/* Releases memory allocated for a preset tcp-check variable */
+void free_tcpcheck_var(struct tcpcheck_var *var)
+{
+ if (!var)
+ return;
+
+ istfree(&var->name);
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN)
+ free(var->data.u.str.area);
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER)
+ free(var->data.u.meth.str.area);
+ free(var);
+}
+
+/* Releases a list of preset tcp-check variables */
+void free_tcpcheck_vars(struct list *vars)
+{
+ struct tcpcheck_var *var, *back;
+
+ list_for_each_entry_safe(var, back, vars, list) {
+ LIST_DELETE(&var->list);
+ free_tcpcheck_var(var);
+ }
+}
+
+/* Duplicate a list of preset tcp-check variables */
+int dup_tcpcheck_vars(struct list *dst, const struct list *src)
+{
+ const struct tcpcheck_var *var;
+ struct tcpcheck_var *new = NULL;
+
+ list_for_each_entry(var, src, list) {
+ new = create_tcpcheck_var(var->name);
+ if (!new)
+ goto error;
+ new->data.type = var->data.type;
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN) {
+ if (chunk_dup(&new->data.u.str, &var->data.u.str) == NULL)
+ goto error;
+ if (var->data.type == SMP_T_STR)
+ new->data.u.str.area[new->data.u.str.data] = 0;
+ }
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ if (chunk_dup(&new->data.u.str, &var->data.u.str) == NULL)
+ goto error;
+ new->data.u.str.area[new->data.u.str.data] = 0;
+ new->data.u.meth.meth = var->data.u.meth.meth;
+ }
+ else
+ new->data.u = var->data.u;
+ LIST_APPEND(dst, &new->list);
+ }
+ return 1;
+
+ error:
+ free(new);
+ return 0;
+}
+
+/* Looks for a shared tcp-check ruleset given its name. */
+struct tcpcheck_ruleset *find_tcpcheck_ruleset(const char *name)
+{
+ struct tcpcheck_ruleset *rs;
+ struct ebpt_node *node;
+
+ node = ebis_lookup_len(&shared_tcpchecks, name, strlen(name));
+ if (node) {
+ rs = container_of(node, typeof(*rs), node);
+ return rs;
+ }
+ return NULL;
+}
+
+/* Creates a new shared tcp-check ruleset and insert it in shared_tcpchecks
+ * tree.
+ */
+struct tcpcheck_ruleset *create_tcpcheck_ruleset(const char *name)
+{
+ struct tcpcheck_ruleset *rs;
+
+ rs = calloc(1, sizeof(*rs));
+ if (rs == NULL)
+ return NULL;
+
+ rs->node.key = strdup(name);
+ if (rs->node.key == NULL) {
+ free(rs);
+ return NULL;
+ }
+
+ LIST_INIT(&rs->rules);
+ ebis_insert(&shared_tcpchecks, &rs->node);
+ return rs;
+}
+
+/* Releases memory allocated by a tcp-check ruleset. */
+void free_tcpcheck_ruleset(struct tcpcheck_ruleset *rs)
+{
+ struct tcpcheck_rule *r, *rb;
+
+ if (!rs)
+ return;
+
+ ebpt_delete(&rs->node);
+ free(rs->node.key);
+ list_for_each_entry_safe(r, rb, &rs->rules, list) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ }
+ free(rs);
+}
+
+
+/**************************************************************************/
+/**************** Everything about tcp-checks execution *******************/
+/**************************************************************************/
+/* Returns the id of a step in a tcp-check ruleset */
+int tcpcheck_get_step_id(const struct check *check, const struct tcpcheck_rule *rule)
+{
+ if (!rule)
+ rule = check->current_step;
+
+ /* no last started step => first step */
+ if (!rule)
+ return 1;
+
+ /* last step is the first implicit connect */
+ if (rule->index == 0 &&
+ rule->action == TCPCHK_ACT_CONNECT &&
+ (rule->connect.options & TCPCHK_OPT_IMPLICIT))
+ return 0;
+
+ return rule->index + 1;
+}
+
+/* Returns the first non COMMENT/ACTION_KW tcp-check rule from list <list> or
+ * NULL if none was found.
+ */
+struct tcpcheck_rule *get_first_tcpcheck_rule(const struct tcpcheck_rules *rules)
+{
+ struct tcpcheck_rule *r;
+
+ list_for_each_entry(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+/* Returns the last non COMMENT/ACTION_KW tcp-check rule from list <list> or
+ * NULL if none was found.
+ */
+static struct tcpcheck_rule *get_last_tcpcheck_rule(struct tcpcheck_rules *rules)
+{
+ struct tcpcheck_rule *r;
+
+ list_for_each_entry_rev(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+/* Returns the non COMMENT/ACTION_KW tcp-check rule from list <list> following
+ * <start> or NULL if non was found. If <start> is NULL, it relies on
+ * get_first_tcpcheck_rule().
+ */
+static struct tcpcheck_rule *get_next_tcpcheck_rule(struct tcpcheck_rules *rules, struct tcpcheck_rule *start)
+{
+ struct tcpcheck_rule *r;
+
+ if (!start)
+ return get_first_tcpcheck_rule(rules);
+
+ r = LIST_NEXT(&start->list, typeof(r), list);
+ list_for_each_entry_from(r, rules->list, list) {
+ if (r->action != TCPCHK_ACT_COMMENT && r->action != TCPCHK_ACT_ACTION_KW)
+ return r;
+ }
+ return NULL;
+}
+
+
+/* Creates info message when a tcp-check healthcheck fails on an expect rule */
+static void tcpcheck_expect_onerror_message(struct buffer *msg, struct check *check, struct tcpcheck_rule *rule,
+ int match, struct ist info)
+{
+ struct sample *smp;
+ int is_empty;
+
+ /* Follows these step to produce the info message:
+ * 1. if info field is already provided, copy it
+ * 2. if the expect rule provides an onerror log-format string,
+ * use it to produce the message
+ * 3. the expect rule is part of a protocol check (http, redis, mysql...), do nothing
+ * 4. Otherwise produce the generic tcp-check info message
+ */
+ if (istlen(info)) {
+ chunk_istcat(msg, info);
+ goto comment;
+ }
+ else if (!LIST_ISEMPTY(&rule->expect.onerror_fmt)) {
+ msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onerror_fmt);
+ goto comment;
+ }
+
+ is_empty = (IS_HTX_SC(check->sc) ? htx_is_empty(htxbuf(&check->bi)) : !b_data(&check->bi));
+ if (is_empty) {
+ TRACE_ERROR("empty response", CHK_EV_RX_DATA|CHK_EV_RX_ERR, check);
+ chunk_printf(msg, "TCPCHK got an empty response at step %d",
+ tcpcheck_get_step_id(check, rule));
+ goto comment;
+ }
+
+ if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) != TCPCHK_RULES_TCP_CHK) {
+ goto comment;
+ }
+
+ chunk_strcat(msg, (match ? "TCPCHK matched unwanted content" : "TCPCHK did not match content"));
+ switch (rule->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ chunk_appendf(msg, "(status codes) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chunk_appendf(msg, " '%.*s' at step %d", (unsigned int)istlen(rule->expect.data), istptr(rule->expect.data),
+ tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY:
+ chunk_appendf(msg, " (binary) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chunk_appendf(msg, " (regex) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_appendf(msg, " (binary regex) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ chunk_appendf(msg, " (log-format string) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_BINARY_LF:
+ chunk_appendf(msg, " (log-format binary) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chunk_appendf(msg, " (custom function) at step %d", tcpcheck_get_step_id(check, rule));
+ break;
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ chunk_appendf(msg, " (header pattern) at step %d", tcpcheck_get_step_id(check, rule));
+ case TCPCHK_EXPECT_UNDEF:
+ /* Should never happen. */
+ return;
+ }
+
+ comment:
+ /* If the failing expect rule provides a comment, it is concatenated to
+ * the info message.
+ */
+ if (rule->comment) {
+ chunk_strcat(msg, " comment: ");
+ chunk_strcat(msg, rule->comment);
+ }
+
+ /* Finally, the check status code is set if the failing expect rule
+ * defines a status expression.
+ */
+ if (rule->expect.status_expr) {
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL, SMP_OPT_DIR_RES | SMP_OPT_FINAL,
+ rule->expect.status_expr, SMP_T_STR);
+
+ if (smp && sample_casts[smp->data.type][SMP_T_SINT] &&
+ sample_casts[smp->data.type][SMP_T_SINT](smp))
+ check->code = smp->data.u.sint;
+ }
+
+ *(b_tail(msg)) = '\0';
+}
+
+/* Creates info message when a tcp-check healthcheck succeeds on an expect rule */
+static void tcpcheck_expect_onsuccess_message(struct buffer *msg, struct check *check, struct tcpcheck_rule *rule,
+ struct ist info)
+{
+ struct sample *smp;
+
+ /* Follows these step to produce the info message:
+ * 1. if info field is already provided, copy it
+ * 2. if the expect rule provides an onsucces log-format string,
+ * use it to produce the message
+ * 3. the expect rule is part of a protocol check (http, redis, mysql...), do nothing
+ * 4. Otherwise produce the generic tcp-check info message
+ */
+ if (istlen(info))
+ chunk_istcat(msg, info);
+ if (!LIST_ISEMPTY(&rule->expect.onsuccess_fmt))
+ msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg),
+ &rule->expect.onsuccess_fmt);
+ else if (check->type == PR_O2_TCPCHK_CHK &&
+ (check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK)
+ chunk_strcat(msg, "(tcp-check)");
+
+ /* Finally, the check status code is set if the expect rule defines a
+ * status expression.
+ */
+ if (rule->expect.status_expr) {
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL, SMP_OPT_DIR_RES | SMP_OPT_FINAL,
+ rule->expect.status_expr, SMP_T_STR);
+
+ if (smp && sample_casts[smp->data.type][SMP_T_SINT] &&
+ sample_casts[smp->data.type][SMP_T_SINT](smp))
+ check->code = smp->data.u.sint;
+ }
+
+ *(b_tail(msg)) = '\0';
+}
+
+/* Internal functions to parse and validate a MySQL packet in the context of an
+ * expect rule. It start to parse the input buffer at the offset <offset>. If
+ * <last_read> is set, no more data are expected.
+ */
+static enum tcpcheck_eval_ret tcpcheck_mysql_expect_packet(struct check *check, struct tcpcheck_rule *rule,
+ unsigned int offset, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ unsigned int err = 0, plen = 0;
+
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* 3 Bytes for the packet length and 1 byte for the sequence id */
+ if (b_data(&check->bi) < offset+4) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ plen = ((unsigned char) *b_peek(&check->bi, offset)) +
+ (((unsigned char) *(b_peek(&check->bi, offset+1))) << 8) +
+ (((unsigned char) *(b_peek(&check->bi, offset+2))) << 16);
+
+ if (b_data(&check->bi) < offset+plen+4) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ if (*b_peek(&check->bi, offset+4) == '\xff') {
+ /* MySQL Error packet always begin with field_count = 0xff */
+ status = HCHK_STATUS_L7STS;
+ err = ((unsigned char) *b_peek(&check->bi, offset+5)) +
+ (((unsigned char) *(b_peek(&check->bi, offset+6))) << 8);
+ desc = ist2(b_peek(&check->bi, offset+7), b_data(&check->bi) - offset - 7);
+ goto error;
+ }
+
+ if (get_next_tcpcheck_rule(check->tcpcheck_rules, rule) != NULL) {
+ /* Not the last rule, continue */
+ goto out;
+ }
+
+ /* We set the MySQL Version in description for information purpose
+ * FIXME : it can be cool to use MySQL Version for other purpose,
+ * like mark as down old MySQL server.
+ */
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, b_peek(&check->bi, 5));
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ check->code = err;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the MySQL initial
+ * handshake packet. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_mysql_expect_iniths(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ return tcpcheck_mysql_expect_packet(check, rule, 0, last_read);
+}
+
+/* Custom tcp-check expect function to parse and validate the MySQL OK packet
+ * following the initial handshake. Returns TCPCHK_EVAL_WAIT to wait for more
+ * data, TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if
+ * an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_mysql_expect_ok(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ unsigned int hslen = 0;
+
+ hslen = 4 + ((unsigned char) *b_head(&check->bi)) +
+ (((unsigned char) *(b_peek(&check->bi, 1))) << 8) +
+ (((unsigned char) *(b_peek(&check->bi, 2))) << 16);
+
+ return tcpcheck_mysql_expect_packet(check, rule, hslen, last_read);
+}
+
+/* Custom tcp-check expect function to parse and validate the LDAP bind response
+ * package packet. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_ldap_expect_bindrsp(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ char *ptr;
+ unsigned short nbytes = 0;
+ size_t msglen = 0;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* Check if the server speaks LDAP (ASN.1/BER)
+ * http://en.wikipedia.org/wiki/Basic_Encoding_Rules
+ * http://tools.ietf.org/html/rfc4511
+ */
+ ptr = b_head(&check->bi) + 1;
+
+ /* size of LDAPMessage */
+ if (*ptr & 0x80) {
+ /* For message size encoded on several bytes, we only handle
+ * size encoded on 2 or 4 bytes. There is no reason to make this
+ * part to complex because only Active Directory is known to
+ * encode BindReponse length on 4 bytes.
+ */
+ nbytes = (*ptr & 0x7f);
+ if (b_data(&check->bi) < 1 + nbytes)
+ goto too_short;
+ switch (nbytes) {
+ case 4: msglen = read_n32(ptr+1); break;
+ case 2: msglen = read_n16(ptr+1); break;
+ default:
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ }
+ else
+ msglen = *ptr;
+ ptr += 1 + nbytes;
+
+ if (b_data(&check->bi) < 2 + nbytes + msglen)
+ goto too_short;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.2.2
+ * messageID: 0x02 0x01 0x01: INTEGER 1
+ * protocolOp: 0x61: bindResponse
+ */
+ if (memcmp(ptr, "\x02\x01\x01\x61", 4) != 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ ptr += 4;
+
+ /* skip size of bindResponse */
+ nbytes = 0;
+ if (*ptr & 0x80)
+ nbytes = (*ptr & 0x7f);
+ ptr += 1 + nbytes;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.1.9
+ * ldapResult: 0x0a 0x01: ENUMERATION
+ */
+ if (memcmp(ptr, "\x0a\x01", 2) != 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Not LDAPv3 protocol");
+ goto error;
+ }
+ ptr += 2;
+
+ /* http://tools.ietf.org/html/rfc4511#section-4.1.9
+ * resultCode
+ */
+ check->code = *ptr;
+ if (check->code) {
+ status = HCHK_STATUS_L7STS;
+ desc = ist("See RFC: http://tools.ietf.org/html/rfc4511#section-4.1.9");
+ goto error;
+ }
+
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, "Success");
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ too_short:
+ if (!last_read)
+ goto wait_more_data;
+ /* invalid length or truncated response */
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the SPOP hello agent
+ * frame. Returns TCPCHK_EVAL_WAIT to wait for more data, TCPCHK_EVAL_CONTINUE
+ * to evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_spop_expect_agenthello(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ enum healthcheck_status status;
+ struct buffer *msg = NULL;
+ struct ist desc = IST_NULL;
+ unsigned int framesz;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ memcpy(&framesz, b_head(&check->bi), 4);
+ framesz = ntohl(framesz);
+
+ if (!last_read && b_data(&check->bi) < (4+framesz))
+ goto wait_more_data;
+
+ memset(b_orig(&trash), 0, b_size(&trash));
+ if (spoe_handle_healthcheck_response(b_peek(&check->bi, 4), framesz, b_orig(&trash), HCHK_DESC_LEN) == -1) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist2(b_orig(&trash), strlen(b_orig(&trash)));
+ goto error;
+ }
+
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, "SPOA server is ok");
+
+ out:
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Custom tcp-check expect function to parse and validate the agent-check
+ * reply. Returns TCPCHK_EVAL_WAIT to wait for more data, TCPCHK_EVAL_CONTINUE
+ * to evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_agent_expect_reply(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_STOP;
+ enum healthcheck_status status = HCHK_STATUS_CHECKED;
+ const char *hs = NULL; /* health status */
+ const char *as = NULL; /* admin status */
+ const char *ps = NULL; /* performance status */
+ const char *sc = NULL; /* maxconn */
+ const char *err = NULL; /* first error to report */
+ const char *wrn = NULL; /* first warning to report */
+ char *cmd, *p;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ /* We're getting an agent check response. The agent could
+ * have been disabled in the mean time with a long check
+ * still pending. It is important that we ignore the whole
+ * response.
+ */
+ if (!(check->state & CHK_ST_ENABLED))
+ goto out;
+
+ /* The agent supports strings made of a single line ended by the
+ * first CR ('\r') or LF ('\n'). This line is composed of words
+ * delimited by spaces (' '), tabs ('\t'), or commas (','). The
+ * line may optionally contained a description of a state change
+ * after a sharp ('#'), which is only considered if a health state
+ * is announced.
+ *
+ * Words may be composed of :
+ * - a numeric weight suffixed by the percent character ('%').
+ * - a health status among "up", "down", "stopped", and "fail".
+ * - an admin status among "ready", "drain", "maint".
+ *
+ * These words may appear in any order. If multiple words of the
+ * same category appear, the last one wins.
+ */
+
+ p = b_head(&check->bi);
+ while (*p && *p != '\n' && *p != '\r')
+ p++;
+
+ if (!*p) {
+ if (!last_read)
+ goto wait_more_data;
+
+ /* at least inform the admin that the agent is mis-behaving */
+ set_server_check_status(check, check->status, "Ignoring incomplete line from agent");
+ goto out;
+ }
+
+ *p = 0;
+ cmd = b_head(&check->bi);
+
+ while (*cmd) {
+ /* look for next word */
+ if (*cmd == ' ' || *cmd == '\t' || *cmd == ',') {
+ cmd++;
+ continue;
+ }
+
+ if (*cmd == '#') {
+ /* this is the beginning of a health status description,
+ * skip the sharp and blanks.
+ */
+ cmd++;
+ while (*cmd == '\t' || *cmd == ' ')
+ cmd++;
+ break;
+ }
+
+ /* find the end of the word so that we have a null-terminated
+ * word between <cmd> and <p>.
+ */
+ p = cmd + 1;
+ while (*p && *p != '\t' && *p != ' ' && *p != '\n' && *p != ',')
+ p++;
+ if (*p)
+ *p++ = 0;
+
+ /* first, health statuses */
+ if (strcasecmp(cmd, "up") == 0) {
+ check->health = check->rise + check->fall - 1;
+ status = HCHK_STATUS_L7OKD;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "down") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "stopped") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ else if (strcasecmp(cmd, "fail") == 0) {
+ check->health = 0;
+ status = HCHK_STATUS_L7STS;
+ hs = cmd;
+ }
+ /* admin statuses */
+ else if (strcasecmp(cmd, "ready") == 0) {
+ as = cmd;
+ }
+ else if (strcasecmp(cmd, "drain") == 0) {
+ as = cmd;
+ }
+ else if (strcasecmp(cmd, "maint") == 0) {
+ as = cmd;
+ }
+ /* try to parse a weight here and keep the last one */
+ else if (isdigit((unsigned char)*cmd) && strchr(cmd, '%') != NULL) {
+ ps = cmd;
+ }
+ /* try to parse a maxconn here */
+ else if (strncasecmp(cmd, "maxconn:", strlen("maxconn:")) == 0) {
+ sc = cmd;
+ }
+ else {
+ /* keep a copy of the first error */
+ if (!err)
+ err = cmd;
+ }
+ /* skip to next word */
+ cmd = p;
+ }
+ /* here, cmd points either to \0 or to the beginning of a
+ * description. Skip possible leading spaces.
+ */
+ while (*cmd == ' ' || *cmd == '\n')
+ cmd++;
+
+ /* First, update the admin status so that we avoid sending other
+ * possibly useless warnings and can also update the health if
+ * present after going back up.
+ */
+ if (as) {
+ if (strcasecmp(as, "drain") == 0) {
+ TRACE_DEVEL("set server into DRAIN mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_drain(check->server);
+ }
+ else if (strcasecmp(as, "maint") == 0) {
+ TRACE_DEVEL("set server into MAINT mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_maint(check->server);
+ }
+ else {
+ TRACE_DEVEL("set server into READY mode", CHK_EV_TCPCHK_EXP, check);
+ srv_adm_set_ready(check->server);
+ }
+ }
+
+ /* now change weights */
+ if (ps) {
+ const char *msg;
+
+ TRACE_DEVEL("change server weight", CHK_EV_TCPCHK_EXP, check);
+ msg = server_parse_weight_change_request(check->server, ps);
+ if (!wrn || !*wrn)
+ wrn = msg;
+ }
+
+ if (sc) {
+ const char *msg;
+
+ sc += strlen("maxconn:");
+
+ TRACE_DEVEL("change server maxconn", CHK_EV_TCPCHK_EXP, check);
+ /* This is safe to call server_parse_maxconn_change_request
+ * because the server lock is held during the check.
+ */
+ msg = server_parse_maxconn_change_request(check->server, sc);
+ if (!wrn || !*wrn)
+ wrn = msg;
+ }
+
+ /* and finally health status */
+ if (hs) {
+ /* We'll report some of the warnings and errors we have
+ * here. Down reports are critical, we leave them untouched.
+ * Lack of report, or report of 'UP' leaves the room for
+ * ERR first, then WARN.
+ */
+ const char *msg = cmd;
+ struct buffer *t;
+
+ if (!*msg || status == HCHK_STATUS_L7OKD) {
+ if (err && *err)
+ msg = err;
+ else if (wrn && *wrn)
+ msg = wrn;
+ }
+
+ t = get_trash_chunk();
+ chunk_printf(t, "via agent : %s%s%s%s",
+ hs, *msg ? " (" : "",
+ msg, *msg ? ")" : "");
+ TRACE_DEVEL("update server health status", CHK_EV_TCPCHK_EXP, check);
+ set_server_check_status(check, status, t->area);
+ }
+ else if (err && *err) {
+ /* No status change but we'd like to report something odd.
+ * Just report the current state and copy the message.
+ */
+ TRACE_DEVEL("agent reports an error", CHK_EV_TCPCHK_EXP, check);
+ chunk_printf(&trash, "agent reports an error : %s", err);
+ set_server_check_status(check, status/*check->status*/, trash.area);
+ }
+ else if (wrn && *wrn) {
+ /* No status change but we'd like to report something odd.
+ * Just report the current state and copy the message.
+ */
+ TRACE_DEVEL("agent reports a warning", CHK_EV_TCPCHK_EXP, check);
+ chunk_printf(&trash, "agent warns : %s", wrn);
+ set_server_check_status(check, status/*check->status*/, trash.area);
+ }
+ else {
+ TRACE_DEVEL("update server health status", CHK_EV_TCPCHK_EXP, check);
+ set_server_check_status(check, status, NULL);
+ }
+
+ out:
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ wait_more_data:
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates a TCPCHK_ACT_CONNECT rule. Returns TCPCHK_EVAL_WAIT to wait the
+ * connection establishment, TCPCHK_EVAL_CONTINUE to evaluate the next rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_connect(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_connect *connect = &rule->connect;
+ struct proxy *proxy = check->proxy;
+ struct server *s = check->server;
+ struct task *t = check->task;
+ struct connection *conn = sc_conn(check->sc);
+ struct protocol *proto;
+ struct xprt_ops *xprt;
+ struct tcpcheck_rule *next;
+ int status, port;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_CONN, check);
+
+ next = get_next_tcpcheck_rule(check->tcpcheck_rules, rule);
+
+ /* current connection already created, check if it is established or not */
+ if (conn) {
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ /* We are still waiting for the connection establishment */
+ if (next && next->action == TCPCHK_ACT_SEND) {
+ if (!(check->sc->wait_event.events & SUB_RETRY_SEND))
+ conn->mux->subscribe(check->sc, SUB_RETRY_SEND, &check->sc->wait_event);
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("not connected yet", CHK_EV_TCPCHK_CONN, check);
+ }
+ else
+ ret = tcpcheck_eval_recv(check, rule);
+ }
+ goto out;
+ }
+
+ /* Note: here check->sc = sc = conn = NULL */
+
+ /* Always release input and output buffer when a new connect is evaluated */
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+
+ /* No connection, prepare a new one */
+ conn = conn_new((s ? &s->obj_type : &proxy->obj_type));
+ if (!conn) {
+ chunk_printf(&trash, "TCPCHK error allocating connection at step %d",
+ tcpcheck_get_step_id(check, rule));
+ if (rule->comment)
+ chunk_appendf(&trash, " comment: '%s'", rule->comment);
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ TRACE_ERROR("stconn allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ goto out;
+ }
+ if (sc_attach_mux(check->sc, NULL, conn) < 0) {
+ TRACE_ERROR("mux attach error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ conn_free(conn);
+ conn = NULL;
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+ conn->ctx = check->sc;
+ conn_set_owner(conn, check->sess, NULL);
+
+ /* no client address */
+ if (!sockaddr_alloc(&conn->dst, NULL, 0)) {
+ TRACE_ERROR("sockaddr allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ /* connect to the connect rule addr if specified, otherwise the check
+ * addr if specified on the server. otherwise, use the server addr (it
+ * MUST exist at this step).
+ */
+ *conn->dst = (is_addr(&connect->addr)
+ ? connect->addr
+ : (is_addr(&check->addr) ? check->addr : s->addr));
+ proto = protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0);
+
+ port = 0;
+ if (connect->port)
+ port = connect->port;
+ if (!port && connect->port_expr) {
+ struct sample *smp;
+
+ smp = sample_fetch_as_type(check->proxy, check->sess, NULL,
+ SMP_OPT_DIR_REQ | SMP_OPT_FINAL,
+ connect->port_expr, SMP_T_SINT);
+ if (smp)
+ port = smp->data.u.sint;
+ }
+ if (!port && is_inet_addr(&connect->addr))
+ port = get_host_port(&connect->addr);
+ if (!port && check->port)
+ port = check->port;
+ if (!port && is_inet_addr(&check->addr))
+ port = get_host_port(&check->addr);
+ if (!port) {
+ /* The server MUST exist here */
+ port = s->svc_port;
+ }
+ set_host_port(conn->dst, port);
+ TRACE_DEVEL("set port", CHK_EV_TCPCHK_CONN, check, 0, 0, (size_t[]){port});
+
+ xprt = ((connect->options & TCPCHK_OPT_SSL)
+ ? xprt_get(XPRT_SSL)
+ : ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) ? check->xprt : xprt_get(XPRT_RAW)));
+
+ if (conn_prepare(conn, proto, xprt) < 0) {
+ TRACE_ERROR("xprt allocation error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ if ((connect->options & TCPCHK_OPT_SOCKS4) && s && (s->flags & SRV_F_SOCKS4_PROXY)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SOCKS4;
+ TRACE_DEVEL("configure SOCKS4 proxy", CHK_EV_TCPCHK_CONN);
+ }
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.via_socks4 && (s->flags & SRV_F_SOCKS4_PROXY)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SOCKS4;
+ TRACE_DEVEL("configure SOCKS4 proxy", CHK_EV_TCPCHK_CONN);
+ }
+
+ if (connect->options & TCPCHK_OPT_SEND_PROXY) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SEND_PROXY;
+ TRACE_DEVEL("configure PROXY protocol", CHK_EV_TCPCHK_CONN, check);
+ }
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.send_proxy && !(check->state & CHK_ST_AGENT)) {
+ conn->send_proxy_ofs = 1;
+ conn->flags |= CO_FL_SEND_PROXY;
+ TRACE_DEVEL("configure PROXY protocol", CHK_EV_TCPCHK_CONN, check);
+ }
+
+ status = SF_ERR_INTERNAL;
+ if (proto && proto->connect) {
+ int flags = 0;
+
+ if (!next)
+ flags |= CONNECT_DELACK_ALWAYS;
+ if (connect->options & TCPCHK_OPT_HAS_DATA)
+ flags |= (CONNECT_HAS_DATA|CONNECT_DELACK_ALWAYS);
+ status = proto->connect(conn, flags);
+ }
+
+ if (status != SF_ERR_NONE)
+ goto fail_check;
+
+ conn_set_private(conn);
+ conn->ctx = check->sc;
+
+#ifdef USE_OPENSSL
+ if (connect->sni)
+ ssl_sock_set_servername(conn, connect->sni);
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.sni)
+ ssl_sock_set_servername(conn, s->check.sni);
+
+ if (connect->alpn)
+ ssl_sock_set_alpn(conn, (unsigned char *)connect->alpn, connect->alpn_len);
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && s && s->check.alpn_str)
+ ssl_sock_set_alpn(conn, (unsigned char *)s->check.alpn_str, s->check.alpn_len);
+#endif
+
+ if (conn_ctrl_ready(conn) && (connect->options & TCPCHK_OPT_LINGER) && !(conn->flags & CO_FL_FDLESS)) {
+ /* Some servers don't like reset on close */
+ HA_ATOMIC_AND(&fdtab[conn->handle.fd].state, ~FD_LINGER_RISK);
+ }
+
+ if (conn_ctrl_ready(conn) && (conn->flags & (CO_FL_SEND_PROXY | CO_FL_SOCKS4))) {
+ if (xprt_add_hs(conn) < 0)
+ status = SF_ERR_RESOURCE;
+ }
+
+ if (conn_xprt_start(conn) < 0) {
+ status = SF_ERR_RESOURCE;
+ goto fail_check;
+ }
+
+ /* The mux may be initialized now if there isn't server attached to the
+ * check (email alerts) or if there is a mux proto specified or if there
+ * is no alpn.
+ */
+ if (!s || ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && check->mux_proto) ||
+ connect->mux_proto || (!connect->alpn && !check->alpn_str)) {
+ const struct mux_ops *mux_ops;
+
+ TRACE_DEVEL("try to install mux now", CHK_EV_TCPCHK_CONN, check);
+ if (connect->mux_proto)
+ mux_ops = connect->mux_proto->mux;
+ else if ((connect->options & TCPCHK_OPT_DEFAULT_CONNECT) && check->mux_proto)
+ mux_ops = check->mux_proto->mux;
+ else {
+ int mode = ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK
+ ? PROTO_MODE_HTTP
+ : PROTO_MODE_TCP);
+
+ mux_ops = conn_get_best_mux(conn, IST_NULL, PROTO_SIDE_BE, mode);
+ }
+ if (mux_ops && conn_install_mux(conn, mux_ops, check->sc, proxy, check->sess) < 0) {
+ TRACE_ERROR("failed to install mux", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ status = SF_ERR_INTERNAL;
+ goto fail_check;
+ }
+ }
+
+ fail_check:
+ /* It can return one of :
+ * - SF_ERR_NONE if everything's OK
+ * - SF_ERR_SRVTO if there are no more servers
+ * - SF_ERR_SRVCL if the connection was refused by the server
+ * - SF_ERR_PRXCOND if the connection has been limited by the proxy (maxconn)
+ * - SF_ERR_RESOURCE if a system resource is lacking (eg: fd limits, ports, ...)
+ * - SF_ERR_INTERNAL for any other purely internal errors
+ * Additionally, in the case of SF_ERR_RESOURCE, an emergency log will be emitted.
+ * Note that we try to prevent the network stack from sending the ACK during the
+ * connect() when a pure TCP check is used (without PROXY protocol).
+ */
+ switch (status) {
+ case SF_ERR_NONE:
+ /* we allow up to min(inter, timeout.connect) for a connection
+ * to establish but only when timeout.check is set as it may be
+ * to short for a full check otherwise
+ */
+ t->expire = tick_add(now_ms, MS_TO_TICKS(check->inter));
+
+ if (proxy->timeout.check && proxy->timeout.connect) {
+ int t_con = tick_add(now_ms, proxy->timeout.connect);
+ t->expire = tick_first(t->expire, t_con);
+ }
+ break;
+ case SF_ERR_SRVTO: /* ETIMEDOUT */
+ case SF_ERR_SRVCL: /* ECONNREFUSED, ENETUNREACH, ... */
+ case SF_ERR_PRXCOND:
+ case SF_ERR_RESOURCE:
+ case SF_ERR_INTERNAL:
+ TRACE_ERROR("report connection error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check, 0, 0, (size_t[]){status});
+ chk_report_conn_err(check, errno, 0);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+
+ /* don't do anything until the connection is established */
+ if (conn->flags & CO_FL_WAIT_XPRT) {
+ if (conn->mux) {
+ if (next && next->action == TCPCHK_ACT_SEND)
+ conn->mux->subscribe(check->sc, SUB_RETRY_SEND, &check->sc->wait_event);
+ else
+ conn->mux->subscribe(check->sc, SUB_RETRY_RECV, &check->sc->wait_event);
+ }
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("not connected yet", CHK_EV_TCPCHK_CONN, check);
+ goto out;
+ }
+
+ out:
+ if (conn && check->result == CHK_RES_FAILED) {
+ conn->flags |= CO_FL_ERROR;
+ TRACE_ERROR("connect failed, report connection error", CHK_EV_TCPCHK_CONN|CHK_EV_TCPCHK_ERR, check);
+ }
+
+ if (ret == TCPCHK_EVAL_CONTINUE && check->proxy->timeout.check)
+ check->task->expire = tick_add_ifset(now_ms, check->proxy->timeout.check);
+
+ TRACE_LEAVE(CHK_EV_TCPCHK_CONN, check, 0, 0, (size_t[]){ret});
+ return ret;
+}
+
+/* Evaluates a TCPCHK_ACT_SEND rule. Returns TCPCHK_EVAL_WAIT if outgoing data
+ * were not fully sent, TCPCHK_EVAL_CONTINUE to evaluate the next rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_send(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_send *send = &rule->send;
+ struct stconn *sc = check->sc;
+ struct connection *conn = __sc_conn(sc);
+ struct buffer *tmp = NULL;
+ struct htx *htx = NULL;
+ int connection_hdr = 0;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+
+ if (check->state & CHK_ST_OUT_ALLOC) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_STATE("waiting for output buffer allocation", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_BLK, check);
+ goto out;
+ }
+
+ if (!check_get_buf(check, &check->bo)) {
+ check->state |= CHK_ST_OUT_ALLOC;
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_STATE("waiting for output buffer allocation", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_BLK, check);
+ goto out;
+ }
+
+ /* Data already pending in the output buffer, send them now */
+ if ((IS_HTX_CONN(conn) && !htx_is_empty(htxbuf(&check->bo))) || (!IS_HTX_CONN(conn) && b_data(&check->bo))) {
+ TRACE_DEVEL("Data still pending, try to send it now", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ goto do_send;
+ }
+
+ /* Always release input buffer when a new send is evaluated */
+ check_release_buf(check, &check->bi);
+
+ switch (send->type) {
+ case TCPCHK_SEND_STRING:
+ case TCPCHK_SEND_BINARY:
+ if (istlen(send->data) >= b_size(&check->bo)) {
+ chunk_printf(&trash, "tcp-check send : string too large (%u) for buffer size (%u) at step %d",
+ (unsigned int)istlen(send->data), (unsigned int)b_size(&check->bo),
+ tcpcheck_get_step_id(check, rule));
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+ b_putist(&check->bo, send->data);
+ break;
+ case TCPCHK_SEND_STRING_LF:
+ check->bo.data = sess_build_logline(check->sess, NULL, b_orig(&check->bo), b_size(&check->bo), &rule->send.fmt);
+ if (!b_data(&check->bo))
+ goto out;
+ break;
+ case TCPCHK_SEND_BINARY_LF: {
+ int len = b_size(&check->bo);
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto error_lf;
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &rule->send.fmt);
+ if (!b_data(tmp))
+ goto out;
+ tmp->area[tmp->data] = '\0';
+ if (parse_binary(b_orig(tmp), &check->bo.area, &len, NULL) == 0)
+ goto error_lf;
+ check->bo.data = len;
+ break;
+ }
+ case TCPCHK_SEND_HTTP: {
+ struct htx_sl *sl;
+ struct ist meth, uri, vsn, clen, body;
+ unsigned int slflags = 0;
+
+ tmp = alloc_trash_chunk();
+ if (!tmp)
+ goto error_htx;
+
+ meth = ((send->http.meth.meth == HTTP_METH_OTHER)
+ ? ist2(send->http.meth.str.area, send->http.meth.str.data)
+ : http_known_methods[send->http.meth.meth]);
+ if (send->http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) {
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &send->http.uri_fmt);
+ uri = (b_data(tmp) ? ist2(b_orig(tmp), b_data(tmp)) : ist("/"));
+ }
+ else
+ uri = (isttest(send->http.uri) ? send->http.uri : ist("/"));
+ vsn = (isttest(send->http.vsn) ? send->http.vsn : ist("HTTP/1.0"));
+
+ if ((istlen(vsn) == 6 && *(vsn.ptr+5) == '2') ||
+ (istlen(vsn) == 8 && (*(vsn.ptr+5) > '1' || (*(vsn.ptr+5) == '1' && *(vsn.ptr+7) >= '1'))))
+ slflags |= HTX_SL_F_VER_11;
+ slflags |= (HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN);
+ if (!(send->http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !isttest(send->http.body))
+ slflags |= HTX_SL_F_BODYLESS;
+
+ htx = htx_from_buf(&check->bo);
+ sl = htx_add_stline(htx, HTX_BLK_REQ_SL, slflags, meth, uri, vsn);
+ if (!sl)
+ goto error_htx;
+ sl->info.req.meth = send->http.meth.meth;
+ if (!http_update_host(htx, sl, uri))
+ goto error_htx;
+
+ if (!LIST_ISEMPTY(&send->http.hdrs)) {
+ struct tcpcheck_http_hdr *hdr;
+ struct ist hdr_value;
+
+ list_for_each_entry(hdr, &send->http.hdrs, list) {
+ chunk_reset(tmp);
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &hdr->value);
+ if (!b_data(tmp))
+ continue;
+ hdr_value = ist2(b_orig(tmp), b_data(tmp));
+ if (!htx_add_header(htx, hdr->name, hdr_value))
+ goto error_htx;
+ if ((sl->flags & HTX_SL_F_HAS_AUTHORITY) && isteqi(hdr->name, ist("host"))) {
+ if (!http_update_authority(htx, sl, hdr_value))
+ goto error_htx;
+ }
+ if (isteqi(hdr->name, ist("connection")))
+ connection_hdr = 1;
+ }
+
+ }
+ if (check->proxy->options2 & PR_O2_CHK_SNDST) {
+ chunk_reset(tmp);
+ httpchk_build_status_header(check->server, tmp);
+ if (!htx_add_header(htx, ist("X-Haproxy-Server-State"), ist2(b_orig(tmp), b_data(tmp))))
+ goto error_htx;
+ }
+
+
+ if (send->http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) {
+ chunk_reset(tmp);
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &send->http.body_fmt);
+ body = ist2(b_orig(tmp), b_data(tmp));
+ }
+ else
+ body = send->http.body;
+
+ if (!connection_hdr && !htx_add_header(htx, ist("Connection"), ist("close")))
+ goto error_htx;
+
+ if ((send->http.meth.meth != HTTP_METH_OPTIONS &&
+ send->http.meth.meth != HTTP_METH_GET &&
+ send->http.meth.meth != HTTP_METH_HEAD &&
+ send->http.meth.meth != HTTP_METH_DELETE) || istlen(body)) {
+ clen = ist((!istlen(body) ? "0" : ultoa(istlen(body))));
+ if (!htx_add_header(htx, ist("Content-length"), clen))
+ goto error_htx;
+ }
+
+ if (!htx_add_endof(htx, HTX_BLK_EOH) ||
+ (istlen(body) && !htx_add_data_atonce(htx, body)))
+ goto error_htx;
+
+ /* no more data are expected */
+ htx->flags |= HTX_FL_EOM;
+ htx_to_buf(htx, &check->bo);
+ break;
+ }
+ case TCPCHK_SEND_UNDEF:
+ /* Should never happen. */
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ };
+
+ do_send:
+ TRACE_DATA("send data", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ if (conn->mux->snd_buf(sc, &check->bo,
+ (IS_HTX_CONN(conn) ? (htxbuf(&check->bo))->data: b_data(&check->bo)), 0) <= 0) {
+ if ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR)) {
+ ret = TCPCHK_EVAL_STOP;
+ TRACE_DEVEL("connection error during send", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TX_ERR, check);
+ goto out;
+ }
+ }
+ if ((IS_HTX_CONN(conn) && !htx_is_empty(htxbuf(&check->bo))) || (!IS_HTX_CONN(conn) && b_data(&check->bo))) {
+ conn->mux->subscribe(sc, SUB_RETRY_SEND, &sc->wait_event);
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("data not fully sent, wait", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA, check);
+ goto out;
+ }
+
+ out:
+ free_trash_chunk(tmp);
+ if (!b_data(&check->bo) || ret == TCPCHK_EVAL_STOP)
+ check_release_buf(check, &check->bo);
+
+ TRACE_LEAVE(CHK_EV_TCPCHK_SND, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error_htx:
+ if (htx) {
+ htx_reset(htx);
+ htx_to_buf(htx, &check->bo);
+ }
+ chunk_printf(&trash, "tcp-check send : failed to build HTTP request at step %d",
+ tcpcheck_get_step_id(check, rule));
+ TRACE_ERROR("failed to build HTTP request", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+ error_lf:
+ chunk_printf(&trash, "tcp-check send : failed to build log-format string at step %d",
+ tcpcheck_get_step_id(check, rule));
+ TRACE_ERROR("failed to build log-format string", CHK_EV_TCPCHK_SND|CHK_EV_TX_DATA|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+}
+
+/* Try to receive data before evaluating a tcp-check expect rule. Returns
+ * TCPCHK_EVAL_WAIT if it is already subscribed on receive events or if nothing
+ * was received, TCPCHK_EVAL_CONTINUE to evaluate the expect rule or
+ * TCPCHK_EVAL_STOP if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_recv(struct check *check, struct tcpcheck_rule *rule)
+{
+ struct stconn *sc = check->sc;
+ struct connection *conn = __sc_conn(sc);
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ size_t max, read, cur_read = 0;
+ int is_empty;
+ int read_poll = MAX_READ_POLL_LOOPS;
+
+ TRACE_ENTER(CHK_EV_RX_DATA, check);
+
+ if (sc->wait_event.events & SUB_RETRY_RECV) {
+ TRACE_DEVEL("waiting for response", CHK_EV_RX_DATA, check);
+ goto wait_more_data;
+ }
+
+ if (sc_ep_test(sc, SE_FL_EOS))
+ goto end_recv;
+
+ if (check->state & CHK_ST_IN_ALLOC) {
+ TRACE_STATE("waiting for input buffer allocation", CHK_EV_RX_DATA|CHK_EV_RX_BLK, check);
+ goto wait_more_data;
+ }
+
+ if (!check_get_buf(check, &check->bi)) {
+ check->state |= CHK_ST_IN_ALLOC;
+ TRACE_STATE("waiting for input buffer allocation", CHK_EV_RX_DATA|CHK_EV_RX_BLK, check);
+ goto wait_more_data;
+ }
+
+ /* errors on the connection and the stream connector were already checked */
+
+ /* prepare to detect if the mux needs more room */
+ sc_ep_clr(sc, SE_FL_WANT_ROOM);
+
+ while (sc_ep_test(sc, SE_FL_RCV_MORE) ||
+ (!(conn->flags & CO_FL_ERROR) && !sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS))) {
+ max = (IS_HTX_SC(sc) ? htx_free_space(htxbuf(&check->bi)) : b_room(&check->bi));
+ read = conn->mux->rcv_buf(sc, &check->bi, max, 0);
+ cur_read += read;
+ if (!read ||
+ sc_ep_test(sc, SE_FL_WANT_ROOM) ||
+ (--read_poll <= 0) ||
+ (read < max && read >= global.tune.recv_enough))
+ break;
+ }
+
+ end_recv:
+ is_empty = (IS_HTX_SC(sc) ? htx_is_empty(htxbuf(&check->bi)) : !b_data(&check->bi));
+ if (is_empty && ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))) {
+ /* Report network errors only if we got no other data. Otherwise
+ * we'll let the upper layers decide whether the response is OK
+ * or not. It is very common that an RST sent by the server is
+ * reported as an error just after the last data chunk.
+ */
+ TRACE_ERROR("connection error during recv", CHK_EV_RX_DATA|CHK_EV_RX_ERR, check);
+ goto stop;
+ }
+ else if (!cur_read && !sc_ep_test(sc, SE_FL_WANT_ROOM | SE_FL_ERROR | SE_FL_EOS)) {
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ TRACE_DEVEL("waiting for response", CHK_EV_RX_DATA, check);
+ goto wait_more_data;
+ }
+ TRACE_DATA("data received", CHK_EV_RX_DATA, check, 0, 0, (size_t[]){cur_read});
+
+ out:
+ if (!b_data(&check->bi) || ret == TCPCHK_EVAL_STOP)
+ check_release_buf(check, &check->bi);
+
+ TRACE_LEAVE(CHK_EV_RX_DATA, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ stop:
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+
+ wait_more_data:
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates an HTTP TCPCHK_ACT_EXPECT rule. If <last_read> is set , no more data
+ * are expected. Returns TCPCHK_EVAL_WAIT to wait for more data,
+ * TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP if an
+ * error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ struct htx *htx = htxbuf(&check->bi);
+ struct htx_sl *sl;
+ struct htx_blk *blk;
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_expect *expect = &rule->expect;
+ struct buffer *msg = NULL, *tmp = NULL, *nbuf = NULL, *vbuf = NULL;
+ enum healthcheck_status status = HCHK_STATUS_L7RSP;
+ struct ist desc = IST_NULL;
+ int i, match, inverse;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ last_read |= (!htx_free_data_space(htx) || (htx->flags & HTX_FL_EOM));
+
+ if (htx->flags & HTX_FL_PARSING_ERROR) {
+ TRACE_ERROR("invalid response", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+
+ if (htx_is_empty(htx)) {
+ if (last_read) {
+ TRACE_ERROR("empty response received", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ status = HCHK_STATUS_L7RSP;
+ goto error;
+ }
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto wait_more_data;
+ }
+
+ sl = http_get_stline(htx);
+ check->code = sl->info.res.status;
+
+ if (check->server &&
+ (check->server->proxy->options & PR_O_DISABLE404) &&
+ (check->server->next_state != SRV_ST_STOPPED) &&
+ (check->code == 404)) {
+ /* 404 may be accepted as "stopping" only if the server was up */
+ TRACE_STATE("404 response & disable-404", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ inverse = !!(expect->flags & TCPCHK_EXPT_FL_INV);
+ /* Make GCC happy ; initialize match to a failure state. */
+ match = inverse;
+ status = expect->err_status;
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_HTTP_STATUS:
+ match = 0;
+ for (i = 0; i < expect->codes.num; i++) {
+ if (sl->info.res.status >= expect->codes.codes[i][0] &&
+ sl->info.res.status <= expect->codes.codes[i][1]) {
+ match = 1;
+ break;
+ }
+ }
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ match = regex_exec2(expect->regex, HTX_SL_RES_CPTR(sl), HTX_SL_RES_CLEN(sl));
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+
+ case TCPCHK_EXPECT_HTTP_HEADER: {
+ struct http_hdr_ctx ctx;
+ struct ist npat, vpat, value;
+ int full = (expect->flags & (TCPCHK_EXPT_FL_HTTP_HVAL_NONE|TCPCHK_EXPT_FL_HTTP_HVAL_FULL));
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ nbuf = alloc_trash_chunk();
+ if (!nbuf) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ nbuf->data = sess_build_logline(check->sess, NULL, b_orig(nbuf), b_size(nbuf), &expect->hdr.name_fmt);
+ if (!b_data(nbuf)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string (hdr name)", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ npat = ist2(b_orig(nbuf), b_data(nbuf));
+ }
+ else if (!(expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG))
+ npat = expect->hdr.name;
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ vbuf = alloc_trash_chunk();
+ if (!vbuf) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ vbuf->data = sess_build_logline(check->sess, NULL, b_orig(vbuf), b_size(vbuf), &expect->hdr.value_fmt);
+ if (!b_data(vbuf)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string (hdr value)", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ vpat = ist2(b_orig(vbuf), b_data(vbuf));
+ }
+ else if (!(expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG))
+ vpat = expect->hdr.value;
+
+ match = 0;
+ ctx.blk = NULL;
+ while (1) {
+ switch (expect->flags & TCPCHK_EXPT_FL_HTTP_HNAME_TYPE) {
+ case TCPCHK_EXPT_FL_HTTP_HNAME_STR:
+ if (!http_find_str_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_BEG:
+ if (!http_find_pfx_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_END:
+ if (!http_find_sfx_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_SUB:
+ if (!http_find_sub_header(htx, npat, &ctx, full))
+ goto end_of_match;
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HNAME_REG:
+ if (!http_match_header(htx, expect->hdr.name_re, &ctx, full))
+ goto end_of_match;
+ break;
+ default:
+ /* should never happen */
+ goto end_of_match;
+ }
+
+ /* A header has matched the name pattern, let's test its
+ * value now (always defined from there). If there is no
+ * value pattern, it is a good match.
+ */
+
+ if (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE) {
+ match = 1;
+ goto end_of_match;
+ }
+
+ value = ctx.value;
+ switch (expect->flags & TCPCHK_EXPT_FL_HTTP_HVAL_TYPE) {
+ case TCPCHK_EXPT_FL_HTTP_HVAL_STR:
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_BEG:
+ if (istlen(value) < istlen(vpat))
+ break;
+ value = ist2(istptr(value), istlen(vpat));
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_END:
+ if (istlen(value) < istlen(vpat))
+ break;
+ value = ist2(istend(value) - istlen(vpat), istlen(vpat));
+ if (isteq(value, vpat)) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_SUB:
+ if (isttest(istist(value, vpat))) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ case TCPCHK_EXPT_FL_HTTP_HVAL_REG:
+ if (regex_exec2(expect->hdr.value_re, istptr(value), istlen(value))) {
+ match = 1;
+ goto end_of_match;
+ }
+ break;
+ }
+ }
+
+ end_of_match:
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = htx_sl_res_reason(sl);
+ break;
+ }
+
+ case TCPCHK_EXPECT_HTTP_BODY:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ match = 0;
+ chunk_reset(&trash);
+ for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) {
+ enum htx_blk_type type = htx_get_blk_type(blk);
+
+ if (type == HTX_BLK_TLR || type == HTX_BLK_EOT)
+ break;
+ if (type == HTX_BLK_DATA) {
+ if (!chunk_istcat(&trash, htx_get_blk_value(htx, blk)))
+ break;
+ }
+ }
+
+ if (!b_data(&trash)) {
+ if (!last_read) {
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto wait_more_data;
+ }
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = ist("HTTP content check could not find a response body");
+ TRACE_ERROR("no response boduy found while expected", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+
+ if (expect->type == TCPCHK_EXPECT_HTTP_BODY_LF) {
+ tmp = alloc_trash_chunk();
+ if (!tmp) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval log-format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &expect->fmt);
+ if (!b_data(tmp)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ }
+
+ if (!last_read &&
+ ((expect->type == TCPCHK_EXPECT_HTTP_BODY && b_data(&trash) < istlen(expect->data)) ||
+ ((expect->type == TCPCHK_EXPECT_HTTP_BODY_LF && b_data(&trash) < b_data(tmp))) ||
+ (expect->min_recv > 0 && b_data(&trash) < expect->min_recv))) {
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+ }
+
+ if (expect->type ==TCPCHK_EXPECT_HTTP_BODY)
+ match = my_memmem(b_orig(&trash), b_data(&trash), istptr(expect->data), istlen(expect->data)) != NULL;
+ else if (expect->type ==TCPCHK_EXPECT_HTTP_BODY_LF)
+ match = my_memmem(b_orig(&trash), b_data(&trash), b_orig(tmp), b_data(tmp)) != NULL;
+ else
+ match = regex_exec2(expect->regex, b_orig(&trash), b_data(&trash));
+
+ /* Wait for more data on mismatch only if no minimum is defined (-1),
+ * otherwise the absence of match is already conclusive.
+ */
+ if (!match && !last_read && (expect->min_recv == -1)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ /* Set status and description in case of error */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ if (LIST_ISEMPTY(&expect->onerror_fmt))
+ desc = (inverse
+ ? ist("HTTP check matched unwanted content")
+ : ist("HTTP content check did not match"));
+ break;
+
+
+ default:
+ /* should never happen */
+ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP);
+ goto error;
+ }
+
+ if (!(match ^ inverse)) {
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+
+ TRACE_STATE("expect rule succeeded", CHK_EV_TCPCHK_EXP, check);
+
+ out:
+ free_trash_chunk(tmp);
+ free_trash_chunk(nbuf);
+ free_trash_chunk(vbuf);
+ free_trash_chunk(msg);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+
+ error:
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, 0, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ goto out;
+
+ wait_more_data:
+ ret = TCPCHK_EVAL_WAIT;
+ goto out;
+}
+
+/* Evaluates a TCP TCPCHK_ACT_EXPECT rule. Returns TCPCHK_EVAL_WAIT to wait for
+ * more data, TCPCHK_EVAL_CONTINUE to evaluate the next rule or TCPCHK_EVAL_STOP
+ * if an error occurred.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_expect(struct check *check, struct tcpcheck_rule *rule, int last_read)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct tcpcheck_expect *expect = &rule->expect;
+ struct buffer *msg = NULL, *tmp = NULL;
+ struct ist desc = IST_NULL;
+ enum healthcheck_status status;
+ int match, inverse;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EXP, check);
+
+ last_read |= b_full(&check->bi);
+
+ /* The current expect might need more data than the previous one, check again
+ * that the minimum amount data required to match is respected.
+ */
+ if (!last_read) {
+ if ((expect->type == TCPCHK_EXPECT_STRING || expect->type == TCPCHK_EXPECT_BINARY) &&
+ (b_data(&check->bi) < istlen(expect->data))) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ if (expect->min_recv > 0 && (b_data(&check->bi) < expect->min_recv)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ }
+
+ inverse = !!(expect->flags & TCPCHK_EXPT_FL_INV);
+ /* Make GCC happy ; initialize match to a failure state. */
+ match = inverse;
+ status = ((expect->err_status != HCHK_STATUS_UNKNOWN) ? expect->err_status : HCHK_STATUS_L7RSP);
+
+ switch (expect->type) {
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_BINARY:
+ match = my_memmem(b_head(&check->bi), b_data(&check->bi), istptr(expect->data), istlen(expect->data)) != NULL;
+ break;
+ case TCPCHK_EXPECT_STRING_REGEX:
+ match = regex_exec2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1));
+ break;
+
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ chunk_reset(&trash);
+ dump_binary(&trash, b_head(&check->bi), b_data(&check->bi));
+ match = regex_exec2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1));
+ break;
+
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ match = 0;
+ tmp = alloc_trash_chunk();
+ if (!tmp) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to allocate buffer to eval format string");
+ TRACE_ERROR("buffer allocation failure", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = sess_build_logline(check->sess, NULL, b_orig(tmp), b_size(tmp), &expect->fmt);
+ if (!b_data(tmp)) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("log-format string evaluated to an empty string");
+ TRACE_ERROR("invalid log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ if (expect->type == TCPCHK_EXPECT_BINARY_LF) {
+ int len = tmp->data;
+ if (parse_binary(b_orig(tmp), &tmp->area, &len, NULL) == 0) {
+ status = HCHK_STATUS_L7RSP;
+ desc = ist("Failed to parse hexastring resulting of eval of a log-format string");
+ TRACE_ERROR("invalid binary log-format string", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ goto error;
+ }
+ tmp->data = len;
+ }
+ if (b_data(&check->bi) < tmp->data) {
+ if (!last_read) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+ break;
+ }
+ match = my_memmem(b_head(&check->bi), b_data(&check->bi), b_orig(tmp), b_data(tmp)) != NULL;
+ break;
+
+ case TCPCHK_EXPECT_CUSTOM:
+ /* Don't eval custom function if the buffer is empty. It means
+ * custom functions can't expect an empty response. If this
+ * change, don't forget to change this test and update all
+ * custom functions.
+ */
+ if (!b_data(&check->bi))
+ break;
+ if (expect->custom)
+ ret = expect->custom(check, rule, last_read);
+ goto out;
+ default:
+ /* Should never happen. */
+ ret = TCPCHK_EVAL_STOP;
+ goto out;
+ }
+
+
+ /* Wait for more data on mismatch only if no minimum is defined (-1),
+ * otherwise the absence of match is already conclusive.
+ */
+ if (!match && !last_read && (expect->min_recv == -1)) {
+ ret = TCPCHK_EVAL_WAIT;
+ TRACE_DEVEL("waiting for more data", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ /* Result as expected, next rule. */
+ if (match ^ inverse) {
+ TRACE_STATE("expect rule succeeded", CHK_EV_TCPCHK_EXP, check);
+ goto out;
+ }
+
+ error:
+ /* From this point on, we matched something we did not want, this is an error state. */
+ TRACE_STATE("expect rule failed", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check);
+ ret = TCPCHK_EVAL_STOP;
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onerror_message(msg, check, rule, match, desc);
+ set_server_check_status(check, status, (msg ? b_head(msg) : NULL));
+ free_trash_chunk(msg);
+
+ out:
+ free_trash_chunk(tmp);
+ TRACE_LEAVE(CHK_EV_TCPCHK_EXP, check, 0, 0, (size_t[]){ret});
+ return ret;
+}
+
+/* Evaluates a TCPCHK_ACT_ACTION_KW rule. Returns TCPCHK_EVAL_CONTINUE to
+ * evaluate the next rule or TCPCHK_EVAL_STOP if an error occurred. It never
+ * waits.
+ */
+enum tcpcheck_eval_ret tcpcheck_eval_action_kw(struct check *check, struct tcpcheck_rule *rule)
+{
+ enum tcpcheck_eval_ret ret = TCPCHK_EVAL_CONTINUE;
+ struct act_rule *act_rule;
+ enum act_return act_ret;
+
+ act_rule =rule->action_kw.rule;
+ act_ret = act_rule->action_ptr(act_rule, check->proxy, check->sess, NULL, 0);
+ if (act_ret != ACT_RET_CONT) {
+ chunk_printf(&trash, "TCPCHK ACTION unexpected result at step %d\n",
+ tcpcheck_get_step_id(check, rule));
+ set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
+ ret = TCPCHK_EVAL_STOP;
+ }
+
+ return ret;
+}
+
+/* Executes a tcp-check ruleset. Note that this is called both from the
+ * connection's wake() callback and from the check scheduling task. It returns
+ * 0 on normal cases, or <0 if a close() has happened on an existing connection,
+ * presenting the risk of an fd replacement.
+ *
+ * Please do NOT place any return statement in this function and only leave
+ * via the out_end_tcpcheck label after setting retcode.
+ */
+int tcpcheck_main(struct check *check)
+{
+ struct tcpcheck_rule *rule;
+ struct stconn *sc = check->sc;
+ struct connection *conn = sc_conn(sc);
+ int must_read = 1, last_read = 0;
+ int retcode = 0;
+ enum tcpcheck_eval_ret eval_ret;
+
+ /* here, we know that the check is complete or that it failed */
+ if (check->result != CHK_RES_UNKNOWN)
+ goto out;
+
+ TRACE_ENTER(CHK_EV_TCPCHK_EVAL, check);
+
+ /* Note: the stream connector and the connection may only be undefined before
+ * the first rule evaluation (it is always a connect rule) or when the
+ * stream connector allocation failed on a connect rule, during sc allocation.
+ */
+
+ /* 1- check for connection error, if any */
+ if ((conn && conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR))
+ goto out_end_tcpcheck;
+
+ /* 2- check if a rule must be resume. It happens if check->current_step
+ * is defined. */
+ else if (check->current_step) {
+ rule = check->current_step;
+ TRACE_PROTO("resume rule evaluation", CHK_EV_TCPCHK_EVAL, check, 0, 0, (size_t[]){ tcpcheck_get_step_id(check, rule)});
+ }
+
+ /* 3- It is the first evaluation. We must create a session and preset
+ * tcp-check variables */
+ else {
+ struct tcpcheck_var *var;
+
+ /* First evaluation, create a session */
+ check->sess = session_new(&checks_fe, NULL, &check->obj_type);
+ if (!check->sess) {
+ chunk_printf(&trash, "TCPCHK error allocating check session");
+ TRACE_ERROR("session allocation failure", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ERR, check);
+ set_server_check_status(check, HCHK_STATUS_SOCKERR, trash.area);
+ goto out_end_tcpcheck;
+ }
+ vars_init_head(&check->vars, SCOPE_CHECK);
+ rule = LIST_NEXT(check->tcpcheck_rules->list, typeof(rule), list);
+
+ /* Preset tcp-check variables */
+ list_for_each_entry(var, &check->tcpcheck_rules->preset_vars, list) {
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, check->proxy, check->sess, NULL, SMP_OPT_FINAL);
+ smp.data = var->data;
+ vars_set_by_name_ifexist(istptr(var->name), istlen(var->name), &smp);
+ }
+ TRACE_PROTO("start rules evaluation", CHK_EV_TCPCHK_EVAL, check);
+ }
+
+ /* Now evaluate the tcp-check rules */
+
+ list_for_each_entry_from(rule, check->tcpcheck_rules->list, list) {
+ check->code = 0;
+ switch (rule->action) {
+ case TCPCHK_ACT_CONNECT:
+ /* Not the first connection, release it first */
+ if (sc_conn(sc) && check->current_step != rule) {
+ check->state |= CHK_ST_CLOSE_CONN;
+ retcode = -1;
+ }
+
+ check->current_step = rule;
+
+ /* We are still waiting the connection gets closed */
+ if (check->state & CHK_ST_CLOSE_CONN) {
+ TRACE_DEVEL("wait previous connection closure", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_CONN, check);
+ eval_ret = TCPCHK_EVAL_WAIT;
+ break;
+ }
+
+ TRACE_PROTO("eval connect rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_CONN, check);
+ eval_ret = tcpcheck_eval_connect(check, rule);
+
+ /* Refresh connection */
+ conn = sc_conn(sc);
+ last_read = 0;
+ must_read = (IS_HTX_SC(sc) ? htx_is_empty(htxbuf(&check->bi)) : !b_data(&check->bi));
+ break;
+ case TCPCHK_ACT_SEND:
+ check->current_step = rule;
+ TRACE_PROTO("eval send rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_SND, check);
+ eval_ret = tcpcheck_eval_send(check, rule);
+ must_read = 1;
+ break;
+ case TCPCHK_ACT_EXPECT:
+ check->current_step = rule;
+ TRACE_PROTO("eval expect rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_EXP, check);
+ if (must_read) {
+ eval_ret = tcpcheck_eval_recv(check, rule);
+ if (eval_ret == TCPCHK_EVAL_STOP)
+ goto out_end_tcpcheck;
+ else if (eval_ret == TCPCHK_EVAL_WAIT)
+ goto out;
+ last_read = ((conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR | SE_FL_EOS));
+ must_read = 0;
+ }
+
+ eval_ret = ((check->tcpcheck_rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK
+ ? tcpcheck_eval_expect_http(check, rule, last_read)
+ : tcpcheck_eval_expect(check, rule, last_read));
+
+ if (eval_ret == TCPCHK_EVAL_WAIT) {
+ check->current_step = rule->expect.head;
+ if (!(sc->wait_event.events & SUB_RETRY_RECV))
+ conn->mux->subscribe(sc, SUB_RETRY_RECV, &sc->wait_event);
+ }
+ break;
+ case TCPCHK_ACT_ACTION_KW:
+ /* Don't update the current step */
+ TRACE_PROTO("eval action kw rule", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ACT, check);
+ eval_ret = tcpcheck_eval_action_kw(check, rule);
+ break;
+ default:
+ /* Otherwise, just go to the next one and don't update
+ * the current step
+ */
+ eval_ret = TCPCHK_EVAL_CONTINUE;
+ break;
+ }
+
+ switch (eval_ret) {
+ case TCPCHK_EVAL_CONTINUE:
+ break;
+ case TCPCHK_EVAL_WAIT:
+ goto out;
+ case TCPCHK_EVAL_STOP:
+ goto out_end_tcpcheck;
+ }
+ }
+
+ /* All rules was evaluated */
+ if (check->current_step) {
+ rule = check->current_step;
+
+ TRACE_DEVEL("eval tcp-check result", CHK_EV_TCPCHK_EVAL, check);
+
+ if (rule->action == TCPCHK_ACT_EXPECT) {
+ struct buffer *msg;
+ enum healthcheck_status status;
+
+ if (check->server &&
+ (check->server->proxy->options & PR_O_DISABLE404) &&
+ (check->server->next_state != SRV_ST_STOPPED) &&
+ (check->code == 404)) {
+ set_server_check_status(check, HCHK_STATUS_L7OKCD, NULL);
+ TRACE_PROTO("tcp-check conditionally passed (disable-404)", CHK_EV_TCPCHK_EVAL, check);
+ goto out_end_tcpcheck;
+ }
+
+ msg = alloc_trash_chunk();
+ if (msg)
+ tcpcheck_expect_onsuccess_message(msg, check, rule, IST_NULL);
+ status = ((rule->expect.ok_status != HCHK_STATUS_UNKNOWN) ? rule->expect.ok_status : HCHK_STATUS_L7OKD);
+ set_server_check_status(check, status, (msg ? b_head(msg) : "(tcp-check)"));
+ free_trash_chunk(msg);
+ }
+ else if (rule->action == TCPCHK_ACT_CONNECT) {
+ const char *msg = ((rule->connect.options & TCPCHK_OPT_IMPLICIT) ? NULL : "(tcp-check)");
+ enum healthcheck_status status = HCHK_STATUS_L4OK;
+#ifdef USE_OPENSSL
+ if (conn_is_ssl(conn))
+ status = HCHK_STATUS_L6OK;
+#endif
+ set_server_check_status(check, status, msg);
+ }
+ else
+ set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
+ }
+ else {
+ set_server_check_status(check, HCHK_STATUS_L7OKD, "(tcp-check)");
+ }
+ TRACE_PROTO("tcp-check passed", CHK_EV_TCPCHK_EVAL, check);
+
+ out_end_tcpcheck:
+ if ((conn && conn->flags & CO_FL_ERROR) || sc_ep_test(sc, SE_FL_ERROR)) {
+ TRACE_ERROR("report connection error", CHK_EV_TCPCHK_EVAL|CHK_EV_TCPCHK_ERR, check);
+ chk_report_conn_err(check, errno, 0);
+ }
+
+ /* the tcpcheck is finished, release in/out buffer now */
+ check_release_buf(check, &check->bi);
+ check_release_buf(check, &check->bo);
+
+ out:
+ TRACE_LEAVE(CHK_EV_HCHK_RUN, check);
+ return retcode;
+}
+
+void tcp_check_keywords_register(struct action_kw_list *kw_list)
+{
+ LIST_APPEND(&tcp_check_keywords.list, &kw_list->list);
+}
+
+/**************************************************************************/
+/******************* Internals to parse tcp-check rules *******************/
+/**************************************************************************/
+struct action_kw_list tcp_check_keywords = {
+ .list = LIST_HEAD_INIT(tcp_check_keywords.list),
+};
+
+/* Creates a tcp-check rule resulting from parsing a custom keyword. NULL is
+ * returned on error.
+ */
+struct tcpcheck_rule *parse_tcpcheck_action(char **args, int cur_arg, struct proxy *px,
+ struct list *rules, struct action_kw *kw,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct act_rule *actrule = NULL;
+
+ actrule = new_act_rule(ACT_F_TCP_CHK, file, line);
+ if (!actrule) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ actrule->kw = kw;
+
+ cur_arg++;
+ if (kw->parse((const char **)args, &cur_arg, px, actrule, errmsg) == ACT_RET_PRS_ERR) {
+ memprintf(errmsg, "'%s' : %s", kw->kw, *errmsg);
+ goto error;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_ACTION_KW;
+ chk->action_kw.rule = actrule;
+ return chk;
+
+ error:
+ free(actrule);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check connect or an http-check connect rule. NULL is
+ * returned on error.
+ */
+struct tcpcheck_rule *parse_tcpcheck_connect(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct sockaddr_storage *sk = NULL;
+ char *comment = NULL, *sni = NULL, *alpn = NULL;
+ struct sample_expr *port_expr = NULL;
+ const struct mux_proto_list *mux_proto = NULL;
+ unsigned short conn_opts = 0;
+ long port = 0;
+ int alpn_len = 0;
+
+ list_for_each_entry(chk, rules, list) {
+ if (chk->action == TCPCHK_ACT_CONNECT)
+ break;
+ if (chk->action == TCPCHK_ACT_COMMENT ||
+ chk->action == TCPCHK_ACT_ACTION_KW ||
+ (chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)))
+ continue;
+
+ memprintf(errmsg, "first step MUST also be a 'connect', "
+ "optionally preceded by a 'set-var', an 'unset-var' or a 'comment', "
+ "when there is a 'connect' step in the tcp-check ruleset");
+ goto error;
+ }
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "default") == 0)
+ conn_opts |= TCPCHK_OPT_DEFAULT_CONNECT;
+ else if (strcmp(args[cur_arg], "addr") == 0) {
+ int port1, port2;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects <ipv4|ipv6> as argument.", args[cur_arg]);
+ goto error;
+ }
+
+ sk = str2sa_range(args[cur_arg+1], NULL, &port1, &port2, NULL, NULL, NULL,
+ errmsg, NULL, NULL, PA_O_RESOLVE | PA_O_PORT_OK | PA_O_STREAM | PA_O_CONNECT);
+ if (!sk) {
+ memprintf(errmsg, "'%s' : %s.", args[cur_arg], *errmsg);
+ goto error;
+ }
+
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "port") == 0) {
+ const char *p, *end;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a port number or a sample expression as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+
+ port = 0;
+ release_sample_expr(port_expr);
+ p = args[cur_arg]; end = p + strlen(p);
+ port = read_uint(&p, end);
+ if (p != end) {
+ int idx = 0;
+
+ px->conf.args.ctx = ARGC_SRV;
+ port_expr = sample_parse_expr((char *[]){args[cur_arg], NULL}, &idx,
+ file, line, errmsg, &px->conf.args, NULL);
+
+ if (!port_expr) {
+ memprintf(errmsg, "error detected while parsing port expression : %s", *errmsg);
+ goto error;
+ }
+ if (!(port_expr->fetch->val & SMP_VAL_BE_CHK_RUL)) {
+ memprintf(errmsg, "error detected while parsing port expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.\n",
+ args[cur_arg], sample_src_names(port_expr->fetch->use));
+ goto error;
+ }
+ px->http_needed |= !!(port_expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+ else if (port > 65535 || port < 1) {
+ memprintf(errmsg, "expects a valid TCP port (from range 1 to 65535) or a sample expression, got %s.",
+ args[cur_arg]);
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "proto") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a MUX protocol as argument.", args[cur_arg]);
+ goto error;
+ }
+ mux_proto = get_mux_proto(ist(args[cur_arg + 1]));
+ if (!mux_proto) {
+ memprintf(errmsg, "'%s' : unknown MUX protocol '%s'.", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+
+ if (strcmp(args[0], "tcp-check") == 0 && mux_proto->mode != PROTO_MODE_TCP) {
+ memprintf(errmsg, "'%s' : invalid MUX protocol '%s' for tcp-check", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ else if (strcmp(args[0], "http-check") == 0 && mux_proto->mode != PROTO_MODE_HTTP) {
+ memprintf(errmsg, "'%s' : invalid MUX protocol '%s' for http-check", args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "send-proxy") == 0)
+ conn_opts |= TCPCHK_OPT_SEND_PROXY;
+ else if (strcmp(args[cur_arg], "via-socks4") == 0)
+ conn_opts |= TCPCHK_OPT_SOCKS4;
+ else if (strcmp(args[cur_arg], "linger") == 0)
+ conn_opts |= TCPCHK_OPT_LINGER;
+#ifdef USE_OPENSSL
+ else if (strcmp(args[cur_arg], "ssl") == 0) {
+ px->options |= PR_O_TCPCHK_SSL;
+ conn_opts |= TCPCHK_OPT_SSL;
+ }
+ else if (strcmp(args[cur_arg], "sni") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(sni);
+ sni = strdup(args[cur_arg]);
+ if (!sni) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "alpn") == 0) {
+#ifdef TLSEXT_TYPE_application_layer_protocol_negotiation
+ free(alpn);
+ if (ssl_sock_parse_alpn(args[cur_arg + 1], &alpn, &alpn_len, errmsg)) {
+ memprintf(errmsg, "'%s' : %s", args[cur_arg], *errmsg);
+ goto error;
+ }
+ cur_arg++;
+#else
+ memprintf(errmsg, "'%s' : library does not support TLS ALPN extension.", args[cur_arg]);
+ goto error;
+#endif
+ }
+#endif /* USE_OPENSSL */
+
+ else {
+ memprintf(errmsg, "expects 'comment', 'port', 'addr', 'send-proxy'"
+#ifdef USE_OPENSSL
+ ", 'ssl', 'sni', 'alpn'"
+#endif /* USE_OPENSSL */
+ " or 'via-socks4', 'linger', 'default' but got '%s' as argument.",
+ args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->comment = comment;
+ chk->connect.port = port;
+ chk->connect.options = conn_opts;
+ chk->connect.sni = sni;
+ chk->connect.alpn = alpn;
+ chk->connect.alpn_len= alpn_len;
+ chk->connect.port_expr= port_expr;
+ chk->connect.mux_proto= mux_proto;
+ if (sk)
+ chk->connect.addr = *sk;
+ return chk;
+
+ error:
+ free(alpn);
+ free(sni);
+ free(comment);
+ release_sample_expr(port_expr);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check send rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ char *comment = NULL, *data = NULL;
+ enum tcpcheck_send_type type = TCPCHK_SEND_UNDEF;
+
+ if (strcmp(args[cur_arg], "send-binary-lf") == 0)
+ type = TCPCHK_SEND_BINARY_LF;
+ else if (strcmp(args[cur_arg], "send-binary") == 0)
+ type = TCPCHK_SEND_BINARY;
+ else if (strcmp(args[cur_arg], "send-lf") == 0)
+ type = TCPCHK_SEND_STRING_LF;
+ else if (strcmp(args[cur_arg], "send") == 0)
+ type = TCPCHK_SEND_STRING;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a %s as argument",
+ (type == TCPCHK_SEND_BINARY ? "binary string": "string"), args[cur_arg]);
+ goto error;
+ }
+
+ data = args[cur_arg+1];
+
+ cur_arg += 2;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else {
+ memprintf(errmsg, "expects 'comment' but got '%s' as argument.",
+ args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->comment = comment;
+ chk->send.type = type;
+
+ switch (chk->send.type) {
+ case TCPCHK_SEND_STRING:
+ chk->send.data = ist(strdup(data));
+ if (!isttest(chk->send.data)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ break;
+ case TCPCHK_SEND_BINARY: {
+ int len = chk->send.data.len;
+ if (parse_binary(data, &chk->send.data.ptr, &len, errmsg) == 0) {
+ memprintf(errmsg, "'%s' invalid binary string (%s).\n", data, *errmsg);
+ goto error;
+ }
+ chk->send.data.len = len;
+ break;
+ }
+ case TCPCHK_SEND_STRING_LF:
+ case TCPCHK_SEND_BINARY_LF:
+ LIST_INIT(&chk->send.fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(data, px, &chk->send.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", data, *errmsg);
+ goto error;
+ }
+ break;
+ case TCPCHK_SEND_HTTP:
+ case TCPCHK_SEND_UNDEF:
+ goto error;
+ }
+
+ return chk;
+
+ error:
+ free(chk);
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a http-check send rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct tcpcheck_http_hdr *hdr = NULL;
+ struct http_hdr hdrs[global.tune.max_http_hdr];
+ char *meth = NULL, *uri = NULL, *vsn = NULL;
+ char *body = NULL, *comment = NULL;
+ unsigned int flags = 0;
+ int i = 0, host_hdr = -1;
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ if (strcmp(args[cur_arg], "meth") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ meth = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "uri") == 0 || strcmp(args[cur_arg], "uri-lf") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT;
+ if (strcmp(args[cur_arg], "uri-lf") == 0)
+ flags |= TCPCHK_SND_HTTP_FL_URI_FMT;
+ cur_arg++;
+ uri = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "ver") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ vsn = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0) {
+ if (!*args[cur_arg+1] || !*args[cur_arg+2]) {
+ memprintf(errmsg, "'%s' expects <name> and <value> as arguments", args[cur_arg]);
+ goto error;
+ }
+
+ if (strcasecmp(args[cur_arg+1], "host") == 0) {
+ if (host_hdr >= 0) {
+ memprintf(errmsg, "'%s' header already defined (previous value is '%s')",
+ args[cur_arg+1], istptr(hdrs[host_hdr].v));
+ goto error;
+ }
+ host_hdr = i;
+ }
+ else if (strcasecmp(args[cur_arg+1], "content-length") == 0 ||
+ strcasecmp(args[cur_arg+1], "transfer-encoding") == 0)
+ goto skip_hdr;
+
+ hdrs[i].n = ist(args[cur_arg + 1]);
+ hdrs[i].v = ist(args[cur_arg + 2]);
+ i++;
+ skip_hdr:
+ cur_arg += 2;
+ }
+ else if (strcmp(args[cur_arg], "body") == 0 || strcmp(args[cur_arg], "body-lf") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT;
+ if (strcmp(args[cur_arg], "body-lf") == 0)
+ flags |= TCPCHK_SND_HTTP_FL_BODY_FMT;
+ cur_arg++;
+ body = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else {
+ memprintf(errmsg, "expects 'comment', 'meth', 'uri', 'uri-lf', 'ver', 'hdr', 'body' or 'body-lf'"
+ " but got '%s' as argument.", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ }
+
+ hdrs[i].n = hdrs[i].v = IST_NULL;
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->comment = comment; comment = NULL;
+ chk->send.type = TCPCHK_SEND_HTTP;
+ chk->send.http.flags = flags;
+ LIST_INIT(&chk->send.http.hdrs);
+
+ if (meth) {
+ chk->send.http.meth.meth = find_http_meth(meth, strlen(meth));
+ chk->send.http.meth.str.area = strdup(meth);
+ chk->send.http.meth.str.data = strlen(meth);
+ if (!chk->send.http.meth.str.area) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (uri) {
+ if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) {
+ LIST_INIT(&chk->send.http.uri_fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(uri, px, &chk->send.http.uri_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", uri, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->send.http.uri = ist(strdup(uri));
+ if (!isttest(chk->send.http.uri)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ }
+ if (vsn) {
+ chk->send.http.vsn = ist(strdup(vsn));
+ if (!isttest(chk->send.http.vsn)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ for (i = 0; istlen(hdrs[i].n); i++) {
+ hdr = calloc(1, sizeof(*hdr));
+ if (!hdr) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ LIST_INIT(&hdr->value);
+ hdr->name = istdup(hdrs[i].n);
+ if (!isttest(hdr->name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+
+ ist0(hdrs[i].v);
+ if (!parse_logformat_string(istptr(hdrs[i].v), px, &hdr->value, 0, SMP_VAL_BE_CHK_RUL, errmsg))
+ goto error;
+ LIST_APPEND(&chk->send.http.hdrs, &hdr->list);
+ hdr = NULL;
+ }
+
+ if (body) {
+ if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) {
+ LIST_INIT(&chk->send.http.body_fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(body, px, &chk->send.http.body_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", body, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->send.http.body = ist(strdup(body));
+ if (!isttest(chk->send.http.body)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ }
+
+ return chk;
+
+ error:
+ free_tcpcheck_http_hdr(hdr);
+ free_tcpcheck(chk, 0);
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a http-check comment rule. NULL is returned on error */
+struct tcpcheck_rule *parse_tcpcheck_comment(char **args, int cur_arg, struct proxy *px, struct list *rules,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ char *comment = NULL;
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "expects a string as argument");
+ goto error;
+ }
+ cur_arg++;
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_COMMENT;
+ chk->comment = comment;
+ return chk;
+
+ error:
+ free(comment);
+ return NULL;
+}
+
+/* Parses and creates a tcp-check or an http-check expect rule. NULL is returned
+ * on error. <proto> is set to the right protocol flags (covered by the
+ * TCPCHK_RULES_PROTO_CHK mask).
+ */
+struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct proxy *px,
+ struct list *rules, unsigned int proto,
+ const char *file, int line, char **errmsg)
+{
+ struct tcpcheck_rule *prev_check, *chk = NULL;
+ struct sample_expr *status_expr = NULL;
+ char *on_success_msg, *on_error_msg, *comment, *pattern, *npat, *vpat;
+ enum tcpcheck_expect_type type = TCPCHK_EXPECT_UNDEF;
+ enum healthcheck_status ok_st = HCHK_STATUS_UNKNOWN;
+ enum healthcheck_status err_st = HCHK_STATUS_UNKNOWN;
+ enum healthcheck_status tout_st = HCHK_STATUS_UNKNOWN;
+ unsigned int flags = 0;
+ long min_recv = -1;
+ int inverse = 0;
+
+ on_success_msg = on_error_msg = comment = pattern = npat = vpat = NULL;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "expects at least a matching pattern as arguments");
+ goto error;
+ }
+
+ cur_arg++;
+ while (*(args[cur_arg])) {
+ int in_pattern = 0;
+
+ rescan:
+ if (strcmp(args[cur_arg], "min-recv") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a integer as argument", args[cur_arg]);
+ goto error;
+ }
+ /* Use an signed integer here because of bufsize */
+ cur_arg++;
+ min_recv = atol(args[cur_arg]);
+ if (min_recv < -1 || min_recv > INT_MAX) {
+ memprintf(errmsg, "'%s' expects -1 or an integer from 0 to INT_MAX" , args[cur_arg-1]);
+ goto error;
+ }
+ }
+ else if (*(args[cur_arg]) == '!') {
+ in_pattern = 1;
+ while (*(args[cur_arg]) == '!') {
+ inverse = !inverse;
+ args[cur_arg]++;
+ }
+ if (!*(args[cur_arg]))
+ cur_arg++;
+ goto rescan;
+ }
+ else if (strcmp(args[cur_arg], "string") == 0 || strcmp(args[cur_arg], "rstring") == 0) {
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_STRING : TCPCHK_EXPECT_STRING_REGEX);
+ else
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_HTTP_BODY : TCPCHK_EXPECT_HTTP_BODY_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "binary") == 0 || strcmp(args[cur_arg], "rbinary") == 0) {
+ if (proto == TCPCHK_RULES_HTTP_CHK)
+ goto bad_http_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = ((*(args[cur_arg]) == 'b') ? TCPCHK_EXPECT_BINARY : TCPCHK_EXPECT_BINARY_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "string-lf") == 0 || strcmp(args[cur_arg], "binary-lf") == 0) {
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_STRING_LF : TCPCHK_EXPECT_BINARY_LF);
+ else {
+ if (*(args[cur_arg]) != 's')
+ goto bad_http_kw;
+ type = TCPCHK_EXPECT_HTTP_BODY_LF;
+ }
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "status") == 0 || strcmp(args[cur_arg], "rstatus") == 0) {
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ goto bad_tcp_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = ((*(args[cur_arg]) == 's') ? TCPCHK_EXPECT_HTTP_STATUS : TCPCHK_EXPECT_HTTP_STATUS_REGEX);
+
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a <pattern> as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ pattern = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "custom") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = TCPCHK_EXPECT_CUSTOM;
+ }
+ else if (strcmp(args[cur_arg], "hdr") == 0 || strcmp(args[cur_arg], "fhdr") == 0) {
+ int orig_arg = cur_arg;
+
+ if (proto != TCPCHK_RULES_HTTP_CHK)
+ goto bad_tcp_kw;
+ if (type != TCPCHK_EXPECT_UNDEF) {
+ memprintf(errmsg, "only on pattern expected");
+ goto error;
+ }
+ type = TCPCHK_EXPECT_HTTP_HEADER;
+
+ if (strcmp(args[cur_arg], "fhdr") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_FULL;
+
+ /* Parse the name pattern, mandatory */
+ if (!*(args[cur_arg+1]) || !*(args[cur_arg+2]) ||
+ (strcmp(args[cur_arg+1], "name") != 0 && strcmp(args[cur_arg+1], "name-lf") != 0)) {
+ memprintf(errmsg, "'%s' expects at the name keyword as first argument followed by a pattern",
+ args[orig_arg]);
+ goto error;
+ }
+
+ if (strcmp(args[cur_arg+1], "name-lf") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_FMT;
+
+ cur_arg += 2;
+ if (strcmp(args[cur_arg], "-m") == 0) {
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' : '%s' expects at a matching pattern ('str', 'beg', 'end', 'sub' or 'reg')",
+ args[orig_arg], args[cur_arg]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg+1], "str") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_STR;
+ else if (strcmp(args[cur_arg+1], "beg") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_BEG;
+ else if (strcmp(args[cur_arg+1], "end") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_END;
+ else if (strcmp(args[cur_arg+1], "sub") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_SUB;
+ else if (strcmp(args[cur_arg+1], "reg") == 0) {
+ if (flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ memprintf(errmsg, "'%s': log-format string is not supported with a regex matching method",
+ args[orig_arg]);
+ goto error;
+ }
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_REG;
+ }
+ else {
+ memprintf(errmsg, "'%s' : '%s' only supports 'str', 'beg', 'end', 'sub' or 'reg' (got '%s')",
+ args[orig_arg], args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg += 2;
+ }
+ else
+ flags |= TCPCHK_EXPT_FL_HTTP_HNAME_STR;
+ npat = args[cur_arg];
+
+ if (!*(args[cur_arg+1]) ||
+ (strcmp(args[cur_arg+1], "value") != 0 && strcmp(args[cur_arg+1], "value-lf") != 0)) {
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_NONE;
+ goto next;
+ }
+ if (strcmp(args[cur_arg+1], "value-lf") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_FMT;
+
+ /* Parse the value pattern, optional */
+ if (strcmp(args[cur_arg+2], "-m") == 0) {
+ cur_arg += 2;
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' : '%s' expects at a matching pattern ('str', 'beg', 'end', 'sub' or 'reg')",
+ args[orig_arg], args[cur_arg]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg+1], "str") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_STR;
+ else if (strcmp(args[cur_arg+1], "beg") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_BEG;
+ else if (strcmp(args[cur_arg+1], "end") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_END;
+ else if (strcmp(args[cur_arg+1], "sub") == 0)
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_SUB;
+ else if (strcmp(args[cur_arg+1], "reg") == 0) {
+ if (flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ memprintf(errmsg, "'%s': log-format string is not supported with a regex matching method",
+ args[orig_arg]);
+ goto error;
+ }
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_REG;
+ }
+ else {
+ memprintf(errmsg, "'%s' : '%s' only supports 'str', 'beg', 'end', 'sub' or 'reg' (got '%s')",
+ args[orig_arg], args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ }
+ else
+ flags |= TCPCHK_EXPT_FL_HTTP_HVAL_STR;
+
+ if (!*(args[cur_arg+2])) {
+ memprintf(errmsg, "'%s' expect a pattern with the value keyword", args[orig_arg]);
+ goto error;
+ }
+ vpat = args[cur_arg+2];
+ cur_arg += 2;
+ }
+ else if (strcmp(args[cur_arg], "comment") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ free(comment);
+ comment = strdup(args[cur_arg]);
+ if (!comment) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ else if (strcmp(args[cur_arg], "on-success") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ on_success_msg = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "on-error") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ cur_arg++;
+ on_error_msg = args[cur_arg];
+ }
+ else if (strcmp(args[cur_arg], "ok-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7OK") == 0)
+ ok_st = HCHK_STATUS_L7OKD;
+ else if (strcasecmp(args[cur_arg+1], "L7OKC") == 0)
+ ok_st = HCHK_STATUS_L7OKCD;
+ else if (strcasecmp(args[cur_arg+1], "L6OK") == 0)
+ ok_st = HCHK_STATUS_L6OK;
+ else if (strcasecmp(args[cur_arg+1], "L4OK") == 0)
+ ok_st = HCHK_STATUS_L4OK;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4OK', 'L6OK', 'L7OK' or 'L7OKC' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "error-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7RSP") == 0)
+ err_st = HCHK_STATUS_L7RSP;
+ else if (strcasecmp(args[cur_arg+1], "L7STS") == 0)
+ err_st = HCHK_STATUS_L7STS;
+ else if (strcasecmp(args[cur_arg+1], "L7OKC") == 0)
+ err_st = HCHK_STATUS_L7OKCD;
+ else if (strcasecmp(args[cur_arg+1], "L6RSP") == 0)
+ err_st = HCHK_STATUS_L6RSP;
+ else if (strcasecmp(args[cur_arg+1], "L4CON") == 0)
+ err_st = HCHK_STATUS_L4CON;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4CON', 'L6RSP', 'L7RSP' or 'L7STS' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else if (strcmp(args[cur_arg], "status-code") == 0) {
+ int idx = 0;
+
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects an expression as argument", args[cur_arg]);
+ goto error;
+ }
+
+ cur_arg++;
+ release_sample_expr(status_expr);
+ px->conf.args.ctx = ARGC_SRV;
+ status_expr = sample_parse_expr((char *[]){args[cur_arg], NULL}, &idx,
+ file, line, errmsg, &px->conf.args, NULL);
+ if (!status_expr) {
+ memprintf(errmsg, "error detected while parsing status-code expression : %s", *errmsg);
+ goto error;
+ }
+ if (!(status_expr->fetch->val & SMP_VAL_BE_CHK_RUL)) {
+ memprintf(errmsg, "error detected while parsing status-code expression : "
+ " fetch method '%s' extracts information from '%s', "
+ "none of which is available here.\n",
+ args[cur_arg], sample_src_names(status_expr->fetch->use));
+ goto error;
+ }
+ px->http_needed |= !!(status_expr->fetch->use & SMP_USE_HTTP_ANY);
+ }
+ else if (strcmp(args[cur_arg], "tout-status") == 0) {
+ if (in_pattern) {
+ memprintf(errmsg, "[!] not supported with '%s'", args[cur_arg]);
+ goto error;
+ }
+ if (!*(args[cur_arg+1])) {
+ memprintf(errmsg, "'%s' expects a string as argument", args[cur_arg]);
+ goto error;
+ }
+ if (strcasecmp(args[cur_arg+1], "L7TOUT") == 0)
+ tout_st = HCHK_STATUS_L7TOUT;
+ else if (strcasecmp(args[cur_arg+1], "L6TOUT") == 0)
+ tout_st = HCHK_STATUS_L6TOUT;
+ else if (strcasecmp(args[cur_arg+1], "L4TOUT") == 0)
+ tout_st = HCHK_STATUS_L4TOUT;
+ else {
+ memprintf(errmsg, "'%s' only supports 'L4TOUT', 'L6TOUT' or 'L7TOUT' status (got '%s').",
+ args[cur_arg], args[cur_arg+1]);
+ goto error;
+ }
+ cur_arg++;
+ }
+ else {
+ if (proto == TCPCHK_RULES_HTTP_CHK) {
+ bad_http_kw:
+ memprintf(errmsg, "'only supports min-recv, [!]string', '[!]rstring', '[!]string-lf', '[!]status', "
+ "'[!]rstatus', [!]hdr, [!]fhdr or comment but got '%s' as argument.", args[cur_arg]);
+ }
+ else {
+ bad_tcp_kw:
+ memprintf(errmsg, "'only supports min-recv, '[!]binary', '[!]string', '[!]rstring', '[!]string-lf'"
+ "'[!]rbinary', '[!]binary-lf' or comment but got '%s' as argument.", args[cur_arg]);
+ }
+ goto error;
+ }
+ next:
+ cur_arg++;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_EXPECT;
+ LIST_INIT(&chk->expect.onerror_fmt);
+ LIST_INIT(&chk->expect.onsuccess_fmt);
+ chk->comment = comment; comment = NULL;
+ chk->expect.type = type;
+ chk->expect.min_recv = min_recv;
+ chk->expect.flags = flags | (inverse ? TCPCHK_EXPT_FL_INV : 0);
+ chk->expect.ok_status = ok_st;
+ chk->expect.err_status = err_st;
+ chk->expect.tout_status = tout_st;
+ chk->expect.status_expr = status_expr; status_expr = NULL;
+
+ if (on_success_msg) {
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(on_success_msg, px, &chk->expect.onsuccess_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", on_success_msg, *errmsg);
+ goto error;
+ }
+ }
+ if (on_error_msg) {
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(on_error_msg, px, &chk->expect.onerror_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", on_error_msg, *errmsg);
+ goto error;
+ }
+ }
+
+ switch (chk->expect.type) {
+ case TCPCHK_EXPECT_HTTP_STATUS: {
+ const char *p = pattern;
+ unsigned int c1,c2;
+
+ chk->expect.codes.codes = NULL;
+ chk->expect.codes.num = 0;
+ while (1) {
+ c1 = c2 = read_uint(&p, pattern + strlen(pattern));
+ if (*p == '-') {
+ p++;
+ c2 = read_uint(&p, pattern + strlen(pattern));
+ }
+ if (c1 > c2) {
+ memprintf(errmsg, "invalid range of status codes '%s'", pattern);
+ goto error;
+ }
+
+ chk->expect.codes.num++;
+ chk->expect.codes.codes = my_realloc2(chk->expect.codes.codes,
+ chk->expect.codes.num * sizeof(*chk->expect.codes.codes));
+ if (!chk->expect.codes.codes) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->expect.codes.codes[chk->expect.codes.num-1][0] = c1;
+ chk->expect.codes.codes[chk->expect.codes.num-1][1] = c2;
+
+ if (*p == '\0')
+ break;
+ if (*p != ',') {
+ memprintf(errmsg, "invalid character '%c' in the list of status codes", *p);
+ goto error;
+ }
+ p++;
+ }
+ break;
+ }
+ case TCPCHK_EXPECT_STRING:
+ case TCPCHK_EXPECT_HTTP_BODY:
+ chk->expect.data = ist(strdup(pattern));
+ if (!isttest(chk->expect.data)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ break;
+ case TCPCHK_EXPECT_BINARY: {
+ int len = chk->expect.data.len;
+
+ if (parse_binary(pattern, &chk->expect.data.ptr, &len, errmsg) == 0) {
+ memprintf(errmsg, "invalid binary string (%s)", *errmsg);
+ goto error;
+ }
+ chk->expect.data.len = len;
+ break;
+ }
+ case TCPCHK_EXPECT_STRING_REGEX:
+ case TCPCHK_EXPECT_BINARY_REGEX:
+ case TCPCHK_EXPECT_HTTP_STATUS_REGEX:
+ case TCPCHK_EXPECT_HTTP_BODY_REGEX:
+ chk->expect.regex = regex_comp(pattern, 1, 0, errmsg);
+ if (!chk->expect.regex)
+ goto error;
+ break;
+
+ case TCPCHK_EXPECT_STRING_LF:
+ case TCPCHK_EXPECT_BINARY_LF:
+ case TCPCHK_EXPECT_HTTP_BODY_LF:
+ LIST_INIT(&chk->expect.fmt);
+ px->conf.args.ctx = ARGC_SRV;
+ if (!parse_logformat_string(pattern, px, &chk->expect.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", pattern, *errmsg);
+ goto error;
+ }
+ break;
+
+ case TCPCHK_EXPECT_HTTP_HEADER:
+ if (!npat) {
+ memprintf(errmsg, "unexpected error, undefined header name pattern");
+ goto error;
+ }
+ if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG) {
+ chk->expect.hdr.name_re = regex_comp(npat, 0, 0, errmsg);
+ if (!chk->expect.hdr.name_re)
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) {
+ px->conf.args.ctx = ARGC_SRV;
+ LIST_INIT(&chk->expect.hdr.name_fmt);
+ if (!parse_logformat_string(npat, px, &chk->expect.hdr.name_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->expect.hdr.name = ist(strdup(npat));
+ if (!isttest(chk->expect.hdr.name)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE) {
+ chk->expect.hdr.value = IST_NULL;
+ break;
+ }
+
+ if (!vpat) {
+ memprintf(errmsg, "unexpected error, undefined header value pattern");
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG) {
+ chk->expect.hdr.value_re = regex_comp(vpat, 1, 0, errmsg);
+ if (!chk->expect.hdr.value_re)
+ goto error;
+ }
+ else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) {
+ px->conf.args.ctx = ARGC_SRV;
+ LIST_INIT(&chk->expect.hdr.value_fmt);
+ if (!parse_logformat_string(vpat, px, &chk->expect.hdr.value_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) {
+ memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg);
+ goto error;
+ }
+ }
+ else {
+ chk->expect.hdr.value = ist(strdup(vpat));
+ if (!isttest(chk->expect.hdr.value)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ break;
+ case TCPCHK_EXPECT_CUSTOM:
+ chk->expect.custom = NULL; /* Must be defined by the caller ! */
+ break;
+ case TCPCHK_EXPECT_UNDEF:
+ memprintf(errmsg, "pattern not found");
+ goto error;
+ }
+
+ /* All tcp-check expect points back to the first inverse expect rule in
+ * a chain of one or more expect rule, potentially itself.
+ */
+ chk->expect.head = chk;
+ list_for_each_entry_rev(prev_check, rules, list) {
+ if (prev_check->action == TCPCHK_ACT_EXPECT) {
+ if (prev_check->expect.flags & TCPCHK_EXPT_FL_INV)
+ chk->expect.head = prev_check;
+ continue;
+ }
+ if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
+ break;
+ }
+ return chk;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free(comment);
+ release_sample_expr(status_expr);
+ return NULL;
+}
+
+/* Overwrites fields of the old http send rule with those of the new one. When
+ * replaced, old values are freed and replaced by the new ones. New values are
+ * not copied but transferred. At the end <new> should be empty and can be
+ * safely released. This function never fails.
+ */
+void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpcheck_rule *new)
+{
+ struct logformat_node *lf, *lfb;
+ struct tcpcheck_http_hdr *hdr, *bhdr;
+
+
+ if (new->send.http.meth.str.area) {
+ free(old->send.http.meth.str.area);
+ old->send.http.meth.meth = new->send.http.meth.meth;
+ old->send.http.meth.str.area = new->send.http.meth.str.area;
+ old->send.http.meth.str.data = new->send.http.meth.str.data;
+ new->send.http.meth.str = BUF_NULL;
+ }
+
+ if (!(new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && isttest(new->send.http.uri)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&old->send.http.uri);
+ else
+ free_tcpcheck_fmt(&old->send.http.uri_fmt);
+ old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT;
+ old->send.http.uri = new->send.http.uri;
+ new->send.http.uri = IST_NULL;
+ }
+ else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !LIST_ISEMPTY(&new->send.http.uri_fmt)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT))
+ istfree(&old->send.http.uri);
+ else
+ free_tcpcheck_fmt(&old->send.http.uri_fmt);
+ old->send.http.flags |= TCPCHK_SND_HTTP_FL_URI_FMT;
+ LIST_INIT(&old->send.http.uri_fmt);
+ list_for_each_entry_safe(lf, lfb, &new->send.http.uri_fmt, list) {
+ LIST_DELETE(&lf->list);
+ LIST_APPEND(&old->send.http.uri_fmt, &lf->list);
+ }
+ }
+
+ if (isttest(new->send.http.vsn)) {
+ istfree(&old->send.http.vsn);
+ old->send.http.vsn = new->send.http.vsn;
+ new->send.http.vsn = IST_NULL;
+ }
+
+ if (!LIST_ISEMPTY(&new->send.http.hdrs)) {
+ free_tcpcheck_http_hdrs(&old->send.http.hdrs);
+ list_for_each_entry_safe(hdr, bhdr, &new->send.http.hdrs, list) {
+ LIST_DELETE(&hdr->list);
+ LIST_APPEND(&old->send.http.hdrs, &hdr->list);
+ }
+ }
+
+ if (!(new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && isttest(new->send.http.body)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&old->send.http.body);
+ else
+ free_tcpcheck_fmt(&old->send.http.body_fmt);
+ old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT;
+ old->send.http.body = new->send.http.body;
+ new->send.http.body = IST_NULL;
+ }
+ else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !LIST_ISEMPTY(&new->send.http.body_fmt)) {
+ if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT))
+ istfree(&old->send.http.body);
+ else
+ free_tcpcheck_fmt(&old->send.http.body_fmt);
+ old->send.http.flags |= TCPCHK_SND_HTTP_FL_BODY_FMT;
+ LIST_INIT(&old->send.http.body_fmt);
+ list_for_each_entry_safe(lf, lfb, &new->send.http.body_fmt, list) {
+ LIST_DELETE(&lf->list);
+ LIST_APPEND(&old->send.http.body_fmt, &lf->list);
+ }
+ }
+}
+
+/* Internal function used to add an http-check rule in a list during the config
+ * parsing step. Depending on its type, and the previously inserted rules, a
+ * specific action may be performed or an error may be reported. This functions
+ * returns 1 on success and 0 on error and <errmsg> is filled with the error
+ * message.
+ */
+int tcpcheck_add_http_rule(struct tcpcheck_rule *chk, struct tcpcheck_rules *rules, char **errmsg)
+{
+ struct tcpcheck_rule *r;
+
+ /* the implicit send rule coming from an "option httpchk" line must be
+ * merged with the first explici http-check send rule, if
+ * any. Depending on the declaration order some tests are required.
+ *
+ * Some tests are also required for other kinds of http-check rules to be
+ * sure the ruleset remains valid.
+ */
+
+ if (chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ /* Tries to add an implicit http-check send rule from an "option httpchk" line.
+ * First, the first rule is retrieved, skipping the first CONNECT, if any, and
+ * following tests are performed :
+ *
+ * 1- If there is no such rule or if it is not a send rule, the implicit send
+ * rule is pushed in front of the ruleset
+ *
+ * 2- If it is another implicit send rule, it is replaced with the new one.
+ *
+ * 3- Otherwise, it means it is an explicit send rule. In this case we merge
+ * both, overwriting the old send rule (the explicit one) with info of the
+ * new send rule (the implicit one).
+ */
+ r = get_first_tcpcheck_rule(rules);
+ if (r && r->action == TCPCHK_ACT_CONNECT)
+ r = get_next_tcpcheck_rule(rules, r);
+ if (!r || r->action != TCPCHK_ACT_SEND)
+ LIST_INSERT(rules->list, &chk->list);
+ else if (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ LIST_INSERT(rules->list, &chk->list);
+ }
+ else {
+ tcpcheck_overwrite_send_http_rule(r, chk);
+ free_tcpcheck(chk, 0);
+ }
+ }
+ else {
+ /* Tries to add an explicit http-check rule. First of all we check the typefo the
+ * last inserted rule to be sure it is valid. Then for send rule, we try to merge it
+ * with an existing implicit send rule, if any. At the end, if there is no error,
+ * the rule is appended to the list.
+ */
+
+ r = get_last_tcpcheck_rule(rules);
+ if (!r || (r->action == TCPCHK_ACT_SEND && (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)))
+ /* no error */;
+ else if (r->action != TCPCHK_ACT_CONNECT && chk->action == TCPCHK_ACT_SEND) {
+ memprintf(errmsg, "unable to add http-check send rule at step %d (missing connect rule).",
+ chk->index+1);
+ return 0;
+ }
+ else if (r->action != TCPCHK_ACT_SEND && r->action != TCPCHK_ACT_EXPECT && chk->action == TCPCHK_ACT_EXPECT) {
+ memprintf(errmsg, "unable to add http-check expect rule at step %d (missing send rule).",
+ chk->index+1);
+ return 0;
+ }
+ else if (r->action != TCPCHK_ACT_EXPECT && chk->action == TCPCHK_ACT_CONNECT) {
+ memprintf(errmsg, "unable to add http-check connect rule at step %d (missing expect rule).",
+ chk->index+1);
+ return 0;
+ }
+
+ if (chk->action == TCPCHK_ACT_SEND) {
+ r = get_first_tcpcheck_rule(rules);
+ if (r && r->action == TCPCHK_ACT_SEND && (r->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ tcpcheck_overwrite_send_http_rule(r, chk);
+ free_tcpcheck(chk, 0);
+ LIST_DELETE(&r->list);
+ r->send.http.flags &= ~TCPCHK_SND_HTTP_FROM_OPT;
+ chk = r;
+ }
+ }
+ LIST_APPEND(rules->list, &chk->list);
+ }
+ return 1;
+}
+
+/* Check tcp-check health-check configuration for the proxy <px>. */
+static int check_proxy_tcpcheck(struct proxy *px)
+{
+ struct tcpcheck_rule *chk, *back;
+ char *comment = NULL, *errmsg = NULL;
+ enum tcpcheck_rule_type prev_action = TCPCHK_ACT_COMMENT;
+ int ret = ERR_NONE;
+
+ if (!(px->cap & PR_CAP_BE) || (px->options2 & PR_O2_CHK_ANY) != PR_O2_TCPCHK_CHK) {
+ deinit_proxy_tcpcheck(px);
+ goto out;
+ }
+
+ ha_free(&px->check_command);
+ ha_free(&px->check_path);
+
+ if (!px->tcpcheck_rules.list) {
+ ha_alert("proxy '%s' : tcp-check configured but no ruleset defined.\n", px->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+
+ /* HTTP ruleset only : */
+ if ((px->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ struct tcpcheck_rule *next;
+
+ /* move remaining implicit send rule from "option httpchk" line to the right place.
+ * If such rule exists, it must be the first one. In this case, the rule is moved
+ * after the first connect rule, if any. Otherwise, nothing is done.
+ */
+ chk = get_first_tcpcheck_rule(&px->tcpcheck_rules);
+ if (chk && chk->action == TCPCHK_ACT_SEND && (chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT)) {
+ next = get_next_tcpcheck_rule(&px->tcpcheck_rules, chk);
+ if (next && next->action == TCPCHK_ACT_CONNECT) {
+ LIST_DELETE(&chk->list);
+ LIST_INSERT(&next->list, &chk->list);
+ chk->index = next->index + 1;
+ }
+ }
+
+ /* add implicit expect rule if the last one is a send. It is inherited from previous
+ * versions where the http expect rule was optional. Now it is possible to chained
+ * send/expect rules but the last expect may still be implicit.
+ */
+ chk = get_last_tcpcheck_rule(&px->tcpcheck_rules);
+ if (chk && chk->action == TCPCHK_ACT_SEND) {
+ next = parse_tcpcheck_expect((char *[]){"http-check", "expect", "status", "200-399", ""},
+ 1, px, px->tcpcheck_rules.list, TCPCHK_RULES_HTTP_CHK,
+ px->conf.file, px->conf.line, &errmsg);
+ if (!next) {
+ ha_alert("proxy '%s': unable to add implicit http-check expect rule "
+ "(%s).\n", px->id, errmsg);
+ free(errmsg);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ LIST_APPEND(px->tcpcheck_rules.list, &next->list);
+ next->index = chk->index + 1;
+ }
+ }
+
+ /* For all ruleset: */
+
+ /* If there is no connect rule preceding all send / expect rules, an
+ * implicit one is inserted before all others.
+ */
+ chk = get_first_tcpcheck_rule(&px->tcpcheck_rules);
+ if (!chk || chk->action != TCPCHK_ACT_CONNECT) {
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ ha_alert("proxy '%s': unable to add implicit tcp-check connect rule "
+ "(out of memory).\n", px->id);
+ ret |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ chk->action = TCPCHK_ACT_CONNECT;
+ chk->connect.options = (TCPCHK_OPT_DEFAULT_CONNECT|TCPCHK_OPT_IMPLICIT);
+ LIST_INSERT(px->tcpcheck_rules.list, &chk->list);
+ }
+
+ /* Remove all comment rules. To do so, when a such rule is found, the
+ * comment is assigned to the following rule(s).
+ */
+ list_for_each_entry_safe(chk, back, px->tcpcheck_rules.list, list) {
+ struct tcpcheck_rule *next;
+
+ if (chk->action != prev_action && prev_action != TCPCHK_ACT_COMMENT)
+ ha_free(&comment);
+
+ prev_action = chk->action;
+ switch (chk->action) {
+ case TCPCHK_ACT_COMMENT:
+ free(comment);
+ comment = chk->comment;
+ LIST_DELETE(&chk->list);
+ free(chk);
+ break;
+ case TCPCHK_ACT_CONNECT:
+ if (!chk->comment && comment)
+ chk->comment = strdup(comment);
+ next = get_next_tcpcheck_rule(&px->tcpcheck_rules, chk);
+ if (next && next->action == TCPCHK_ACT_SEND)
+ chk->connect.options |= TCPCHK_OPT_HAS_DATA;
+ __fallthrough;
+ case TCPCHK_ACT_ACTION_KW:
+ ha_free(&comment);
+ break;
+ case TCPCHK_ACT_SEND:
+ case TCPCHK_ACT_EXPECT:
+ if (!chk->comment && comment)
+ chk->comment = strdup(comment);
+ break;
+ }
+ }
+ ha_free(&comment);
+
+ out:
+ return ret;
+}
+
+void deinit_proxy_tcpcheck(struct proxy *px)
+{
+ free_tcpcheck_vars(&px->tcpcheck_rules.preset_vars);
+ px->tcpcheck_rules.flags = 0;
+ px->tcpcheck_rules.list = NULL;
+}
+
+static void deinit_tcpchecks()
+{
+ struct tcpcheck_ruleset *rs;
+ struct tcpcheck_rule *r, *rb;
+ struct ebpt_node *node, *next;
+
+ node = ebpt_first(&shared_tcpchecks);
+ while (node) {
+ next = ebpt_next(node);
+ ebpt_delete(node);
+ free(node->key);
+ rs = container_of(node, typeof(*rs), node);
+ list_for_each_entry_safe(r, rb, &rs->rules, list) {
+ LIST_DELETE(&r->list);
+ free_tcpcheck(r, 0);
+ }
+ free(rs);
+ node = next;
+ }
+}
+
+int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str)
+{
+ struct tcpcheck_rule *tcpcheck, *prev_check;
+ struct tcpcheck_expect *expect;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ return 0;
+ tcpcheck->action = TCPCHK_ACT_EXPECT;
+
+ expect = &tcpcheck->expect;
+ expect->type = TCPCHK_EXPECT_STRING;
+ LIST_INIT(&expect->onerror_fmt);
+ LIST_INIT(&expect->onsuccess_fmt);
+ expect->ok_status = HCHK_STATUS_L7OKD;
+ expect->err_status = HCHK_STATUS_L7RSP;
+ expect->tout_status = HCHK_STATUS_L7TOUT;
+ expect->data = ist(strdup(str));
+ if (!isttest(expect->data)) {
+ pool_free(pool_head_tcpcheck_rule, tcpcheck);
+ return 0;
+ }
+
+ /* All tcp-check expect points back to the first inverse expect rule
+ * in a chain of one or more expect rule, potentially itself.
+ */
+ tcpcheck->expect.head = tcpcheck;
+ list_for_each_entry_rev(prev_check, rules->list, list) {
+ if (prev_check->action == TCPCHK_ACT_EXPECT) {
+ if (prev_check->expect.flags & TCPCHK_EXPT_FL_INV)
+ tcpcheck->expect.head = prev_check;
+ continue;
+ }
+ if (prev_check->action != TCPCHK_ACT_COMMENT && prev_check->action != TCPCHK_ACT_ACTION_KW)
+ break;
+ }
+ LIST_APPEND(rules->list, &tcpcheck->list);
+ return 1;
+}
+
+int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *strs)
+{
+ struct tcpcheck_rule *tcpcheck;
+ struct tcpcheck_send *send;
+ const char *in;
+ char *dst;
+ int i;
+
+ if ((tcpcheck = pool_zalloc(pool_head_tcpcheck_rule)) == NULL)
+ return 0;
+ tcpcheck->action = TCPCHK_ACT_SEND;
+
+ send = &tcpcheck->send;
+ send->type = TCPCHK_SEND_STRING;
+
+ for (i = 0; strs[i]; i++)
+ send->data.len += strlen(strs[i]);
+
+ send->data.ptr = malloc(istlen(send->data) + 1);
+ if (!isttest(send->data)) {
+ pool_free(pool_head_tcpcheck_rule, tcpcheck);
+ return 0;
+ }
+
+ dst = istptr(send->data);
+ for (i = 0; strs[i]; i++)
+ for (in = strs[i]; (*dst = *in++); dst++);
+ *dst = 0;
+
+ LIST_APPEND(rules->list, &tcpcheck->list);
+ return 1;
+}
+
+/* Parses the "tcp-check" proxy keyword */
+static int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rule *chk = NULL;
+ int index, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[0], NULL))
+ ret = 1;
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*tcp-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ goto error;
+ }
+ }
+
+ index = 0;
+ if (!LIST_ISEMPTY(&rs->rules)) {
+ chk = LIST_PREV(&rs->rules, typeof(chk), list);
+ index = chk->index + 1;
+ chk = NULL;
+ }
+
+ cur_arg = 1;
+ if (strcmp(args[cur_arg], "connect") == 0)
+ chk = parse_tcpcheck_connect(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "send") == 0 || strcmp(args[cur_arg], "send-binary") == 0 ||
+ strcmp(args[cur_arg], "send-lf") == 0 || strcmp(args[cur_arg], "send-binary-lf") == 0)
+ chk = parse_tcpcheck_send(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "expect") == 0)
+ chk = parse_tcpcheck_expect(args, cur_arg, curpx, &rs->rules, 0, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "comment") == 0)
+ chk = parse_tcpcheck_comment(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else {
+ struct action_kw *kw = action_kw_tcp_check_lookup(args[cur_arg]);
+
+ if (!kw) {
+ action_kw_tcp_check_build_list(&trash);
+ memprintf(errmsg, "'%s' only supports 'comment', 'connect', 'send', 'send-binary', 'expect'"
+ "%s%s. but got '%s'",
+ args[0], (*trash.area ? ", " : ""), trash.area, args[1]);
+ goto error;
+ }
+ chk = parse_tcpcheck_action(args, cur_arg, curpx, &rs->rules, kw, file, line, errmsg);
+ }
+
+ if (!chk) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ goto error;
+ }
+ ret = (ret || (*errmsg != NULL)); /* Handle warning */
+
+ /* No error: add the tcp-check rule in the list */
+ chk->index = index;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if ((curpx->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curpx->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ /* Use this ruleset if the proxy already has tcp-check enabled */
+ curpx->tcpcheck_rules.list = &rs->rules;
+ curpx->tcpcheck_rules.flags &= ~TCPCHK_RULES_UNUSED_TCP_RS;
+ }
+ else {
+ /* mark this ruleset as unused for now */
+ curpx->tcpcheck_rules.flags |= TCPCHK_RULES_UNUSED_TCP_RS;
+ }
+
+ return ret;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free_tcpcheck_ruleset(rs);
+ return -1;
+}
+
+/* Parses the "http-check" proxy keyword */
+static int proxy_parse_httpcheck(char **args, int section, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **errmsg)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rule *chk = NULL;
+ int index, cur_arg, ret = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[0], NULL))
+ ret = 1;
+
+ cur_arg = 1;
+ if (strcmp(args[cur_arg], "disable-on-404") == 0) {
+ /* enable a graceful server shutdown on an HTTP 404 response */
+ curpx->options |= PR_O_DISABLE404;
+ if (too_many_args(1, args, errmsg, NULL))
+ goto error;
+ goto out;
+ }
+ else if (strcmp(args[cur_arg], "send-state") == 0) {
+ /* enable emission of the apparent state of a server in HTTP checks */
+ curpx->options2 |= PR_O2_CHK_SNDST;
+ if (too_many_args(1, args, errmsg, NULL))
+ goto error;
+ goto out;
+ }
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*http-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ memprintf(errmsg, "out of memory.\n");
+ goto error;
+ }
+ }
+
+ index = 0;
+ if (!LIST_ISEMPTY(&rs->rules)) {
+ chk = LIST_PREV(&rs->rules, typeof(chk), list);
+ if (chk->action != TCPCHK_ACT_SEND || !(chk->send.http.flags & TCPCHK_SND_HTTP_FROM_OPT))
+ index = chk->index + 1;
+ chk = NULL;
+ }
+
+ if (strcmp(args[cur_arg], "connect") == 0)
+ chk = parse_tcpcheck_connect(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "send") == 0)
+ chk = parse_tcpcheck_send_http(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else if (strcmp(args[cur_arg], "expect") == 0)
+ chk = parse_tcpcheck_expect(args, cur_arg, curpx, &rs->rules, TCPCHK_RULES_HTTP_CHK,
+ file, line, errmsg);
+ else if (strcmp(args[cur_arg], "comment") == 0)
+ chk = parse_tcpcheck_comment(args, cur_arg, curpx, &rs->rules, file, line, errmsg);
+ else {
+ struct action_kw *kw = action_kw_tcp_check_lookup(args[cur_arg]);
+
+ if (!kw) {
+ action_kw_tcp_check_build_list(&trash);
+ memprintf(errmsg, "'%s' only supports 'disable-on-404', 'send-state', 'comment', 'connect',"
+ " 'send', 'expect'%s%s. but got '%s'",
+ args[0], (*trash.area ? ", " : ""), trash.area, args[1]);
+ goto error;
+ }
+ chk = parse_tcpcheck_action(args, cur_arg, curpx, &rs->rules, kw, file, line, errmsg);
+ }
+
+ if (!chk) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ goto error;
+ }
+ ret = (*errmsg != NULL); /* Handle warning */
+
+ chk->index = index;
+ if ((curpx->options2 & PR_O2_CHK_ANY) == PR_O2_TCPCHK_CHK &&
+ (curpx->tcpcheck_rules.flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_HTTP_CHK) {
+ /* Use this ruleset if the proxy already has http-check enabled */
+ curpx->tcpcheck_rules.list = &rs->rules;
+ curpx->tcpcheck_rules.flags &= ~TCPCHK_RULES_UNUSED_HTTP_RS;
+ if (!tcpcheck_add_http_rule(chk, &curpx->tcpcheck_rules, errmsg)) {
+ memprintf(errmsg, "'%s %s' : %s.", args[0], args[1], *errmsg);
+ curpx->tcpcheck_rules.list = NULL;
+ goto error;
+ }
+ }
+ else {
+ /* mark this ruleset as unused for now */
+ curpx->tcpcheck_rules.flags |= TCPCHK_RULES_UNUSED_HTTP_RS;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ out:
+ return ret;
+
+ error:
+ free_tcpcheck(chk, 0);
+ free_tcpcheck_ruleset(rs);
+ return -1;
+}
+
+/* Parses the "option redis-check" proxy keyword */
+int proxy_parse_redis_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *redis_req = "*1\r\n$4\r\nPING\r\n";
+ static char *redis_res = "+PONG\r\n";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*redis-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*redis-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send", redis_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "string", redis_res,
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(0,0),cut_crlf]",
+ "on-success", "Redis server is ok",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_REDIS_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_REDIS_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parses the "option ssl-hello-chk" proxy keyword */
+int proxy_parse_ssl_hello_chk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ /* This is the SSLv3 CLIENT HELLO packet used in conjunction with the
+ * ssl-hello-chk option to ensure that the remote server speaks SSL.
+ *
+ * Check RFC 2246 (TLSv1.0) sections A.3 and A.4 for details.
+ */
+ static char sslv3_client_hello[] = {
+ "16" /* ContentType : 0x16 = Handshake */
+ "0300" /* ProtocolVersion : 0x0300 = SSLv3 */
+ "0079" /* ContentLength : 0x79 bytes after this one */
+ "01" /* HanshakeType : 0x01 = CLIENT HELLO */
+ "000075" /* HandshakeLength : 0x75 bytes after this one */
+ "0300" /* Hello Version : 0x0300 = v3 */
+ "%[date(),htonl,hex]" /* Unix GMT Time (s) : filled with <now> (@0x0B) */
+ "%[str(HAPROXYSSLCHK\nHAPROXYSSLCHK\n),hex]" /* Random : must be exactly 28 bytes */
+ "00" /* Session ID length : empty (no session ID) */
+ "004E" /* Cipher Suite Length : 78 bytes after this one */
+ "0001" "0002" "0003" "0004" /* 39 most common ciphers : */
+ "0005" "0006" "0007" "0008" /* 0x01...0x1B, 0x2F...0x3A */
+ "0009" "000A" "000B" "000C" /* This covers RSA/DH, */
+ "000D" "000E" "000F" "0010" /* various bit lengths, */
+ "0011" "0012" "0013" "0014" /* SHA1/MD5, DES/3DES/AES... */
+ "0015" "0016" "0017" "0018"
+ "0019" "001A" "001B" "002F"
+ "0030" "0031" "0032" "0033"
+ "0034" "0035" "0036" "0037"
+ "0038" "0039" "003A"
+ "01" /* Compression Length : 0x01 = 1 byte for types */
+ "00" /* Compression Type : 0x00 = NULL compression */
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*ssl-hello-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*ssl-hello-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", sslv3_client_hello, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^1[56]",
+ "min-recv", "5", "ok-status", "L6OK",
+ "error-status", "L6RSP", "tout-status", "L6TOUT",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SSL3_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SSL3_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option smtpchk" proxy keyword */
+int proxy_parse_smtpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *smtp_req = "%[var(check.smtp_cmd)]\r\n";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *cmd = NULL, *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(2, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (*args[cur_arg] && *args[cur_arg+1] &&
+ (strcmp(args[cur_arg], "EHLO") == 0 || strcmp(args[cur_arg], "HELO") == 0)) {
+ /* <EHLO|HELO> + space (1) + <host> + null byte (1) */
+ size_t len = strlen(args[cur_arg]) + 1 + strlen(args[cur_arg+1]) + 1;
+ cmd = calloc(1, len);
+ if (cmd)
+ snprintf(cmd, len, "%s %s", args[cur_arg], args[cur_arg+1]);
+ }
+ else {
+ /* this just hits the default for now, but you could potentially expand it to allow for other stuff
+ though, it's unlikely you'd want to send anything other than an EHLO or HELO */
+ cmd = strdup("HELO localhost");
+ }
+
+ var = create_tcpcheck_var(ist("check.smtp_cmd"));
+ if (cmd == NULL || var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = cmd;
+ var->data.u.str.data = strlen(cmd);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ cmd = NULL;
+ var = NULL;
+
+ rs = find_tcpcheck_ruleset("*smtp-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*smtp-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^[0-9]{3}[ \r]",
+ "min-recv", "4",
+ "error-status", "L7RSP",
+ "on-error", "%[res.payload(0,0),cut_crlf]",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^2[0-9]{2}[ \r]",
+ "min-recv", "4",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-lf", smtp_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 3;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^(2[0-9]{2}-[^\r]*\r\n)*2[0-9]{2}[ \r]",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "on-success", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 4;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ /* Send an SMTP QUIT to ensure clean disconnect (issue 1812), and expect a 2xx response code */
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send", "QUIT\r\n", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 5;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rstring", "^2[0-9]{2}[- \r]",
+ "min-recv", "4",
+ "error-status", "L7STS",
+ "on-error", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "on-success", "%[res.payload(4,0),ltrim(' '),cut_crlf]",
+ "status-code", "res.payload(0,3)",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SMTP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 6;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SMTP_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(cmd);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option pgsql-check" proxy keyword */
+int proxy_parse_pgsql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char pgsql_req[] = {
+ "%[var(check.plen),htonl,hex]" /* The packet length*/
+ "00030000" /* the version 3.0 */
+ "7573657200" /* "user" key */
+ "%[var(check.username),hex]00" /* the username */
+ "00"
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *user = NULL, *errmsg = NULL;
+ size_t packetlen = 0;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(2, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (!*args[cur_arg] || !*args[cur_arg+1]) {
+ ha_alert("parsing [%s:%d] : '%s %s' expects 'user <username>' as argument.\n",
+ file, line, args[0], args[1]);
+ goto error;
+ }
+ if (strcmp(args[cur_arg], "user") == 0) {
+ packetlen = 15 + strlen(args[cur_arg+1]);
+ user = strdup(args[cur_arg+1]);
+
+ var = create_tcpcheck_var(ist("check.username"));
+ if (user == NULL || var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = user;
+ var->data.u.str.data = strlen(user);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ user = NULL;
+ var = NULL;
+
+ var = create_tcpcheck_var(ist("check.plen"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_SINT;
+ var->data.u.sint = packetlen;
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ var = NULL;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'user'.\n",
+ file, line, args[0], args[1]);
+ goto error;
+ }
+
+ rs = find_tcpcheck_ruleset("*pgsql-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*pgsql-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", pgsql_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "!rstring", "^E",
+ "min-recv", "5",
+ "error-status", "L7RSP",
+ "on-error", "%[res.payload(6,0)]",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_PGSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^52000000[A-Z0-9]{2}000000(00|02|03|04|05|06|07|09|0A)",
+ "min-recv", "9",
+ "error-status", "L7STS",
+ "on-success", "PostgreSQL server is ok",
+ "on-error", "PostgreSQL unknown error",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_PGSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 3;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_PGSQL_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(user);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+/* Parses the "option mysql-check" proxy keyword */
+int proxy_parse_mysql_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ /* This is an example of a MySQL >=4.0 client Authentication packet kindly provided by Cyril Bonte.
+ * const char mysql40_client_auth_pkt[] = {
+ * "\x0e\x00\x00" // packet length
+ * "\x01" // packet number
+ * "\x00\x00" // client capabilities
+ * "\x00\x00\x01" // max packet
+ * "haproxy\x00" // username (null terminated string)
+ * "\x00" // filler (always 0x00)
+ * "\x01\x00\x00" // packet length
+ * "\x00" // packet number
+ * "\x01" // COM_QUIT command
+ * };
+ */
+ static char mysql40_rsname[] = "*mysql40-check";
+ static char mysql40_req[] = {
+ "%[var(check.header),hex]" /* 3 bytes for the packet length and 1 byte for the sequence ID */
+ "0080" /* client capabilities */
+ "000001" /* max packet */
+ "%[var(check.username),hex]00" /* the username */
+ "00" /* filler (always 0x00) */
+ "010000" /* packet length*/
+ "00" /* sequence ID */
+ "01" /* COM_QUIT command */
+ };
+
+ /* This is an example of a MySQL >=4.1 client Authentication packet provided by Nenad Merdanovic.
+ * const char mysql41_client_auth_pkt[] = {
+ * "\x0e\x00\x00\" // packet length
+ * "\x01" // packet number
+ * "\x00\x00\x00\x00" // client capabilities
+ * "\x00\x00\x00\x01" // max packet
+ * "\x21" // character set (UTF-8)
+ * char[23] // All zeroes
+ * "haproxy\x00" // username (null terminated string)
+ * "\x00" // filler (always 0x00)
+ * "\x01\x00\x00" // packet length
+ * "\x00" // packet number
+ * "\x01" // COM_QUIT command
+ * };
+ */
+ static char mysql41_rsname[] = "*mysql41-check";
+ static char mysql41_req[] = {
+ "%[var(check.header),hex]" /* 3 bytes for the packet length and 1 byte for the sequence ID */
+ "00820000" /* client capabilities */
+ "00800001" /* max packet */
+ "21" /* character set (UTF-8) */
+ "000000000000000000000000" /* 23 bytes, al zeroes */
+ "0000000000000000000000"
+ "%[var(check.username),hex]00" /* the username */
+ "00" /* filler (always 0x00) */
+ "010000" /* packet length*/
+ "00" /* sequence ID */
+ "01" /* COM_QUIT command */
+ };
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ struct tcpcheck_var *var = NULL;
+ char *mysql_rsname = "*mysql-check";
+ char *mysql_req = NULL, *hdr = NULL, *user = NULL, *errmsg = NULL;
+ int index = 0, err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(3, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ cur_arg += 2;
+ if (*args[cur_arg]) {
+ int packetlen, userlen;
+
+ if (strcmp(args[cur_arg], "user") != 0) {
+ ha_alert("parsing [%s:%d] : '%s %s' only supports optional values: 'user' (got '%s').\n",
+ file, line, args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+
+ if (*(args[cur_arg+1]) == 0) {
+ ha_alert("parsing [%s:%d] : '%s %s %s' expects <username> as argument.\n",
+ file, line, args[0], args[1], args[cur_arg]);
+ goto error;
+ }
+
+ hdr = calloc(4, sizeof(*hdr));
+ user = strdup(args[cur_arg+1]);
+ userlen = strlen(args[cur_arg+1]);
+
+ if (hdr == NULL || user == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ if (!*args[cur_arg+2] || strcmp(args[cur_arg+2], "post-41") == 0) {
+ packetlen = userlen + 7 + 27;
+ mysql_req = mysql41_req;
+ mysql_rsname = mysql41_rsname;
+ }
+ else if (strcmp(args[cur_arg+2], "pre-41") == 0) {
+ packetlen = userlen + 7;
+ mysql_req = mysql40_req;
+ mysql_rsname = mysql40_rsname;
+ }
+ else {
+ ha_alert("parsing [%s:%d] : keyword '%s' only supports 'post-41' and 'pre-41' (got '%s').\n",
+ file, line, args[cur_arg], args[cur_arg+2]);
+ goto error;
+ }
+
+ hdr[0] = (unsigned char)(packetlen & 0xff);
+ hdr[1] = (unsigned char)((packetlen >> 8) & 0xff);
+ hdr[2] = (unsigned char)((packetlen >> 16) & 0xff);
+ hdr[3] = 1;
+
+ var = create_tcpcheck_var(ist("check.header"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = hdr;
+ var->data.u.str.data = 4;
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ hdr = NULL;
+ var = NULL;
+
+ var = create_tcpcheck_var(ist("check.username"));
+ if (var == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ var->data.type = SMP_T_STR;
+ var->data.u.str.area = user;
+ var->data.u.str.data = strlen(user);
+ LIST_INIT(&var->list);
+ LIST_APPEND(&rules->preset_vars, &var->list);
+ user = NULL;
+ var = NULL;
+ }
+
+ rs = find_tcpcheck_ruleset(mysql_rsname);
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset(mysql_rsname);
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_connect((char *[]){"tcp-check", "connect", "default", "linger", ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if (mysql_req) {
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary-lf", mysql_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_MYSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_mysql_expect_iniths;
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ if (mysql_req) {
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_MYSQL_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_mysql_expect_ok;
+ chk->index = index++;
+ LIST_APPEND(&rs->rules, &chk->list);
+ }
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_MYSQL_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free(hdr);
+ free(user);
+ free(var);
+ free_tcpcheck_vars(&rules->preset_vars);
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int proxy_parse_ldap_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ static char *ldap_req = "300C020101600702010304008000";
+
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+ rs = find_tcpcheck_ruleset("*ldap-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*ldap-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary", ldap_req, ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "rbinary", "^30",
+ "min-recv", "14",
+ "on-error", "Not LDAPv3 protocol",
+ ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_LDAP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_LDAP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_ldap_expect_bindrsp;
+ chk->index = 2;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_LDAP_CHK;
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+int proxy_parse_spop_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *spop_req = NULL;
+ char *errmsg = NULL;
+ int spop_len = 0, err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags = 0;
+
+
+ rs = find_tcpcheck_ruleset("*spop-check");
+ if (rs)
+ goto ruleset_found;
+
+ rs = create_tcpcheck_ruleset("*spop-check");
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+
+ if (spoe_prepare_healthcheck_request(&spop_req, &spop_len) == -1) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ chunk_reset(&trash);
+ dump_binary(&trash, spop_req, spop_len);
+ trash.area[trash.data] = '\0';
+
+ chk = parse_tcpcheck_send((char *[]){"tcp-check", "send-binary", b_head(&trash), ""},
+ 1, curpx, &rs->rules, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->index = 0;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ chk = parse_tcpcheck_expect((char *[]){"tcp-check", "expect", "custom", "min-recv", "4", ""},
+ 1, curpx, &rs->rules, TCPCHK_RULES_SPOP_CHK, file, line, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : %s\n", file, line, errmsg);
+ goto error;
+ }
+ chk->expect.custom = tcpcheck_spop_expect_agenthello;
+ chk->index = 1;
+ LIST_APPEND(&rs->rules, &chk->list);
+
+ ruleset_found:
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_SPOP_CHK;
+
+ out:
+ free(spop_req);
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+
+static struct tcpcheck_rule *proxy_parse_httpchk_req(char **args, int cur_arg, struct proxy *px, char **errmsg)
+{
+ struct tcpcheck_rule *chk = NULL;
+ struct tcpcheck_http_hdr *hdr = NULL;
+ char *meth = NULL, *uri = NULL, *vsn = NULL;
+ char *hdrs, *body;
+
+ hdrs = (*args[cur_arg+2] ? strstr(args[cur_arg+2], "\r\n") : NULL);
+ body = (*args[cur_arg+2] ? strstr(args[cur_arg+2], "\r\n\r\n") : NULL);
+ if (hdrs || body) {
+ memprintf(errmsg, "hiding headers or body at the end of the version string is unsupported."
+ "Use 'http-check send' directive instead.");
+ goto error;
+ }
+
+ chk = calloc(1, sizeof(*chk));
+ if (!chk) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ chk->action = TCPCHK_ACT_SEND;
+ chk->send.type = TCPCHK_SEND_HTTP;
+ chk->send.http.flags |= TCPCHK_SND_HTTP_FROM_OPT;
+ chk->send.http.meth.meth = HTTP_METH_OPTIONS;
+ LIST_INIT(&chk->send.http.hdrs);
+
+ /* Copy the method, uri and version */
+ if (*args[cur_arg]) {
+ if (!*args[cur_arg+1])
+ uri = args[cur_arg];
+ else
+ meth = args[cur_arg];
+ }
+ if (*args[cur_arg+1])
+ uri = args[cur_arg+1];
+ if (*args[cur_arg+2])
+ vsn = args[cur_arg+2];
+
+ if (meth) {
+ chk->send.http.meth.meth = find_http_meth(meth, strlen(meth));
+ chk->send.http.meth.str.area = strdup(meth);
+ chk->send.http.meth.str.data = strlen(meth);
+ if (!chk->send.http.meth.str.area) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (uri) {
+ chk->send.http.uri = ist(strdup(uri));
+ if (!isttest(chk->send.http.uri)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+ if (vsn) {
+ chk->send.http.vsn = ist(strdup(vsn));
+ if (!isttest(chk->send.http.vsn)) {
+ memprintf(errmsg, "out of memory");
+ goto error;
+ }
+ }
+
+ return chk;
+
+ error:
+ free_tcpcheck_http_hdr(hdr);
+ free_tcpcheck(chk, 0);
+ return NULL;
+}
+
+/* Parses the "option httpchck" proxy keyword */
+int proxy_parse_httpchk_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ struct tcpcheck_rule *chk;
+ char *errmsg = NULL;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(3, 1, file, line, args, &err_code))
+ goto out;
+
+ chk = proxy_parse_httpchk_req(args, cur_arg+2, curpx, &errmsg);
+ if (!chk) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s.\n", file, line, args[0], args[1], errmsg);
+ goto error;
+ }
+ if (errmsg) {
+ ha_warning("parsing [%s:%d]: '%s %s' : %s\n", file, line, args[0], args[1], errmsg);
+ err_code |= ERR_WARN;
+ ha_free(&errmsg);
+ }
+
+ no_request:
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = NULL;
+ rules->flags |= TCPCHK_SND_HTTP_FROM_OPT;
+
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*http-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ }
+
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_HTTP_CHK;
+ if (!tcpcheck_add_http_rule(chk, rules, &errmsg)) {
+ ha_alert("parsing [%s:%d] : '%s %s' : %s.\n", file, line, args[0], args[1], errmsg);
+ rules->list = NULL;
+ goto error;
+ }
+
+ out:
+ free(errmsg);
+ return err_code;
+
+ error:
+ free_tcpcheck_ruleset(rs);
+ free_tcpcheck(chk, 0);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+/* Parses the "option tcp-check" proxy keyword */
+int proxy_parse_tcp_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx,
+ const char *file, int line)
+{
+ struct tcpcheck_ruleset *rs = NULL;
+ struct tcpcheck_rules *rules = &curpx->tcpcheck_rules;
+ int err_code = 0;
+
+ if (warnifnotcap(curpx, PR_CAP_BE, file, line, args[cur_arg+1], NULL))
+ err_code |= ERR_WARN;
+
+ if (alertif_too_many_args_idx(0, 1, file, line, args, &err_code))
+ goto out;
+
+ curpx->options2 &= ~PR_O2_CHK_ANY;
+ curpx->options2 |= PR_O2_TCPCHK_CHK;
+
+ if ((rules->flags & TCPCHK_RULES_PROTO_CHK) == TCPCHK_RULES_TCP_CHK) {
+ /* If a tcp-check rulesset is already set, do nothing */
+ if (rules->list)
+ goto out;
+
+ /* If a tcp-check ruleset is waiting to be used for the current proxy,
+ * get it.
+ */
+ if (rules->flags & TCPCHK_RULES_UNUSED_TCP_RS)
+ goto curpx_ruleset;
+
+ /* Otherwise, try to get the tcp-check ruleset of the default proxy */
+ chunk_printf(&trash, "*tcp-check-defaults_%s-%d", defpx->conf.file, defpx->conf.line);
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs)
+ goto ruleset_found;
+ }
+
+ curpx_ruleset:
+ /* Deduce the ruleset name from the proxy info */
+ chunk_printf(&trash, "*tcp-check-%s_%s-%d",
+ ((curpx == defpx) ? "defaults" : curpx->id),
+ curpx->conf.file, curpx->conf.line);
+
+ rs = find_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ rs = create_tcpcheck_ruleset(b_orig(&trash));
+ if (rs == NULL) {
+ ha_alert("parsing [%s:%d] : out of memory.\n", file, line);
+ goto error;
+ }
+ }
+
+ ruleset_found:
+ free_tcpcheck_vars(&rules->preset_vars);
+ rules->list = &rs->rules;
+ rules->flags &= ~(TCPCHK_RULES_PROTO_CHK|TCPCHK_RULES_UNUSED_RS);
+ rules->flags |= TCPCHK_RULES_TCP_CHK;
+
+ out:
+ return err_code;
+
+ error:
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+}
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_LISTEN, "http-check", proxy_parse_httpcheck },
+ { CFG_LISTEN, "tcp-check", proxy_parse_tcpcheck },
+ { 0, NULL, NULL },
+}};
+
+REGISTER_POST_PROXY_CHECK(check_proxy_tcpcheck);
+REGISTER_PROXY_DEINIT(deinit_proxy_tcpcheck);
+REGISTER_POST_DEINIT(deinit_tcpchecks);
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/thread.c b/src/thread.c
new file mode 100644
index 0000000..ab4342d
--- /dev/null
+++ b/src/thread.c
@@ -0,0 +1,1864 @@
+/*
+ * functions about threads.
+ *
+ * Copyright (C) 2017 Christopher Fauet - cfaulet@haproxy.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+#include <signal.h>
+#include <unistd.h>
+#ifdef _POSIX_PRIORITY_SCHEDULING
+#include <sched.h>
+#endif
+
+#ifdef USE_THREAD
+# include <pthread.h>
+#endif
+
+#ifdef USE_CPU_AFFINITY
+# include <sched.h>
+# if defined(__FreeBSD__) || defined(__DragonFly__)
+# include <sys/param.h>
+# ifdef __FreeBSD__
+# include <sys/cpuset.h>
+# endif
+# include <pthread_np.h>
+# endif
+# ifdef __APPLE__
+# include <mach/mach_types.h>
+# include <mach/thread_act.h>
+# include <mach/thread_policy.h>
+# endif
+# include <haproxy/cpuset.h>
+#endif
+
+#include <haproxy/cfgparse.h>
+#include <haproxy/clock.h>
+#include <haproxy/fd.h>
+#include <haproxy/global.h>
+#include <haproxy/log.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+struct tgroup_info ha_tgroup_info[MAX_TGROUPS] = { };
+THREAD_LOCAL const struct tgroup_info *tg = &ha_tgroup_info[0];
+
+struct thread_info ha_thread_info[MAX_THREADS] = { };
+THREAD_LOCAL const struct thread_info *ti = &ha_thread_info[0];
+
+struct tgroup_ctx ha_tgroup_ctx[MAX_TGROUPS] = { };
+THREAD_LOCAL struct tgroup_ctx *tg_ctx = &ha_tgroup_ctx[0];
+
+struct thread_ctx ha_thread_ctx[MAX_THREADS] = { };
+THREAD_LOCAL struct thread_ctx *th_ctx = &ha_thread_ctx[0];
+
+#ifdef USE_THREAD
+
+volatile unsigned long all_tgroups_mask __read_mostly = 1; // nbtgroup 1 assumed by default
+volatile unsigned int rdv_requests = 0; // total number of threads requesting RDV
+volatile unsigned int isolated_thread = ~0; // ID of the isolated thread, or ~0 when none
+THREAD_LOCAL unsigned int tgid = 1; // thread ID starts at 1
+THREAD_LOCAL unsigned int tid = 0;
+int thread_cpus_enabled_at_boot = 1;
+static pthread_t ha_pthread[MAX_THREADS] = { };
+
+/* Marks the thread as harmless until the last thread using the rendez-vous
+ * point quits. Given that we can wait for a long time, sched_yield() is
+ * used when available to offer the CPU resources to competing threads if
+ * needed.
+ */
+void thread_harmless_till_end()
+{
+ _HA_ATOMIC_OR(&tg_ctx->threads_harmless, ti->ltid_bit);
+ while (_HA_ATOMIC_LOAD(&rdv_requests) != 0) {
+ ha_thread_relax();
+ }
+}
+
+/* Isolates the current thread : request the ability to work while all other
+ * threads are harmless, as defined by thread_harmless_now() (i.e. they're not
+ * going to touch any visible memory area). Only returns once all of them are
+ * harmless, with the current thread's bit in &tg_ctx->threads_harmless cleared.
+ * Needs to be completed using thread_release().
+ */
+void thread_isolate()
+{
+ uint tgrp, thr;
+
+ _HA_ATOMIC_OR(&tg_ctx->threads_harmless, ti->ltid_bit);
+ __ha_barrier_atomic_store();
+ _HA_ATOMIC_INC(&rdv_requests);
+
+ /* wait for all threads to become harmless. They cannot change their
+ * mind once seen thanks to rdv_requests above, unless they pass in
+ * front of us. For this reason we proceed in 4 steps:
+ * 1) wait for all threads to declare themselves harmless
+ * 2) try to grab the isolated_thread exclusivity
+ * 3) verify again that all threads are harmless, since another one
+ * that was isolating between 1 and 2 could have dropped its
+ * harmless state there.
+ * 4) drop harmless flag (which also has the benefit of leaving
+ * all other threads wait on reads instead of writes.
+ */
+ while (1) {
+ for (tgrp = 0; tgrp < global.nbtgroups; tgrp++) {
+ do {
+ ulong te = _HA_ATOMIC_LOAD(&ha_tgroup_info[tgrp].threads_enabled);
+ ulong th = _HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp].threads_harmless);
+
+ if ((th & te) == te)
+ break;
+ ha_thread_relax();
+ } while (1);
+ }
+
+ /* all other ones are harmless. isolated_thread will contain
+ * ~0U if no other one competes, !=tid if another one got it,
+ * tid if the current thread already grabbed it on the previous
+ * round.
+ */
+ thr = _HA_ATOMIC_LOAD(&isolated_thread);
+ if (thr == tid)
+ break; // we won and we're certain everyone is harmless
+
+ /* try to win the race against others */
+ if (thr != ~0U || !_HA_ATOMIC_CAS(&isolated_thread, &thr, tid))
+ ha_thread_relax();
+ }
+
+ /* the thread is no longer harmless as it runs */
+ _HA_ATOMIC_AND(&tg_ctx->threads_harmless, ~ti->ltid_bit);
+
+ /* the thread is isolated until it calls thread_release() which will
+ * 1) reset isolated_thread to ~0;
+ * 2) decrement rdv_requests.
+ */
+}
+
+/* Isolates the current thread : request the ability to work while all other
+ * threads are idle, as defined by thread_idle_now(). It only returns once
+ * all of them are both harmless and idle, with the current thread's bit in
+ * &tg_ctx->threads_harmless and idle_mask cleared. Needs to be completed using
+ * thread_release(). By doing so the thread also engages in being safe against
+ * any actions that other threads might be about to start under the same
+ * conditions. This specifically targets destruction of any internal structure,
+ * which implies that the current thread may not hold references to any object.
+ *
+ * Note that a concurrent thread_isolate() will usually win against
+ * thread_isolate_full() as it doesn't consider the idle_mask, allowing it to
+ * get back to the poller or any other fully idle location, that will
+ * ultimately release this one.
+ */
+void thread_isolate_full()
+{
+ uint tgrp, thr;
+
+ _HA_ATOMIC_OR(&tg_ctx->threads_idle, ti->ltid_bit);
+ _HA_ATOMIC_OR(&tg_ctx->threads_harmless, ti->ltid_bit);
+ __ha_barrier_atomic_store();
+ _HA_ATOMIC_INC(&rdv_requests);
+
+ /* wait for all threads to become harmless. They cannot change their
+ * mind once seen thanks to rdv_requests above, unless they pass in
+ * front of us. For this reason we proceed in 4 steps:
+ * 1) wait for all threads to declare themselves harmless
+ * 2) try to grab the isolated_thread exclusivity
+ * 3) verify again that all threads are harmless, since another one
+ * that was isolating between 1 and 2 could have dropped its
+ * harmless state there.
+ * 4) drop harmless flag (which also has the benefit of leaving
+ * all other threads wait on reads instead of writes.
+ */
+ while (1) {
+ for (tgrp = 0; tgrp < global.nbtgroups; tgrp++) {
+ do {
+ ulong te = _HA_ATOMIC_LOAD(&ha_tgroup_info[tgrp].threads_enabled);
+ ulong th = _HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp].threads_harmless);
+ ulong id = _HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp].threads_idle);
+
+ if ((th & id & te) == te)
+ break;
+ ha_thread_relax();
+ } while (1);
+ }
+
+ /* all other ones are harmless and idle. isolated_thread will
+ * contain ~0U if no other one competes, !=tid if another one
+ * got it, tid if the current thread already grabbed it on the
+ * previous round.
+ */
+ thr = _HA_ATOMIC_LOAD(&isolated_thread);
+ if (thr == tid)
+ break; // we won and we're certain everyone is harmless
+
+ if (thr != ~0U || !_HA_ATOMIC_CAS(&isolated_thread, &thr, tid))
+ ha_thread_relax();
+ }
+
+ /* we're not idle nor harmless anymore at this point. Other threads
+ * waiting on this condition will need to wait until out next pass to
+ * the poller, or our next call to thread_isolate_full().
+ */
+ _HA_ATOMIC_AND(&tg_ctx->threads_idle, ~ti->ltid_bit);
+ _HA_ATOMIC_AND(&tg_ctx->threads_harmless, ~ti->ltid_bit);
+
+ /* the thread is isolated until it calls thread_release() which will
+ * 1) reset isolated_thread to ~0;
+ * 2) decrement rdv_requests.
+ */
+}
+
+/* Cancels the effect of thread_isolate() by resetting the ID of the isolated
+ * thread and decrementing the number of RDV requesters. This immediately allows
+ * other threads to expect to be executed, though they will first have to wait
+ * for this thread to become harmless again (possibly by reaching the poller
+ * again).
+ */
+void thread_release()
+{
+ HA_ATOMIC_STORE(&isolated_thread, ~0U);
+ HA_ATOMIC_DEC(&rdv_requests);
+}
+
+/* Sets up threads, signals and masks, and starts threads 2 and above.
+ * Does nothing when threads are disabled.
+ */
+void setup_extra_threads(void *(*handler)(void *))
+{
+ sigset_t blocked_sig, old_sig;
+ int i;
+
+ /* ensure the signals will be blocked in every thread */
+ sigfillset(&blocked_sig);
+ sigdelset(&blocked_sig, SIGPROF);
+ sigdelset(&blocked_sig, SIGBUS);
+ sigdelset(&blocked_sig, SIGFPE);
+ sigdelset(&blocked_sig, SIGILL);
+ sigdelset(&blocked_sig, SIGSEGV);
+ pthread_sigmask(SIG_SETMASK, &blocked_sig, &old_sig);
+
+ /* Create nbthread-1 thread. The first thread is the current process */
+ ha_pthread[0] = pthread_self();
+ for (i = 1; i < global.nbthread; i++)
+ pthread_create(&ha_pthread[i], NULL, handler, &ha_thread_info[i]);
+}
+
+/* waits for all threads to terminate. Does nothing when threads are
+ * disabled.
+ */
+void wait_for_threads_completion()
+{
+ int i;
+
+ /* Wait the end of other threads */
+ for (i = 1; i < global.nbthread; i++)
+ pthread_join(ha_pthread[i], NULL);
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+ show_lock_stats();
+#endif
+}
+
+/* Tries to set the current thread's CPU affinity according to the cpu_map */
+void set_thread_cpu_affinity()
+{
+#if defined(USE_CPU_AFFINITY)
+ /* no affinity setting for the master process */
+ if (master)
+ return;
+
+ /* Now the CPU affinity for all threads */
+ if (ha_cpuset_count(&cpu_map[tgid - 1].thread[ti->ltid])) {/* only do this if the thread has a THREAD map */
+# if defined(__APPLE__)
+ /* Note: this API is limited to the first 32/64 CPUs */
+ unsigned long set = cpu_map[tgid - 1].thread[ti->ltid].cpuset;
+ int j;
+
+ while ((j = ffsl(set)) > 0) {
+ thread_affinity_policy_data_t cpu_set = { j - 1 };
+ thread_port_t mthread;
+
+ mthread = pthread_mach_thread_np(ha_pthread[tid]);
+ thread_policy_set(mthread, THREAD_AFFINITY_POLICY, (thread_policy_t)&cpu_set, 1);
+ set &= ~(1UL << (j - 1));
+ }
+# else
+ struct hap_cpuset *set = &cpu_map[tgid - 1].thread[ti->ltid];
+
+ pthread_setaffinity_np(ha_pthread[tid], sizeof(set->cpuset), &set->cpuset);
+# endif
+ }
+#endif /* USE_CPU_AFFINITY */
+}
+
+/* Retrieves the opaque pthread_t of thread <thr> cast to an unsigned long long
+ * since POSIX took great care of not specifying its representation, making it
+ * hard to export for post-mortem analysis. For this reason we copy it into a
+ * union and will use the smallest scalar type at least as large as its size,
+ * which will keep endianness and alignment for all regular sizes. As a last
+ * resort we end up with a long long ligned to the first bytes in memory, which
+ * will be endian-dependent if pthread_t is larger than a long long (not seen
+ * yet).
+ */
+unsigned long long ha_get_pthread_id(unsigned int thr)
+{
+ union {
+ pthread_t t;
+ unsigned long long ll;
+ unsigned int i;
+ unsigned short s;
+ unsigned char c;
+ } u = { 0 };
+
+ u.t = ha_pthread[thr];
+
+ if (sizeof(u.t) <= sizeof(u.c))
+ return u.c;
+ else if (sizeof(u.t) <= sizeof(u.s))
+ return u.s;
+ else if (sizeof(u.t) <= sizeof(u.i))
+ return u.i;
+ return u.ll;
+}
+
+/* send signal <sig> to thread <thr> */
+void ha_tkill(unsigned int thr, int sig)
+{
+ pthread_kill(ha_pthread[thr], sig);
+}
+
+/* send signal <sig> to all threads. The calling thread is signaled last in
+ * order to allow all threads to synchronize in the handler.
+ */
+void ha_tkillall(int sig)
+{
+ unsigned int thr;
+
+ for (thr = 0; thr < global.nbthread; thr++) {
+ if (!(ha_thread_info[thr].tg->threads_enabled & ha_thread_info[thr].ltid_bit))
+ continue;
+ if (thr == tid)
+ continue;
+ pthread_kill(ha_pthread[thr], sig);
+ }
+ raise(sig);
+}
+
+void ha_thread_relax(void)
+{
+#ifdef _POSIX_PRIORITY_SCHEDULING
+ sched_yield();
+#else
+ pl_cpu_relax();
+#endif
+}
+
+/* these calls are used as callbacks at init time when debugging is on */
+void ha_spin_init(HA_SPINLOCK_T *l)
+{
+ HA_SPIN_INIT(l);
+}
+
+/* these calls are used as callbacks at init time when debugging is on */
+void ha_rwlock_init(HA_RWLOCK_T *l)
+{
+ HA_RWLOCK_INIT(l);
+}
+
+/* returns the number of CPUs the current process is enabled to run on,
+ * regardless of any MAX_THREADS limitation.
+ */
+static int thread_cpus_enabled()
+{
+ int ret = 1;
+
+#ifdef USE_CPU_AFFINITY
+#if defined(__linux__) && defined(CPU_COUNT)
+ cpu_set_t mask;
+
+ if (sched_getaffinity(0, sizeof(mask), &mask) == 0)
+ ret = CPU_COUNT(&mask);
+#elif defined(__FreeBSD__) && defined(USE_CPU_AFFINITY)
+ cpuset_t cpuset;
+ if (cpuset_getaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1,
+ sizeof(cpuset), &cpuset) == 0)
+ ret = CPU_COUNT(&cpuset);
+#elif defined(__APPLE__)
+ ret = (int)sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+#endif
+ ret = MAX(ret, 1);
+ return ret;
+}
+
+/* Returns 1 if the cpu set is currently restricted for the process else 0.
+ * Currently only implemented for the Linux platform.
+ */
+int thread_cpu_mask_forced()
+{
+#if defined(__linux__)
+ const int cpus_avail = sysconf(_SC_NPROCESSORS_ONLN);
+ return cpus_avail != thread_cpus_enabled();
+#else
+ return 0;
+#endif
+}
+
+/* Below come the lock-debugging functions */
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+
+struct lock_stat lock_stats[LOCK_LABELS];
+
+/* this is only used below */
+static const char *lock_label(enum lock_label label)
+{
+ switch (label) {
+ case TASK_RQ_LOCK: return "TASK_RQ";
+ case TASK_WQ_LOCK: return "TASK_WQ";
+ case LISTENER_LOCK: return "LISTENER";
+ case PROXY_LOCK: return "PROXY";
+ case SERVER_LOCK: return "SERVER";
+ case LBPRM_LOCK: return "LBPRM";
+ case SIGNALS_LOCK: return "SIGNALS";
+ case STK_TABLE_LOCK: return "STK_TABLE";
+ case STK_SESS_LOCK: return "STK_SESS";
+ case APPLETS_LOCK: return "APPLETS";
+ case PEER_LOCK: return "PEER";
+ case SHCTX_LOCK: return "SHCTX";
+ case SSL_LOCK: return "SSL";
+ case SSL_GEN_CERTS_LOCK: return "SSL_GEN_CERTS";
+ case PATREF_LOCK: return "PATREF";
+ case PATEXP_LOCK: return "PATEXP";
+ case VARS_LOCK: return "VARS";
+ case COMP_POOL_LOCK: return "COMP_POOL";
+ case LUA_LOCK: return "LUA";
+ case NOTIF_LOCK: return "NOTIF";
+ case SPOE_APPLET_LOCK: return "SPOE_APPLET";
+ case DNS_LOCK: return "DNS";
+ case PID_LIST_LOCK: return "PID_LIST";
+ case EMAIL_ALERTS_LOCK: return "EMAIL_ALERTS";
+ case PIPES_LOCK: return "PIPES";
+ case TLSKEYS_REF_LOCK: return "TLSKEYS_REF";
+ case AUTH_LOCK: return "AUTH";
+ case RING_LOCK: return "RING";
+ case DICT_LOCK: return "DICT";
+ case PROTO_LOCK: return "PROTO";
+ case QUEUE_LOCK: return "QUEUE";
+ case CKCH_LOCK: return "CKCH";
+ case SNI_LOCK: return "SNI";
+ case SSL_SERVER_LOCK: return "SSL_SERVER";
+ case SFT_LOCK: return "SFT";
+ case IDLE_CONNS_LOCK: return "IDLE_CONNS";
+ case OCSP_LOCK: return "OCSP";
+ case QC_CID_LOCK: return "QC_CID";
+ case CACHE_LOCK: return "CACHE";
+ case OTHER_LOCK: return "OTHER";
+ case DEBUG1_LOCK: return "DEBUG1";
+ case DEBUG2_LOCK: return "DEBUG2";
+ case DEBUG3_LOCK: return "DEBUG3";
+ case DEBUG4_LOCK: return "DEBUG4";
+ case DEBUG5_LOCK: return "DEBUG5";
+ case LOCK_LABELS: break; /* keep compiler happy */
+ };
+ /* only way to come here is consecutive to an internal bug */
+ abort();
+}
+
+void show_lock_stats()
+{
+ int lbl;
+
+ for (lbl = 0; lbl < LOCK_LABELS; lbl++) {
+ if (!lock_stats[lbl].num_write_locked &&
+ !lock_stats[lbl].num_seek_locked &&
+ !lock_stats[lbl].num_read_locked) {
+ fprintf(stderr,
+ "Stats about Lock %s: not used\n",
+ lock_label(lbl));
+ continue;
+ }
+
+ fprintf(stderr,
+ "Stats about Lock %s: \n",
+ lock_label(lbl));
+
+ if (lock_stats[lbl].num_write_locked)
+ fprintf(stderr,
+ "\t # write lock : %llu\n"
+ "\t # write unlock: %llu (%lld)\n"
+ "\t # wait time for write : %.3f msec\n"
+ "\t # wait time for write/lock: %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_write_locked,
+ (ullong)lock_stats[lbl].num_write_unlocked,
+ (llong)(lock_stats[lbl].num_write_unlocked - lock_stats[lbl].num_write_locked),
+ (double)lock_stats[lbl].nsec_wait_for_write / 1000000.0,
+ lock_stats[lbl].num_write_locked ? ((double)lock_stats[lbl].nsec_wait_for_write / (double)lock_stats[lbl].num_write_locked) : 0);
+
+ if (lock_stats[lbl].num_seek_locked)
+ fprintf(stderr,
+ "\t # seek lock : %llu\n"
+ "\t # seek unlock : %llu (%lld)\n"
+ "\t # wait time for seek : %.3f msec\n"
+ "\t # wait time for seek/lock : %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_seek_locked,
+ (ullong)lock_stats[lbl].num_seek_unlocked,
+ (llong)(lock_stats[lbl].num_seek_unlocked - lock_stats[lbl].num_seek_locked),
+ (double)lock_stats[lbl].nsec_wait_for_seek / 1000000.0,
+ lock_stats[lbl].num_seek_locked ? ((double)lock_stats[lbl].nsec_wait_for_seek / (double)lock_stats[lbl].num_seek_locked) : 0);
+
+ if (lock_stats[lbl].num_read_locked)
+ fprintf(stderr,
+ "\t # read lock : %llu\n"
+ "\t # read unlock : %llu (%lld)\n"
+ "\t # wait time for read : %.3f msec\n"
+ "\t # wait time for read/lock : %.3f nsec\n",
+ (ullong)lock_stats[lbl].num_read_locked,
+ (ullong)lock_stats[lbl].num_read_unlocked,
+ (llong)(lock_stats[lbl].num_read_unlocked - lock_stats[lbl].num_read_locked),
+ (double)lock_stats[lbl].nsec_wait_for_read / 1000000.0,
+ lock_stats[lbl].num_read_locked ? ((double)lock_stats[lbl].nsec_wait_for_read / (double)lock_stats[lbl].num_read_locked) : 0);
+ }
+}
+
+void __ha_rwlock_init(struct ha_rwlock *l)
+{
+ memset(l, 0, sizeof(struct ha_rwlock));
+ __RWLOCK_INIT(&l->lock);
+}
+
+void __ha_rwlock_destroy(struct ha_rwlock *l)
+{
+ __RWLOCK_DESTROY(&l->lock);
+ memset(l, 0, sizeof(struct ha_rwlock));
+}
+
+
+void __ha_rwlock_wrlock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_writers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ st->cur_writer = tbit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_writers, ~tbit);
+}
+
+int __ha_rwlock_trywrlock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+ int r;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ /* We set waiting writer because trywrlock could wait for readers to quit */
+ HA_ATOMIC_OR(&st->wait_writers, tbit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYWRLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+ if (unlikely(r)) {
+ HA_ATOMIC_AND(&st->wait_writers, ~tbit);
+ return r;
+ }
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ st->cur_writer = tbit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_writers, ~tbit);
+
+ return 0;
+}
+
+void __ha_rwlock_wrunlock(enum lock_label lbl,struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+
+ if (unlikely(!(st->cur_writer & tbit))) {
+ /* the thread is not owning the lock for write */
+ abort();
+ }
+
+ st->cur_writer = 0;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __RWLOCK_WRUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_unlocked);
+}
+
+void __ha_rwlock_rdlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_readers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_RDLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&st->cur_readers, tbit);
+
+ HA_ATOMIC_AND(&st->wait_readers, ~tbit);
+}
+
+int __ha_rwlock_tryrdlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ int r;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ /* try read should never wait */
+ r = __RWLOCK_TRYRDLOCK(&l->lock);
+ if (unlikely(r))
+ return r;
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&st->cur_readers, tbit);
+
+ return 0;
+}
+
+void __ha_rwlock_rdunlock(enum lock_label lbl,struct ha_rwlock *l)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+
+ if (unlikely(!(st->cur_readers & tbit))) {
+ /* the thread is not owning the lock for read */
+ abort();
+ }
+
+ HA_ATOMIC_AND(&st->cur_readers, ~tbit);
+
+ __RWLOCK_RDUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_unlocked);
+}
+
+void __ha_rwlock_wrtord(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_seeker) & tbit)
+ abort();
+
+ if (!(st->cur_writer & tbit))
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_readers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRTORD(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&st->cur_readers, tbit);
+ HA_ATOMIC_AND(&st->cur_writer, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_readers, ~tbit);
+}
+
+void __ha_rwlock_wrtosk(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_seeker) & tbit)
+ abort();
+
+ if (!(st->cur_writer & tbit))
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_seekers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_WRTOSK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+
+ HA_ATOMIC_OR(&st->cur_seeker, tbit);
+ HA_ATOMIC_AND(&st->cur_writer, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_seekers, ~tbit);
+}
+
+void __ha_rwlock_sklock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_seekers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+
+ HA_ATOMIC_OR(&st->cur_seeker, tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_seekers, ~tbit);
+}
+
+void __ha_rwlock_sktowr(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_writer) & tbit)
+ abort();
+
+ if (!(st->cur_seeker & tbit))
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_writers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKTOWR(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ HA_ATOMIC_OR(&st->cur_writer, tbit);
+ HA_ATOMIC_AND(&st->cur_seeker, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_writers, ~tbit);
+}
+
+void __ha_rwlock_sktord(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if ((st->cur_readers | st->cur_writer) & tbit)
+ abort();
+
+ if (!(st->cur_seeker & tbit))
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_readers, tbit);
+
+ start_time = now_mono_time();
+ __RWLOCK_SKTORD(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_read, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_read_locked);
+
+ HA_ATOMIC_OR(&st->cur_readers, tbit);
+ HA_ATOMIC_AND(&st->cur_seeker, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->wait_readers, ~tbit);
+}
+
+void __ha_rwlock_skunlock(enum lock_label lbl,struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ if (!(st->cur_seeker & tbit))
+ abort();
+
+ HA_ATOMIC_AND(&st->cur_seeker, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __RWLOCK_SKUNLOCK(&l->lock);
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_unlocked);
+}
+
+int __ha_rwlock_trysklock(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+ int r;
+
+ if ((st->cur_readers | st->cur_seeker | st->cur_writer) & tbit)
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_seekers, tbit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYSKLOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ if (likely(!r)) {
+ /* got the lock ! */
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+ HA_ATOMIC_OR(&st->cur_seeker, tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+ }
+
+ HA_ATOMIC_AND(&st->wait_seekers, ~tbit);
+ return r;
+}
+
+int __ha_rwlock_tryrdtosk(enum lock_label lbl, struct ha_rwlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_rwlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+ int r;
+
+ if ((st->cur_writer | st->cur_seeker) & tbit)
+ abort();
+
+ if (!(st->cur_readers & tbit))
+ abort();
+
+ HA_ATOMIC_OR(&st->wait_seekers, tbit);
+
+ start_time = now_mono_time();
+ r = __RWLOCK_TRYRDTOSK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_seek, (now_mono_time() - start_time));
+
+ if (likely(!r)) {
+ /* got the lock ! */
+ HA_ATOMIC_INC(&lock_stats[lbl].num_seek_locked);
+ HA_ATOMIC_OR(&st->cur_seeker, tbit);
+ HA_ATOMIC_AND(&st->cur_readers, ~tbit);
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+ }
+
+ HA_ATOMIC_AND(&st->wait_seekers, ~tbit);
+ return r;
+}
+
+void __spin_init(struct ha_spinlock *l)
+{
+ memset(l, 0, sizeof(struct ha_spinlock));
+ __SPIN_INIT(&l->lock);
+}
+
+void __spin_destroy(struct ha_spinlock *l)
+{
+ __SPIN_DESTROY(&l->lock);
+ memset(l, 0, sizeof(struct ha_spinlock));
+}
+
+void __spin_lock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_spinlock_state *st = &l->info.st[tgid-1];
+ uint64_t start_time;
+
+ if (unlikely(st->owner & tbit)) {
+ /* the thread is already owning the lock */
+ abort();
+ }
+
+ HA_ATOMIC_OR(&st->waiters, tbit);
+
+ start_time = now_mono_time();
+ __SPIN_LOCK(&l->lock);
+ HA_ATOMIC_ADD(&lock_stats[lbl].nsec_wait_for_write, (now_mono_time() - start_time));
+
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+
+ st->owner = tbit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ HA_ATOMIC_AND(&st->waiters, ~tbit);
+}
+
+int __spin_trylock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_spinlock_state *st = &l->info.st[tgid-1];
+ int r;
+
+ if (unlikely(st->owner & tbit)) {
+ /* the thread is already owning the lock */
+ abort();
+ }
+
+ /* try read should never wait */
+ r = __SPIN_TRYLOCK(&l->lock);
+ if (unlikely(r))
+ return r;
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_locked);
+
+ st->owner = tbit;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ return 0;
+}
+
+void __spin_unlock(enum lock_label lbl, struct ha_spinlock *l,
+ const char *func, const char *file, int line)
+{
+ ulong tbit = (ti && ti->ltid_bit) ? ti->ltid_bit : 1;
+ struct ha_spinlock_state *st = &l->info.st[tgid-1];
+
+ if (unlikely(!(st->owner & tbit))) {
+ /* the thread is not owning the lock */
+ abort();
+ }
+
+ st->owner = 0;
+ l->info.last_location.function = func;
+ l->info.last_location.file = file;
+ l->info.last_location.line = line;
+
+ __SPIN_UNLOCK(&l->lock);
+ HA_ATOMIC_INC(&lock_stats[lbl].num_write_unlocked);
+}
+
+#endif // defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+
+
+#if defined(USE_PTHREAD_EMULATION)
+
+/* pthread rwlock emulation using plocks (to avoid expensive futexes).
+ * these are a direct mapping on Progressive Locks, with the exception that
+ * since there's a common unlock operation in pthreads, we need to know if
+ * we need to unlock for reads or writes, so we set the topmost bit to 1 when
+ * a write lock is acquired to indicate that a write unlock needs to be
+ * performed. It's not a problem since this bit will never be used given that
+ * haproxy won't support as many threads as the plocks.
+ *
+ * The storage is the pthread_rwlock_t cast as an ulong
+ */
+
+int pthread_rwlock_init(pthread_rwlock_t *restrict rwlock, const pthread_rwlockattr_t *restrict attr)
+{
+ ulong *lock = (ulong *)rwlock;
+
+ *lock = 0;
+ return 0;
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
+{
+ ulong *lock = (ulong *)rwlock;
+
+ *lock = 0;
+ return 0;
+}
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
+{
+ pl_lorw_rdlock((unsigned long *)rwlock);
+ return 0;
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
+{
+ return !!pl_cmpxchg((unsigned long *)rwlock, 0, PLOCK_LORW_SHR_BASE);
+}
+
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *restrict rwlock, const struct timespec *restrict abstime)
+{
+ return pthread_rwlock_tryrdlock(rwlock);
+}
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
+{
+ pl_lorw_wrlock((unsigned long *)rwlock);
+ return 0;
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
+{
+ return !!pl_cmpxchg((unsigned long *)rwlock, 0, PLOCK_LORW_EXC_BASE);
+}
+
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *restrict rwlock, const struct timespec *restrict abstime)
+{
+ return pthread_rwlock_trywrlock(rwlock);
+}
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
+{
+ pl_lorw_unlock((unsigned long *)rwlock);
+ return 0;
+}
+#endif // defined(USE_PTHREAD_EMULATION)
+
+/* Depending on the platform and how libpthread was built, pthread_exit() may
+ * involve some code in libgcc_s that would be loaded on exit for the first
+ * time, causing aborts if the process is chrooted. It's harmless bit very
+ * dirty. There isn't much we can do to make sure libgcc_s is loaded only if
+ * needed, so what we do here is that during early boot we create a dummy
+ * thread that immediately exits. This will lead to libgcc_s being loaded
+ * during boot on the platforms where it's required.
+ */
+static void *dummy_thread_function(void *data)
+{
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static inline void preload_libgcc_s(void)
+{
+ pthread_t dummy_thread;
+ if (pthread_create(&dummy_thread, NULL, dummy_thread_function, NULL) == 0)
+ pthread_join(dummy_thread, NULL);
+}
+
+static void __thread_init(void)
+{
+ char *ptr = NULL;
+
+ preload_libgcc_s();
+
+ thread_cpus_enabled_at_boot = thread_cpus_enabled();
+ thread_cpus_enabled_at_boot = MIN(thread_cpus_enabled_at_boot, MAX_THREADS);
+
+ memprintf(&ptr, "Built with multi-threading support (MAX_TGROUPS=%d, MAX_THREADS=%d, default=%d).",
+ MAX_TGROUPS, MAX_THREADS, thread_cpus_enabled_at_boot);
+ hap_register_build_opts(ptr, 1);
+
+#if defined(DEBUG_THREAD) || defined(DEBUG_FULL)
+ memset(lock_stats, 0, sizeof(lock_stats));
+#endif
+}
+INITCALL0(STG_PREPARE, __thread_init);
+
+#else
+
+/* send signal <sig> to thread <thr> (send to process in fact) */
+void ha_tkill(unsigned int thr, int sig)
+{
+ raise(sig);
+}
+
+/* send signal <sig> to all threads (send to process in fact) */
+void ha_tkillall(int sig)
+{
+ raise(sig);
+}
+
+void ha_thread_relax(void)
+{
+#ifdef _POSIX_PRIORITY_SCHEDULING
+ sched_yield();
+#endif
+}
+
+REGISTER_BUILD_OPTS("Built without multi-threading support (USE_THREAD not set).");
+
+#endif // USE_THREAD
+
+
+/* Returns non-zero on anomaly (bound vs unbound), and emits a warning in this
+ * case.
+ */
+int thread_detect_binding_discrepancies(void)
+{
+#if defined(USE_CPU_AFFINITY)
+ uint th, tg, id;
+ uint tot_b = 0, tot_u = 0;
+ int first_b = -1;
+ int first_u = -1;
+
+ for (th = 0; th < global.nbthread; th++) {
+ tg = ha_thread_info[th].tgid;
+ id = ha_thread_info[th].ltid;
+
+ if (ha_cpuset_count(&cpu_map[tg - 1].thread[id]) == 0) {
+ tot_u++;
+ if (first_u < 0)
+ first_u = th;
+ } else {
+ tot_b++;
+ if (first_b < 0)
+ first_b = th;
+ }
+ }
+
+ if (tot_u > 0 && tot_b > 0) {
+ ha_warning("Found %u thread(s) mapped to a CPU and %u thread(s) not mapped to any CPU. "
+ "This will result in some threads being randomly assigned to the same CPU, "
+ "which will occasionally cause severe performance degradation. First thread "
+ "bound is %d and first thread not bound is %d. Please either bind all threads "
+ "or none (maybe some cpu-map directives are missing?).\n",
+ tot_b, tot_u, first_b, first_u);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/* Returns non-zero on anomaly (more threads than CPUs), and emits a warning in
+ * this case. It checks against configured cpu-map if any, otherwise against
+ * the number of CPUs at boot if known. It's better to run it only after
+ * thread_detect_binding_discrepancies() so that mixed cases can be eliminated.
+ */
+int thread_detect_more_than_cpus(void)
+{
+#if defined(USE_CPU_AFFINITY)
+ struct hap_cpuset cpuset_map, cpuset_boot, cpuset_all;
+ uint th, tg, id;
+ int bound;
+ int tot_map, tot_all;
+
+ ha_cpuset_zero(&cpuset_boot);
+ ha_cpuset_zero(&cpuset_map);
+ ha_cpuset_zero(&cpuset_all);
+ bound = 0;
+ for (th = 0; th < global.nbthread; th++) {
+ tg = ha_thread_info[th].tgid;
+ id = ha_thread_info[th].ltid;
+ if (ha_cpuset_count(&cpu_map[tg - 1].thread[id])) {
+ ha_cpuset_or(&cpuset_map, &cpu_map[tg - 1].thread[id]);
+ bound++;
+ }
+ }
+
+ ha_cpuset_assign(&cpuset_all, &cpuset_map);
+ if (bound != global.nbthread) {
+ if (ha_cpuset_detect_bound(&cpuset_boot))
+ ha_cpuset_or(&cpuset_all, &cpuset_boot);
+ }
+
+ tot_map = ha_cpuset_count(&cpuset_map);
+ tot_all = ha_cpuset_count(&cpuset_all);
+
+ if (tot_map && bound > tot_map) {
+ ha_warning("This configuration binds %d threads to a total of %d CPUs via cpu-map "
+ "directives. This means that some threads will compete for the same CPU, "
+ "which will cause severe performance degradation. Please fix either the "
+ "'cpu-map' directives or set the global 'nbthread' value accordingly.\n",
+ bound, tot_map);
+ return 1;
+ }
+ else if (tot_all && global.nbthread > tot_all) {
+ ha_warning("This configuration enables %d threads running on a total of %d CPUs. "
+ "This means that some threads will compete for the same CPU, which will cause "
+ "severe performance degradation. Please either the 'cpu-map' directives to "
+ "adjust the CPUs to use, or fix the global 'nbthread' value.\n",
+ global.nbthread, tot_all);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+/* scans the configured thread mapping and establishes the final one. Returns <0
+ * on failure, >=0 on success.
+ */
+int thread_map_to_groups()
+{
+ int t, g, ut, ug;
+ int q, r;
+ ulong m __maybe_unused;
+
+ ut = ug = 0; // unassigned threads & groups
+
+ for (t = 0; t < global.nbthread; t++) {
+ if (!ha_thread_info[t].tg)
+ ut++;
+ }
+
+ for (g = 0; g < global.nbtgroups; g++) {
+ if (!ha_tgroup_info[g].count)
+ ug++;
+ ha_tgroup_info[g].tgid_bit = 1UL << g;
+ }
+
+ if (ug > ut) {
+ ha_alert("More unassigned thread-groups (%d) than threads (%d). Please reduce thread-groups\n", ug, ut);
+ return -1;
+ }
+
+ /* look for first unassigned thread */
+ for (t = 0; t < global.nbthread && ha_thread_info[t].tg; t++)
+ ;
+
+ /* assign threads to empty groups */
+ for (g = 0; ug && ut; ) {
+ /* due to sparse thread assignment we can end up with more threads
+ * per group on last assigned groups than former ones, so we must
+ * always try to pack the maximum remaining ones together first.
+ */
+ q = ut / ug;
+ r = ut % ug;
+ if ((q + !!r) > MAX_THREADS_PER_GROUP) {
+ ha_alert("Too many remaining unassigned threads (%d) for thread groups (%d). Please increase thread-groups or make sure to keep thread numbers contiguous\n", ut, ug);
+ return -1;
+ }
+
+ /* thread <t> is the next unassigned one. Let's look for next
+ * unassigned group, we know there are some left
+ */
+ while (ut >= ug && ha_tgroup_info[g].count)
+ g++;
+
+ /* group g is unassigned, try to fill it with consecutive threads */
+ while (ut && ut >= ug && ha_tgroup_info[g].count < q + !!r &&
+ (!ha_tgroup_info[g].count || t == ha_tgroup_info[g].base + ha_tgroup_info[g].count)) {
+
+ if (!ha_tgroup_info[g].count) {
+ /* assign new group */
+ ha_tgroup_info[g].base = t;
+ ug--;
+ }
+
+ ha_tgroup_info[g].count++;
+ ha_thread_info[t].tgid = g + 1;
+ ha_thread_info[t].tg = &ha_tgroup_info[g];
+ ha_thread_info[t].tg_ctx = &ha_tgroup_ctx[g];
+
+ ut--;
+ /* switch to next unassigned thread */
+ while (++t < global.nbthread && ha_thread_info[t].tg)
+ ;
+ }
+ }
+
+ if (ut) {
+ ha_alert("Remaining unassigned threads found (%d) because all groups are in use. Please increase 'thread-groups', reduce 'nbthreads' or remove or extend 'thread-group' enumerations.\n", ut);
+ return -1;
+ }
+
+ for (t = 0; t < global.nbthread; t++) {
+ ha_thread_info[t].tid = t;
+ ha_thread_info[t].ltid = t - ha_thread_info[t].tg->base;
+ ha_thread_info[t].ltid_bit = 1UL << ha_thread_info[t].ltid;
+ }
+
+ m = 0;
+ for (g = 0; g < global.nbtgroups; g++) {
+ ha_tgroup_info[g].threads_enabled = nbits(ha_tgroup_info[g].count);
+ /* for now, additional threads are not started, so we should
+ * consider them as harmless and idle.
+ * This will get automatically updated when such threads are
+ * started in run_thread_poll_loop()
+ * Without this, thread_isolate() and thread_isolate_full()
+ * will fail to work as long as secondary threads did not enter
+ * the polling loop at least once.
+ */
+ ha_tgroup_ctx[g].threads_harmless = ha_tgroup_info[g].threads_enabled;
+ ha_tgroup_ctx[g].threads_idle = ha_tgroup_info[g].threads_enabled;
+ if (!ha_tgroup_info[g].count)
+ continue;
+ m |= 1UL << g;
+
+ }
+
+#ifdef USE_THREAD
+ all_tgroups_mask = m;
+#endif
+ return 0;
+}
+
+/* Converts a configuration thread set based on either absolute or relative
+ * thread numbers into a global group+mask. This is essentially for use with
+ * the "thread" directive on "bind" lines, where "thread 4-6,10-12" might be
+ * turned to "2/1-3,4/1-3". It cannot be used before the thread mapping above
+ * was completed and the thread group numbers configured. The thread_set is
+ * replaced by the resolved group-based one. It is possible to force a single
+ * default group for unspecified sets instead of enabling all groups by passing
+ * this group's non-zero value to defgrp.
+ *
+ * Returns <0 on failure, >=0 on success.
+ */
+int thread_resolve_group_mask(struct thread_set *ts, int defgrp, char **err)
+{
+ struct thread_set new_ts = { };
+ ulong mask, imask;
+ uint g;
+
+ if (!ts->grps) {
+ /* unspecified group, IDs are global */
+ if (thread_set_is_empty(ts)) {
+ /* all threads of all groups, unless defgrp is set and
+ * we then set it as the only group.
+ */
+ for (g = defgrp ? defgrp-1 : 0; g < (defgrp ? defgrp : global.nbtgroups); g++) {
+ new_ts.rel[g] = ha_tgroup_info[g].threads_enabled;
+ if (new_ts.rel[g])
+ new_ts.grps |= 1UL << g;
+ }
+ } else {
+ /* some absolute threads are set, we must remap them to
+ * relative ones. Each group cannot have more than
+ * LONGBITS threads, thus it spans at most two absolute
+ * blocks.
+ */
+ for (g = 0; g < global.nbtgroups; g++) {
+ uint block = ha_tgroup_info[g].base / LONGBITS;
+ uint base = ha_tgroup_info[g].base % LONGBITS;
+
+ mask = ts->abs[block] >> base;
+ if (base &&
+ (block + 1) < sizeof(ts->abs) / sizeof(ts->abs[0]) &&
+ ha_tgroup_info[g].count > (LONGBITS - base))
+ mask |= ts->abs[block + 1] << (LONGBITS - base);
+ mask &= nbits(ha_tgroup_info[g].count);
+ mask &= ha_tgroup_info[g].threads_enabled;
+
+ /* now the mask exactly matches the threads to be enabled
+ * in this group.
+ */
+ new_ts.rel[g] |= mask;
+ if (new_ts.rel[g])
+ new_ts.grps |= 1UL << g;
+ }
+ }
+ } else {
+ /* groups were specified */
+ for (g = 0; g < MAX_TGROUPS; g++) {
+ imask = ts->rel[g];
+ if (!imask)
+ continue;
+
+ if (g >= global.nbtgroups) {
+ memprintf(err, "'thread' directive references non-existing thread group %u", g+1);
+ return -1;
+ }
+
+ /* some relative threads are set. Keep only existing ones for this group */
+ mask = nbits(ha_tgroup_info[g].count);
+
+ if (!(mask & imask)) {
+ /* no intersection between the thread group's
+ * threads and the bind line's.
+ */
+#ifdef THREAD_AUTO_ADJUST_GROUPS
+ unsigned long new_mask = 0;
+
+ while (imask) {
+ new_mask |= imask & mask;
+ imask >>= ha_tgroup_info[g].count;
+ }
+ imask = new_mask;
+#else
+ memprintf(err, "'thread' directive only references threads not belonging to group %u", g+1);
+ return -1;
+#endif
+ }
+
+ new_ts.rel[g] = imask & mask;
+ if (new_ts.rel[g])
+ new_ts.grps |= 1UL << g;
+ }
+ }
+
+ /* update the thread_set */
+ if (!thread_set_nth_group(&new_ts, 0)) {
+ memprintf(err, "'thread' directive only references non-existing threads");
+ return -1;
+ }
+
+ *ts = new_ts;
+ return 0;
+}
+
+/* Parse a string representing a thread set in one of the following forms:
+ *
+ * - { "all" | "odd" | "even" | <abs_num> [ "-" <abs_num> ] }[,...]
+ * => these are (lists of) absolute thread numbers
+ *
+ * - <tgnum> "/" { "all" | "odd" | "even" | <rel_num> [ "-" <rel_num> ][,...]
+ * => these are (lists of) per-group relative thread numbers. All numbers
+ * must be lower than or equal to LONGBITS. When multiple list elements
+ * are provided, each of them must contain the thread group number.
+ *
+ * Minimum value for a thread or group number is always 1. Maximum value for an
+ * absolute thread number is MAX_THREADS, maximum value for a relative thread
+ * number is MAX_THREADS_PER_GROUP, an maximum value for a thread group is
+ * MAX_TGROUPS. "all", "even" and "odd" will be bound by MAX_THREADS and/or
+ * MAX_THREADS_PER_GROUP in any case. In ranges, a missing digit before "-"
+ * is implicitly 1, and a missing digit after "-" is implicitly the highest of
+ * its class. As such "-" is equivalent to "all", allowing to build strings
+ * such as "${MIN}-${MAX}" where both MIN and MAX are optional.
+ *
+ * It is not valid to mix absolute and relative numbers. As such:
+ * - all valid (all absolute threads)
+ * - 12-19,24-31 valid (abs threads 12 to 19 and 24 to 31)
+ * - 1/all valid (all 32 or 64 threads of group 1)
+ * - 1/1-4,1/8-10,2/1 valid
+ * - 1/1-4,8-10 invalid (mixes relatve "1/1-4" with absolute "8-10")
+ * - 1-4,8-10,2/1 invalid (mixes absolute "1-4,8-10" with relative "2/1")
+ * - 1/odd-4 invalid (mixes range with boundary)
+ *
+ * The target thread set is *completed* with supported threads, which means
+ * that it's the caller's responsibility for pre-initializing it. If the target
+ * thread set is NULL, it's not updated and the function only verifies that the
+ * input parses.
+ *
+ * On success, it returns 0. otherwise it returns non-zero with an error
+ * message in <err>.
+ */
+int parse_thread_set(const char *arg, struct thread_set *ts, char **err)
+{
+ const char *set;
+ const char *sep;
+ int v, min, max, tg;
+ int is_rel;
+
+ /* search for the first delimiter (',', '-' or '/') to decide whether
+ * we're facing an absolute or relative form. The relative form always
+ * starts with a number followed by a slash.
+ */
+ for (sep = arg; isdigit((uchar)*sep); sep++)
+ ;
+
+ is_rel = (/*sep > arg &&*/ *sep == '/'); /* relative form */
+
+ /* from there we have to cut the thread spec around commas */
+
+ set = arg;
+ tg = 0;
+ while (*set) {
+ /* note: we can't use strtol() here because "-3" would parse as
+ * (-3) while we want to stop before the "-", so we find the
+ * separator ourselves and rely on atoi() whose value we may
+ * ignore depending where the separator is.
+ */
+ for (sep = set; isdigit((uchar)*sep); sep++)
+ ;
+
+ if (sep != set && *sep && *sep != '/' && *sep != '-' && *sep != ',') {
+ memprintf(err, "invalid character '%c' in thread set specification: '%s'.", *sep, set);
+ return -1;
+ }
+
+ v = (sep != set) ? atoi(set) : 0;
+
+ /* Now we know that the string is made of an optional series of digits
+ * optionally followed by one of the delimiters above, or that it
+ * starts with a different character.
+ */
+
+ /* first, let's search for the thread group (digits before '/') */
+
+ if (tg || !is_rel) {
+ /* thread group already specified or not expected if absolute spec */
+ if (*sep == '/') {
+ if (tg)
+ memprintf(err, "redundant thread group specification '%s' for group %d", set, tg);
+ else
+ memprintf(err, "group-relative thread specification '%s' is not permitted after a absolute thread range.", set);
+ return -1;
+ }
+ } else {
+ /* this is a group-relative spec, first field is the group number */
+ if (sep == set && *sep == '/') {
+ memprintf(err, "thread group number expected before '%s'.", set);
+ return -1;
+ }
+
+ if (*sep != '/') {
+ memprintf(err, "absolute thread specification '%s' is not permitted after a group-relative thread range.", set);
+ return -1;
+ }
+
+ if (v < 1 || v > MAX_TGROUPS) {
+ memprintf(err, "invalid thread group number '%d', permitted range is 1..%d in '%s'.", v, MAX_TGROUPS, set);
+ return -1;
+ }
+
+ tg = v;
+
+ /* skip group number and go on with set,sep,v as if
+ * there was no group number.
+ */
+ set = sep + 1;
+ continue;
+ }
+
+ /* Now 'set' starts at the min thread number, whose value is in v if any,
+ * and preset the max to it, unless the range is filled at once via "all"
+ * (stored as 1:0), "odd" (stored as) 1:-1, or "even" (stored as 1:-2).
+ * 'sep' points to the next non-digit which may be set itself e.g. for
+ * "all" etc or "-xx".
+ */
+
+ if (!*set) {
+ /* empty set sets no restriction */
+ min = 1;
+ max = is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS;
+ }
+ else {
+ if (sep != set && *sep && *sep != '-' && *sep != ',') {
+ // Only delimiters are permitted around digits.
+ memprintf(err, "invalid character '%c' in thread set specification: '%s'.", *sep, set);
+ return -1;
+ }
+
+ /* for non-digits, find next delim */
+ for (; *sep && *sep != '-' && *sep != ','; sep++)
+ ;
+
+ min = max = 1;
+ if (sep != set) {
+ /* non-empty first thread */
+ if (isteq(ist2(set, sep-set), ist("all")))
+ max = 0;
+ else if (isteq(ist2(set, sep-set), ist("odd")))
+ max = -1;
+ else if (isteq(ist2(set, sep-set), ist("even")))
+ max = -2;
+ else if (v)
+ min = max = v;
+ else
+ max = min = 0; // throw an error below
+ }
+
+ if (min < 1 || min > MAX_THREADS || (is_rel && min > MAX_THREADS_PER_GROUP)) {
+ memprintf(err, "invalid first thread number '%s', permitted range is 1..%d, or 'all', 'odd', 'even'.",
+ set, is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS);
+ return -1;
+ }
+
+ /* is this a range ? */
+ if (*sep == '-') {
+ if (min != max) {
+ memprintf(err, "extraneous range after 'all', 'odd' or 'even': '%s'.", set);
+ return -1;
+ }
+
+ /* this is a seemingly valid range, there may be another number */
+ for (set = ++sep; isdigit((uchar)*sep); sep++)
+ ;
+ v = atoi(set);
+
+ if (sep == set) { // no digit: to the max
+ max = is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS;
+ if (*sep && *sep != ',')
+ max = 0; // throw an error below
+ } else
+ max = v;
+
+ if (max < 1 || max > MAX_THREADS || (is_rel && max > MAX_THREADS_PER_GROUP)) {
+ memprintf(err, "invalid last thread number '%s', permitted range is 1..%d.",
+ set, is_rel ? MAX_THREADS_PER_GROUP : MAX_THREADS);
+ return -1;
+ }
+ }
+
+ /* here sep points to the first non-digit after the thread spec,
+ * must be a valid delimiter.
+ */
+ if (*sep && *sep != ',') {
+ memprintf(err, "invalid character '%c' after thread set specification: '%s'.", *sep, set);
+ return -1;
+ }
+ }
+
+ /* store values */
+ if (ts) {
+ if (is_rel) {
+ /* group-relative thread numbers */
+ ts->grps |= 1UL << (tg - 1);
+
+ if (max >= min) {
+ for (v = min; v <= max; v++)
+ ts->rel[tg - 1] |= 1UL << (v - 1);
+ } else {
+ memset(&ts->rel[tg - 1],
+ (max == 0) ? 0xff /* all */ : (max == -1) ? 0x55 /* odd */: 0xaa /* even */,
+ sizeof(ts->rel[tg - 1]));
+ }
+ } else {
+ /* absolute thread numbers */
+ if (max >= min) {
+ for (v = min; v <= max; v++)
+ ts->abs[(v - 1) / LONGBITS] |= 1UL << ((v - 1) % LONGBITS);
+ } else {
+ memset(&ts->abs,
+ (max == 0) ? 0xff /* all */ : (max == -1) ? 0x55 /* odd */: 0xaa /* even */,
+ sizeof(ts->abs));
+ }
+ }
+ }
+
+ set = *sep ? sep + 1 : sep;
+ tg = 0;
+ }
+ return 0;
+}
+
+/* Parse the "nbthread" global directive, which takes an integer argument that
+ * contains the desired number of threads.
+ */
+static int cfg_parse_nbthread(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ long nbthread;
+ char *errptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (non_global_section_parsed == 1) {
+ memprintf(err, "'%s' not allowed if a non-global section was previously defined. This parameter must be declared in the first global section", args[0]);
+ return -1;
+ }
+
+ nbthread = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+#ifndef USE_THREAD
+ if (nbthread != 1) {
+ memprintf(err, "'%s' specified with a value other than 1 while HAProxy is not compiled with threads support. Please check build options for USE_THREAD", args[0]);
+ return -1;
+ }
+#else
+ if (nbthread < 1 || nbthread > MAX_THREADS) {
+ memprintf(err, "'%s' value must be between 1 and %d (was %ld)", args[0], MAX_THREADS, nbthread);
+ return -1;
+ }
+#endif
+
+ HA_DIAG_WARNING_COND(global.nbthread,
+ "parsing [%s:%d] : '%s' is already defined and will be overridden.\n",
+ file, line, args[0]);
+
+ global.nbthread = nbthread;
+ return 0;
+}
+
+/* Parse the "thread-group" global directive, which takes an integer argument
+ * that designates a thread group, and a list of threads to put into that group.
+ */
+static int cfg_parse_thread_group(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *errptr;
+ long tnum, tend, tgroup;
+ int arg, tot;
+
+ if (non_global_section_parsed == 1) {
+ memprintf(err, "'%s' not allowed if a non-global section was previously defined. This parameter must be declared in the first global section", args[0]);
+ return -1;
+ }
+
+ tgroup = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+ if (tgroup < 1 || tgroup > MAX_TGROUPS) {
+ memprintf(err, "'%s' thread-group number must be between 1 and %d (was %ld)", args[0], MAX_TGROUPS, tgroup);
+ return -1;
+ }
+
+ /* look for a preliminary definition of any thread pointing to this
+ * group, and remove them.
+ */
+ if (ha_tgroup_info[tgroup-1].count) {
+ ha_warning("parsing [%s:%d] : '%s %ld' was already defined and will be overridden.\n",
+ file, line, args[0], tgroup);
+
+ for (tnum = ha_tgroup_info[tgroup-1].base;
+ tnum < ha_tgroup_info[tgroup-1].base + ha_tgroup_info[tgroup-1].count;
+ tnum++) {
+ if (ha_thread_info[tnum-1].tg == &ha_tgroup_info[tgroup-1]) {
+ ha_thread_info[tnum-1].tg = NULL;
+ ha_thread_info[tnum-1].tgid = 0;
+ ha_thread_info[tnum-1].tg_ctx = NULL;
+ }
+ }
+ ha_tgroup_info[tgroup-1].count = ha_tgroup_info[tgroup-1].base = 0;
+ }
+
+ tot = 0;
+ for (arg = 2; args[arg] && *args[arg]; arg++) {
+ tend = tnum = strtol(args[arg], &errptr, 10);
+
+ if (*errptr == '-')
+ tend = strtol(errptr + 1, &errptr, 10);
+
+ if (*errptr || tnum < 1 || tend < 1 || tnum > MAX_THREADS || tend > MAX_THREADS) {
+ memprintf(err, "'%s %ld' passed an unparsable or invalid thread number '%s' (valid range is 1 to %d)", args[0], tgroup, args[arg], MAX_THREADS);
+ return -1;
+ }
+
+ for(; tnum <= tend; tnum++) {
+ if (ha_thread_info[tnum-1].tg == &ha_tgroup_info[tgroup-1]) {
+ ha_warning("parsing [%s:%d] : '%s %ld': thread %ld assigned more than once on the same line.\n",
+ file, line, args[0], tgroup, tnum);
+ } else if (ha_thread_info[tnum-1].tg) {
+ ha_warning("parsing [%s:%d] : '%s %ld': thread %ld was previously assigned to thread group %ld and will be overridden.\n",
+ file, line, args[0], tgroup, tnum,
+ (long)(ha_thread_info[tnum-1].tg - &ha_tgroup_info[0] + 1));
+ }
+
+ if (!ha_tgroup_info[tgroup-1].count) {
+ ha_tgroup_info[tgroup-1].base = tnum-1;
+ ha_tgroup_info[tgroup-1].count = 1;
+ }
+ else if (tnum >= ha_tgroup_info[tgroup-1].base + ha_tgroup_info[tgroup-1].count) {
+ ha_tgroup_info[tgroup-1].count = tnum - ha_tgroup_info[tgroup-1].base;
+ }
+ else if (tnum < ha_tgroup_info[tgroup-1].base) {
+ ha_tgroup_info[tgroup-1].count += ha_tgroup_info[tgroup-1].base - tnum-1;
+ ha_tgroup_info[tgroup-1].base = tnum - 1;
+ }
+
+ ha_thread_info[tnum-1].tgid = tgroup;
+ ha_thread_info[tnum-1].tg = &ha_tgroup_info[tgroup-1];
+ ha_thread_info[tnum-1].tg_ctx = &ha_tgroup_ctx[tgroup-1];
+ tot++;
+ }
+ }
+
+ if (ha_tgroup_info[tgroup-1].count > tot) {
+ memprintf(err, "'%s %ld' assigned sparse threads, only contiguous supported", args[0], tgroup);
+ return -1;
+ }
+
+ if (ha_tgroup_info[tgroup-1].count > MAX_THREADS_PER_GROUP) {
+ memprintf(err, "'%s %ld' assigned too many threads (%d, max=%d)", args[0], tgroup, tot, MAX_THREADS_PER_GROUP);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Parse the "thread-groups" global directive, which takes an integer argument
+ * that contains the desired number of thread groups.
+ */
+static int cfg_parse_thread_groups(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ long nbtgroups;
+ char *errptr;
+
+ if (too_many_args(1, args, err, NULL))
+ return -1;
+
+ if (non_global_section_parsed == 1) {
+ memprintf(err, "'%s' not allowed if a non-global section was previously defined. This parameter must be declared in the first global section", args[0]);
+ return -1;
+ }
+
+ nbtgroups = strtol(args[1], &errptr, 10);
+ if (!*args[1] || *errptr) {
+ memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]);
+ return -1;
+ }
+
+#ifndef USE_THREAD
+ if (nbtgroups != 1) {
+ memprintf(err, "'%s' specified with a value other than 1 while HAProxy is not compiled with threads support. Please check build options for USE_THREAD", args[0]);
+ return -1;
+ }
+#else
+ if (nbtgroups < 1 || nbtgroups > MAX_TGROUPS) {
+ memprintf(err, "'%s' value must be between 1 and %d (was %ld)", args[0], MAX_TGROUPS, nbtgroups);
+ return -1;
+ }
+#endif
+
+ HA_DIAG_WARNING_COND(global.nbtgroups,
+ "parsing [%s:%d] : '%s' is already defined and will be overridden.\n",
+ file, line, args[0]);
+
+ global.nbtgroups = nbtgroups;
+ return 0;
+}
+
+/* config keyword parsers */
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "nbthread", cfg_parse_nbthread, 0 },
+ { CFG_GLOBAL, "thread-group", cfg_parse_thread_group, 0 },
+ { CFG_GLOBAL, "thread-groups", cfg_parse_thread_groups, 0 },
+ { 0, NULL, NULL }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
diff --git a/src/time.c b/src/time.c
new file mode 100644
index 0000000..280b522
--- /dev/null
+++ b/src/time.c
@@ -0,0 +1,147 @@
+/*
+ * Time calculation functions.
+ *
+ * Copyright 2000-2011 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <sys/time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/time.h>
+
+
+/*
+ * adds <ms> ms to <from>, set the result to <tv> and returns a pointer <tv>
+ */
+struct timeval *_tv_ms_add(struct timeval *tv, const struct timeval *from, int ms)
+{
+ tv->tv_usec = from->tv_usec + (ms % 1000) * 1000;
+ tv->tv_sec = from->tv_sec + (ms / 1000);
+ while (tv->tv_usec >= 1000000) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec++;
+ }
+ return tv;
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2
+ * Must not be used when either argument is eternity. Use tv_ms_cmp2() for that.
+ */
+int _tv_ms_cmp(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_cmp(tv1, tv2);
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1 ms: returns 0 if equal, -1 if tv1 < tv2, 1 if tv1 > tv2,
+ * assuming that TV_ETERNITY is greater than everything.
+ */
+int _tv_ms_cmp2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_cmp2(tv1, tv2);
+}
+
+/*
+ * compares <tv1> and <tv2> modulo 1 ms: returns 1 if tv1 <= tv2, 0 if tv1 > tv2,
+ * assuming that TV_ETERNITY is greater than everything. Returns 0 if tv1 is
+ * TV_ETERNITY, and always assumes that tv2 != TV_ETERNITY. Designed to replace
+ * occurrences of (tv_ms_cmp2(tv,now) <= 0).
+ */
+int _tv_ms_le2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_le2(tv1, tv2);
+}
+
+/*
+ * returns the remaining time between tv1=now and event=tv2
+ * if tv2 is passed, 0 is returned.
+ * Must not be used when either argument is eternity.
+ */
+unsigned long _tv_ms_remain(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_remain(tv1, tv2);
+}
+
+/*
+ * returns the remaining time between tv1=now and event=tv2
+ * if tv2 is passed, 0 is returned.
+ * Returns TIME_ETERNITY if tv2 is eternity.
+ */
+unsigned long _tv_ms_remain2(const struct timeval *tv1, const struct timeval *tv2)
+{
+ if (tv_iseternity(tv2))
+ return TIME_ETERNITY;
+
+ return __tv_ms_remain(tv1, tv2);
+}
+
+/*
+ * Returns the time in ms elapsed between tv1 and tv2, assuming that tv1<=tv2.
+ * Must not be used when either argument is eternity.
+ */
+unsigned long _tv_ms_elapsed(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_ms_elapsed(tv1, tv2);
+}
+
+/*
+ * adds <inc> to <from>, set the result to <tv> and returns a pointer <tv>
+ */
+struct timeval *_tv_add(struct timeval *tv, const struct timeval *from, const struct timeval *inc)
+{
+ return __tv_add(tv, from, inc);
+}
+
+/*
+ * If <inc> is set, then add it to <from> and set the result to <tv>, then
+ * return 1, otherwise return 0. It is meant to be used in if conditions.
+ */
+int _tv_add_ifset(struct timeval *tv, const struct timeval *from, const struct timeval *inc)
+{
+ return __tv_add_ifset(tv, from, inc);
+}
+
+/*
+ * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed,
+ * 0 is returned. The result is stored into tv.
+ */
+struct timeval *_tv_remain(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv)
+{
+ return __tv_remain(tv1, tv2, tv);
+}
+
+/*
+ * Computes the remaining time between tv1=now and event=tv2. if tv2 is passed,
+ * 0 is returned. The result is stored into tv. Returns ETERNITY if tv2 is
+ * eternity.
+ */
+struct timeval *_tv_remain2(const struct timeval *tv1, const struct timeval *tv2, struct timeval *tv)
+{
+ return __tv_remain2(tv1, tv2, tv);
+}
+
+/* tv_isle: compares <tv1> and <tv2> : returns 1 if tv1 <= tv2, otherwise 0 */
+int _tv_isle(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_isle(tv1, tv2);
+}
+
+/* tv_isgt: compares <tv1> and <tv2> : returns 1 if tv1 > tv2, otherwise 0 */
+int _tv_isgt(const struct timeval *tv1, const struct timeval *tv2)
+{
+ return __tv_isgt(tv1, tv2);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/tools.c b/src/tools.c
new file mode 100644
index 0000000..b2814b5
--- /dev/null
+++ b/src/tools.c
@@ -0,0 +1,6348 @@
+/*
+ * General purpose functions.
+ *
+ * Copyright 2000-2010 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+#define _GNU_SOURCE
+#include <dlfcn.h>
+#include <link.h>
+#endif
+
+#if defined(__FreeBSD__)
+#include <elf.h>
+#include <dlfcn.h>
+extern void *__elf_aux_vector;
+#endif
+
+#if defined(__NetBSD__)
+#include <sys/exec_elf.h>
+#include <dlfcn.h>
+#endif
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#if defined(__linux__) && defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16))
+#include <sys/auxv.h>
+#endif
+
+#include <import/eb32sctree.h>
+#include <import/eb32tree.h>
+#include <import/ebmbtree.h>
+
+#include <haproxy/api.h>
+#include <haproxy/applet.h>
+#include <haproxy/chunk.h>
+#include <haproxy/dgram.h>
+#include <haproxy/global.h>
+#include <haproxy/hlua.h>
+#include <haproxy/listener.h>
+#include <haproxy/namespace.h>
+#include <haproxy/net_helper.h>
+#include <haproxy/protocol.h>
+#include <haproxy/quic_sock.h>
+#include <haproxy/resolvers.h>
+#include <haproxy/sc_strm.h>
+#include <haproxy/sock.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/ssl_utils.h>
+#include <haproxy/stconn.h>
+#include <haproxy/task.h>
+#include <haproxy/tools.h>
+#include <haproxy/xxhash.h>
+
+/* This macro returns false if the test __x is false. Many
+ * of the following parsing function must be abort the processing
+ * if it returns 0, so this macro is useful for writing light code.
+ */
+#define RET0_UNLESS(__x) do { if (!(__x)) return 0; } while (0)
+
+/* Define the number of line of hash_word */
+#define NB_L_HASH_WORD 15
+
+/* return the hash of a string and length for a given key. All keys are valid. */
+#define HA_ANON(key, str, len) (XXH32(str, len, key) & 0xFFFFFF)
+
+/* enough to store NB_ITOA_STR integers of :
+ * 2^64-1 = 18446744073709551615 or
+ * -2^63 = -9223372036854775808
+ *
+ * The HTML version needs room for adding the 25 characters
+ * '<span class="rls"></span>' around digits at positions 3N+1 in order
+ * to add spacing at up to 6 positions : 18 446 744 073 709 551 615
+ */
+THREAD_LOCAL char itoa_str[NB_ITOA_STR][171];
+THREAD_LOCAL int itoa_idx = 0; /* index of next itoa_str to use */
+
+/* sometimes we'll need to quote strings (eg: in stats), and we don't expect
+ * to quote strings larger than a max configuration line.
+ */
+THREAD_LOCAL char quoted_str[NB_QSTR][QSTR_SIZE + 1];
+THREAD_LOCAL int quoted_idx = 0;
+
+/* thread-local PRNG state. It's modified to start from a different sequence
+ * on all threads upon startup. It must not be used or anything beyond getting
+ * statistical values as it's 100% predictable.
+ */
+THREAD_LOCAL unsigned int statistical_prng_state = 2463534242U;
+
+/* set to true if this is a static build */
+int build_is_static = 0;
+
+/* A global static table to store hashed words */
+static THREAD_LOCAL char hash_word[NB_L_HASH_WORD][20];
+static THREAD_LOCAL int index_hash = 0;
+
+/*
+ * unsigned long long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ulltoa(unsigned long long n, char *dst, size_t size)
+{
+ int i = 0;
+ char *res;
+
+ switch(n) {
+ case 1ULL ... 9ULL:
+ i = 0;
+ break;
+
+ case 10ULL ... 99ULL:
+ i = 1;
+ break;
+
+ case 100ULL ... 999ULL:
+ i = 2;
+ break;
+
+ case 1000ULL ... 9999ULL:
+ i = 3;
+ break;
+
+ case 10000ULL ... 99999ULL:
+ i = 4;
+ break;
+
+ case 100000ULL ... 999999ULL:
+ i = 5;
+ break;
+
+ case 1000000ULL ... 9999999ULL:
+ i = 6;
+ break;
+
+ case 10000000ULL ... 99999999ULL:
+ i = 7;
+ break;
+
+ case 100000000ULL ... 999999999ULL:
+ i = 8;
+ break;
+
+ case 1000000000ULL ... 9999999999ULL:
+ i = 9;
+ break;
+
+ case 10000000000ULL ... 99999999999ULL:
+ i = 10;
+ break;
+
+ case 100000000000ULL ... 999999999999ULL:
+ i = 11;
+ break;
+
+ case 1000000000000ULL ... 9999999999999ULL:
+ i = 12;
+ break;
+
+ case 10000000000000ULL ... 99999999999999ULL:
+ i = 13;
+ break;
+
+ case 100000000000000ULL ... 999999999999999ULL:
+ i = 14;
+ break;
+
+ case 1000000000000000ULL ... 9999999999999999ULL:
+ i = 15;
+ break;
+
+ case 10000000000000000ULL ... 99999999999999999ULL:
+ i = 16;
+ break;
+
+ case 100000000000000000ULL ... 999999999999999999ULL:
+ i = 17;
+ break;
+
+ case 1000000000000000000ULL ... 9999999999999999999ULL:
+ i = 18;
+ break;
+
+ case 10000000000000000000ULL ... ULLONG_MAX:
+ i = 19;
+ break;
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ res = dst + i + 1;
+ *res = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10ULL + '0';
+ n /= 10ULL;
+ }
+ return res;
+}
+
+/*
+ * unsigned long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ultoa_o(unsigned long n, char *dst, size_t size)
+{
+ int i = 0;
+ char *res;
+
+ switch (n) {
+ case 0U ... 9UL:
+ i = 0;
+ break;
+
+ case 10U ... 99UL:
+ i = 1;
+ break;
+
+ case 100U ... 999UL:
+ i = 2;
+ break;
+
+ case 1000U ... 9999UL:
+ i = 3;
+ break;
+
+ case 10000U ... 99999UL:
+ i = 4;
+ break;
+
+ case 100000U ... 999999UL:
+ i = 5;
+ break;
+
+ case 1000000U ... 9999999UL:
+ i = 6;
+ break;
+
+ case 10000000U ... 99999999UL:
+ i = 7;
+ break;
+
+ case 100000000U ... 999999999UL:
+ i = 8;
+ break;
+#if __WORDSIZE == 32
+
+ case 1000000000ULL ... ULONG_MAX:
+ i = 9;
+ break;
+
+#elif __WORDSIZE == 64
+
+ case 1000000000ULL ... 9999999999UL:
+ i = 9;
+ break;
+
+ case 10000000000ULL ... 99999999999UL:
+ i = 10;
+ break;
+
+ case 100000000000ULL ... 999999999999UL:
+ i = 11;
+ break;
+
+ case 1000000000000ULL ... 9999999999999UL:
+ i = 12;
+ break;
+
+ case 10000000000000ULL ... 99999999999999UL:
+ i = 13;
+ break;
+
+ case 100000000000000ULL ... 999999999999999UL:
+ i = 14;
+ break;
+
+ case 1000000000000000ULL ... 9999999999999999UL:
+ i = 15;
+ break;
+
+ case 10000000000000000ULL ... 99999999999999999UL:
+ i = 16;
+ break;
+
+ case 100000000000000000ULL ... 999999999999999999UL:
+ i = 17;
+ break;
+
+ case 1000000000000000000ULL ... 9999999999999999999UL:
+ i = 18;
+ break;
+
+ case 10000000000000000000ULL ... ULONG_MAX:
+ i = 19;
+ break;
+
+#endif
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ res = dst + i + 1;
+ *res = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10U + '0';
+ n /= 10U;
+ }
+ return res;
+}
+
+/*
+ * signed long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *ltoa_o(long int n, char *dst, size_t size)
+{
+ char *pos = dst;
+
+ if (n < 0) {
+ if (size < 3)
+ return NULL; // min size is '-' + digit + '\0' but another test in ultoa
+ *pos = '-';
+ pos++;
+ dst = ultoa_o(-n, pos, size - 1);
+ } else {
+ dst = ultoa_o(n, dst, size);
+ }
+ return dst;
+}
+
+/*
+ * signed long long ASCII representation
+ *
+ * return the last char '\0' or NULL if no enough
+ * space in dst
+ */
+char *lltoa(long long n, char *dst, size_t size)
+{
+ char *pos = dst;
+
+ if (n < 0) {
+ if (size < 3)
+ return NULL; // min size is '-' + digit + '\0' but another test in ulltoa
+ *pos = '-';
+ pos++;
+ dst = ulltoa(-n, pos, size - 1);
+ } else {
+ dst = ulltoa(n, dst, size);
+ }
+ return dst;
+}
+
+/*
+ * write a ascii representation of a unsigned into dst,
+ * return a pointer to the last character
+ * Pad the ascii representation with '0', using size.
+ */
+char *utoa_pad(unsigned int n, char *dst, size_t size)
+{
+ int i = 0;
+ char *ret;
+
+ switch(n) {
+ case 0U ... 9U:
+ i = 0;
+ break;
+
+ case 10U ... 99U:
+ i = 1;
+ break;
+
+ case 100U ... 999U:
+ i = 2;
+ break;
+
+ case 1000U ... 9999U:
+ i = 3;
+ break;
+
+ case 10000U ... 99999U:
+ i = 4;
+ break;
+
+ case 100000U ... 999999U:
+ i = 5;
+ break;
+
+ case 1000000U ... 9999999U:
+ i = 6;
+ break;
+
+ case 10000000U ... 99999999U:
+ i = 7;
+ break;
+
+ case 100000000U ... 999999999U:
+ i = 8;
+ break;
+
+ case 1000000000U ... 4294967295U:
+ i = 9;
+ break;
+ }
+ if (i + 2 > size) // (i + 1) + '\0'
+ return NULL; // too long
+ i = size - 2; // padding - '\0'
+
+ ret = dst + i + 1;
+ *ret = '\0';
+ for (; i >= 0; i--) {
+ dst[i] = n % 10U + '0';
+ n /= 10U;
+ }
+ return ret;
+}
+
+/*
+ * copies at most <size-1> chars from <src> to <dst>. Last char is always
+ * set to 0, unless <size> is 0. The number of chars copied is returned
+ * (excluding the terminating zero).
+ * This code has been optimized for size and speed : on x86, it's 45 bytes
+ * long, uses only registers, and consumes only 4 cycles per char.
+ */
+int strlcpy2(char *dst, const char *src, int size)
+{
+ char *orig = dst;
+ if (size) {
+ while (--size && (*dst = *src)) {
+ src++; dst++;
+ }
+ *dst = 0;
+ }
+ return dst - orig;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal.
+ */
+char *ultoa_r(unsigned long n, char *buffer, int size)
+{
+ char *pos;
+
+ pos = buffer + size - 1;
+ *pos-- = '\0';
+
+ do {
+ *pos-- = '0' + n % 10;
+ n /= 10;
+ } while (n && pos >= buffer);
+ return pos + 1;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal.
+ */
+char *lltoa_r(long long int in, char *buffer, int size)
+{
+ char *pos;
+ int neg = 0;
+ unsigned long long int n;
+
+ pos = buffer + size - 1;
+ *pos-- = '\0';
+
+ if (in < 0) {
+ neg = 1;
+ n = -in;
+ }
+ else
+ n = in;
+
+ do {
+ *pos-- = '0' + n % 10;
+ n /= 10;
+ } while (n && pos >= buffer);
+ if (neg && pos > buffer)
+ *pos-- = '-';
+ return pos + 1;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for signed number 'n' in decimal.
+ */
+char *sltoa_r(long n, char *buffer, int size)
+{
+ char *pos;
+
+ if (n >= 0)
+ return ultoa_r(n, buffer, size);
+
+ pos = ultoa_r(-n, buffer + 1, size - 1) - 1;
+ *pos = '-';
+ return pos;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal, formatted for
+ * HTML output with tags to create visual grouping by 3 digits. The
+ * output needs to support at least 171 characters.
+ */
+const char *ulltoh_r(unsigned long long n, char *buffer, int size)
+{
+ char *start;
+ int digit = 0;
+
+ start = buffer + size;
+ *--start = '\0';
+
+ do {
+ if (digit == 3 && start >= buffer + 7)
+ memcpy(start -= 7, "</span>", 7);
+
+ if (start >= buffer + 1) {
+ *--start = '0' + n % 10;
+ n /= 10;
+ }
+
+ if (digit == 3 && start >= buffer + 18)
+ memcpy(start -= 18, "<span class=\"rls\">", 18);
+
+ if (digit++ == 3)
+ digit = 1;
+ } while (n && start > buffer);
+ return start;
+}
+
+/*
+ * This function simply returns a locally allocated string containing the ascii
+ * representation for number 'n' in decimal, unless n is 0 in which case it
+ * returns the alternate string (or an empty string if the alternate string is
+ * NULL). It use is intended for limits reported in reports, where it's
+ * desirable not to display anything if there is no limit. Warning! it shares
+ * the same vector as ultoa_r().
+ */
+const char *limit_r(unsigned long n, char *buffer, int size, const char *alt)
+{
+ return (n) ? ultoa_r(n, buffer, size) : (alt ? alt : "");
+}
+
+/* Trims the first "%f" float in a string to its minimum number of digits after
+ * the decimal point by trimming trailing zeroes, even dropping the decimal
+ * point if not needed. The string is in <buffer> of length <len>, and the
+ * number is expected to start at or after position <num_start> (the first
+ * point appearing there is considered). A NUL character is always placed at
+ * the end if some trimming occurs. The new buffer length is returned.
+ */
+size_t flt_trim(char *buffer, size_t num_start, size_t len)
+{
+ char *end = buffer + len;
+ char *p = buffer + num_start;
+ char *trim;
+
+ do {
+ if (p >= end)
+ return len;
+ trim = p++;
+ } while (*trim != '.');
+
+ /* For now <trim> is on the decimal point. Let's look for any other
+ * meaningful digit after it.
+ */
+ while (p < end) {
+ if (*p++ != '0')
+ trim = p;
+ }
+
+ if (trim < end)
+ *trim = 0;
+
+ return trim - buffer;
+}
+
+/*
+ * This function simply returns a locally allocated string containing
+ * the ascii representation for number 'n' in decimal with useless trailing
+ * zeroes trimmed.
+ */
+char *ftoa_r(double n, char *buffer, int size)
+{
+ flt_trim(buffer, 0, snprintf(buffer, size, "%f", n));
+ return buffer;
+}
+
+/* returns a locally allocated string containing the quoted encoding of the
+ * input string. The output may be truncated to QSTR_SIZE chars, but it is
+ * guaranteed that the string will always be properly terminated. Quotes are
+ * encoded by doubling them as is commonly done in CSV files. QSTR_SIZE must
+ * always be at least 4 chars.
+ */
+const char *qstr(const char *str)
+{
+ char *ret = quoted_str[quoted_idx];
+ char *p, *end;
+
+ if (++quoted_idx >= NB_QSTR)
+ quoted_idx = 0;
+
+ p = ret;
+ end = ret + QSTR_SIZE;
+
+ *p++ = '"';
+
+ /* always keep 3 chars to support passing "" and the ending " */
+ while (*str && p < end - 3) {
+ if (*str == '"') {
+ *p++ = '"';
+ *p++ = '"';
+ }
+ else
+ *p++ = *str;
+ str++;
+ }
+ *p++ = '"';
+ return ret;
+}
+
+/*
+ * Returns non-zero if character <s> is a hex digit (0-9, a-f, A-F), else zero.
+ *
+ * It looks like this one would be a good candidate for inlining, but this is
+ * not interesting because it around 35 bytes long and often called multiple
+ * times within the same function.
+ */
+int ishex(char s)
+{
+ s -= '0';
+ if ((unsigned char)s <= 9)
+ return 1;
+ s -= 'A' - '0';
+ if ((unsigned char)s <= 5)
+ return 1;
+ s -= 'a' - 'A';
+ if ((unsigned char)s <= 5)
+ return 1;
+ return 0;
+}
+
+/* rounds <i> down to the closest value having max 2 digits */
+unsigned int round_2dig(unsigned int i)
+{
+ unsigned int mul = 1;
+
+ while (i >= 100) {
+ i /= 10;
+ mul *= 10;
+ }
+ return i * mul;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_:.-]. If an
+ * invalid character is found, a pointer to it is returned. If everything is
+ * fine, NULL is returned.
+ */
+const char *invalid_char(const char *name)
+{
+ if (!*name)
+ return name;
+
+ while (*name) {
+ if (!isalnum((unsigned char)*name) && *name != '.' && *name != ':' &&
+ *name != '_' && *name != '-')
+ return name;
+ name++;
+ }
+ return NULL;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [_.-] and those
+ * accepted by <f> function.
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+static inline const char *__invalid_char(const char *name, int (*f)(int)) {
+
+ if (!*name)
+ return name;
+
+ while (*name) {
+ if (!f((unsigned char)*name) && *name != '.' &&
+ *name != '_' && *name != '-')
+ return name;
+
+ name++;
+ }
+
+ return NULL;
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z0-9_.-].
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+const char *invalid_domainchar(const char *name) {
+ return __invalid_char(name, isalnum);
+}
+
+/*
+ * Checks <name> for invalid characters. Valid chars are [A-Za-z_.-].
+ * If an invalid character is found, a pointer to it is returned.
+ * If everything is fine, NULL is returned.
+ */
+const char *invalid_prefix_char(const char *name) {
+ return __invalid_char(name, isalnum);
+}
+
+/*
+ * converts <str> to a struct sockaddr_storage* provided by the caller. The
+ * caller must have zeroed <sa> first, and may have set sa->ss_family to force
+ * parse a specific address format. If the ss_family is 0 or AF_UNSPEC, then
+ * the function tries to guess the address family from the syntax. If the
+ * family is forced and the format doesn't match, an error is returned. The
+ * string is assumed to contain only an address, no port. The address can be a
+ * dotted IPv4 address, an IPv6 address, a host name, or empty or "*" to
+ * indicate INADDR_ANY. NULL is returned if the host part cannot be resolved.
+ * The return address will only have the address family and the address set,
+ * all other fields remain zero. The string is not supposed to be modified.
+ * The IPv6 '::' address is IN6ADDR_ANY. If <resolve> is non-zero, the hostname
+ * is resolved, otherwise only IP addresses are resolved, and anything else
+ * returns NULL. If the address contains a port, this one is preserved.
+ */
+struct sockaddr_storage *str2ip2(const char *str, struct sockaddr_storage *sa, int resolve)
+{
+ struct hostent *he;
+ /* max IPv6 length, including brackets and terminating NULL */
+ char tmpip[48];
+ int port = get_host_port(sa);
+
+ /* check IPv6 with square brackets */
+ if (str[0] == '[') {
+ size_t iplength = strlen(str);
+
+ if (iplength < 4) {
+ /* minimal size is 4 when using brackets "[::]" */
+ goto fail;
+ }
+ else if (iplength >= sizeof(tmpip)) {
+ /* IPv6 literal can not be larger than tmpip */
+ goto fail;
+ }
+ else {
+ if (str[iplength - 1] != ']') {
+ /* if address started with bracket, it should end with bracket */
+ goto fail;
+ }
+ else {
+ memcpy(tmpip, str + 1, iplength - 2);
+ tmpip[iplength - 2] = '\0';
+ str = tmpip;
+ }
+ }
+ }
+
+ /* Any IPv6 address */
+ if (str[0] == ':' && str[1] == ':' && !str[2]) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = AF_INET6;
+ else if (sa->ss_family != AF_INET6)
+ goto fail;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* Any address for the family, defaults to IPv4 */
+ if (!str[0] || (str[0] == '*' && !str[1])) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = AF_INET;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* check for IPv6 first */
+ if ((!sa->ss_family || sa->ss_family == AF_UNSPEC || sa->ss_family == AF_INET6) &&
+ inet_pton(AF_INET6, str, &((struct sockaddr_in6 *)sa)->sin6_addr)) {
+ sa->ss_family = AF_INET6;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ /* then check for IPv4 */
+ if ((!sa->ss_family || sa->ss_family == AF_UNSPEC || sa->ss_family == AF_INET) &&
+ inet_pton(AF_INET, str, &((struct sockaddr_in *)sa)->sin_addr)) {
+ sa->ss_family = AF_INET;
+ set_host_port(sa, port);
+ return sa;
+ }
+
+ if (!resolve)
+ return NULL;
+
+ if (!resolv_hostname_validation(str, NULL))
+ return NULL;
+
+#ifdef USE_GETADDRINFO
+ if (global.tune.options & GTUNE_USE_GAI) {
+ struct addrinfo hints, *result;
+ int success = 0;
+
+ memset(&result, 0, sizeof(result));
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = sa->ss_family ? sa->ss_family : AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = 0;
+ hints.ai_protocol = 0;
+
+ if (getaddrinfo(str, NULL, &hints, &result) == 0) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = result->ai_family;
+ else if (sa->ss_family != result->ai_family) {
+ freeaddrinfo(result);
+ goto fail;
+ }
+
+ switch (result->ai_family) {
+ case AF_INET:
+ memcpy((struct sockaddr_in *)sa, result->ai_addr, result->ai_addrlen);
+ set_host_port(sa, port);
+ success = 1;
+ break;
+ case AF_INET6:
+ memcpy((struct sockaddr_in6 *)sa, result->ai_addr, result->ai_addrlen);
+ set_host_port(sa, port);
+ success = 1;
+ break;
+ }
+ }
+
+ if (result)
+ freeaddrinfo(result);
+
+ if (success)
+ return sa;
+ }
+#endif
+ /* try to resolve an IPv4/IPv6 hostname */
+ he = gethostbyname(str);
+ if (he) {
+ if (!sa->ss_family || sa->ss_family == AF_UNSPEC)
+ sa->ss_family = he->h_addrtype;
+ else if (sa->ss_family != he->h_addrtype)
+ goto fail;
+
+ switch (sa->ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)sa)->sin_addr = *(struct in_addr *) *(he->h_addr_list);
+ set_host_port(sa, port);
+ return sa;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sa)->sin6_addr = *(struct in6_addr *) *(he->h_addr_list);
+ set_host_port(sa, port);
+ return sa;
+ }
+ }
+
+ /* unsupported address family */
+ fail:
+ return NULL;
+}
+
+/*
+ * Converts <str> to a locally allocated struct sockaddr_storage *, and a port
+ * range or offset consisting in two integers that the caller will have to
+ * check to find the relevant input format. The following format are supported :
+ *
+ * String format | address | port | low | high
+ * addr | <addr> | 0 | 0 | 0
+ * addr: | <addr> | 0 | 0 | 0
+ * addr:port | <addr> | <port> | <port> | <port>
+ * addr:pl-ph | <addr> | <pl> | <pl> | <ph>
+ * addr:+port | <addr> | <port> | 0 | <port>
+ * addr:-port | <addr> |-<port> | <port> | 0
+ *
+ * The detection of a port range or increment by the caller is made by
+ * comparing <low> and <high>. If both are equal, then port 0 means no port
+ * was specified. The caller may pass NULL for <low> and <high> if it is not
+ * interested in retrieving port ranges.
+ *
+ * Note that <addr> above may also be :
+ * - empty ("") => family will be AF_INET and address will be INADDR_ANY
+ * - "*" => family will be AF_INET and address will be INADDR_ANY
+ * - "::" => family will be AF_INET6 and address will be IN6ADDR_ANY
+ * - a host name => family and address will depend on host name resolving.
+ *
+ * A prefix may be passed in before the address above to force the family :
+ * - "ipv4@" => force address to resolve as IPv4 and fail if not possible.
+ * - "ipv6@" => force address to resolve as IPv6 and fail if not possible.
+ * - "unix@" => force address to be a path to a UNIX socket even if the
+ * path does not start with a '/'
+ * - 'abns@' -> force address to belong to the abstract namespace (Linux
+ * only). These sockets are just like Unix sockets but without
+ * the need for an underlying file system. The address is a
+ * string. Technically it's like a Unix socket with a zero in
+ * the first byte of the address.
+ * - "fd@" => an integer must follow, and is a file descriptor number.
+ *
+ * IPv6 addresses can be declared with or without square brackets. When using
+ * square brackets for IPv6 addresses, the port separator (colon) is optional.
+ * If not using square brackets, and in order to avoid any ambiguity with
+ * IPv6 addresses, the last colon ':' is mandatory even when no port is specified.
+ * NULL is returned if the address cannot be parsed. The <low> and <high> ports
+ * are always initialized if non-null, even for non-IP families.
+ *
+ * If <pfx> is non-null, it is used as a string prefix before any path-based
+ * address (typically the path to a unix socket).
+ *
+ * if <fqdn> is non-null, it will be filled with :
+ * - a pointer to the FQDN of the server name to resolve if there's one, and
+ * that the caller will have to free(),
+ * - NULL if there was an explicit address that doesn't require resolution.
+ *
+ * Hostnames are only resolved if <opts> has PA_O_RESOLVE. Otherwise <fqdn> is
+ * still honored so it is possible for the caller to know whether a resolution
+ * failed by clearing this flag and checking if <fqdn> was filled, indicating
+ * the need for a resolution.
+ *
+ * When a file descriptor is passed, its value is put into the s_addr part of
+ * the address when cast to sockaddr_in and the address family is
+ * AF_CUST_EXISTING_FD.
+ *
+ * The matching protocol will be set into <proto> if non-null.
+ * The address protocol and transport types hints which are directly resolved
+ * will be set into <sa_type> if not NULL.
+ *
+ * Any known file descriptor is also assigned to <fd> if non-null, otherwise it
+ * is forced to -1.
+ */
+struct sockaddr_storage *str2sa_range(const char *str, int *port, int *low, int *high, int *fd,
+ struct protocol **proto, struct net_addr_type *sa_type,
+ char **err, const char *pfx, char **fqdn, unsigned int opts)
+{
+ static THREAD_LOCAL struct sockaddr_storage ss;
+ struct sockaddr_storage *ret = NULL;
+ struct protocol *new_proto = NULL;
+ char *back, *str2;
+ char *port1, *port2;
+ int portl, porth, porta;
+ int abstract = 0;
+ int new_fd = -1;
+ enum proto_type proto_type = 0; // to shut gcc warning
+ int ctrl_type = 0; // to shut gcc warning
+
+ portl = porth = porta = 0;
+ if (fqdn)
+ *fqdn = NULL;
+
+ str2 = back = env_expand(strdup(str));
+ if (str2 == NULL) {
+ memprintf(err, "out of memory in '%s'", __FUNCTION__);
+ goto out;
+ }
+
+ if (!*str2) {
+ memprintf(err, "'%s' resolves to an empty address (environment variable missing?)", str);
+ goto out;
+ }
+
+ memset(&ss, 0, sizeof(ss));
+
+ /* prepare the default socket types */
+ if ((opts & (PA_O_STREAM|PA_O_DGRAM)) == PA_O_DGRAM ||
+ ((opts & (PA_O_STREAM|PA_O_DGRAM)) == (PA_O_DGRAM|PA_O_STREAM) && (opts & PA_O_DEFAULT_DGRAM))) {
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ } else {
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+
+ if (strncmp(str2, "stream+", 7) == 0) {
+ str2 += 7;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "dgram+", 6) == 0) {
+ str2 += 6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "quic+", 5) == 0) {
+ str2 += 5;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_STREAM;
+ }
+
+ if (strncmp(str2, "unix@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ }
+ else if (strncmp(str2, "uxdg@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "uxst@", 5) == 0) {
+ str2 += 5;
+ abstract = 0;
+ ss.ss_family = AF_UNIX;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "abns@", 5) == 0) {
+ str2 += 5;
+ abstract = 1;
+ ss.ss_family = AF_UNIX;
+ }
+ else if (strncmp(str2, "ip@", 3) == 0) {
+ str2 += 3;
+ ss.ss_family = AF_UNSPEC;
+ }
+ else if (strncmp(str2, "ipv4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ }
+ else if (strncmp(str2, "ipv6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ }
+ else if (strncmp(str2, "tcp4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp4@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "tcp6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp6@", 5) == 0) {
+ str2 += 5;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "tcp@", 4) == 0) {
+ str2 += 4;
+ ss.ss_family = AF_UNSPEC;
+ proto_type = PROTO_TYPE_STREAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "udp@", 4) == 0) {
+ str2 += 4;
+ ss.ss_family = AF_UNSPEC;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_DGRAM;
+ }
+ else if (strncmp(str2, "quic4@", 6) == 0) {
+ str2 += 6;
+ ss.ss_family = AF_INET;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "quic6@", 6) == 0) {
+ str2 += 6;
+ ss.ss_family = AF_INET6;
+ proto_type = PROTO_TYPE_DGRAM;
+ ctrl_type = SOCK_STREAM;
+ }
+ else if (strncmp(str2, "fd@", 3) == 0) {
+ str2 += 3;
+ ss.ss_family = AF_CUST_EXISTING_FD;
+ }
+ else if (strncmp(str2, "sockpair@", 9) == 0) {
+ str2 += 9;
+ ss.ss_family = AF_CUST_SOCKPAIR;
+ }
+ else if (strncmp(str2, "rhttp@", 3) == 0) {
+ /* TODO duplicated code from check_kw_experimental() */
+ if (!experimental_directives_allowed) {
+ memprintf(err, "Address '%s' is experimental, must be allowed via a global 'expose-experimental-directives'", str2);
+ goto out;
+ }
+ mark_tainted(TAINTED_CONFIG_EXP_KW_DECLARED);
+
+ str2 += 4;
+ ss.ss_family = AF_CUST_RHTTP_SRV;
+ }
+ else if (*str2 == '/') {
+ ss.ss_family = AF_UNIX;
+ }
+ else
+ ss.ss_family = AF_UNSPEC;
+
+ if (ss.ss_family == AF_CUST_SOCKPAIR) {
+ struct sockaddr_storage ss2;
+ socklen_t addr_len;
+ char *endptr;
+
+ new_fd = strtol(str2, &endptr, 10);
+ if (!*str2 || new_fd < 0 || *endptr) {
+ memprintf(err, "file descriptor '%s' is not a valid integer in '%s'", str2, str);
+ goto out;
+ }
+
+ /* just verify that it's a socket */
+ addr_len = sizeof(ss2);
+ if (getsockname(new_fd, (struct sockaddr *)&ss2, &addr_len) == -1) {
+ memprintf(err, "cannot use file descriptor '%d' : %s.", new_fd, strerror(errno));
+ goto out;
+ }
+
+ ((struct sockaddr_in *)&ss)->sin_addr.s_addr = new_fd;
+ ((struct sockaddr_in *)&ss)->sin_port = 0;
+ }
+ else if (ss.ss_family == AF_CUST_EXISTING_FD) {
+ char *endptr;
+
+ new_fd = strtol(str2, &endptr, 10);
+ if (!*str2 || new_fd < 0 || *endptr) {
+ memprintf(err, "file descriptor '%s' is not a valid integer in '%s'", str2, str);
+ goto out;
+ }
+
+ if (opts & PA_O_SOCKET_FD) {
+ socklen_t addr_len;
+ int type;
+
+ addr_len = sizeof(ss);
+ if (getsockname(new_fd, (struct sockaddr *)&ss, &addr_len) == -1) {
+ memprintf(err, "cannot use file descriptor '%d' : %s.", new_fd, strerror(errno));
+ goto out;
+ }
+
+ addr_len = sizeof(type);
+ if (getsockopt(new_fd, SOL_SOCKET, SO_TYPE, &type, &addr_len) != 0 ||
+ (type == SOCK_STREAM) != (proto_type == PROTO_TYPE_STREAM)) {
+ memprintf(err, "socket on file descriptor '%d' is of the wrong type.", new_fd);
+ goto out;
+ }
+
+ porta = portl = porth = get_host_port(&ss);
+ } else if (opts & PA_O_RAW_FD) {
+ ((struct sockaddr_in *)&ss)->sin_addr.s_addr = new_fd;
+ ((struct sockaddr_in *)&ss)->sin_port = 0;
+ } else {
+ memprintf(err, "a file descriptor is not acceptable here in '%s'", str);
+ goto out;
+ }
+ }
+ else if (ss.ss_family == AF_UNIX) {
+ struct sockaddr_un *un = (struct sockaddr_un *)&ss;
+ int prefix_path_len;
+ int max_path_len;
+ int adr_len;
+
+ /* complete unix socket path name during startup or soft-restart is
+ * <unix_bind_prefix><path>.<pid>.<bak|tmp>
+ */
+ prefix_path_len = (pfx && !abstract) ? strlen(pfx) : 0;
+ max_path_len = (sizeof(un->sun_path) - 1) -
+ (abstract ? 0 : prefix_path_len + 1 + 5 + 1 + 3);
+
+ adr_len = strlen(str2);
+ if (adr_len > max_path_len) {
+ memprintf(err, "socket path '%s' too long (max %d)", str, max_path_len);
+ goto out;
+ }
+
+ /* when abstract==1, we skip the first zero and copy all bytes except the trailing zero */
+ memset(un->sun_path, 0, sizeof(un->sun_path));
+ if (prefix_path_len)
+ memcpy(un->sun_path, pfx, prefix_path_len);
+ memcpy(un->sun_path + prefix_path_len + abstract, str2, adr_len + 1 - abstract);
+ }
+ else if (ss.ss_family == AF_CUST_RHTTP_SRV) {
+ /* Nothing to do here. */
+ }
+ else { /* IPv4 and IPv6 */
+ char *end = str2 + strlen(str2);
+ char *chr;
+
+ /* search for : or ] whatever comes first */
+ for (chr = end-1; chr > str2; chr--) {
+ if (*chr == ']' || *chr == ':')
+ break;
+ }
+
+ if (*chr == ':') {
+ /* Found a colon before a closing-bracket, must be a port separator.
+ * This guarantee backward compatibility.
+ */
+ if (!(opts & PA_O_PORT_OK)) {
+ memprintf(err, "port specification not permitted here in '%s'", str);
+ goto out;
+ }
+ *chr++ = '\0';
+ port1 = chr;
+ }
+ else {
+ /* Either no colon and no closing-bracket
+ * or directly ending with a closing-bracket.
+ * However, no port.
+ */
+ if (opts & PA_O_PORT_MAND) {
+ memprintf(err, "missing port specification in '%s'", str);
+ goto out;
+ }
+ port1 = "";
+ }
+
+ if (isdigit((unsigned char)*port1)) { /* single port or range */
+ char *endptr;
+
+ port2 = strchr(port1, '-');
+ if (port2) {
+ if (!(opts & PA_O_PORT_RANGE)) {
+ memprintf(err, "port range not permitted here in '%s'", str);
+ goto out;
+ }
+ *port2++ = '\0';
+ }
+ else
+ port2 = port1;
+ portl = strtol(port1, &endptr, 10);
+ if (*endptr != '\0') {
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'", *endptr, port1, str);
+ goto out;
+ }
+ porth = strtol(port2, &endptr, 10);
+ if (*endptr != '\0') {
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'", *endptr, port2, str);
+ goto out;
+ }
+
+ if (portl < !!(opts & PA_O_PORT_MAND) || portl > 65535) {
+ memprintf(err, "invalid port '%s'", port1);
+ goto out;
+ }
+
+ if (porth < !!(opts & PA_O_PORT_MAND) || porth > 65535) {
+ memprintf(err, "invalid port '%s'", port2);
+ goto out;
+ }
+
+ if (portl > porth) {
+ memprintf(err, "invalid port range '%d-%d'", portl, porth);
+ goto out;
+ }
+
+ porta = portl;
+ }
+ else if (*port1 == '-') { /* negative offset */
+ char *endptr;
+
+ if (!(opts & PA_O_PORT_OFS)) {
+ memprintf(err, "port offset not permitted here in '%s'", str);
+ goto out;
+ }
+ portl = strtol(port1 + 1, &endptr, 10);
+ if (*endptr != '\0') {
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'", *endptr, port1 + 1, str);
+ goto out;
+ }
+ porta = -portl;
+ }
+ else if (*port1 == '+') { /* positive offset */
+ char *endptr;
+
+ if (!(opts & PA_O_PORT_OFS)) {
+ memprintf(err, "port offset not permitted here in '%s'", str);
+ goto out;
+ }
+ porth = strtol(port1 + 1, &endptr, 10);
+ if (*endptr != '\0') {
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'", *endptr, port1 + 1, str);
+ goto out;
+ }
+ porta = porth;
+ }
+ else if (*port1) { /* other any unexpected char */
+ memprintf(err, "invalid character '%c' in port number '%s' in '%s'", *port1, port1, str);
+ goto out;
+ }
+ else if (opts & PA_O_PORT_MAND) {
+ memprintf(err, "missing port specification in '%s'", str);
+ goto out;
+ }
+
+ /* first try to parse the IP without resolving. If it fails, it
+ * tells us we need to keep a copy of the FQDN to resolve later
+ * and to enable DNS. In this case we can proceed if <fqdn> is
+ * set or if PA_O_RESOLVE is set, otherwise it's an error.
+ */
+ if (str2ip2(str2, &ss, 0) == NULL) {
+ if ((!(opts & PA_O_RESOLVE) && !fqdn) ||
+ ((opts & PA_O_RESOLVE) && str2ip2(str2, &ss, 1) == NULL)) {
+ memprintf(err, "invalid address: '%s' in '%s'", str2, str);
+ goto out;
+ }
+
+ if (fqdn) {
+ if (str2 != back)
+ memmove(back, str2, strlen(str2) + 1);
+ *fqdn = back;
+ back = NULL;
+ }
+ }
+ set_host_port(&ss, porta);
+ }
+
+ if (ctrl_type == SOCK_STREAM && !(opts & PA_O_STREAM)) {
+ memprintf(err, "stream-type address not acceptable in '%s'", str);
+ goto out;
+ }
+ else if (ctrl_type == SOCK_DGRAM && !(opts & PA_O_DGRAM)) {
+ memprintf(err, "dgram-type address not acceptable in '%s'", str);
+ goto out;
+ }
+
+ if (proto || (opts & PA_O_CONNECT)) {
+ /* Note: if the caller asks for a proto, we must find one,
+ * except if we inherit from a raw FD (family == AF_CUST_EXISTING_FD)
+ * orif we return with an fqdn that will resolve later,
+ * in which case the address is not known yet (this is only
+ * for servers actually).
+ */
+ new_proto = protocol_lookup(ss.ss_family,
+ proto_type,
+ ctrl_type == SOCK_DGRAM);
+
+ if (!new_proto && (!fqdn || !*fqdn) && (ss.ss_family != AF_CUST_EXISTING_FD)) {
+ memprintf(err, "unsupported %s protocol for %s family %d address '%s'%s",
+ (ctrl_type == SOCK_DGRAM) ? "datagram" : "stream",
+ (proto_type == PROTO_TYPE_DGRAM) ? "datagram" : "stream",
+ ss.ss_family,
+ str,
+#ifndef USE_QUIC
+ (ctrl_type == SOCK_STREAM && proto_type == PROTO_TYPE_DGRAM)
+ ? "; QUIC is not compiled in if this is what you were looking for."
+ : ""
+#else
+ ""
+#endif
+ );
+ goto out;
+ }
+
+ if ((opts & PA_O_CONNECT) && new_proto && !new_proto->connect) {
+ memprintf(err, "connect() not supported for this protocol family %d used by address '%s'", ss.ss_family, str);
+ goto out;
+ }
+ }
+
+ ret = &ss;
+ out:
+ if (port)
+ *port = porta;
+ if (low)
+ *low = portl;
+ if (high)
+ *high = porth;
+ if (fd)
+ *fd = new_fd;
+ if (proto)
+ *proto = new_proto;
+ if (sa_type) {
+ sa_type->proto_type = proto_type;
+ sa_type->xprt_type = (ctrl_type == SOCK_DGRAM) ? PROTO_TYPE_DGRAM : PROTO_TYPE_STREAM;
+ }
+ free(back);
+ return ret;
+}
+
+/* converts <addr> and <port> into a string representation of the address and port. This is sort
+ * of an inverse of str2sa_range, with some restrictions. The supported families are AF_INET,
+ * AF_INET6, AF_UNIX, and AF_CUST_SOCKPAIR. If the family is unsopported NULL is returned.
+ * If map_ports is true, then the sign of the port is included in the output, to indicate it is
+ * relative to the incoming port. AF_INET and AF_INET6 will be in the form "<addr>:<port>".
+ * AF_UNIX will either be just the path (if using a pathname) or "abns@<path>" if it is abstract.
+ * AF_CUST_SOCKPAIR will be of the form "sockpair@<fd>".
+ *
+ * The returned char* is allocated, and it is the responsibility of the caller to free it.
+ */
+char * sa2str(const struct sockaddr_storage *addr, int port, int map_ports)
+{
+ char buffer[INET6_ADDRSTRLEN];
+ char *out = NULL;
+ const void *ptr;
+ const char *path;
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ ptr = &((struct sockaddr_in *)addr)->sin_addr;
+ break;
+ case AF_INET6:
+ ptr = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ break;
+ case AF_UNIX:
+ path = ((struct sockaddr_un *)addr)->sun_path;
+ if (path[0] == '\0') {
+ const int max_length = sizeof(struct sockaddr_un) - offsetof(struct sockaddr_un, sun_path) - 1;
+ return memprintf(&out, "abns@%.*s", max_length, path+1);
+ } else {
+ return strdup(path);
+ }
+ case AF_CUST_SOCKPAIR:
+ return memprintf(&out, "sockpair@%d", ((struct sockaddr_in *)addr)->sin_addr.s_addr);
+ default:
+ return NULL;
+ }
+ if (inet_ntop(addr->ss_family, ptr, buffer, sizeof(buffer)) == NULL) {
+ BUG_ON(errno == ENOSPC);
+ return NULL;
+ }
+ if (map_ports)
+ return memprintf(&out, "%s:%+d", buffer, port);
+ else
+ return memprintf(&out, "%s:%d", buffer, port);
+}
+
+
+/* converts <str> to a struct in_addr containing a network mask. It can be
+ * passed in dotted form (255.255.255.0) or in CIDR form (24). It returns 1
+ * if the conversion succeeds otherwise zero.
+ */
+int str2mask(const char *str, struct in_addr *mask)
+{
+ if (strchr(str, '.') != NULL) { /* dotted notation */
+ if (!inet_pton(AF_INET, str, mask))
+ return 0;
+ }
+ else { /* mask length */
+ char *err;
+ unsigned long len = strtol(str, &err, 10);
+
+ if (!*str || (err && *err) || (unsigned)len > 32)
+ return 0;
+
+ len2mask4(len, mask);
+ }
+ return 1;
+}
+
+/* converts <str> to a struct in6_addr containing a network mask. It can be
+ * passed in quadruplet form (ffff:ffff::) or in CIDR form (64). It returns 1
+ * if the conversion succeeds otherwise zero.
+ */
+int str2mask6(const char *str, struct in6_addr *mask)
+{
+ if (strchr(str, ':') != NULL) { /* quadruplet notation */
+ if (!inet_pton(AF_INET6, str, mask))
+ return 0;
+ }
+ else { /* mask length */
+ char *err;
+ unsigned long len = strtol(str, &err, 10);
+
+ if (!*str || (err && *err) || (unsigned)len > 128)
+ return 0;
+
+ len2mask6(len, mask);
+ }
+ return 1;
+}
+
+/* convert <cidr> to struct in_addr <mask>. It returns 1 if the conversion
+ * succeeds otherwise zero.
+ */
+int cidr2dotted(int cidr, struct in_addr *mask) {
+
+ if (cidr < 0 || cidr > 32)
+ return 0;
+
+ mask->s_addr = cidr ? htonl(~0UL << (32 - cidr)) : 0;
+ return 1;
+}
+
+/* Convert mask from bit length form to in_addr form.
+ * This function never fails.
+ */
+void len2mask4(int len, struct in_addr *addr)
+{
+ if (len >= 32) {
+ addr->s_addr = 0xffffffff;
+ return;
+ }
+ if (len <= 0) {
+ addr->s_addr = 0x00000000;
+ return;
+ }
+ addr->s_addr = 0xffffffff << (32 - len);
+ addr->s_addr = htonl(addr->s_addr);
+}
+
+/* Convert mask from bit length form to in6_addr form.
+ * This function never fails.
+ */
+void len2mask6(int len, struct in6_addr *addr)
+{
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[0]); /* msb */
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[4]);
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[8]);
+ len -= 32;
+ len2mask4(len, (struct in_addr *)&addr->s6_addr[12]); /* lsb */
+}
+
+/*
+ * converts <str> to two struct in_addr* which must be pre-allocated.
+ * The format is "addr[/mask]", where "addr" cannot be empty, and mask
+ * is optional and either in the dotted or CIDR notation.
+ * Note: "addr" can also be a hostname. Returns 1 if OK, 0 if error.
+ */
+int str2net(const char *str, int resolve, struct in_addr *addr, struct in_addr *mask)
+{
+ __label__ out_free, out_err;
+ char *c, *s;
+ int ret_val;
+
+ s = strdup(str);
+ if (!s)
+ return 0;
+
+ memset(mask, 0, sizeof(*mask));
+ memset(addr, 0, sizeof(*addr));
+
+ if ((c = strrchr(s, '/')) != NULL) {
+ *c++ = '\0';
+ /* c points to the mask */
+ if (!str2mask(c, mask))
+ goto out_err;
+ }
+ else {
+ mask->s_addr = ~0U;
+ }
+ if (!inet_pton(AF_INET, s, addr)) {
+ struct hostent *he;
+
+ if (!resolve)
+ goto out_err;
+
+ if ((he = gethostbyname(s)) == NULL) {
+ goto out_err;
+ }
+ else
+ *addr = *(struct in_addr *) *(he->h_addr_list);
+ }
+
+ ret_val = 1;
+ out_free:
+ free(s);
+ return ret_val;
+ out_err:
+ ret_val = 0;
+ goto out_free;
+}
+
+
+/*
+ * converts <str> to two struct in6_addr* which must be pre-allocated.
+ * The format is "addr[/mask]", where "addr" cannot be empty, and mask
+ * is an optional number of bits (128 being the default).
+ * Returns 1 if OK, 0 if error.
+ */
+int str62net(const char *str, struct in6_addr *addr, unsigned char *mask)
+{
+ char *c, *s;
+ int ret_val = 0;
+ char *err;
+ unsigned long len = 128;
+
+ s = strdup(str);
+ if (!s)
+ return 0;
+
+ memset(mask, 0, sizeof(*mask));
+ memset(addr, 0, sizeof(*addr));
+
+ if ((c = strrchr(s, '/')) != NULL) {
+ *c++ = '\0'; /* c points to the mask */
+ if (!*c)
+ goto out_free;
+
+ len = strtoul(c, &err, 10);
+ if ((err && *err) || (unsigned)len > 128)
+ goto out_free;
+ }
+ *mask = len; /* OK we have a valid mask in <len> */
+
+ if (!inet_pton(AF_INET6, s, addr))
+ goto out_free;
+
+ ret_val = 1;
+ out_free:
+ free(s);
+ return ret_val;
+}
+
+
+/*
+ * Parse IPv4 address found in url. Return the number of bytes parsed. It
+ * expects exactly 4 numbers between 0 and 255 delimited by dots, and returns
+ * zero in case of mismatch.
+ */
+int url2ipv4(const char *addr, struct in_addr *dst)
+{
+ int saw_digit, octets, ch;
+ u_char tmp[4], *tp;
+ const char *cp = addr;
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+
+ while (*addr) {
+ unsigned char digit = (ch = *addr) - '0';
+ if (digit > 9 && ch != '.')
+ break;
+ addr++;
+ if (digit <= 9) {
+ u_int new = *tp * 10 + digit;
+ if (new > 255)
+ return 0;
+ *tp = new;
+ if (!saw_digit) {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit) {
+ if (octets == 4)
+ return 0;
+ *++tp = 0;
+ saw_digit = 0;
+ } else
+ return 0;
+ }
+
+ if (octets < 4)
+ return 0;
+
+ memcpy(&dst->s_addr, tmp, 4);
+ return addr - cp;
+}
+
+/*
+ * Resolve destination server from URL. Convert <str> to a sockaddr_storage.
+ * <out> contain the code of the detected scheme, the start and length of
+ * the hostname. Actually only http and https are supported. <out> can be NULL.
+ * This function returns the consumed length. It is useful if you parse complete
+ * url like http://host:port/path, because the consumed length corresponds to
+ * the first character of the path. If the conversion fails, it returns -1.
+ *
+ * This function tries to resolve the DNS name if haproxy is in starting mode.
+ * So, this function may be used during the configuration parsing.
+ */
+int url2sa(const char *url, int ulen, struct sockaddr_storage *addr, struct split_url *out)
+{
+ const char *curr = url, *cp = url;
+ const char *end;
+ int ret, url_code = 0;
+ unsigned long long int http_code = 0;
+ int default_port;
+ struct hostent *he;
+ char *p;
+
+ /* Firstly, try to find :// pattern */
+ while (curr < url+ulen && url_code != 0x3a2f2f) {
+ url_code = ((url_code & 0xffff) << 8);
+ url_code += (unsigned char)*curr++;
+ }
+
+ /* Secondly, if :// pattern is found, verify parsed stuff
+ * before pattern is matching our http pattern.
+ * If so parse ip address and port in uri.
+ *
+ * WARNING: Current code doesn't support dynamic async dns resolver.
+ */
+ if (url_code != 0x3a2f2f)
+ return -1;
+
+ /* Copy scheme, and utrn to lower case. */
+ while (cp < curr - 3)
+ http_code = (http_code << 8) + *cp++;
+ http_code |= 0x2020202020202020ULL; /* Turn everything to lower case */
+
+ /* HTTP or HTTPS url matching */
+ if (http_code == 0x2020202068747470ULL) {
+ default_port = 80;
+ if (out)
+ out->scheme = SCH_HTTP;
+ }
+ else if (http_code == 0x2020206874747073ULL) {
+ default_port = 443;
+ if (out)
+ out->scheme = SCH_HTTPS;
+ }
+ else
+ return -1;
+
+ /* If the next char is '[', the host address is IPv6. */
+ if (*curr == '[') {
+ curr++;
+
+ /* Check trash size */
+ if (trash.size < ulen)
+ return -1;
+
+ /* Look for ']' and copy the address in a trash buffer. */
+ p = trash.area;
+ for (end = curr;
+ end < url + ulen && *end != ']';
+ end++, p++)
+ *p = *end;
+ if (*end != ']')
+ return -1;
+ *p = '\0';
+
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = end - curr;
+ }
+
+ /* Try IPv6 decoding. */
+ if (!inet_pton(AF_INET6, trash.area, &((struct sockaddr_in6 *)addr)->sin6_addr))
+ return -1;
+ end++;
+
+ /* Decode port. */
+ if (end < url + ulen && *end == ':') {
+ end++;
+ default_port = read_uint(&end, url + ulen);
+ }
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(default_port);
+ ((struct sockaddr_in6 *)addr)->sin6_family = AF_INET6;
+ return end - url;
+ }
+ else {
+ /* we need to copy the string into the trash because url2ipv4
+ * needs a \0 at the end of the string */
+ if (trash.size < ulen)
+ return -1;
+
+ memcpy(trash.area, curr, ulen - (curr - url));
+ trash.area[ulen - (curr - url)] = '\0';
+
+ /* We are looking for IP address. If you want to parse and
+ * resolve hostname found in url, you can use str2sa_range(), but
+ * be warned this can slow down global daemon performances
+ * while handling lagging dns responses.
+ */
+ ret = url2ipv4(trash.area, &((struct sockaddr_in *)addr)->sin_addr);
+ if (ret) {
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = ret;
+ }
+
+ curr += ret;
+
+ /* Decode port. */
+ if (curr < url + ulen && *curr == ':') {
+ curr++;
+ default_port = read_uint(&curr, url + ulen);
+ }
+ ((struct sockaddr_in *)addr)->sin_port = htons(default_port);
+
+ /* Set family. */
+ ((struct sockaddr_in *)addr)->sin_family = AF_INET;
+ return curr - url;
+ }
+ else if (global.mode & MODE_STARTING) {
+ /* The IPv4 and IPv6 decoding fails, maybe the url contain name. Try to execute
+ * synchronous DNS request only if HAProxy is in the start state.
+ */
+
+ /* look for : or / or end */
+ for (end = curr;
+ end < url + ulen && *end != '/' && *end != ':';
+ end++);
+ memcpy(trash.area, curr, end - curr);
+ trash.area[end - curr] = '\0';
+
+ /* try to resolve an IPv4/IPv6 hostname */
+ he = gethostbyname(trash.area);
+ if (!he)
+ return -1;
+
+ /* Update out. */
+ if (out) {
+ out->host = curr;
+ out->host_len = end - curr;
+ }
+
+ /* Decode port. */
+ if (end < url + ulen && *end == ':') {
+ end++;
+ default_port = read_uint(&end, url + ulen);
+ }
+
+ /* Copy IP address, set port and family. */
+ switch (he->h_addrtype) {
+ case AF_INET:
+ ((struct sockaddr_in *)addr)->sin_addr = *(struct in_addr *) *(he->h_addr_list);
+ ((struct sockaddr_in *)addr)->sin_port = htons(default_port);
+ ((struct sockaddr_in *)addr)->sin_family = AF_INET;
+ return end - url;
+
+ case AF_INET6:
+ ((struct sockaddr_in6 *)addr)->sin6_addr = *(struct in6_addr *) *(he->h_addr_list);
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(default_port);
+ ((struct sockaddr_in6 *)addr)->sin6_family = AF_INET6;
+ return end - url;
+ }
+ }
+ }
+ return -1;
+}
+
+/* Tries to convert a sockaddr_storage address to text form. Upon success, the
+ * address family is returned so that it's easy for the caller to adapt to the
+ * output format. Zero is returned if the address family is not supported. -1
+ * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are
+ * supported.
+ */
+int addr_to_str(const struct sockaddr_storage *addr, char *str, int size)
+{
+
+ const void *ptr;
+
+ if (size < 5)
+ return 0;
+ *str = '\0';
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ ptr = &((struct sockaddr_in *)addr)->sin_addr;
+ break;
+ case AF_INET6:
+ ptr = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ break;
+ case AF_UNIX:
+ memcpy(str, "unix", 5);
+ return addr->ss_family;
+ default:
+ return 0;
+ }
+
+ if (inet_ntop(addr->ss_family, ptr, str, size))
+ return addr->ss_family;
+
+ /* failed */
+ return -1;
+}
+
+/* Tries to convert a sockaddr_storage port to text form. Upon success, the
+ * address family is returned so that it's easy for the caller to adapt to the
+ * output format. Zero is returned if the address family is not supported. -1
+ * is returned upon error, with errno set. AF_INET, AF_INET6 and AF_UNIX are
+ * supported.
+ */
+int port_to_str(const struct sockaddr_storage *addr, char *str, int size)
+{
+
+ uint16_t port;
+
+
+ if (size < 6)
+ return 0;
+ *str = '\0';
+
+ switch (addr->ss_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)addr)->sin_port;
+ break;
+ case AF_INET6:
+ port = ((struct sockaddr_in6 *)addr)->sin6_port;
+ break;
+ case AF_UNIX:
+ memcpy(str, "unix", 5);
+ return addr->ss_family;
+ default:
+ return 0;
+ }
+
+ snprintf(str, size, "%u", ntohs(port));
+ return addr->ss_family;
+}
+
+/* check if the given address is local to the system or not. It will return
+ * -1 when it's not possible to know, 0 when the address is not local, 1 when
+ * it is. We don't want to iterate over all interfaces for this (and it is not
+ * portable). So instead we try to bind in UDP to this address on a free non
+ * privileged port and to connect to the same address, port 0 (connect doesn't
+ * care). If it succeeds, we own the address. Note that non-inet addresses are
+ * considered local since they're most likely AF_UNIX.
+ */
+int addr_is_local(const struct netns_entry *ns,
+ const struct sockaddr_storage *orig)
+{
+ struct sockaddr_storage addr;
+ int result;
+ int fd;
+
+ if (!is_inet_addr(orig))
+ return 1;
+
+ memcpy(&addr, orig, sizeof(addr));
+ set_host_port(&addr, 0);
+
+ fd = my_socketat(ns, addr.ss_family, SOCK_DGRAM, IPPROTO_UDP);
+ if (fd < 0)
+ return -1;
+
+ result = -1;
+ if (bind(fd, (struct sockaddr *)&addr, get_addr_len(&addr)) == 0) {
+ if (connect(fd, (struct sockaddr *)&addr, get_addr_len(&addr)) == -1)
+ result = 0; // fail, non-local address
+ else
+ result = 1; // success, local address
+ }
+ else {
+ if (errno == EADDRNOTAVAIL)
+ result = 0; // definitely not local :-)
+ }
+ close(fd);
+
+ return result;
+}
+
+/* will try to encode the string <string> replacing all characters tagged in
+ * <map> with the hexadecimal representation of their ASCII-code (2 digits)
+ * prefixed by <escape>, and will store the result between <start> (included)
+ * and <stop> (excluded), and will always terminate the string with a '\0'
+ * before <stop>. The position of the '\0' is returned if the conversion
+ * completes. If bytes are missing between <start> and <stop>, then the
+ * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0'
+ * cannot even be stored so we return <start> without writing the 0.
+ * The input string must also be zero-terminated.
+ */
+const char hextab[16] = "0123456789ABCDEF";
+char *encode_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string)
+{
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map))
+ *start++ = *string;
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*string >> 4) & 15];
+ *start++ = hextab[*string & 15];
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/*
+ * Same behavior as encode_string() above, except that it encodes chunk
+ * <chunk> instead of a string.
+ */
+char *encode_chunk(char *start, char *stop,
+ const char escape, const long *map,
+ const struct buffer *chunk)
+{
+ char *str = chunk->area;
+ char *end = chunk->area + chunk->data;
+
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && str < end) {
+ if (!ha_bit_test((unsigned char)(*str), map))
+ *start++ = *str;
+ else {
+ if (start + 3 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = hextab[(*str >> 4) & 15];
+ *start++ = hextab[*str & 15];
+ }
+ str++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/*
+ * Tries to prefix characters tagged in the <map> with the <escape>
+ * character. The input <string> is processed until string_stop
+ * is reached or NULL-byte is encountered. The result will
+ * be stored between <start> (included) and <stop> (excluded). This
+ * function will always try to terminate the resulting string with a '\0'
+ * before <stop>, and will return its position if the conversion
+ * completes.
+ */
+char *escape_string(char *start, char *stop,
+ const char escape, const long *map,
+ const char *string, const char *string_stop)
+{
+ if (start < stop) {
+ stop--; /* reserve one byte for the final '\0' */
+ while (start < stop && string < string_stop && *string != '\0') {
+ if (!ha_bit_test((unsigned char)(*string), map))
+ *start++ = *string;
+ else {
+ if (start + 2 >= stop)
+ break;
+ *start++ = escape;
+ *start++ = *string;
+ }
+ string++;
+ }
+ *start = '\0';
+ }
+ return start;
+}
+
+/* Check a string for using it in a CSV output format. If the string contains
+ * one of the following four char <">, <,>, CR or LF, the string is
+ * encapsulated between <"> and the <"> are escaped by a <""> sequence.
+ * <str> is the input string to be escaped. The function assumes that
+ * the input string is null-terminated.
+ *
+ * If <quote> is 0, the result is returned escaped but without double quote.
+ * It is useful if the escaped string is used between double quotes in the
+ * format.
+ *
+ * printf("..., \"%s\", ...\r\n", csv_enc(str, 0, 0, &trash));
+ *
+ * If <quote> is 1, the converter puts the quotes only if any reserved character
+ * is present. If <quote> is 2, the converter always puts the quotes.
+ *
+ * If <oneline> is not 0, CRs are skipped and LFs are replaced by spaces.
+ * This re-format multi-lines strings to only one line. The purpose is to
+ * allow a line by line parsing but also to keep the output compliant with
+ * the CLI witch uses LF to defines the end of the response.
+ *
+ * If <oneline> is 2, In addition to previous action, the trailing spaces are
+ * removed.
+ *
+ * <output> is a struct buffer used for storing the output string.
+ *
+ * The function returns the converted string on its output. If an error
+ * occurs, the function returns an empty string. This type of output is useful
+ * for using the function directly as printf() argument.
+ *
+ * If the output buffer is too short to contain the input string, the result
+ * is truncated.
+ *
+ * This function appends the encoding to the existing output chunk, and it
+ * guarantees that it starts immediately at the first available character of
+ * the chunk. Please use csv_enc() instead if you want to replace the output
+ * chunk.
+ */
+const char *csv_enc_append(const char *str, int quote, int oneline, struct buffer *output)
+{
+ char *end = output->area + output->size;
+ char *out = output->area + output->data;
+ char *ptr = out;
+
+ if (quote == 1) {
+ /* automatic quoting: first verify if we'll have to quote the string */
+ if (!strpbrk(str, "\n\r,\""))
+ quote = 0;
+ }
+
+ if (quote)
+ *ptr++ = '"';
+
+ while (*str && ptr < end - 2) { /* -2 for reserving space for <"> and \0. */
+ if (oneline) {
+ if (*str == '\n' ) {
+ /* replace LF by a space */
+ *ptr++ = ' ';
+ str++;
+ continue;
+ }
+ else if (*str == '\r' ) {
+ /* skip CR */
+ str++;
+ continue;
+ }
+ }
+ *ptr = *str;
+ if (*str == '"') {
+ ptr++;
+ if (ptr >= end - 2) {
+ ptr--;
+ break;
+ }
+ *ptr = '"';
+ }
+ ptr++;
+ str++;
+ }
+
+ if (oneline == 2) {
+ /* remove trailing spaces */
+ while (ptr > out && *(ptr - 1) == ' ')
+ ptr--;
+ }
+
+ if (quote)
+ *ptr++ = '"';
+
+ *ptr = '\0';
+ output->data = ptr - output->area;
+ return out;
+}
+
+/* Decode an URL-encoded string in-place. The resulting string might
+ * be shorter. If some forbidden characters are found, the conversion is
+ * aborted, the string is truncated before the issue and a negative value is
+ * returned, otherwise the operation returns the length of the decoded string.
+ * If the 'in_form' argument is non-nul the string is assumed to be part of
+ * an "application/x-www-form-urlencoded" encoded string, and the '+' will be
+ * turned to a space. If it's zero, this will only be done after a question
+ * mark ('?').
+ */
+int url_decode(char *string, int in_form)
+{
+ char *in, *out;
+ int ret = -1;
+
+ in = string;
+ out = string;
+ while (*in) {
+ switch (*in) {
+ case '+' :
+ *out++ = in_form ? ' ' : *in;
+ break;
+ case '%' :
+ if (!ishex(in[1]) || !ishex(in[2]))
+ goto end;
+ *out++ = (hex2i(in[1]) << 4) + hex2i(in[2]);
+ in += 2;
+ break;
+ case '?':
+ in_form = 1;
+ __fallthrough;
+ default:
+ *out++ = *in;
+ break;
+ }
+ in++;
+ }
+ ret = out - string; /* success */
+ end:
+ *out = 0;
+ return ret;
+}
+
+unsigned int str2ui(const char *s)
+{
+ return __str2ui(s);
+}
+
+unsigned int str2uic(const char *s)
+{
+ return __str2uic(s);
+}
+
+unsigned int strl2ui(const char *s, int len)
+{
+ return __strl2ui(s, len);
+}
+
+unsigned int strl2uic(const char *s, int len)
+{
+ return __strl2uic(s, len);
+}
+
+unsigned int read_uint(const char **s, const char *end)
+{
+ return __read_uint(s, end);
+}
+
+/* This function reads an unsigned integer from the string pointed to by <s> and
+ * returns it. The <s> pointer is adjusted to point to the first unread char. The
+ * function automatically stops at <end>. If the number overflows, the 2^64-1
+ * value is returned.
+ */
+unsigned long long int read_uint64(const char **s, const char *end)
+{
+ const char *ptr = *s;
+ unsigned long long int i = 0, tmp;
+ unsigned int j;
+
+ while (ptr < end) {
+
+ /* read next char */
+ j = *ptr - '0';
+ if (j > 9)
+ goto read_uint64_end;
+
+ /* add char to the number and check overflow. */
+ tmp = i * 10;
+ if (tmp / 10 != i) {
+ i = ULLONG_MAX;
+ goto read_uint64_eat;
+ }
+ if (ULLONG_MAX - tmp < j) {
+ i = ULLONG_MAX;
+ goto read_uint64_eat;
+ }
+ i = tmp + j;
+ ptr++;
+ }
+read_uint64_eat:
+ /* eat each numeric char */
+ while (ptr < end) {
+ if ((unsigned int)(*ptr - '0') > 9)
+ break;
+ ptr++;
+ }
+read_uint64_end:
+ *s = ptr;
+ return i;
+}
+
+/* This function reads an integer from the string pointed to by <s> and returns
+ * it. The <s> pointer is adjusted to point to the first unread char. The function
+ * automatically stops at <end>. Il the number is bigger than 2^63-2, the 2^63-1
+ * value is returned. If the number is lowest than -2^63-1, the -2^63 value is
+ * returned.
+ */
+long long int read_int64(const char **s, const char *end)
+{
+ unsigned long long int i = 0;
+ int neg = 0;
+
+ /* Look for minus char. */
+ if (**s == '-') {
+ neg = 1;
+ (*s)++;
+ }
+ else if (**s == '+')
+ (*s)++;
+
+ /* convert as positive number. */
+ i = read_uint64(s, end);
+
+ if (neg) {
+ if (i > 0x8000000000000000ULL)
+ return LLONG_MIN;
+ return -i;
+ }
+ if (i > 0x7fffffffffffffffULL)
+ return LLONG_MAX;
+ return i;
+}
+
+/* This one is 7 times faster than strtol() on athlon with checks.
+ * It returns the value of the number composed of all valid digits read,
+ * and can process negative numbers too.
+ */
+int strl2ic(const char *s, int len)
+{
+ int i = 0;
+ int j, k;
+
+ if (len > 0) {
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if (j > 9)
+ break;
+ i = k + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ k = i * 10;
+ if (j > 9)
+ break;
+ i = k - j;
+ }
+ }
+ }
+ return i;
+}
+
+
+/* This function reads exactly <len> chars from <s> and converts them to a
+ * signed integer which it stores into <ret>. It accurately detects any error
+ * (truncated string, invalid chars, overflows). It is meant to be used in
+ * applications designed for hostile environments. It returns zero when the
+ * number has successfully been converted, non-zero otherwise. When an error
+ * is returned, the <ret> value is left untouched. It is yet 5 to 40 times
+ * faster than strtol().
+ */
+int strl2irc(const char *s, int len, int *ret)
+{
+ int i = 0;
+ int j;
+
+ if (!len)
+ return 1;
+
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i > INT_MAX / 10) return 1; /* check for multiply overflow */
+ i = i * 10;
+ if (i + j < i) return 1; /* check for addition overflow */
+ i = i + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i < INT_MIN / 10) return 1; /* check for multiply overflow */
+ i = i * 10;
+ if (i - j > i) return 1; /* check for subtract overflow */
+ i = i - j;
+ }
+ }
+ *ret = i;
+ return 0;
+}
+
+
+/* This function reads exactly <len> chars from <s> and converts them to a
+ * signed integer which it stores into <ret>. It accurately detects any error
+ * (truncated string, invalid chars, overflows). It is meant to be used in
+ * applications designed for hostile environments. It returns zero when the
+ * number has successfully been converted, non-zero otherwise. When an error
+ * is returned, the <ret> value is left untouched. It is about 3 times slower
+ * than strl2irc().
+ */
+
+int strl2llrc(const char *s, int len, long long *ret)
+{
+ long long i = 0;
+ int j;
+
+ if (!len)
+ return 1;
+
+ if (*s != '-') {
+ /* positive number */
+ while (len-- > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i > LLONG_MAX / 10LL) return 1; /* check for multiply overflow */
+ i = i * 10LL;
+ if (i + j < i) return 1; /* check for addition overflow */
+ i = i + j;
+ }
+ } else {
+ /* negative number */
+ s++;
+ while (--len > 0) {
+ j = (*s++) - '0';
+ if (j > 9) return 1; /* invalid char */
+ if (i < LLONG_MIN / 10LL) return 1; /* check for multiply overflow */
+ i = i * 10LL;
+ if (i - j > i) return 1; /* check for subtract overflow */
+ i = i - j;
+ }
+ }
+ *ret = i;
+ return 0;
+}
+
+/* This function is used with pat_parse_dotted_ver(). It converts a string
+ * composed by two number separated by a dot. Each part must contain in 16 bits
+ * because internally they will be represented as a 32-bit quantity stored in
+ * a 64-bit integer. It returns zero when the number has successfully been
+ * converted, non-zero otherwise. When an error is returned, the <ret> value
+ * is left untouched.
+ *
+ * "1.3" -> 0x0000000000010003
+ * "65535.65535" -> 0x00000000ffffffff
+ */
+int strl2llrc_dotted(const char *text, int len, long long *ret)
+{
+ const char *end = &text[len];
+ const char *p;
+ long long major, minor;
+
+ /* Look for dot. */
+ for (p = text; p < end; p++)
+ if (*p == '.')
+ break;
+
+ /* Convert major. */
+ if (strl2llrc(text, p - text, &major) != 0)
+ return 1;
+
+ /* Check major. */
+ if (major >= 65536)
+ return 1;
+
+ /* Convert minor. */
+ minor = 0;
+ if (p < end)
+ if (strl2llrc(p + 1, end - (p + 1), &minor) != 0)
+ return 1;
+
+ /* Check minor. */
+ if (minor >= 65536)
+ return 1;
+
+ /* Compose value. */
+ *ret = (major << 16) | (minor & 0xffff);
+ return 0;
+}
+
+/* This function parses a time value optionally followed by a unit suffix among
+ * "d", "h", "m", "s", "ms" or "us". It converts the value into the unit
+ * expected by the caller. The computation does its best to avoid overflows.
+ * The value is returned in <ret> if everything is fine, and a NULL is returned
+ * by the function. In case of error, a pointer to the error is returned and
+ * <ret> is left untouched. Values are automatically rounded up when needed.
+ * Values resulting in values larger than or equal to 2^31 after conversion are
+ * reported as an overflow as value PARSE_TIME_OVER. Non-null values resulting
+ * in an underflow are reported as an underflow as value PARSE_TIME_UNDER.
+ */
+const char *parse_time_err(const char *text, unsigned *ret, unsigned unit_flags)
+{
+ unsigned long long imult, idiv;
+ unsigned long long omult, odiv;
+ unsigned long long value, result;
+ const char *str = text;
+
+ if (!isdigit((unsigned char)*text))
+ return text;
+
+ omult = odiv = 1;
+
+ switch (unit_flags & TIME_UNIT_MASK) {
+ case TIME_UNIT_US: omult = 1000000; break;
+ case TIME_UNIT_MS: omult = 1000; break;
+ case TIME_UNIT_S: break;
+ case TIME_UNIT_MIN: odiv = 60; break;
+ case TIME_UNIT_HOUR: odiv = 3600; break;
+ case TIME_UNIT_DAY: odiv = 86400; break;
+ default: break;
+ }
+
+ value = 0;
+
+ while (1) {
+ unsigned int j;
+
+ j = *text - '0';
+ if (j > 9)
+ break;
+ text++;
+ value *= 10;
+ value += j;
+ }
+
+ imult = idiv = 1;
+ switch (*text) {
+ case '\0': /* no unit = default unit */
+ imult = omult = idiv = odiv = 1;
+ goto end;
+ case 's': /* second = unscaled unit */
+ break;
+ case 'u': /* microsecond : "us" */
+ if (text[1] == 's') {
+ idiv = 1000000;
+ text++;
+ break;
+ }
+ return text;
+ case 'm': /* millisecond : "ms" or minute: "m" */
+ if (text[1] == 's') {
+ idiv = 1000;
+ text++;
+ } else
+ imult = 60;
+ break;
+ case 'h': /* hour : "h" */
+ imult = 3600;
+ break;
+ case 'd': /* day : "d" */
+ imult = 86400;
+ break;
+ default:
+ return text;
+ }
+ if (*(++text) != '\0') {
+ ha_warning("unexpected character '%c' after the timer value '%s', only "
+ "(us=microseconds,ms=milliseconds,s=seconds,m=minutes,h=hours,d=days) are supported."
+ " This will be reported as an error in next versions.\n", *text, str);
+ }
+
+ end:
+ if (omult % idiv == 0) { omult /= idiv; idiv = 1; }
+ if (idiv % omult == 0) { idiv /= omult; omult = 1; }
+ if (imult % odiv == 0) { imult /= odiv; odiv = 1; }
+ if (odiv % imult == 0) { odiv /= imult; imult = 1; }
+
+ result = (value * (imult * omult) + (idiv * odiv - 1)) / (idiv * odiv);
+ if (result >= 0x80000000)
+ return PARSE_TIME_OVER;
+ if (!result && value)
+ return PARSE_TIME_UNDER;
+ *ret = result;
+ return NULL;
+}
+
+/* this function converts the string starting at <text> to an unsigned int
+ * stored in <ret>. If an error is detected, the pointer to the unexpected
+ * character is returned. If the conversion is successful, NULL is returned.
+ */
+const char *parse_size_err(const char *text, unsigned *ret) {
+ unsigned value = 0;
+
+ if (!isdigit((unsigned char)*text))
+ return text;
+
+ while (1) {
+ unsigned int j;
+
+ j = *text - '0';
+ if (j > 9)
+ break;
+ if (value > ~0U / 10)
+ return text;
+ value *= 10;
+ if (value > (value + j))
+ return text;
+ value += j;
+ text++;
+ }
+
+ switch (*text) {
+ case '\0':
+ break;
+ case 'K':
+ case 'k':
+ if (value > ~0U >> 10)
+ return text;
+ value = value << 10;
+ break;
+ case 'M':
+ case 'm':
+ if (value > ~0U >> 20)
+ return text;
+ value = value << 20;
+ break;
+ case 'G':
+ case 'g':
+ if (value > ~0U >> 30)
+ return text;
+ value = value << 30;
+ break;
+ default:
+ return text;
+ }
+
+ if (*text != '\0' && *++text != '\0')
+ return text;
+
+ *ret = value;
+ return NULL;
+}
+
+/*
+ * Parse binary string written in hexadecimal (source) and store the decoded
+ * result into binstr and set binstrlen to the length of binstr. Memory for
+ * binstr is allocated by the function. In case of error, returns 0 with an
+ * error message in err. In success case, it returns the consumed length.
+ */
+int parse_binary(const char *source, char **binstr, int *binstrlen, char **err)
+{
+ int len;
+ const char *p = source;
+ int i,j;
+ int alloc;
+
+ len = strlen(source);
+ if (len % 2) {
+ memprintf(err, "an even number of hex digit is expected");
+ return 0;
+ }
+
+ len = len >> 1;
+
+ if (!*binstr) {
+ *binstr = calloc(len, sizeof(**binstr));
+ if (!*binstr) {
+ memprintf(err, "out of memory while loading string pattern");
+ return 0;
+ }
+ alloc = 1;
+ }
+ else {
+ if (*binstrlen < len) {
+ memprintf(err, "no space available in the buffer. expect %d, provides %d",
+ len, *binstrlen);
+ return 0;
+ }
+ alloc = 0;
+ }
+ *binstrlen = len;
+
+ i = j = 0;
+ while (j < len) {
+ if (!ishex(p[i++]))
+ goto bad_input;
+ if (!ishex(p[i++]))
+ goto bad_input;
+ (*binstr)[j++] = (hex2i(p[i-2]) << 4) + hex2i(p[i-1]);
+ }
+ return len << 1;
+
+bad_input:
+ memprintf(err, "an hex digit is expected (found '%c')", p[i-1]);
+ if (alloc)
+ ha_free(binstr);
+ return 0;
+}
+
+/* copies at most <n> characters from <src> and always terminates with '\0' */
+char *my_strndup(const char *src, int n)
+{
+ int len = 0;
+ char *ret;
+
+ while (len < n && src[len])
+ len++;
+
+ ret = malloc(len + 1);
+ if (!ret)
+ return ret;
+ memcpy(ret, src, len);
+ ret[len] = '\0';
+ return ret;
+}
+
+/*
+ * search needle in haystack
+ * returns the pointer if found, returns NULL otherwise
+ */
+const void *my_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen)
+{
+ const void *c = NULL;
+ unsigned char f;
+
+ if ((haystack == NULL) || (needle == NULL) || (haystacklen < needlelen))
+ return NULL;
+
+ f = *(char *)needle;
+ c = haystack;
+ while ((c = memchr(c, f, haystacklen - (c - haystack))) != NULL) {
+ if ((haystacklen - (c - haystack)) < needlelen)
+ return NULL;
+
+ if (memcmp(c, needle, needlelen) == 0)
+ return c;
+ ++c;
+ }
+ return NULL;
+}
+
+/* get length of the initial segment consisting entirely of bytes in <accept> */
+size_t my_memspn(const void *str, size_t len, const void *accept, size_t acceptlen)
+{
+ size_t ret = 0;
+
+ while (ret < len && memchr(accept, *((int *)str), acceptlen)) {
+ str++;
+ ret++;
+ }
+ return ret;
+}
+
+/* get length of the initial segment consisting entirely of bytes not in <rejcet> */
+size_t my_memcspn(const void *str, size_t len, const void *reject, size_t rejectlen)
+{
+ size_t ret = 0;
+
+ while (ret < len) {
+ if(memchr(reject, *((int *)str), rejectlen))
+ return ret;
+ str++;
+ ret++;
+ }
+ return ret;
+}
+
+/* This function returns the first unused key greater than or equal to <key> in
+ * ID tree <root>. Zero is returned if no place is found.
+ */
+unsigned int get_next_id(struct eb_root *root, unsigned int key)
+{
+ struct eb32_node *used;
+
+ do {
+ used = eb32_lookup_ge(root, key);
+ if (!used || used->key > key)
+ return key; /* key is available */
+ key++;
+ } while (key);
+ return key;
+}
+
+/* dump the full tree to <file> in DOT format for debugging purposes. Will
+ * optionally highlight node <subj> if found, depending on operation <op> :
+ * 0 : nothing
+ * >0 : insertion, node/leaf are surrounded in red
+ * <0 : removal, node/leaf are dashed with no background
+ * Will optionally add "desc" as a label on the graph if set and non-null.
+ */
+void eb32sc_to_file(FILE *file, struct eb_root *root, const struct eb32sc_node *subj, int op, const char *desc)
+{
+ struct eb32sc_node *node;
+ unsigned long scope = -1;
+
+ fprintf(file, "digraph ebtree {\n");
+
+ if (desc && *desc) {
+ fprintf(file,
+ " fontname=\"fixed\";\n"
+ " fontsize=8;\n"
+ " label=\"%s\";\n", desc);
+ }
+
+ fprintf(file,
+ " node [fontname=\"fixed\" fontsize=8 shape=\"box\" style=\"filled\" color=\"black\" fillcolor=\"white\"];\n"
+ " edge [fontname=\"fixed\" fontsize=8 style=\"solid\" color=\"magenta\" dir=\"forward\"];\n"
+ " \"%lx_n\" [label=\"root\\n%lx\"]\n", (long)eb_root_to_node(root), (long)root
+ );
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)eb_root_to_node(root),
+ (long)eb_root_to_node(eb_clrtag(root->b[0])),
+ eb_gettag(root->b[0]) == EB_LEAF ? 'l' : 'n');
+
+ node = eb32sc_first(root, scope);
+ while (node) {
+ if (node->node.node_p) {
+ /* node part is used */
+ fprintf(file, " \"%lx_n\" [label=\"%lx\\nkey=%u\\nscope=%lx\\nbit=%d\" fillcolor=\"lightskyblue1\" %s];\n",
+ (long)node, (long)node, node->key, node->node_s, node->node.bit,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.node_p)),
+ eb_gettag(node->node.node_p) ? 'R' : 'L');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[0])),
+ eb_gettag(node->node.branches.b[0]) == EB_LEAF ? 'l' : 'n');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"R\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[1])),
+ eb_gettag(node->node.branches.b[1]) == EB_LEAF ? 'l' : 'n');
+ }
+
+ fprintf(file, " \"%lx_l\" [label=\"%lx\\nkey=%u\\nscope=%lx\\npfx=%u\" fillcolor=\"yellow\" %s];\n",
+ (long)node, (long)node, node->key, node->leaf_s, node->node.pfx,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_l\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.leaf_p)),
+ eb_gettag(node->node.leaf_p) ? 'R' : 'L');
+ node = eb32sc_next(node, scope);
+ }
+ fprintf(file, "}\n");
+}
+
+/* dump the full tree to <file> in DOT format for debugging purposes. Will
+ * optionally highlight node <subj> if found, depending on operation <op> :
+ * 0 : nothing
+ * >0 : insertion, node/leaf are surrounded in red
+ * <0 : removal, node/leaf are dashed with no background
+ * Will optionally add "desc" as a label on the graph if set and non-null. The
+ * key is printed as a u32 hex value. A full-sized hex dump would be better but
+ * is left to be implemented.
+ */
+void ebmb_to_file(FILE *file, struct eb_root *root, const struct ebmb_node *subj, int op, const char *desc)
+{
+ struct ebmb_node *node;
+
+ fprintf(file, "digraph ebtree {\n");
+
+ if (desc && *desc) {
+ fprintf(file,
+ " fontname=\"fixed\";\n"
+ " fontsize=8;\n"
+ " label=\"%s\";\n", desc);
+ }
+
+ fprintf(file,
+ " node [fontname=\"fixed\" fontsize=8 shape=\"box\" style=\"filled\" color=\"black\" fillcolor=\"white\"];\n"
+ " edge [fontname=\"fixed\" fontsize=8 style=\"solid\" color=\"magenta\" dir=\"forward\"];\n"
+ " \"%lx_n\" [label=\"root\\n%lx\"]\n", (long)eb_root_to_node(root), (long)root
+ );
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)eb_root_to_node(root),
+ (long)eb_root_to_node(eb_clrtag(root->b[0])),
+ eb_gettag(root->b[0]) == EB_LEAF ? 'l' : 'n');
+
+ node = ebmb_first(root);
+ while (node) {
+ if (node->node.node_p) {
+ /* node part is used */
+ fprintf(file, " \"%lx_n\" [label=\"%lx\\nkey=%#x\\nbit=%d\" fillcolor=\"lightskyblue1\" %s];\n",
+ (long)node, (long)node, read_u32(node->key), node->node.bit,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.node_p)),
+ eb_gettag(node->node.node_p) ? 'R' : 'L');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"L\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[0])),
+ eb_gettag(node->node.branches.b[0]) == EB_LEAF ? 'l' : 'n');
+
+ fprintf(file, " \"%lx_n\" -> \"%lx_%c\" [taillabel=\"R\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.branches.b[1])),
+ eb_gettag(node->node.branches.b[1]) == EB_LEAF ? 'l' : 'n');
+ }
+
+ fprintf(file, " \"%lx_l\" [label=\"%lx\\nkey=%#x\\npfx=%u\" fillcolor=\"yellow\" %s];\n",
+ (long)node, (long)node, read_u32(node->key), node->node.pfx,
+ (node == subj) ? (op < 0 ? "color=\"red\" style=\"dashed\"" : op > 0 ? "color=\"red\"" : "") : "");
+
+ fprintf(file, " \"%lx_l\" -> \"%lx_n\" [taillabel=\"%c\"];\n",
+ (long)node,
+ (long)eb_root_to_node(eb_clrtag(node->node.leaf_p)),
+ eb_gettag(node->node.leaf_p) ? 'R' : 'L');
+ node = ebmb_next(node);
+ }
+ fprintf(file, "}\n");
+}
+
+/* This function compares a sample word possibly followed by blanks to another
+ * clean word. The compare is case-insensitive. 1 is returned if both are equal,
+ * otherwise zero. This intends to be used when checking HTTP headers for some
+ * values. Note that it validates a word followed only by blanks but does not
+ * validate a word followed by blanks then other chars.
+ */
+int word_match(const char *sample, int slen, const char *word, int wlen)
+{
+ if (slen < wlen)
+ return 0;
+
+ while (wlen) {
+ char c = *sample ^ *word;
+ if (c && c != ('A' ^ 'a'))
+ return 0;
+ sample++;
+ word++;
+ slen--;
+ wlen--;
+ }
+
+ while (slen) {
+ if (*sample != ' ' && *sample != '\t')
+ return 0;
+ sample++;
+ slen--;
+ }
+ return 1;
+}
+
+/* Converts any text-formatted IPv4 address to a host-order IPv4 address. It
+ * is particularly fast because it avoids expensive operations such as
+ * multiplies, which are optimized away at the end. It requires a properly
+ * formatted address though (3 points).
+ */
+unsigned int inetaddr_host(const char *text)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ const char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/*
+ * Idem except the first unparsed character has to be passed in <stop>.
+ */
+unsigned int inetaddr_host_lim(const char *text, const char *stop)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ const char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9 && p < stop) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || p == stop || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/*
+ * Idem except the pointer to first unparsed byte is returned into <ret> which
+ * must not be NULL.
+ */
+unsigned int inetaddr_host_lim_ret(char *text, char *stop, char **ret)
+{
+ const unsigned int ascii_zero = ('0' << 24) | ('0' << 16) | ('0' << 8) | '0';
+ register unsigned int dig100, dig10, dig1;
+ int s;
+ char *p, *d;
+
+ dig1 = dig10 = dig100 = ascii_zero;
+ s = 24;
+
+ p = text;
+ while (1) {
+ if (((unsigned)(*p - '0')) <= 9 && p < stop) {
+ p++;
+ continue;
+ }
+
+ /* here, we have a complete byte between <text> and <p> (exclusive) */
+ if (p == text)
+ goto end;
+
+ d = p - 1;
+ dig1 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig10 |= (unsigned int)(*d << s);
+ if (d == text)
+ goto end;
+
+ d--;
+ dig100 |= (unsigned int)(*d << s);
+ end:
+ if (!s || p == stop || *p != '.')
+ break;
+
+ s -= 8;
+ text = ++p;
+ }
+
+ *ret = p;
+ dig100 -= ascii_zero;
+ dig10 -= ascii_zero;
+ dig1 -= ascii_zero;
+ return ((dig100 * 10) + dig10) * 10 + dig1;
+}
+
+/* Convert a fixed-length string to an IP address. Returns 0 in case of error,
+ * or the number of chars read in case of success. Maybe this could be replaced
+ * by one of the functions above. Also, apparently this function does not support
+ * hosts above 255 and requires exactly 4 octets.
+ * The destination is only modified on success.
+ */
+int buf2ip(const char *buf, size_t len, struct in_addr *dst)
+{
+ const char *addr;
+ int saw_digit, octets, ch;
+ u_char tmp[4], *tp;
+ const char *cp = buf;
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+
+ for (addr = buf; addr - buf < len; addr++) {
+ unsigned char digit = (ch = *addr) - '0';
+
+ if (digit > 9 && ch != '.')
+ break;
+
+ if (digit <= 9) {
+ u_int new = *tp * 10 + digit;
+
+ if (new > 255)
+ return 0;
+
+ *tp = new;
+
+ if (!saw_digit) {
+ if (++octets > 4)
+ return 0;
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit) {
+ if (octets == 4)
+ return 0;
+
+ *++tp = 0;
+ saw_digit = 0;
+ } else
+ return 0;
+ }
+
+ if (octets < 4)
+ return 0;
+
+ memcpy(&dst->s_addr, tmp, 4);
+ return addr - cp;
+}
+
+/* This function converts the string in <buf> of the len <len> to
+ * struct in6_addr <dst> which must be allocated by the caller.
+ * This function returns 1 in success case, otherwise zero.
+ * The destination is only modified on success.
+ */
+int buf2ip6(const char *buf, size_t len, struct in6_addr *dst)
+{
+ char null_term_ip6[INET6_ADDRSTRLEN + 1];
+ struct in6_addr out;
+
+ if (len > INET6_ADDRSTRLEN)
+ return 0;
+
+ memcpy(null_term_ip6, buf, len);
+ null_term_ip6[len] = '\0';
+
+ if (!inet_pton(AF_INET6, null_term_ip6, &out))
+ return 0;
+
+ *dst = out;
+ return 1;
+}
+
+/* To be used to quote config arg positions. Returns the short string at <ptr>
+ * surrounded by simple quotes if <ptr> is valid and non-empty, or "end of line"
+ * if ptr is NULL or empty. The string is locally allocated.
+ */
+const char *quote_arg(const char *ptr)
+{
+ static THREAD_LOCAL char val[32];
+ int i;
+
+ if (!ptr || !*ptr)
+ return "end of line";
+ val[0] = '\'';
+ for (i = 1; i < sizeof(val) - 2 && *ptr; i++)
+ val[i] = *ptr++;
+ val[i++] = '\'';
+ val[i] = '\0';
+ return val;
+}
+
+/* returns an operator among STD_OP_* for string <str> or < 0 if unknown */
+int get_std_op(const char *str)
+{
+ int ret = -1;
+
+ if (*str == 'e' && str[1] == 'q')
+ ret = STD_OP_EQ;
+ else if (*str == 'n' && str[1] == 'e')
+ ret = STD_OP_NE;
+ else if (*str == 'l') {
+ if (str[1] == 'e') ret = STD_OP_LE;
+ else if (str[1] == 't') ret = STD_OP_LT;
+ }
+ else if (*str == 'g') {
+ if (str[1] == 'e') ret = STD_OP_GE;
+ else if (str[1] == 't') ret = STD_OP_GT;
+ }
+
+ if (ret == -1 || str[2] != '\0')
+ return -1;
+ return ret;
+}
+
+/* hash a 32-bit integer to another 32-bit integer */
+unsigned int full_hash(unsigned int a)
+{
+ return __full_hash(a);
+}
+
+/* Return the bit position in mask <m> of the nth bit set of rank <r>, between
+ * 0 and LONGBITS-1 included, starting from the left. For example ranks 0,1,2,3
+ * for mask 0x55 will be 6, 4, 2 and 0 respectively. This algorithm is based on
+ * a popcount variant and is described here :
+ * https://graphics.stanford.edu/~seander/bithacks.html
+ */
+unsigned int mask_find_rank_bit(unsigned int r, unsigned long m)
+{
+ unsigned long a, b, c, d;
+ unsigned int s;
+ unsigned int t;
+
+ a = m - ((m >> 1) & ~0UL/3);
+ b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
+ c = (b + (b >> 4)) & ~0UL/0x11;
+ d = (c + (c >> 8)) & ~0UL/0x101;
+
+ r++; // make r be 1..64
+
+ t = 0;
+ s = LONGBITS;
+ if (s > 32) {
+ unsigned long d2 = (d >> 16) >> 16;
+ t = d2 + (d2 >> 16);
+ s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+ }
+
+ t = (d >> (s - 16)) & 0xff;
+ s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+ t = (c >> (s - 8)) & 0xf;
+ s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+ t = (b >> (s - 4)) & 0x7;
+ s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+ t = (a >> (s - 2)) & 0x3;
+ s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+ t = (m >> (s - 1)) & 0x1;
+ s -= ((t - r) & 256) >> 8;
+
+ return s - 1;
+}
+
+/* Same as mask_find_rank_bit() above but makes use of pre-computed bitmaps
+ * based on <m>, in <a..d>. These ones must be updated whenever <m> changes
+ * using mask_prep_rank_map() below.
+ */
+unsigned int mask_find_rank_bit_fast(unsigned int r, unsigned long m,
+ unsigned long a, unsigned long b,
+ unsigned long c, unsigned long d)
+{
+ unsigned int s;
+ unsigned int t;
+
+ r++; // make r be 1..64
+
+ t = 0;
+ s = LONGBITS;
+ if (s > 32) {
+ unsigned long d2 = (d >> 16) >> 16;
+ t = d2 + (d2 >> 16);
+ s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
+ }
+
+ t = (d >> (s - 16)) & 0xff;
+ s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
+ t = (c >> (s - 8)) & 0xf;
+ s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
+ t = (b >> (s - 4)) & 0x7;
+ s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
+ t = (a >> (s - 2)) & 0x3;
+ s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
+ t = (m >> (s - 1)) & 0x1;
+ s -= ((t - r) & 256) >> 8;
+
+ return s - 1;
+}
+
+/* Prepare the bitmaps used by the fast implementation of the find_rank_bit()
+ * above.
+ */
+void mask_prep_rank_map(unsigned long m,
+ unsigned long *a, unsigned long *b,
+ unsigned long *c, unsigned long *d)
+{
+ *a = m - ((m >> 1) & ~0UL/3);
+ *b = (*a & ~0UL/5) + ((*a >> 2) & ~0UL/5);
+ *c = (*b + (*b >> 4)) & ~0UL/0x11;
+ *d = (*c + (*c >> 8)) & ~0UL/0x101;
+}
+
+/* Returns the position of one bit set in <v>, starting at position <bit>, and
+ * searching in other halves if not found. This is intended to be used to
+ * report the position of one bit set among several based on a counter or a
+ * random generator while preserving a relatively good distribution so that
+ * values made of holes in the middle do not see one of the bits around the
+ * hole being returned much more often than the other one. It can be seen as a
+ * disturbed ffsl() where the initial search starts at bit <bit>. The look up
+ * is performed in O(logN) time for N bit words, yielding a bit among 64 in
+ * about 16 cycles. Its usage differs from the rank find function in that the
+ * bit passed doesn't need to be limited to the value's popcount, making the
+ * function easier to use for random picking, and twice as fast. Passing value
+ * 0 for <v> makes no sense and -1 is returned in this case.
+ */
+int one_among_mask(unsigned long v, int bit)
+{
+ /* note, these masks may be produced by ~0UL/((1UL<<scale)+1) but
+ * that's more expensive.
+ */
+ static const unsigned long halves[] = {
+ (unsigned long)0x5555555555555555ULL,
+ (unsigned long)0x3333333333333333ULL,
+ (unsigned long)0x0F0F0F0F0F0F0F0FULL,
+ (unsigned long)0x00FF00FF00FF00FFULL,
+ (unsigned long)0x0000FFFF0000FFFFULL,
+ (unsigned long)0x00000000FFFFFFFFULL
+ };
+ unsigned long halfword = ~0UL;
+ int scope = 0;
+ int mirror;
+ int scale;
+
+ if (!v)
+ return -1;
+
+ /* we check if the exact bit is set or if it's present in a mirror
+ * position based on the current scale we're checking, in which case
+ * it's returned with its current (or mirrored) value. Otherwise we'll
+ * make sure there's at least one bit in the half we're in, and will
+ * scale down to a smaller scope and try again, until we find the
+ * closest bit.
+ */
+ for (scale = (sizeof(long) > 4) ? 5 : 4; scale >= 0; scale--) {
+ halfword >>= (1UL << scale);
+ scope |= (1UL << scale);
+ mirror = bit ^ (1UL << scale);
+ if (v & ((1UL << bit) | (1UL << mirror)))
+ return (v & (1UL << bit)) ? bit : mirror;
+
+ if (!((v >> (bit & scope)) & halves[scale] & halfword))
+ bit = mirror;
+ }
+ return bit;
+}
+
+/* Return non-zero if IPv4 address is part of the network,
+ * otherwise zero. Note that <addr> may not necessarily be aligned
+ * while the two other ones must.
+ */
+int in_net_ipv4(const void *addr, const struct in_addr *mask, const struct in_addr *net)
+{
+ struct in_addr addr_copy;
+
+ memcpy(&addr_copy, addr, sizeof(addr_copy));
+ return((addr_copy.s_addr & mask->s_addr) == (net->s_addr & mask->s_addr));
+}
+
+/* Return non-zero if IPv6 address is part of the network,
+ * otherwise zero. Note that <addr> may not necessarily be aligned
+ * while the two other ones must.
+ */
+int in_net_ipv6(const void *addr, const struct in6_addr *mask, const struct in6_addr *net)
+{
+ int i;
+ struct in6_addr addr_copy;
+
+ memcpy(&addr_copy, addr, sizeof(addr_copy));
+ for (i = 0; i < sizeof(struct in6_addr) / sizeof(int); i++)
+ if (((((int *)&addr_copy)[i] & ((int *)mask)[i])) !=
+ (((int *)net)[i] & ((int *)mask)[i]))
+ return 0;
+ return 1;
+}
+
+/* Map IPv4 address on IPv6 address, as specified in RFC4291
+ * "IPv4-Mapped IPv6 Address" (using the :ffff: prefix)
+ *
+ * Input and output may overlap.
+ */
+void v4tov6(struct in6_addr *sin6_addr, struct in_addr *sin_addr)
+{
+ uint32_t ip4_addr;
+
+ ip4_addr = sin_addr->s_addr;
+ memset(&sin6_addr->s6_addr, 0, 10);
+ write_u16(&sin6_addr->s6_addr[10], htons(0xFFFF));
+ write_u32(&sin6_addr->s6_addr[12], ip4_addr);
+}
+
+/* Try to convert IPv6 address to IPv4 address thanks to the
+ * following mapping methods:
+ * - RFC4291 IPv4-Mapped IPv6 Address (preferred method)
+ * -> ::ffff:ip:v4
+ * - RFC4291 IPv4-Compatible IPv6 Address (deprecated, RFC3513 legacy for
+ * "IPv6 Addresses with Embedded IPv4 Addresses)
+ * -> ::0000:ip:v4
+ * - 6to4 (defined in RFC3056 proposal, seems deprecated nowadays)
+ * -> 2002:ip:v4::
+ * Return true if conversion is possible and false otherwise.
+ */
+int v6tov4(struct in_addr *sin_addr, struct in6_addr *sin6_addr)
+{
+ if (read_u64(&sin6_addr->s6_addr[0]) == 0 &&
+ (read_u32(&sin6_addr->s6_addr[8]) == htonl(0xFFFF) ||
+ read_u32(&sin6_addr->s6_addr[8]) == 0)) {
+ // RFC4291 ipv4 mapped or compatible ipv6 address
+ sin_addr->s_addr = read_u32(&sin6_addr->s6_addr[12]);
+ } else if (read_u16(&sin6_addr->s6_addr[0]) == htons(0x2002)) {
+ // RFC3056 6to4 address
+ sin_addr->s_addr = htonl((ntohs(read_u16(&sin6_addr->s6_addr[2])) << 16) +
+ ntohs(read_u16(&sin6_addr->s6_addr[4])));
+ }
+ else
+ return 0; /* unrecognized input */
+ return 1; /* mapping completed */
+}
+
+/* compare two struct sockaddr_storage, including port if <check_port> is true,
+ * and return:
+ * 0 (true) if the addr is the same in both
+ * 1 (false) if the addr is not the same in both
+ * -1 (unable) if one of the addr is not AF_INET*
+ */
+int ipcmp(const struct sockaddr_storage *ss1, const struct sockaddr_storage *ss2, int check_port)
+{
+ if ((ss1->ss_family != AF_INET) && (ss1->ss_family != AF_INET6))
+ return -1;
+
+ if ((ss2->ss_family != AF_INET) && (ss2->ss_family != AF_INET6))
+ return -1;
+
+ if (ss1->ss_family != ss2->ss_family)
+ return 1;
+
+ switch (ss1->ss_family) {
+ case AF_INET:
+ return (memcmp(&((struct sockaddr_in *)ss1)->sin_addr,
+ &((struct sockaddr_in *)ss2)->sin_addr,
+ sizeof(struct in_addr)) != 0) ||
+ (check_port && get_net_port(ss1) != get_net_port(ss2));
+ case AF_INET6:
+ return (memcmp(&((struct sockaddr_in6 *)ss1)->sin6_addr,
+ &((struct sockaddr_in6 *)ss2)->sin6_addr,
+ sizeof(struct in6_addr)) != 0) ||
+ (check_port && get_net_port(ss1) != get_net_port(ss2));
+ }
+
+ return 1;
+}
+
+/* compare a struct sockaddr_storage to a struct net_addr and return :
+ * 0 (true) if <addr> is matching <net>
+ * 1 (false) if <addr> is not matching <net>
+ * -1 (unable) if <addr> or <net> is not AF_INET*
+ */
+int ipcmp2net(const struct sockaddr_storage *addr, const struct net_addr *net)
+{
+ if ((addr->ss_family != AF_INET) && (addr->ss_family != AF_INET6))
+ return -1;
+
+ if ((net->family != AF_INET) && (net->family != AF_INET6))
+ return -1;
+
+ if (addr->ss_family != net->family)
+ return 1;
+
+ if (addr->ss_family == AF_INET &&
+ (((struct sockaddr_in *)addr)->sin_addr.s_addr & net->addr.v4.mask.s_addr) == net->addr.v4.ip.s_addr)
+ return 0;
+ else {
+ const struct in6_addr *addr6 = &(((const struct sockaddr_in6*)addr)->sin6_addr);
+ const struct in6_addr *nip6 = &net->addr.v6.ip;
+ const struct in6_addr *nmask6 = &net->addr.v6.mask;
+
+ if ((read_u32(&addr6->s6_addr[0]) & read_u32(&nmask6->s6_addr[0])) == read_u32(&nip6->s6_addr[0]) &&
+ (read_u32(&addr6->s6_addr[4]) & read_u32(&nmask6->s6_addr[4])) == read_u32(&nip6->s6_addr[4]) &&
+ (read_u32(&addr6->s6_addr[8]) & read_u32(&nmask6->s6_addr[8])) == read_u32(&nip6->s6_addr[8]) &&
+ (read_u32(&addr6->s6_addr[12]) & read_u32(&nmask6->s6_addr[12])) == read_u32(&nip6->s6_addr[12]))
+ return 0;
+ }
+
+ return 1;
+}
+
+/* copy IP address from <source> into <dest>
+ * The caller must allocate and clear <dest> before calling.
+ * The source must be in either AF_INET or AF_INET6 family, or the destination
+ * address will be undefined. If the destination address used to hold a port,
+ * it is preserved, so that this function can be used to switch to another
+ * address family with no risk. Returns a pointer to the destination.
+ */
+struct sockaddr_storage *ipcpy(const struct sockaddr_storage *source, struct sockaddr_storage *dest)
+{
+ int prev_port;
+
+ prev_port = get_net_port(dest);
+ memset(dest, 0, sizeof(*dest));
+ dest->ss_family = source->ss_family;
+
+ /* copy new addr and apply it */
+ switch (source->ss_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)dest)->sin_addr.s_addr = ((struct sockaddr_in *)source)->sin_addr.s_addr;
+ ((struct sockaddr_in *)dest)->sin_port = prev_port;
+ break;
+ case AF_INET6:
+ memcpy(((struct sockaddr_in6 *)dest)->sin6_addr.s6_addr, ((struct sockaddr_in6 *)source)->sin6_addr.s6_addr, sizeof(struct in6_addr));
+ ((struct sockaddr_in6 *)dest)->sin6_port = prev_port;
+ break;
+ }
+
+ return dest;
+}
+
+char *human_time(int t, short hz_div) {
+ static char rv[sizeof("24855d23h")+1]; // longest of "23h59m" and "59m59s"
+ char *p = rv;
+ char *end = rv + sizeof(rv);
+ int cnt=2; // print two numbers
+
+ if (unlikely(t < 0 || hz_div <= 0)) {
+ snprintf(p, end - p, "?");
+ return rv;
+ }
+
+ if (unlikely(hz_div > 1))
+ t /= hz_div;
+
+ if (t >= DAY) {
+ p += snprintf(p, end - p, "%dd", t / DAY);
+ cnt--;
+ }
+
+ if (cnt && t % DAY / HOUR) {
+ p += snprintf(p, end - p, "%dh", t % DAY / HOUR);
+ cnt--;
+ }
+
+ if (cnt && t % HOUR / MINUTE) {
+ p += snprintf(p, end - p, "%dm", t % HOUR / MINUTE);
+ cnt--;
+ }
+
+ if ((cnt && t % MINUTE) || !t) // also display '0s'
+ p += snprintf(p, end - p, "%ds", t % MINUTE / SEC);
+
+ return rv;
+}
+
+const char *monthname[12] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
+/* date2str_log: write a date in the format :
+ * sprintf(str, "%02d/%s/%04d:%02d:%02d:%02d.%03d",
+ * tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900,
+ * tm.tm_hour, tm.tm_min, tm.tm_sec, (int)date.tv_usec/1000);
+ *
+ * without using sprintf. return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *date2str_log(char *dst, const struct tm *tm, const struct timeval *date, size_t size)
+{
+
+ if (size < 25) /* the size is fixed: 24 chars + \0 */
+ return NULL;
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = '.';
+
+ dst = utoa_pad((unsigned int)(date->tv_usec/1000)%1000, dst, 4); // milliseconds
+ if (!dst)
+ return NULL;
+ *dst = '\0';
+
+ return dst;
+}
+
+/* Base year used to compute leap years */
+#define TM_YEAR_BASE 1900
+
+/* Return the difference in seconds between two times (leap seconds are ignored).
+ * Retrieved from glibc 2.18 source code.
+ */
+static int my_tm_diff(const struct tm *a, const struct tm *b)
+{
+ /* Compute intervening leap days correctly even if year is negative.
+ * Take care to avoid int overflow in leap day calculations,
+ * but it's OK to assume that A and B are close to each other.
+ */
+ int a4 = (a->tm_year >> 2) + (TM_YEAR_BASE >> 2) - ! (a->tm_year & 3);
+ int b4 = (b->tm_year >> 2) + (TM_YEAR_BASE >> 2) - ! (b->tm_year & 3);
+ int a100 = a4 / 25 - (a4 % 25 < 0);
+ int b100 = b4 / 25 - (b4 % 25 < 0);
+ int a400 = a100 >> 2;
+ int b400 = b100 >> 2;
+ int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400);
+ int years = a->tm_year - b->tm_year;
+ int days = (365 * years + intervening_leap_days
+ + (a->tm_yday - b->tm_yday));
+ return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour))
+ + (a->tm_min - b->tm_min))
+ + (a->tm_sec - b->tm_sec));
+}
+
+/* Return the GMT offset for a specific local time.
+ * Both t and tm must represent the same time.
+ * The string returned has the same format as returned by strftime(... "%z", tm).
+ * Offsets are kept in an internal cache for better performances.
+ */
+const char *get_gmt_offset(time_t t, struct tm *tm)
+{
+ /* Cache offsets from GMT (depending on whether DST is active or not) */
+ static THREAD_LOCAL char gmt_offsets[2][5+1] = { "", "" };
+
+ char *gmt_offset;
+ struct tm tm_gmt;
+ int diff;
+ int isdst = tm->tm_isdst;
+
+ /* Pretend DST not active if its status is unknown */
+ if (isdst < 0)
+ isdst = 0;
+
+ /* Fetch the offset and initialize it if needed */
+ gmt_offset = gmt_offsets[isdst & 0x01];
+ if (unlikely(!*gmt_offset)) {
+ get_gmtime(t, &tm_gmt);
+ diff = my_tm_diff(tm, &tm_gmt);
+ if (diff < 0) {
+ diff = -diff;
+ *gmt_offset = '-';
+ } else {
+ *gmt_offset = '+';
+ }
+ diff %= 86400U;
+ diff /= 60; /* Convert to minutes */
+ snprintf(gmt_offset+1, 4+1, "%02d%02d", diff/60, diff%60);
+ }
+
+ return gmt_offset;
+}
+
+/* gmt2str_log: write a date in the format :
+ * "%02d/%s/%04d:%02d:%02d:%02d +0000" without using snprintf
+ * return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *gmt2str_log(char *dst, struct tm *tm, size_t size)
+{
+ if (size < 27) /* the size is fixed: 26 chars + \0 */
+ return NULL;
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = ' ';
+ *dst++ = '+';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst++ = '0';
+ *dst = '\0';
+
+ return dst;
+}
+
+/* localdate2str_log: write a date in the format :
+ * "%02d/%s/%04d:%02d:%02d:%02d +0000(local timezone)" without using snprintf
+ * Both t and tm must represent the same time.
+ * return a pointer to the last char written (\0) or
+ * NULL if there isn't enough space.
+ */
+char *localdate2str_log(char *dst, time_t t, struct tm *tm, size_t size)
+{
+ const char *gmt_offset;
+ if (size < 27) /* the size is fixed: 26 chars + \0 */
+ return NULL;
+
+ gmt_offset = get_gmt_offset(t, tm);
+
+ dst = utoa_pad((unsigned int)tm->tm_mday, dst, 3); // day
+ if (!dst)
+ return NULL;
+ *dst++ = '/';
+
+ memcpy(dst, monthname[tm->tm_mon], 3); // month
+ dst += 3;
+ *dst++ = '/';
+
+ dst = utoa_pad((unsigned int)tm->tm_year+1900, dst, 5); // year
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_hour, dst, 3); // hour
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_min, dst, 3); // minutes
+ if (!dst)
+ return NULL;
+ *dst++ = ':';
+
+ dst = utoa_pad((unsigned int)tm->tm_sec, dst, 3); // secondes
+ if (!dst)
+ return NULL;
+ *dst++ = ' ';
+
+ memcpy(dst, gmt_offset, 5); // Offset from local time to GMT
+ dst += 5;
+ *dst = '\0';
+
+ return dst;
+}
+
+/* Returns the number of seconds since 01/01/1970 0:0:0 GMT for GMT date <tm>.
+ * It is meant as a portable replacement for timegm() for use with valid inputs.
+ * Returns undefined results for invalid dates (eg: months out of range 0..11).
+ */
+time_t my_timegm(const struct tm *tm)
+{
+ /* Each month has 28, 29, 30 or 31 days, or 28+N. The date in the year
+ * is thus (current month - 1)*28 + cumulated_N[month] to count the
+ * sum of the extra N days for elapsed months. The sum of all these N
+ * days doesn't exceed 30 for a complete year (366-12*28) so it fits
+ * in a 5-bit word. This means that with 60 bits we can represent a
+ * matrix of all these values at once, which is fast and efficient to
+ * access. The extra February day for leap years is not counted here.
+ *
+ * Jan : none = 0 (0)
+ * Feb : Jan = 3 (3)
+ * Mar : Jan..Feb = 3 (3 + 0)
+ * Apr : Jan..Mar = 6 (3 + 0 + 3)
+ * May : Jan..Apr = 8 (3 + 0 + 3 + 2)
+ * Jun : Jan..May = 11 (3 + 0 + 3 + 2 + 3)
+ * Jul : Jan..Jun = 13 (3 + 0 + 3 + 2 + 3 + 2)
+ * Aug : Jan..Jul = 16 (3 + 0 + 3 + 2 + 3 + 2 + 3)
+ * Sep : Jan..Aug = 19 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3)
+ * Oct : Jan..Sep = 21 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2)
+ * Nov : Jan..Oct = 24 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2 + 3)
+ * Dec : Jan..Nov = 26 (3 + 0 + 3 + 2 + 3 + 2 + 3 + 3 + 2 + 3 + 2)
+ */
+ uint64_t extra =
+ ( 0ULL << 0*5) + ( 3ULL << 1*5) + ( 3ULL << 2*5) + /* Jan, Feb, Mar, */
+ ( 6ULL << 3*5) + ( 8ULL << 4*5) + (11ULL << 5*5) + /* Apr, May, Jun, */
+ (13ULL << 6*5) + (16ULL << 7*5) + (19ULL << 8*5) + /* Jul, Aug, Sep, */
+ (21ULL << 9*5) + (24ULL << 10*5) + (26ULL << 11*5); /* Oct, Nov, Dec, */
+
+ unsigned int y = tm->tm_year + 1900;
+ unsigned int m = tm->tm_mon;
+ unsigned long days = 0;
+
+ /* days since 1/1/1970 for full years */
+ days += days_since_zero(y) - days_since_zero(1970);
+
+ /* days for full months in the current year */
+ days += 28 * m + ((extra >> (m * 5)) & 0x1f);
+
+ /* count + 1 after March for leap years. A leap year is a year multiple
+ * of 4, unless it's multiple of 100 without being multiple of 400. 2000
+ * is leap, 1900 isn't, 1904 is.
+ */
+ if ((m > 1) && !(y & 3) && ((y % 100) || !(y % 400)))
+ days++;
+
+ days += tm->tm_mday - 1;
+ return days * 86400ULL + tm->tm_hour * 3600 + tm->tm_min * 60 + tm->tm_sec;
+}
+
+/* This function check a char. It returns true and updates
+ * <date> and <len> pointer to the new position if the
+ * character is found.
+ */
+static inline int parse_expect_char(const char **date, int *len, char c)
+{
+ if (*len < 1 || **date != c)
+ return 0;
+ (*len)--;
+ (*date)++;
+ return 1;
+}
+
+/* This function expects a string <str> of len <l>. It return true and updates.
+ * <date> and <len> if the string matches, otherwise, it returns false.
+ */
+static inline int parse_strcmp(const char **date, int *len, char *str, int l)
+{
+ if (*len < l || strncmp(*date, str, l) != 0)
+ return 0;
+ (*len) -= l;
+ (*date) += l;
+ return 1;
+}
+
+/* This macro converts 3 chars name in integer. */
+#define STR2I3(__a, __b, __c) ((__a) * 65536 + (__b) * 256 + (__c))
+
+/* day-name = %x4D.6F.6E ; "Mon", case-sensitive
+ * / %x54.75.65 ; "Tue", case-sensitive
+ * / %x57.65.64 ; "Wed", case-sensitive
+ * / %x54.68.75 ; "Thu", case-sensitive
+ * / %x46.72.69 ; "Fri", case-sensitive
+ * / %x53.61.74 ; "Sat", case-sensitive
+ * / %x53.75.6E ; "Sun", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_dayname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 3)
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('M','o','n'): tm->tm_wday = 1; break;
+ case STR2I3('T','u','e'): tm->tm_wday = 2; break;
+ case STR2I3('W','e','d'): tm->tm_wday = 3; break;
+ case STR2I3('T','h','u'): tm->tm_wday = 4; break;
+ case STR2I3('F','r','i'): tm->tm_wday = 5; break;
+ case STR2I3('S','a','t'): tm->tm_wday = 6; break;
+ case STR2I3('S','u','n'): tm->tm_wday = 7; break;
+ default: return 0;
+ }
+ *len -= 3;
+ *date += 3;
+ return 1;
+}
+
+/* month = %x4A.61.6E ; "Jan", case-sensitive
+ * / %x46.65.62 ; "Feb", case-sensitive
+ * / %x4D.61.72 ; "Mar", case-sensitive
+ * / %x41.70.72 ; "Apr", case-sensitive
+ * / %x4D.61.79 ; "May", case-sensitive
+ * / %x4A.75.6E ; "Jun", case-sensitive
+ * / %x4A.75.6C ; "Jul", case-sensitive
+ * / %x41.75.67 ; "Aug", case-sensitive
+ * / %x53.65.70 ; "Sep", case-sensitive
+ * / %x4F.63.74 ; "Oct", case-sensitive
+ * / %x4E.6F.76 ; "Nov", case-sensitive
+ * / %x44.65.63 ; "Dec", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_monthname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 3)
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('J','a','n'): tm->tm_mon = 0; break;
+ case STR2I3('F','e','b'): tm->tm_mon = 1; break;
+ case STR2I3('M','a','r'): tm->tm_mon = 2; break;
+ case STR2I3('A','p','r'): tm->tm_mon = 3; break;
+ case STR2I3('M','a','y'): tm->tm_mon = 4; break;
+ case STR2I3('J','u','n'): tm->tm_mon = 5; break;
+ case STR2I3('J','u','l'): tm->tm_mon = 6; break;
+ case STR2I3('A','u','g'): tm->tm_mon = 7; break;
+ case STR2I3('S','e','p'): tm->tm_mon = 8; break;
+ case STR2I3('O','c','t'): tm->tm_mon = 9; break;
+ case STR2I3('N','o','v'): tm->tm_mon = 10; break;
+ case STR2I3('D','e','c'): tm->tm_mon = 11; break;
+ default: return 0;
+ }
+ *len -= 3;
+ *date += 3;
+ return 1;
+}
+
+/* day-name-l = %x4D.6F.6E.64.61.79 ; "Monday", case-sensitive
+ * / %x54.75.65.73.64.61.79 ; "Tuesday", case-sensitive
+ * / %x57.65.64.6E.65.73.64.61.79 ; "Wednesday", case-sensitive
+ * / %x54.68.75.72.73.64.61.79 ; "Thursday", case-sensitive
+ * / %x46.72.69.64.61.79 ; "Friday", case-sensitive
+ * / %x53.61.74.75.72.64.61.79 ; "Saturday", case-sensitive
+ * / %x53.75.6E.64.61.79 ; "Sunday", case-sensitive
+ *
+ * This array must be alphabetically sorted
+ */
+static inline int parse_http_ldayname(const char **date, int *len, struct tm *tm)
+{
+ if (*len < 6) /* Minimum length. */
+ return 0;
+ switch (STR2I3((*date)[0], (*date)[1], (*date)[2])) {
+ case STR2I3('M','o','n'):
+ RET0_UNLESS(parse_strcmp(date, len, "Monday", 6));
+ tm->tm_wday = 1;
+ return 1;
+ case STR2I3('T','u','e'):
+ RET0_UNLESS(parse_strcmp(date, len, "Tuesday", 7));
+ tm->tm_wday = 2;
+ return 1;
+ case STR2I3('W','e','d'):
+ RET0_UNLESS(parse_strcmp(date, len, "Wednesday", 9));
+ tm->tm_wday = 3;
+ return 1;
+ case STR2I3('T','h','u'):
+ RET0_UNLESS(parse_strcmp(date, len, "Thursday", 8));
+ tm->tm_wday = 4;
+ return 1;
+ case STR2I3('F','r','i'):
+ RET0_UNLESS(parse_strcmp(date, len, "Friday", 6));
+ tm->tm_wday = 5;
+ return 1;
+ case STR2I3('S','a','t'):
+ RET0_UNLESS(parse_strcmp(date, len, "Saturday", 8));
+ tm->tm_wday = 6;
+ return 1;
+ case STR2I3('S','u','n'):
+ RET0_UNLESS(parse_strcmp(date, len, "Sunday", 6));
+ tm->tm_wday = 7;
+ return 1;
+ }
+ return 0;
+}
+
+/* This function parses exactly 1 digit and returns the numeric value in "digit". */
+static inline int parse_digit(const char **date, int *len, int *digit)
+{
+ if (*len < 1 || **date < '0' || **date > '9')
+ return 0;
+ *digit = (**date - '0');
+ (*date)++;
+ (*len)--;
+ return 1;
+}
+
+/* This function parses exactly 2 digits and returns the numeric value in "digit". */
+static inline int parse_2digit(const char **date, int *len, int *digit)
+{
+ int value;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) = value * 10;
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value;
+
+ return 1;
+}
+
+/* This function parses exactly 4 digits and returns the numeric value in "digit". */
+static inline int parse_4digit(const char **date, int *len, int *digit)
+{
+ int value;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) = value * 1000;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value * 100;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value * 10;
+
+ RET0_UNLESS(parse_digit(date, len, &value));
+ (*digit) += value;
+
+ return 1;
+}
+
+/* time-of-day = hour ":" minute ":" second
+ * ; 00:00:00 - 23:59:60 (leap second)
+ *
+ * hour = 2DIGIT
+ * minute = 2DIGIT
+ * second = 2DIGIT
+ */
+static inline int parse_http_time(const char **date, int *len, struct tm *tm)
+{
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_hour)); /* hour 2DIGIT */
+ RET0_UNLESS(parse_expect_char(date, len, ':')); /* expect ":" */
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_min)); /* min 2DIGIT */
+ RET0_UNLESS(parse_expect_char(date, len, ':')); /* expect ":" */
+ RET0_UNLESS(parse_2digit(date, len, &tm->tm_sec)); /* sec 2DIGIT */
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * IMF-fixdate = day-name "," SP date1 SP time-of-day SP GMT
+ * ; fixed length/zone/capitalization subset of the format
+ * ; see Section 3.3 of [RFC5322]
+ *
+ *
+ * date1 = day SP month SP year
+ * ; e.g., 02 Jun 1982
+ *
+ * day = 2DIGIT
+ * year = 4DIGIT
+ *
+ * GMT = %x47.4D.54 ; "GMT", case-sensitive
+ *
+ * time-of-day = hour ":" minute ":" second
+ * ; 00:00:00 - 23:59:60 (leap second)
+ *
+ * hour = 2DIGIT
+ * minute = 2DIGIT
+ * second = 2DIGIT
+ *
+ * DIGIT = decimal 0-9
+ */
+int parse_imf_date(const char *date, int len, struct tm *tm)
+{
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_dayname(&date, &len, tm)); /* day-name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ',')); /* expect "," */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday)); /* day 2DIGIT */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* Month */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_4digit(&date, &len, &tm->tm_year)); /* year = 4DIGIT */
+ tm->tm_year -= 1900;
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_strcmp(&date, &len, "GMT", 3)); /* GMT = %x47.4D.54 ; "GMT", case-sensitive */
+ tm->tm_isdst = -1;
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * rfc850-date = day-name-l "," SP date2 SP time-of-day SP GMT
+ * date2 = day "-" month "-" 2DIGIT
+ * ; e.g., 02-Jun-82
+ *
+ * day = 2DIGIT
+ */
+int parse_rfc850_date(const char *date, int len, struct tm *tm)
+{
+ int year;
+
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_ldayname(&date, &len, tm)); /* Read the day name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ',')); /* expect "," */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday)); /* day 2DIGIT */
+ RET0_UNLESS(parse_expect_char(&date, &len, '-')); /* expect "-" */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* Month */
+ RET0_UNLESS(parse_expect_char(&date, &len, '-')); /* expect "-" */
+
+ /* year = 2DIGIT
+ *
+ * Recipients of a timestamp value in rfc850-(*date) format, which uses a
+ * two-digit year, MUST interpret a timestamp that appears to be more
+ * than 50 years in the future as representing the most recent year in
+ * the past that had the same last two digits.
+ */
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_year));
+
+ /* expect SP */
+ if (!parse_expect_char(&date, &len, ' ')) {
+ /* Maybe we have the date with 4 digits. */
+ RET0_UNLESS(parse_2digit(&date, &len, &year));
+ tm->tm_year = (tm->tm_year * 100 + year) - 1900;
+ /* expect SP */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' '));
+ } else {
+ /* I fix 60 as pivot: >60: +1900, <60: +2000. Note that the
+ * tm_year is the number of year since 1900, so for +1900, we
+ * do nothing, and for +2000, we add 100.
+ */
+ if (tm->tm_year <= 60)
+ tm->tm_year += 100;
+ }
+
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_strcmp(&date, &len, "GMT", 3)); /* GMT = %x47.4D.54 ; "GMT", case-sensitive */
+ tm->tm_isdst = -1;
+
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * asctime-date = day-name SP date3 SP time-of-day SP year
+ * date3 = month SP ( 2DIGIT / ( SP 1DIGIT ))
+ * ; e.g., Jun 2
+ *
+ * HTTP-date is case sensitive. A sender MUST NOT generate additional
+ * whitespace in an HTTP-date beyond that specifically included as SP in
+ * the grammar.
+ */
+int parse_asctime_date(const char *date, int len, struct tm *tm)
+{
+ /* tm_gmtoff, if present, ought to be zero'ed */
+ memset(tm, 0, sizeof(*tm));
+
+ RET0_UNLESS(parse_http_dayname(&date, &len, tm)); /* day-name */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_monthname(&date, &len, tm)); /* expect month */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+
+ /* expect SP and 1DIGIT or 2DIGIT */
+ if (parse_expect_char(&date, &len, ' '))
+ RET0_UNLESS(parse_digit(&date, &len, &tm->tm_mday));
+ else
+ RET0_UNLESS(parse_2digit(&date, &len, &tm->tm_mday));
+
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_http_time(&date, &len, tm)); /* Parse time. */
+ RET0_UNLESS(parse_expect_char(&date, &len, ' ')); /* expect SP */
+ RET0_UNLESS(parse_4digit(&date, &len, &tm->tm_year)); /* year = 4DIGIT */
+ tm->tm_year -= 1900;
+ tm->tm_isdst = -1;
+ return 1;
+}
+
+/* From RFC7231
+ * https://tools.ietf.org/html/rfc7231#section-7.1.1.1
+ *
+ * HTTP-date = IMF-fixdate / obs-date
+ * obs-date = rfc850-date / asctime-date
+ *
+ * parses an HTTP date in the RFC format and is accepted
+ * alternatives. <date> is the strinf containing the date,
+ * len is the len of the string. <tm> is filled with the
+ * parsed time. We must considers this time as GMT.
+ */
+int parse_http_date(const char *date, int len, struct tm *tm)
+{
+ if (parse_imf_date(date, len, tm))
+ return 1;
+
+ if (parse_rfc850_date(date, len, tm))
+ return 1;
+
+ if (parse_asctime_date(date, len, tm))
+ return 1;
+
+ return 0;
+}
+
+/* print the time <ns> in a short form (exactly 7 chars) at the end of buffer
+ * <out>. "-" is printed if the value is zero, "inf" if larger than 1000 years.
+ * It returns the new buffer length, or 0 if it doesn't fit. The value will be
+ * surrounded by <pfx> and <sfx> respectively if not NULL.
+ */
+int print_time_short(struct buffer *out, const char *pfx, uint64_t ns, const char *sfx)
+{
+ double val = ns; // 52 bits of mantissa keep ns accuracy over 52 days
+ const char *unit;
+
+ if (!pfx)
+ pfx = "";
+ if (!sfx)
+ sfx = "";
+
+ do {
+ unit = " - "; if (val <= 0.0) break;
+ unit = "ns"; if (val < 1000.0) break;
+ unit = "us"; val /= 1000.0; if (val < 1000.0) break;
+ unit = "ms"; val /= 1000.0; if (val < 1000.0) break;
+ unit = "s "; val /= 1000.0; if (val < 60.0) break;
+ unit = "m "; val /= 60.0; if (val < 60.0) break;
+ unit = "h "; val /= 60.0; if (val < 24.0) break;
+ unit = "d "; val /= 24.0; if (val < 365.0) break;
+ unit = "yr"; val /= 365.0; if (val < 1000.0) break;
+ unit = " inf "; val = 0.0; break;
+ } while (0);
+
+ if (val <= 0.0)
+ return chunk_appendf(out, "%s%7s%s", pfx, unit, sfx);
+ else if (val < 10.0)
+ return chunk_appendf(out, "%s%1.3f%s%s", pfx, val, unit, sfx);
+ else if (val < 100.0)
+ return chunk_appendf(out, "%s%2.2f%s%s", pfx, val, unit, sfx);
+ else
+ return chunk_appendf(out, "%s%3.1f%s%s", pfx, val, unit, sfx);
+}
+
+/* Dynamically allocates a string of the proper length to hold the formatted
+ * output. NULL is returned on error. The caller is responsible for freeing the
+ * memory area using free(). The resulting string is returned in <out> if the
+ * pointer is not NULL. A previous version of <out> might be used to build the
+ * new string, and it will be freed before returning if it is not NULL, which
+ * makes it possible to build complex strings from iterative calls without
+ * having to care about freeing intermediate values, as in the example below :
+ *
+ * memprintf(&err, "invalid argument: '%s'", arg);
+ * ...
+ * memprintf(&err, "parser said : <%s>\n", *err);
+ * ...
+ * free(*err);
+ *
+ * This means that <err> must be initialized to NULL before first invocation.
+ * The return value also holds the allocated string, which eases error checking
+ * and immediate consumption. If the output pointer is not used, NULL must be
+ * passed instead and it will be ignored. The returned message will then also
+ * be NULL so that the caller does not have to bother with freeing anything.
+ *
+ * It is also convenient to use it without any free except the last one :
+ * err = NULL;
+ * if (!fct1(err)) report(*err);
+ * if (!fct2(err)) report(*err);
+ * if (!fct3(err)) report(*err);
+ * free(*err);
+ *
+ * memprintf relies on memvprintf. This last version can be called from any
+ * function with variadic arguments.
+ */
+char *memvprintf(char **out, const char *format, va_list orig_args)
+{
+ va_list args;
+ char *ret = NULL;
+ int allocated = 0;
+ int needed = 0;
+
+ if (!out)
+ return NULL;
+
+ do {
+ char buf1;
+
+ /* vsnprintf() will return the required length even when the
+ * target buffer is NULL. We do this in a loop just in case
+ * intermediate evaluations get wrong.
+ */
+ va_copy(args, orig_args);
+ needed = vsnprintf(ret ? ret : &buf1, allocated, format, args);
+ va_end(args);
+ if (needed < allocated) {
+ /* Note: on Solaris 8, the first iteration always
+ * returns -1 if allocated is zero, so we force a
+ * retry.
+ */
+ if (!allocated)
+ needed = 0;
+ else
+ break;
+ }
+
+ allocated = needed + 1;
+ ret = my_realloc2(ret, allocated);
+ } while (ret);
+
+ if (needed < 0) {
+ /* an error was encountered */
+ ha_free(&ret);
+ }
+
+ if (out) {
+ free(*out);
+ *out = ret;
+ }
+
+ return ret;
+}
+
+char *memprintf(char **out, const char *format, ...)
+{
+ va_list args;
+ char *ret = NULL;
+
+ va_start(args, format);
+ ret = memvprintf(out, format, args);
+ va_end(args);
+
+ return ret;
+}
+
+/* Used to add <level> spaces before each line of <out>, unless there is only one line.
+ * The input argument is automatically freed and reassigned. The result will have to be
+ * freed by the caller. It also supports being passed a NULL which results in the same
+ * output.
+ * Example of use :
+ * parse(cmd, &err); (callee: memprintf(&err, ...))
+ * fprintf(stderr, "Parser said: %s\n", indent_error(&err));
+ * free(err);
+ */
+char *indent_msg(char **out, int level)
+{
+ char *ret, *in, *p;
+ int needed = 0;
+ int lf = 0;
+ int lastlf = 0;
+ int len;
+
+ if (!out || !*out)
+ return NULL;
+
+ in = *out - 1;
+ while ((in = strchr(in + 1, '\n')) != NULL) {
+ lastlf = in - *out;
+ lf++;
+ }
+
+ if (!lf) /* single line, no LF, return it as-is */
+ return *out;
+
+ len = strlen(*out);
+
+ if (lf == 1 && lastlf == len - 1) {
+ /* single line, LF at end, strip it and return as-is */
+ (*out)[lastlf] = 0;
+ return *out;
+ }
+
+ /* OK now we have at least one LF, we need to process the whole string
+ * as a multi-line string. What we'll do :
+ * - prefix with an LF if there is none
+ * - add <level> spaces before each line
+ * This means at most ( 1 + level + (len-lf) + lf*<1+level) ) =
+ * 1 + level + len + lf * level = 1 + level * (lf + 1) + len.
+ */
+
+ needed = 1 + level * (lf + 1) + len + 1;
+ p = ret = malloc(needed);
+ in = *out;
+
+ /* skip initial LFs */
+ while (*in == '\n')
+ in++;
+
+ /* copy each line, prefixed with LF and <level> spaces, and without the trailing LF */
+ while (*in) {
+ *p++ = '\n';
+ memset(p, ' ', level);
+ p += level;
+ do {
+ *p++ = *in++;
+ } while (*in && *in != '\n');
+ if (*in)
+ in++;
+ }
+ *p = 0;
+
+ free(*out);
+ *out = ret;
+
+ return ret;
+}
+
+/* makes a copy of message <in> into <out>, with each line prefixed with <pfx>
+ * and end of lines replaced with <eol> if not 0. The first line to indent has
+ * to be indicated in <first> (starts at zero), so that it is possible to skip
+ * indenting the first line if it has to be appended after an existing message.
+ * Empty strings are never indented, and NULL strings are considered empty both
+ * for <in> and <pfx>. It returns non-zero if an EOL was appended as the last
+ * character, non-zero otherwise.
+ */
+int append_prefixed_str(struct buffer *out, const char *in, const char *pfx, char eol, int first)
+{
+ int bol, lf;
+ int pfxlen = pfx ? strlen(pfx) : 0;
+
+ if (!in)
+ return 0;
+
+ bol = 1;
+ lf = 0;
+ while (*in) {
+ if (bol && pfxlen) {
+ if (first > 0)
+ first--;
+ else
+ b_putblk(out, pfx, pfxlen);
+ bol = 0;
+ }
+
+ lf = (*in == '\n');
+ bol |= lf;
+ b_putchr(out, (lf && eol) ? eol : *in);
+ in++;
+ }
+ return lf;
+}
+
+/* removes environment variable <name> from the environment as found in
+ * environ. This is only provided as an alternative for systems without
+ * unsetenv() (old Solaris and AIX versions). THIS IS NOT THREAD SAFE.
+ * The principle is to scan environ for each occurrence of variable name
+ * <name> and to replace the matching pointers with the last pointer of
+ * the array (since variables are not ordered).
+ * It always returns 0 (success).
+ */
+int my_unsetenv(const char *name)
+{
+ extern char **environ;
+ char **p = environ;
+ int vars;
+ int next;
+ int len;
+
+ len = strlen(name);
+ for (vars = 0; p[vars]; vars++)
+ ;
+ next = 0;
+ while (next < vars) {
+ if (strncmp(p[next], name, len) != 0 || p[next][len] != '=') {
+ next++;
+ continue;
+ }
+ if (next < vars - 1)
+ p[next] = p[vars - 1];
+ p[--vars] = NULL;
+ }
+ return 0;
+}
+
+/* Convert occurrences of environment variables in the input string to their
+ * corresponding value. A variable is identified as a series of alphanumeric
+ * characters or underscores following a '$' sign. The <in> string must be
+ * free()able. NULL returns NULL. The resulting string might be reallocated if
+ * some expansion is made. Variable names may also be enclosed into braces if
+ * needed (eg: to concatenate alphanum characters).
+ */
+char *env_expand(char *in)
+{
+ char *txt_beg;
+ char *out;
+ char *txt_end;
+ char *var_beg;
+ char *var_end;
+ char *value;
+ char *next;
+ int out_len;
+ int val_len;
+
+ if (!in)
+ return in;
+
+ value = out = NULL;
+ out_len = 0;
+
+ txt_beg = in;
+ do {
+ /* look for next '$' sign in <in> */
+ for (txt_end = txt_beg; *txt_end && *txt_end != '$'; txt_end++);
+
+ if (!*txt_end && !out) /* end and no expansion performed */
+ return in;
+
+ val_len = 0;
+ next = txt_end;
+ if (*txt_end == '$') {
+ char save;
+
+ var_beg = txt_end + 1;
+ if (*var_beg == '{')
+ var_beg++;
+
+ var_end = var_beg;
+ while (isalnum((unsigned char)*var_end) || *var_end == '_') {
+ var_end++;
+ }
+
+ next = var_end;
+ if (*var_end == '}' && (var_beg > txt_end + 1))
+ next++;
+
+ /* get value of the variable name at this location */
+ save = *var_end;
+ *var_end = '\0';
+ value = getenv(var_beg);
+ *var_end = save;
+ val_len = value ? strlen(value) : 0;
+ }
+
+ out = my_realloc2(out, out_len + (txt_end - txt_beg) + val_len + 1);
+ if (txt_end > txt_beg) {
+ memcpy(out + out_len, txt_beg, txt_end - txt_beg);
+ out_len += txt_end - txt_beg;
+ }
+ if (val_len) {
+ memcpy(out + out_len, value, val_len);
+ out_len += val_len;
+ }
+ out[out_len] = 0;
+ txt_beg = next;
+ } while (*txt_beg);
+
+ /* here we know that <out> was allocated and that we don't need <in> anymore */
+ free(in);
+ return out;
+}
+
+
+/* same as strstr() but case-insensitive and with limit length */
+const char *strnistr(const char *str1, int len_str1, const char *str2, int len_str2)
+{
+ char *pptr, *sptr, *start;
+ unsigned int slen, plen;
+ unsigned int tmp1, tmp2;
+
+ if (str1 == NULL || len_str1 == 0) // search pattern into an empty string => search is not found
+ return NULL;
+
+ if (str2 == NULL || len_str2 == 0) // pattern is empty => every str1 match
+ return str1;
+
+ if (len_str1 < len_str2) // pattern is longer than string => search is not found
+ return NULL;
+
+ for (tmp1 = 0, start = (char *)str1, pptr = (char *)str2, slen = len_str1, plen = len_str2; slen >= plen; start++, slen--) {
+ while (toupper((unsigned char)*start) != toupper((unsigned char)*str2)) {
+ start++;
+ slen--;
+ tmp1++;
+
+ if (tmp1 >= len_str1)
+ return NULL;
+
+ /* if pattern longer than string */
+ if (slen < plen)
+ return NULL;
+ }
+
+ sptr = start;
+ pptr = (char *)str2;
+
+ tmp2 = 0;
+ while (toupper((unsigned char)*sptr) == toupper((unsigned char)*pptr)) {
+ sptr++;
+ pptr++;
+ tmp2++;
+
+ if (*pptr == '\0' || tmp2 == len_str2) /* end of pattern found */
+ return start;
+ if (*sptr == '\0' || tmp2 == len_str1) /* end of string found and the pattern is not fully found */
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+/* Returns true if s1 < s2 < s3 otherwise zero. Both s1 and s3 may be NULL and
+ * in this case only non-null strings are compared. This allows to pass initial
+ * values in iterators and in sort functions.
+ */
+int strordered(const char *s1, const char *s2, const char *s3)
+{
+ return (!s1 || strcmp(s1, s2) < 0) && (!s3 || strcmp(s2, s3) < 0);
+}
+
+/* This function read the next valid utf8 char.
+ * <s> is the byte srray to be decode, <len> is its length.
+ * The function returns decoded char encoded like this:
+ * The 4 msb are the return code (UTF8_CODE_*), the 4 lsb
+ * are the length read. The decoded character is stored in <c>.
+ */
+unsigned char utf8_next(const char *s, int len, unsigned int *c)
+{
+ const unsigned char *p = (unsigned char *)s;
+ int dec;
+ unsigned char code = UTF8_CODE_OK;
+
+ if (len < 1)
+ return UTF8_CODE_OK;
+
+ /* Check the type of UTF8 sequence
+ *
+ * 0... .... 0x00 <= x <= 0x7f : 1 byte: ascii char
+ * 10.. .... 0x80 <= x <= 0xbf : invalid sequence
+ * 110. .... 0xc0 <= x <= 0xdf : 2 bytes
+ * 1110 .... 0xe0 <= x <= 0xef : 3 bytes
+ * 1111 0... 0xf0 <= x <= 0xf7 : 4 bytes
+ * 1111 10.. 0xf8 <= x <= 0xfb : 5 bytes
+ * 1111 110. 0xfc <= x <= 0xfd : 6 bytes
+ * 1111 111. 0xfe <= x <= 0xff : invalid sequence
+ */
+ switch (*p) {
+ case 0x00 ... 0x7f:
+ *c = *p;
+ return UTF8_CODE_OK | 1;
+
+ case 0x80 ... 0xbf:
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+
+ case 0xc0 ... 0xdf:
+ if (len < 2) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x1f;
+ dec = 1;
+ break;
+
+ case 0xe0 ... 0xef:
+ if (len < 3) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x0f;
+ dec = 2;
+ break;
+
+ case 0xf0 ... 0xf7:
+ if (len < 4) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x07;
+ dec = 3;
+ break;
+
+ case 0xf8 ... 0xfb:
+ if (len < 5) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x03;
+ dec = 4;
+ break;
+
+ case 0xfc ... 0xfd:
+ if (len < 6) {
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+ *c = *p & 0x01;
+ dec = 5;
+ break;
+
+ case 0xfe ... 0xff:
+ default:
+ *c = *p;
+ return UTF8_CODE_BADSEQ | 1;
+ }
+
+ p++;
+
+ while (dec > 0) {
+
+ /* need 0x10 for the 2 first bits */
+ if ( ( *p & 0xc0 ) != 0x80 )
+ return UTF8_CODE_BADSEQ | ((p-(unsigned char *)s)&0xffff);
+
+ /* add data at char */
+ *c = ( *c << 6 ) | ( *p & 0x3f );
+
+ dec--;
+ p++;
+ }
+
+ /* Check ovelong encoding.
+ * 1 byte : 5 + 6 : 11 : 0x80 ... 0x7ff
+ * 2 bytes : 4 + 6 + 6 : 16 : 0x800 ... 0xffff
+ * 3 bytes : 3 + 6 + 6 + 6 : 21 : 0x10000 ... 0x1fffff
+ */
+ if (( *c <= 0x7f && (p-(unsigned char *)s) > 1) ||
+ (*c >= 0x80 && *c <= 0x7ff && (p-(unsigned char *)s) > 2) ||
+ (*c >= 0x800 && *c <= 0xffff && (p-(unsigned char *)s) > 3) ||
+ (*c >= 0x10000 && *c <= 0x1fffff && (p-(unsigned char *)s) > 4))
+ code |= UTF8_CODE_OVERLONG;
+
+ /* Check invalid UTF8 range. */
+ if ((*c >= 0xd800 && *c <= 0xdfff) ||
+ (*c >= 0xfffe && *c <= 0xffff))
+ code |= UTF8_CODE_INVRANGE;
+
+ return code | ((p-(unsigned char *)s)&0x0f);
+}
+
+/* append a copy of string <str> (in a wordlist) at the end of the list <li>
+ * On failure : return 0 and <err> filled with an error message.
+ * The caller is responsible for freeing the <err> and <str> copy
+ * memory area using free()
+ */
+int list_append_word(struct list *li, const char *str, char **err)
+{
+ struct wordlist *wl;
+
+ wl = calloc(1, sizeof(*wl));
+ if (!wl) {
+ memprintf(err, "out of memory");
+ goto fail_wl;
+ }
+
+ wl->s = strdup(str);
+ if (!wl->s) {
+ memprintf(err, "out of memory");
+ goto fail_wl_s;
+ }
+
+ LIST_APPEND(li, &wl->list);
+
+ return 1;
+
+fail_wl_s:
+ free(wl->s);
+fail_wl:
+ free(wl);
+ return 0;
+}
+
+/* indicates if a memory location may safely be read or not. The trick consists
+ * in performing a harmless syscall using this location as an input and letting
+ * the operating system report whether it's OK or not. For this we have the
+ * stat() syscall, which will return EFAULT when the memory location supposed
+ * to contain the file name is not readable. If it is readable it will then
+ * either return 0 if the area contains an existing file name, or -1 with
+ * another code. This must not be abused, and some audit systems might detect
+ * this as abnormal activity. It's used only for unsafe dumps.
+ */
+int may_access(const void *ptr)
+{
+ struct stat buf;
+
+ if (stat(ptr, &buf) == 0)
+ return 1;
+ if (errno == EFAULT)
+ return 0;
+ return 1;
+}
+
+/* print a string of text buffer to <out>. The format is :
+ * Non-printable chars \t, \n, \r and \e are * encoded in C format.
+ * Other non-printable chars are encoded "\xHH". Space, '\', and '=' are also escaped.
+ * Print stopped if null char or <bsize> is reached, or if no more place in the chunk.
+ */
+int dump_text(struct buffer *out, const char *buf, int bsize)
+{
+ unsigned char c;
+ size_t ptr = 0;
+
+ while (ptr < bsize && buf[ptr]) {
+ c = buf[ptr];
+ if (isprint((unsigned char)c) && isascii((unsigned char)c) && c != '\\' && c != ' ' && c != '=') {
+ if (out->data > out->size - 1)
+ break;
+ out->area[out->data++] = c;
+ }
+ else if (c == '\t' || c == '\n' || c == '\r' || c == '\e' || c == '\\' || c == ' ' || c == '=') {
+ if (out->data > out->size - 2)
+ break;
+ out->area[out->data++] = '\\';
+ switch (c) {
+ case ' ': c = ' '; break;
+ case '\t': c = 't'; break;
+ case '\n': c = 'n'; break;
+ case '\r': c = 'r'; break;
+ case '\e': c = 'e'; break;
+ case '\\': c = '\\'; break;
+ case '=': c = '='; break;
+ }
+ out->area[out->data++] = c;
+ }
+ else {
+ if (out->data > out->size - 4)
+ break;
+ out->area[out->data++] = '\\';
+ out->area[out->data++] = 'x';
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+ }
+ ptr++;
+ }
+
+ return ptr;
+}
+
+/* print a buffer in hexa.
+ * Print stopped if <bsize> is reached, or if no more place in the chunk.
+ */
+int dump_binary(struct buffer *out, const char *buf, int bsize)
+{
+ unsigned char c;
+ int ptr = 0;
+
+ while (ptr < bsize) {
+ c = buf[ptr];
+
+ if (out->data > out->size - 2)
+ break;
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+
+ ptr++;
+ }
+ return ptr;
+}
+
+/* Appends into buffer <out> a hex dump of memory area <buf> for <len> bytes,
+ * prepending each line with prefix <pfx>. The output is *not* initialized.
+ * The output will not wrap pas the buffer's end so it is more optimal if the
+ * caller makes sure the buffer is aligned first. A trailing zero will always
+ * be appended (and not counted) if there is room for it. The caller must make
+ * sure that the area is dumpable first. If <unsafe> is non-null, the memory
+ * locations are checked first for being readable.
+ */
+void dump_hex(struct buffer *out, const char *pfx, const void *buf, int len, int unsafe)
+{
+ const unsigned char *d = buf;
+ int i, j, start;
+
+ d = (const unsigned char *)(((unsigned long)buf) & -16);
+ start = ((unsigned long)buf) & 15;
+
+ for (i = 0; i < start + len; i += 16) {
+ chunk_appendf(out, (sizeof(void *) == 4) ? "%s%8p: " : "%s%16p: ", pfx, d + i);
+
+ // 0: unchecked, 1: checked safe, 2: danger
+ unsafe = !!unsafe;
+ if (unsafe && !may_access(d + i))
+ unsafe = 2;
+
+ for (j = 0; j < 16; j++) {
+ if ((i + j < start) || (i + j >= start + len))
+ chunk_strcat(out, "'' ");
+ else if (unsafe > 1)
+ chunk_strcat(out, "** ");
+ else
+ chunk_appendf(out, "%02x ", d[i + j]);
+
+ if (j == 7)
+ chunk_strcat(out, "- ");
+ }
+ chunk_strcat(out, " ");
+ for (j = 0; j < 16; j++) {
+ if ((i + j < start) || (i + j >= start + len))
+ chunk_strcat(out, "'");
+ else if (unsafe > 1)
+ chunk_strcat(out, "*");
+ else if (isprint((unsigned char)d[i + j]))
+ chunk_appendf(out, "%c", d[i + j]);
+ else
+ chunk_strcat(out, ".");
+ }
+ chunk_strcat(out, "\n");
+ }
+}
+
+/* dumps <pfx> followed by <n> bytes from <addr> in hex form into buffer <buf>
+ * enclosed in brackets after the address itself, formatted on 14 chars
+ * including the "0x" prefix. This is meant to be used as a prefix for code
+ * areas. For example:
+ * "0x7f10b6557690 [48 c7 c0 0f 00 00 00 0f]"
+ * It relies on may_access() to know if the bytes are dumpable, otherwise "--"
+ * is emitted. A NULL <pfx> will be considered empty.
+ */
+void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, int n)
+{
+ int ok = 0;
+ int i;
+
+ chunk_appendf(buf, "%s%#14lx [", pfx ? pfx : "", (long)addr);
+
+ for (i = 0; i < n; i++) {
+ if (i == 0 || (((long)(addr + i) ^ (long)(addr)) & 4096))
+ ok = may_access(addr + i);
+ if (ok)
+ chunk_appendf(buf, "%02x%s", ((uint8_t*)addr)[i], (i<n-1) ? " " : "]");
+ else
+ chunk_appendf(buf, "--%s", (i<n-1) ? " " : "]");
+ }
+}
+
+/* print a line of text buffer (limited to 70 bytes) to <out>. The format is :
+ * <2 spaces> <offset=5 digits> <space or plus> <space> <70 chars max> <\n>
+ * which is 60 chars per line. Non-printable chars \t, \n, \r and \e are
+ * encoded in C format. Other non-printable chars are encoded "\xHH". Original
+ * lines are respected within the limit of 70 output chars. Lines that are
+ * continuation of a previous truncated line begin with "+" instead of " "
+ * after the offset. The new pointer is returned.
+ */
+int dump_text_line(struct buffer *out, const char *buf, int bsize, int len,
+ int *line, int ptr)
+{
+ int end;
+ unsigned char c;
+
+ end = out->data + 80;
+ if (end > out->size)
+ return ptr;
+
+ chunk_appendf(out, " %05d%c ", ptr, (ptr == *line) ? ' ' : '+');
+
+ while (ptr < len && ptr < bsize) {
+ c = buf[ptr];
+ if (isprint((unsigned char)c) && isascii((unsigned char)c) && c != '\\') {
+ if (out->data > end - 2)
+ break;
+ out->area[out->data++] = c;
+ } else if (c == '\t' || c == '\n' || c == '\r' || c == '\e' || c == '\\') {
+ if (out->data > end - 3)
+ break;
+ out->area[out->data++] = '\\';
+ switch (c) {
+ case '\t': c = 't'; break;
+ case '\n': c = 'n'; break;
+ case '\r': c = 'r'; break;
+ case '\e': c = 'e'; break;
+ case '\\': c = '\\'; break;
+ }
+ out->area[out->data++] = c;
+ } else {
+ if (out->data > end - 5)
+ break;
+ out->area[out->data++] = '\\';
+ out->area[out->data++] = 'x';
+ out->area[out->data++] = hextab[(c >> 4) & 0xF];
+ out->area[out->data++] = hextab[c & 0xF];
+ }
+ if (buf[ptr++] == '\n') {
+ /* we had a line break, let's return now */
+ out->area[out->data++] = '\n';
+ *line = ptr;
+ return ptr;
+ }
+ }
+ /* we have an incomplete line, we return it as-is */
+ out->area[out->data++] = '\n';
+ return ptr;
+}
+
+/* displays a <len> long memory block at <buf>, assuming first byte of <buf>
+ * has address <baseaddr>. String <pfx> may be placed as a prefix in front of
+ * each line. It may be NULL if unused. The output is emitted to file <out>.
+ */
+void debug_hexdump(FILE *out, const char *pfx, const char *buf,
+ unsigned int baseaddr, int len)
+{
+ unsigned int i;
+ int b, j;
+
+ for (i = 0; i < (len + (baseaddr & 15)); i += 16) {
+ b = i - (baseaddr & 15);
+ fprintf(out, "%s%08x: ", pfx ? pfx : "", i + (baseaddr & ~15));
+ for (j = 0; j < 8; j++) {
+ if (b + j >= 0 && b + j < len)
+ fprintf(out, "%02x ", (unsigned char)buf[b + j]);
+ else
+ fprintf(out, " ");
+ }
+
+ if (b + j >= 0 && b + j < len)
+ fputc('-', out);
+ else
+ fputc(' ', out);
+
+ for (j = 8; j < 16; j++) {
+ if (b + j >= 0 && b + j < len)
+ fprintf(out, " %02x", (unsigned char)buf[b + j]);
+ else
+ fprintf(out, " ");
+ }
+
+ fprintf(out, " ");
+ for (j = 0; j < 16; j++) {
+ if (b + j >= 0 && b + j < len) {
+ if (isprint((unsigned char)buf[b + j]))
+ fputc((unsigned char)buf[b + j], out);
+ else
+ fputc('.', out);
+ }
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+}
+
+/* Tries to report the executable path name on platforms supporting this. If
+ * not found or not possible, returns NULL.
+ */
+const char *get_exec_path()
+{
+ const char *ret = NULL;
+
+#if defined(__linux__) && defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 16))
+ long execfn = getauxval(AT_EXECFN);
+
+ if (execfn && execfn != ENOENT)
+ ret = (const char *)execfn;
+#elif defined(__FreeBSD__)
+ Elf_Auxinfo *auxv;
+ for (auxv = __elf_aux_vector; auxv->a_type != AT_NULL; ++auxv) {
+ if (auxv->a_type == AT_EXECPATH) {
+ ret = (const char *)auxv->a_un.a_ptr;
+ break;
+ }
+ }
+#elif defined(__NetBSD__)
+ AuxInfo *auxv;
+ for (auxv = _dlauxinfo(); auxv->a_type != AT_NULL; ++auxv) {
+ if (auxv->a_type == AT_SUN_EXECNAME) {
+ ret = (const char *)auxv->a_v;
+ break;
+ }
+ }
+#elif defined(__sun)
+ ret = getexecname();
+#endif
+ return ret;
+}
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+/* calls dladdr() or dladdr1() on <addr> and <dli>. If dladdr1 is available,
+ * also returns the symbol size in <size>, otherwise returns 0 there.
+ */
+static int dladdr_and_size(const void *addr, Dl_info *dli, size_t *size)
+{
+ int ret;
+#if defined(__GLIBC__) && (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 3)) // most detailed one
+ const ElfW(Sym) *sym __attribute__((may_alias));
+
+ ret = dladdr1(addr, dli, (void **)&sym, RTLD_DL_SYMENT);
+ if (ret)
+ *size = sym ? sym->st_size : 0;
+#else
+#if defined(__sun)
+ ret = dladdr((void *)addr, dli);
+#else
+ ret = dladdr(addr, dli);
+#endif
+ *size = 0;
+#endif
+ return ret;
+}
+
+/* Sets build_is_static to true if we detect a static build. Some older glibcs
+ * tend to crash inside dlsym() in static builds, but tests show that at least
+ * dladdr() still works (and will fail to resolve anything of course). Thus we
+ * try to determine if we're on a static build to avoid calling dlsym() in this
+ * case.
+ */
+void check_if_static_build()
+{
+ Dl_info dli = { };
+ size_t size = 0;
+
+ /* Now let's try to be smarter */
+ if (!dladdr_and_size(&main, &dli, &size))
+ build_is_static = 1;
+ else
+ build_is_static = 0;
+}
+
+INITCALL0(STG_PREPARE, check_if_static_build);
+
+/* Tries to retrieve the address of the first occurrence symbol <name>.
+ * Note that NULL in return is not always an error as a symbol may have that
+ * address in special situations.
+ */
+void *get_sym_curr_addr(const char *name)
+{
+ void *ptr = NULL;
+
+#ifdef RTLD_DEFAULT
+ if (!build_is_static)
+ ptr = dlsym(RTLD_DEFAULT, name);
+#endif
+ return ptr;
+}
+
+
+/* Tries to retrieve the address of the next occurrence of symbol <name>
+ * Note that NULL in return is not always an error as a symbol may have that
+ * address in special situations.
+ */
+void *get_sym_next_addr(const char *name)
+{
+ void *ptr = NULL;
+
+#ifdef RTLD_NEXT
+ if (!build_is_static)
+ ptr = dlsym(RTLD_NEXT, name);
+#endif
+ return ptr;
+}
+
+#else /* elf & linux & dl */
+
+/* no possible resolving on other platforms at the moment */
+void *get_sym_curr_addr(const char *name)
+{
+ return NULL;
+}
+
+void *get_sym_next_addr(const char *name)
+{
+ return NULL;
+}
+
+#endif /* elf & linux & dl */
+
+/* Tries to append to buffer <buf> some indications about the symbol at address
+ * <addr> using the following form:
+ * lib:+0xoffset (unresolvable address from lib's base)
+ * main+0xoffset (unresolvable address from main (+/-))
+ * lib:main+0xoffset (unresolvable lib address from main (+/-))
+ * name (resolved exact exec address)
+ * lib:name (resolved exact lib address)
+ * name+0xoffset/0xsize (resolved address within exec symbol)
+ * lib:name+0xoffset/0xsize (resolved address within lib symbol)
+ *
+ * The file name (lib or executable) is limited to what lies between the last
+ * '/' and the first following '.'. An optional prefix <pfx> is prepended before
+ * the output if not null. The file is not dumped when it's the same as the one
+ * that contains the "main" symbol, or when __ELF__ && USE_DL are not set.
+ *
+ * The symbol's base address is returned, or NULL when unresolved, in order to
+ * allow the caller to match it against known ones.
+ */
+const void *resolve_sym_name(struct buffer *buf, const char *pfx, const void *addr)
+{
+ const struct {
+ const void *func;
+ const char *name;
+ } fcts[] = {
+ { .func = process_stream, .name = "process_stream" },
+ { .func = task_run_applet, .name = "task_run_applet" },
+ { .func = sc_conn_io_cb, .name = "sc_conn_io_cb" },
+ { .func = sock_conn_iocb, .name = "sock_conn_iocb" },
+ { .func = dgram_fd_handler, .name = "dgram_fd_handler" },
+ { .func = listener_accept, .name = "listener_accept" },
+ { .func = manage_global_listener_queue, .name = "manage_global_listener_queue" },
+ { .func = poller_pipe_io_handler, .name = "poller_pipe_io_handler" },
+ { .func = mworker_accept_wrapper, .name = "mworker_accept_wrapper" },
+ { .func = session_expire_embryonic, .name = "session_expire_embryonic" },
+#ifdef USE_THREAD
+ { .func = accept_queue_process, .name = "accept_queue_process" },
+#endif
+#ifdef USE_LUA
+ { .func = hlua_process_task, .name = "hlua_process_task" },
+#endif
+#ifdef SSL_MODE_ASYNC
+ { .func = ssl_async_fd_free, .name = "ssl_async_fd_free" },
+ { .func = ssl_async_fd_handler, .name = "ssl_async_fd_handler" },
+#endif
+#ifdef USE_QUIC
+ { .func = quic_conn_sock_fd_iocb, .name = "quic_conn_sock_fd_iocb" },
+#endif
+ };
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+ Dl_info dli, dli_main;
+ size_t size;
+ const char *fname, *p;
+#endif
+ int i;
+
+ if (pfx)
+ chunk_appendf(buf, "%s", pfx);
+
+ for (i = 0; i < sizeof(fcts) / sizeof(fcts[0]); i++) {
+ if (addr == fcts[i].func) {
+ chunk_appendf(buf, "%s", fcts[i].name);
+ return addr;
+ }
+ }
+
+#if (defined(__ELF__) && !defined(__linux__)) || defined(USE_DL)
+ /* Now let's try to be smarter */
+ if (!dladdr_and_size(addr, &dli, &size))
+ goto unknown;
+
+ /* 1. prefix the library name if it's not the same object as the one
+ * that contains the main function. The name is picked between last '/'
+ * and first following '.'.
+ */
+ if (!dladdr(main, &dli_main))
+ dli_main.dli_fbase = NULL;
+
+ if (dli_main.dli_fbase != dli.dli_fbase) {
+ fname = dli.dli_fname;
+ p = strrchr(fname, '/');
+ if (p++)
+ fname = p;
+ p = strchr(fname, '.');
+ if (!p)
+ p = fname + strlen(fname);
+
+ chunk_appendf(buf, "%.*s:", (int)(long)(p - fname), fname);
+ }
+
+ /* 2. symbol name */
+ if (dli.dli_sname) {
+ /* known, dump it and return symbol's address (exact or relative) */
+ chunk_appendf(buf, "%s", dli.dli_sname);
+ if (addr != dli.dli_saddr) {
+ chunk_appendf(buf, "+%#lx", (long)(addr - dli.dli_saddr));
+ if (size)
+ chunk_appendf(buf, "/%#lx", (long)size);
+ }
+ return dli.dli_saddr;
+ }
+ else if (dli_main.dli_fbase != dli.dli_fbase) {
+ /* unresolved symbol from a known library, report relative offset */
+ chunk_appendf(buf, "+%#lx", (long)(addr - dli.dli_fbase));
+ return NULL;
+ }
+#endif /* __ELF__ && !__linux__ || USE_DL */
+ unknown:
+ /* unresolved symbol from the main file, report relative offset to main */
+ if ((void*)addr < (void*)main)
+ chunk_appendf(buf, "main-%#lx", (long)((void*)main - addr));
+ else
+ chunk_appendf(buf, "main+%#lx", (long)(addr - (void*)main));
+ return NULL;
+}
+
+/* On systems where this is supported, let's provide a possibility to enumerate
+ * the list of object files. The output is appended to a buffer initialized by
+ * the caller, with one name per line. A trailing zero is always emitted if data
+ * are written. Only real objects are dumped (executable and .so libs). The
+ * function returns non-zero if it dumps anything. These functions do not make
+ * use of the trash so that it is possible for the caller to call them with the
+ * trash on input. The output format may be platform-specific but at least one
+ * version must emit raw object file names when argument is zero.
+ */
+#if defined(HA_HAVE_DUMP_LIBS)
+# if defined(HA_HAVE_DL_ITERATE_PHDR)
+/* the private <data> we pass below is a dump context initialized like this */
+struct dl_dump_ctx {
+ struct buffer *buf;
+ int with_addr;
+};
+
+static int dl_dump_libs_cb(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct dl_dump_ctx *ctx = data;
+ const char *fname;
+ size_t p1, p2, beg, end;
+ int idx;
+
+ if (!info || !info->dlpi_name)
+ goto leave;
+
+ if (!*info->dlpi_name)
+ fname = get_exec_path();
+ else if (strchr(info->dlpi_name, '/'))
+ fname = info->dlpi_name;
+ else
+ /* else it's a VDSO or similar and we're not interested */
+ goto leave;
+
+ if (!ctx->with_addr)
+ goto dump_name;
+
+ /* virtual addresses are relative to the load address and are per
+ * pseudo-header, so we have to scan them all to find the furthest
+ * one from the beginning. In this case we only dump entries if
+ * they have at least one section.
+ */
+ beg = ~0; end = 0;
+ for (idx = 0; idx < info->dlpi_phnum; idx++) {
+ if (!info->dlpi_phdr[idx].p_memsz)
+ continue;
+ p1 = info->dlpi_phdr[idx].p_vaddr;
+ if (p1 < beg)
+ beg = p1;
+ p2 = p1 + info->dlpi_phdr[idx].p_memsz - 1;
+ if (p2 > end)
+ end = p2;
+ }
+
+ if (!idx)
+ goto leave;
+
+ chunk_appendf(ctx->buf, "0x%012llx-0x%012llx (0x%07llx) ",
+ (ullong)info->dlpi_addr + beg,
+ (ullong)info->dlpi_addr + end,
+ (ullong)(end - beg + 1));
+ dump_name:
+ chunk_appendf(ctx->buf, "%s\n", fname);
+ leave:
+ return 0;
+}
+
+/* dumps lib names and optionally address ranges */
+int dump_libs(struct buffer *output, int with_addr)
+{
+ struct dl_dump_ctx ctx = { .buf = output, .with_addr = with_addr };
+ size_t old_data = output->data;
+
+ dl_iterate_phdr(dl_dump_libs_cb, &ctx);
+ return output->data != old_data;
+}
+# else // no DL_ITERATE_PHDR
+# error "No dump_libs() function for this platform"
+# endif
+#else // no HA_HAVE_DUMP_LIBS
+
+/* unsupported platform: do not dump anything */
+int dump_libs(struct buffer *output, int with_addr)
+{
+ return 0;
+}
+
+#endif // HA_HAVE_DUMP_LIBS
+
+/*
+ * Allocate an array of unsigned int with <nums> as address from <str> string
+ * made of integer separated by dot characters.
+ *
+ * First, initializes the value with <sz> as address to 0 and initializes the
+ * array with <nums> as address to NULL. Then allocates the array with <nums> as
+ * address updating <sz> pointed value to the size of this array.
+ *
+ * Returns 1 if succeeded, 0 if not.
+ */
+int parse_dotted_uints(const char *str, unsigned int **nums, size_t *sz)
+{
+ unsigned int *n;
+ const char *s, *end;
+
+ s = str;
+ *sz = 0;
+ end = str + strlen(str);
+ *nums = n = NULL;
+
+ while (1) {
+ unsigned int r;
+
+ if (s >= end)
+ break;
+
+ r = read_uint(&s, end);
+ /* Expected characters after having read an uint: '\0' or '.',
+ * if '.', must not be terminal.
+ */
+ if (*s != '\0'&& (*s++ != '.' || s == end)) {
+ free(n);
+ return 0;
+ }
+
+ n = my_realloc2(n, (*sz + 1) * sizeof *n);
+ if (!n)
+ return 0;
+
+ n[(*sz)++] = r;
+ }
+ *nums = n;
+
+ return 1;
+}
+
+
+/* returns the number of bytes needed to encode <v> as a varint. An inline
+ * version exists for use with constants (__varint_bytes()).
+ */
+int varint_bytes(uint64_t v)
+{
+ int len = 1;
+
+ if (v >= 240) {
+ v = (v - 240) >> 4;
+ while (1) {
+ len++;
+ if (v < 128)
+ break;
+ v = (v - 128) >> 7;
+ }
+ }
+ return len;
+}
+
+
+/* Random number generator state, see below */
+static uint64_t ha_random_state[2] ALIGNED(2*sizeof(uint64_t));
+
+/* This is a thread-safe implementation of xoroshiro128** described below:
+ * http://prng.di.unimi.it/
+ * It features a 2^128 long sequence, returns 64 high-quality bits on each call,
+ * supports fast jumps and passes all common quality tests. It is thread-safe,
+ * uses a double-cas on 64-bit architectures supporting it, and falls back to a
+ * local lock on other ones.
+ */
+uint64_t ha_random64()
+{
+ uint64_t old[2] ALIGNED(2*sizeof(uint64_t));
+ uint64_t new[2] ALIGNED(2*sizeof(uint64_t));
+
+#if defined(USE_THREAD) && (!defined(HA_CAS_IS_8B) || !defined(HA_HAVE_CAS_DW))
+ static HA_SPINLOCK_T rand_lock;
+
+ HA_SPIN_LOCK(OTHER_LOCK, &rand_lock);
+#endif
+
+ old[0] = ha_random_state[0];
+ old[1] = ha_random_state[1];
+
+#if defined(USE_THREAD) && defined(HA_CAS_IS_8B) && defined(HA_HAVE_CAS_DW)
+ do {
+#endif
+ new[1] = old[0] ^ old[1];
+ new[0] = rotl64(old[0], 24) ^ new[1] ^ (new[1] << 16); // a, b
+ new[1] = rotl64(new[1], 37); // c
+
+#if defined(USE_THREAD) && defined(HA_CAS_IS_8B) && defined(HA_HAVE_CAS_DW)
+ } while (unlikely(!_HA_ATOMIC_DWCAS(ha_random_state, old, new)));
+#else
+ ha_random_state[0] = new[0];
+ ha_random_state[1] = new[1];
+#if defined(USE_THREAD)
+ HA_SPIN_UNLOCK(OTHER_LOCK, &rand_lock);
+#endif
+#endif
+ return rotl64(old[0] * 5, 7) * 9;
+}
+
+/* seeds the random state using up to <len> bytes from <seed>, starting with
+ * the first non-zero byte.
+ */
+void ha_random_seed(const unsigned char *seed, size_t len)
+{
+ size_t pos;
+
+ /* the seed must not be all zeroes, so we pre-fill it with alternating
+ * bits and overwrite part of them with the block starting at the first
+ * non-zero byte from the seed.
+ */
+ memset(ha_random_state, 0x55, sizeof(ha_random_state));
+
+ for (pos = 0; pos < len; pos++)
+ if (seed[pos] != 0)
+ break;
+
+ if (pos == len)
+ return;
+
+ seed += pos;
+ len -= pos;
+
+ if (len > sizeof(ha_random_state))
+ len = sizeof(ha_random_state);
+
+ memcpy(ha_random_state, seed, len);
+}
+
+/* This causes a jump to (dist * 2^96) places in the pseudo-random sequence,
+ * and is equivalent to calling ha_random64() as many times. It is used to
+ * provide non-overlapping sequences of 2^96 numbers (~7*10^28) to up to 2^32
+ * different generators (i.e. different processes after a fork). The <dist>
+ * argument is the distance to jump to and is used in a loop so it rather not
+ * be too large if the processing time is a concern.
+ *
+ * BEWARE: this function is NOT thread-safe and must not be called during
+ * concurrent accesses to ha_random64().
+ */
+void ha_random_jump96(uint32_t dist)
+{
+ while (dist--) {
+ uint64_t s0 = 0;
+ uint64_t s1 = 0;
+ int b;
+
+ for (b = 0; b < 64; b++) {
+ if ((0xd2a98b26625eee7bULL >> b) & 1) {
+ s0 ^= ha_random_state[0];
+ s1 ^= ha_random_state[1];
+ }
+ ha_random64();
+ }
+
+ for (b = 0; b < 64; b++) {
+ if ((0xdddf9b1090aa7ac1ULL >> b) & 1) {
+ s0 ^= ha_random_state[0];
+ s1 ^= ha_random_state[1];
+ }
+ ha_random64();
+ }
+ ha_random_state[0] = s0;
+ ha_random_state[1] = s1;
+ }
+}
+
+/* Generates an RFC4122 UUID into chunk <output> which must be at least 37
+ * bytes large.
+ */
+void ha_generate_uuid(struct buffer *output)
+{
+ uint32_t rnd[4];
+ uint64_t last;
+
+ last = ha_random64();
+ rnd[0] = last;
+ rnd[1] = last >> 32;
+
+ last = ha_random64();
+ rnd[2] = last;
+ rnd[3] = last >> 32;
+
+ chunk_printf(output, "%8.8x-%4.4x-%4.4x-%4.4x-%12.12llx",
+ rnd[0],
+ rnd[1] & 0xFFFF,
+ ((rnd[1] >> 16u) & 0xFFF) | 0x4000, // highest 4 bits indicate the uuid version
+ (rnd[2] & 0x3FFF) | 0x8000, // the highest 2 bits indicate the UUID variant (10),
+ (long long)((rnd[2] >> 14u) | ((uint64_t) rnd[3] << 18u)) & 0xFFFFFFFFFFFFull);
+}
+
+
+/* only used by parse_line() below. It supports writing in place provided that
+ * <in> is updated to the next location before calling it. In that case, the
+ * char at <in> may be overwritten.
+ */
+#define EMIT_CHAR(x) \
+ do { \
+ char __c = (char)(x); \
+ if ((opts & PARSE_OPT_INPLACE) && out+outpos > in) \
+ err |= PARSE_ERR_OVERLAP; \
+ if (outpos >= outmax) \
+ err |= PARSE_ERR_TOOLARGE; \
+ if (!err) \
+ out[outpos] = __c; \
+ outpos++; \
+ } while (0)
+
+/* Parse <in>, copy it into <out> split into isolated words whose pointers
+ * are put in <args>. If more than <outlen> bytes have to be emitted, the
+ * extraneous ones are not emitted but <outlen> is updated so that the caller
+ * knows how much to realloc. Similarly, <args> are not updated beyond <nbargs>
+ * but the returned <nbargs> indicates how many were found. All trailing args
+ * up to <nbargs> point to the trailing zero, and as long as <nbargs> is > 0,
+ * it is guaranteed that at least one arg will point to the zero. It is safe
+ * to call it with a NULL <args> if <nbargs> is 0.
+ *
+ * <out> may overlap with <in> provided that it never goes further, in which
+ * case the parser will accept to perform in-place parsing and unquoting/
+ * unescaping but only if environment variables do not lead to expansion that
+ * causes overlapping, otherwise the input string being destroyed, the error
+ * will not be recoverable. Note that even during out-of-place <in> will
+ * experience temporary modifications in-place for variable resolution and must
+ * be writable, and will also receive zeroes to delimit words when using
+ * in-place copy. Parsing options <opts> taken from PARSE_OPT_*. Return value
+ * is zero on success otherwise a bitwise-or of PARSE_ERR_*. Upon error, the
+ * starting point of the first invalid character sequence or unmatched
+ * quote/brace is reported in <errptr> if not NULL. When using in-place parsing
+ * error reporting might be difficult since zeroes will have been inserted into
+ * the string. One solution for the caller may consist in replacing all args
+ * delimiters with spaces in this case.
+ */
+uint32_t parse_line(char *in, char *out, size_t *outlen, char **args, int *nbargs, uint32_t opts, const char **errptr)
+{
+ char *quote = NULL;
+ char *brace = NULL;
+ char *word_expand = NULL;
+ unsigned char hex1, hex2;
+ size_t outmax = *outlen;
+ int argsmax = *nbargs - 1;
+ size_t outpos = 0;
+ int squote = 0;
+ int dquote = 0;
+ int arg = 0;
+ uint32_t err = 0;
+
+ *nbargs = 0;
+ *outlen = 0;
+
+ /* argsmax may be -1 here, protecting args[] from any write */
+ if (arg < argsmax)
+ args[arg] = out;
+
+ while (1) {
+ if (*in >= '-' && *in != '\\') {
+ /* speedup: directly send all regular chars starting
+ * with '-', '.', '/', alnum etc...
+ */
+ EMIT_CHAR(*in++);
+ continue;
+ }
+ else if (*in == '\0' || *in == '\n' || *in == '\r') {
+ /* end of line */
+ break;
+ }
+ else if (*in == '#' && (opts & PARSE_OPT_SHARP) && !squote && !dquote) {
+ /* comment */
+ break;
+ }
+ else if (*in == '"' && !squote && (opts & PARSE_OPT_DQUOTE)) { /* double quote outside single quotes */
+ if (dquote) {
+ dquote = 0;
+ quote = NULL;
+ }
+ else {
+ dquote = 1;
+ quote = in;
+ }
+ in++;
+ continue;
+ }
+ else if (*in == '\'' && !dquote && (opts & PARSE_OPT_SQUOTE)) { /* single quote outside double quotes */
+ if (squote) {
+ squote = 0;
+ quote = NULL;
+ }
+ else {
+ squote = 1;
+ quote = in;
+ }
+ in++;
+ continue;
+ }
+ else if (*in == '\\' && !squote && (opts & PARSE_OPT_BKSLASH)) {
+ /* first, we'll replace \\, \<space>, \#, \r, \n, \t, \xXX with their
+ * C equivalent value but only when they have a special meaning and within
+ * double quotes for some of them. Other combinations left unchanged (eg: \1).
+ */
+ char tosend = *in;
+
+ switch (in[1]) {
+ case ' ':
+ case '\\':
+ tosend = in[1];
+ in++;
+ break;
+
+ case 't':
+ tosend = '\t';
+ in++;
+ break;
+
+ case 'n':
+ tosend = '\n';
+ in++;
+ break;
+
+ case 'r':
+ tosend = '\r';
+ in++;
+ break;
+
+ case '#':
+ /* escaping of "#" only if comments are supported */
+ if (opts & PARSE_OPT_SHARP)
+ in++;
+ tosend = *in;
+ break;
+
+ case '\'':
+ /* escaping of "'" only outside single quotes and only if single quotes are supported */
+ if (opts & PARSE_OPT_SQUOTE && !squote)
+ in++;
+ tosend = *in;
+ break;
+
+ case '"':
+ /* escaping of '"' only outside single quotes and only if double quotes are supported */
+ if (opts & PARSE_OPT_DQUOTE && !squote)
+ in++;
+ tosend = *in;
+ break;
+
+ case '$':
+ /* escaping of '$' only inside double quotes and only if env supported */
+ if (opts & PARSE_OPT_ENV && dquote)
+ in++;
+ tosend = *in;
+ break;
+
+ case 'x':
+ if (!ishex(in[2]) || !ishex(in[3])) {
+ /* invalid or incomplete hex sequence */
+ err |= PARSE_ERR_HEX;
+ if (errptr)
+ *errptr = in;
+ goto leave;
+ }
+ hex1 = toupper((unsigned char)in[2]) - '0';
+ hex2 = toupper((unsigned char)in[3]) - '0';
+ if (hex1 > 9) hex1 -= 'A' - '9' - 1;
+ if (hex2 > 9) hex2 -= 'A' - '9' - 1;
+ tosend = (hex1 << 4) + hex2;
+ in += 3;
+ break;
+
+ default:
+ /* other combinations are not escape sequences */
+ break;
+ }
+
+ in++;
+ EMIT_CHAR(tosend);
+ }
+ else if (isspace((unsigned char)*in) && !squote && !dquote) {
+ /* a non-escaped space is an argument separator */
+ while (isspace((unsigned char)*in))
+ in++;
+ EMIT_CHAR(0);
+ arg++;
+ if (arg < argsmax)
+ args[arg] = out + outpos;
+ else
+ err |= PARSE_ERR_TOOMANY;
+ }
+ else if (*in == '$' && (opts & PARSE_OPT_ENV) && (dquote || !(opts & PARSE_OPT_DQUOTE))) {
+ /* environment variables are evaluated anywhere, or only
+ * inside double quotes if they are supported.
+ */
+ char *var_name;
+ char save_char;
+ const char *value;
+
+ in++;
+
+ if (*in == '{')
+ brace = in++;
+
+ if (!isalpha((unsigned char)*in) && *in != '_' && *in != '.') {
+ /* unacceptable character in variable name */
+ err |= PARSE_ERR_VARNAME;
+ if (errptr)
+ *errptr = in;
+ goto leave;
+ }
+
+ var_name = in;
+ if (*in == '.')
+ in++;
+ while (isalnum((unsigned char)*in) || *in == '_')
+ in++;
+
+ save_char = *in;
+ *in = '\0';
+ if (unlikely(*var_name == '.')) {
+ /* internal pseudo-variables */
+ if (strcmp(var_name, ".LINE") == 0)
+ value = ultoa(global.cfg_curr_line);
+ else if (strcmp(var_name, ".FILE") == 0)
+ value = global.cfg_curr_file;
+ else if (strcmp(var_name, ".SECTION") == 0)
+ value = global.cfg_curr_section;
+ else {
+ /* unsupported internal variable name */
+ err |= PARSE_ERR_VARNAME;
+ if (errptr)
+ *errptr = var_name;
+ goto leave;
+ }
+ } else {
+ value = getenv(var_name);
+ }
+ *in = save_char;
+
+ /* support for '[*]' sequence to force word expansion,
+ * only available inside braces */
+ if (*in == '[' && brace && (opts & PARSE_OPT_WORD_EXPAND)) {
+ word_expand = in++;
+
+ if (*in++ != '*' || *in++ != ']') {
+ err |= PARSE_ERR_WRONG_EXPAND;
+ if (errptr)
+ *errptr = word_expand;
+ goto leave;
+ }
+ }
+
+ if (brace) {
+ if (*in == '-') {
+ /* default value starts just after the '-' */
+ if (!value)
+ value = in + 1;
+
+ while (*in && *in != '}')
+ in++;
+ if (!*in)
+ goto no_brace;
+ *in = 0; // terminate the default value
+ }
+ else if (*in != '}') {
+ no_brace:
+ /* unmatched brace */
+ err |= PARSE_ERR_BRACE;
+ if (errptr)
+ *errptr = brace;
+ goto leave;
+ }
+
+ /* brace found, skip it */
+ in++;
+ brace = NULL;
+ }
+
+ if (value) {
+ while (*value) {
+ /* expand as individual parameters on a space character */
+ if (word_expand && isspace((unsigned char)*value)) {
+ EMIT_CHAR(0);
+ ++arg;
+ if (arg < argsmax)
+ args[arg] = out + outpos;
+ else
+ err |= PARSE_ERR_TOOMANY;
+
+ /* skip consecutive spaces */
+ while (isspace((unsigned char)*++value))
+ ;
+ } else {
+ EMIT_CHAR(*value++);
+ }
+ }
+ }
+ else {
+ /* An unmatched environment variable was parsed.
+ * Let's skip the trailing double-quote character
+ * and spaces.
+ */
+ if (likely(*var_name != '.') && *in == '"') {
+ in++;
+ while (isspace((unsigned char)*in))
+ in++;
+ if (dquote) {
+ dquote = 0;
+ quote = NULL;
+ }
+ }
+ }
+ word_expand = NULL;
+ }
+ else {
+ /* any other regular char */
+ EMIT_CHAR(*in++);
+ }
+ }
+
+ /* end of output string */
+ EMIT_CHAR(0);
+
+ /* Don't add an empty arg after trailing spaces. Note that args[arg]
+ * may contain some distances relative to NULL if <out> was NULL, or
+ * pointers beyond the end of <out> in case <outlen> is too short, thus
+ * we must not dereference it.
+ */
+ if (arg < argsmax && args[arg] != out + outpos - 1)
+ arg++;
+
+ if (quote) {
+ /* unmatched quote */
+ err |= PARSE_ERR_QUOTE;
+ if (errptr)
+ *errptr = quote;
+ goto leave;
+ }
+ leave:
+ *nbargs = arg;
+ *outlen = outpos;
+
+ /* empty all trailing args by making them point to the trailing zero,
+ * at least the last one in any case.
+ */
+ if (arg > argsmax)
+ arg = argsmax;
+
+ while (arg >= 0 && arg <= argsmax)
+ args[arg++] = out + outpos - 1;
+
+ return err;
+}
+#undef EMIT_CHAR
+
+/* Use <path_fmt> and following arguments as a printf format to build up the
+ * name of a file, whose first line will be read into the trash buffer. The
+ * trailing CR and LF if any are stripped. On success, it sets trash.data to
+ * the number of resulting bytes in the trash and returns this value. Otherwise
+ * on failure it returns -1 if it could not build the path, -2 on file access
+ * access error (e.g. permissions), or -3 on file read error. The trash is
+ * always reset before proceeding. Too large lines are truncated to the size
+ * of the trash.
+ */
+ssize_t read_line_to_trash(const char *path_fmt, ...)
+{
+ va_list args;
+ FILE *file;
+ ssize_t ret;
+
+ chunk_reset(&trash);
+
+ va_start(args, path_fmt);
+ ret = vsnprintf(trash.area, trash.size, path_fmt, args);
+ va_end(args);
+
+ if (ret >= trash.size)
+ return -1;
+
+ file = fopen(trash.area, "r");
+ if (!file)
+ return -2;
+
+ ret = -3;
+ chunk_reset(&trash);
+ if (fgets(trash.area, trash.size, file)) {
+ trash.data = strlen(trash.area);
+ while (trash.data &&
+ (trash.area[trash.data - 1] == '\r' ||
+ trash.area[trash.data - 1] == '\n'))
+ trash.data--;
+ trash.area[trash.data] = 0;
+ ret = trash.data; // success
+ }
+
+ fclose(file);
+ return ret;
+}
+
+/* This is used to sanitize an input line that's about to be used for error reporting.
+ * It will adjust <line> to print approximately <width> chars around <pos>, trying to
+ * preserve the beginning, with leading or trailing "..." when the line is truncated.
+ * If non-printable chars are present in the output. It returns the new offset <pos>
+ * in the modified line. Non-printable characters are replaced with '?'. <width> must
+ * be at least 6 to support two "..." otherwise the result is undefined. The line
+ * itself must have at least 7 chars allocated for the same reason.
+ */
+size_t sanitize_for_printing(char *line, size_t pos, size_t width)
+{
+ size_t shift = 0;
+ char *out = line;
+ char *in = line;
+ char *end = line + width;
+
+ if (pos >= width) {
+ /* if we have to shift, we'll be out of context, so let's
+ * try to put <pos> at the center of width.
+ */
+ shift = pos - width / 2;
+ in += shift + 3;
+ end = out + width - 3;
+ out[0] = out[1] = out[2] = '.';
+ out += 3;
+ }
+
+ while (out < end && *in) {
+ if (isspace((unsigned char)*in))
+ *out++ = ' ';
+ else if (isprint((unsigned char)*in))
+ *out++ = *in;
+ else
+ *out++ = '?';
+ in++;
+ }
+
+ if (end < line + width) {
+ out[0] = out[1] = out[2] = '.';
+ out += 3;
+ }
+
+ *out++ = 0;
+ return pos - shift;
+}
+
+/* Update array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 1..26=letter, 27=digit, 28=other/begin/end.
+ * Row "from=0" is used to mark the character's presence. Others unused.
+ */
+void update_word_fingerprint(uint8_t *fp, const char *word)
+{
+ const char *p;
+ int from, to;
+ int c;
+
+ from = 28; // begin
+ for (p = word; *p; p++) {
+ c = tolower(*p);
+ switch(c) {
+ case 'a'...'z': to = c - 'a' + 1; break;
+ case 'A'...'Z': to = tolower(c) - 'a' + 1; break;
+ case '0'...'9': to = 27; break;
+ default: to = 28; break;
+ }
+ fp[to] = 1;
+ fp[32 * from + to]++;
+ from = to;
+ }
+ to = 28; // end
+ fp[32 * from + to]++;
+}
+
+/* This function hashes a word, scramble is the anonymizing key, returns
+ * the hashed word when the key (scramble) != 0, else returns the word.
+ * This function can be called NB_L_HASH_WORD times in a row, don't call
+ * it if you called it more than NB_L_HASH_WORD.
+ */
+const char *hash_anon(uint32_t scramble, const char *string2hash, const char *prefix, const char *suffix)
+{
+ index_hash++;
+ if (index_hash == NB_L_HASH_WORD)
+ index_hash = 0;
+
+ /* don't hash empty strings */
+ if (!string2hash[0] || (string2hash[0] == ' ' && string2hash[1] == 0))
+ return string2hash;
+
+ if (scramble != 0) {
+ snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "%s%06x%s",
+ prefix, HA_ANON(scramble, string2hash, strlen(string2hash)), suffix);
+ return hash_word[index_hash];
+ }
+ else
+ return string2hash;
+}
+
+/* This function hashes or not an ip address ipstring, scramble is the anonymizing
+ * key, returns the hashed ip with his port or ipstring when there is nothing to hash.
+ * Put hasport equal 0 to point out ipstring has no port, else put an other int.
+ * Without port, return a simple hash or ipstring.
+ */
+const char *hash_ipanon(uint32_t scramble, char *ipstring, int hasport)
+{
+ char *errmsg = NULL;
+ struct sockaddr_storage *sa;
+ struct sockaddr_storage ss;
+ char addr[46];
+ int port;
+
+ index_hash++;
+ if (index_hash == NB_L_HASH_WORD) {
+ index_hash = 0;
+ }
+
+ if (scramble == 0) {
+ return ipstring;
+ }
+ if (strcmp(ipstring, "localhost") == 0 ||
+ strcmp(ipstring, "stdout") == 0 ||
+ strcmp(ipstring, "stderr") == 0 ||
+ strncmp(ipstring, "fd@", 3) == 0 ||
+ strncmp(ipstring, "sockpair@", 9) == 0) {
+ return ipstring;
+ }
+ else {
+ if (hasport == 0) {
+ memset(&ss, 0, sizeof(ss));
+ if (str2ip2(ipstring, &ss, 1) == NULL) {
+ return HA_ANON_STR(scramble, ipstring);
+ }
+ sa = &ss;
+ }
+ else {
+ sa = str2sa_range(ipstring, NULL, NULL, NULL, NULL, NULL, NULL, &errmsg, NULL, NULL,
+ PA_O_PORT_OK | PA_O_STREAM | PA_O_DGRAM | PA_O_XPRT | PA_O_CONNECT |
+ PA_O_PORT_RANGE | PA_O_PORT_OFS | PA_O_RESOLVE);
+ if (sa == NULL) {
+ return HA_ANON_STR(scramble, ipstring);
+ }
+ }
+ addr_to_str(sa, addr, sizeof(addr));
+ port = get_host_port(sa);
+
+ switch(sa->ss_family) {
+ case AF_INET:
+ if (strncmp(addr, "127", 3) == 0 || strncmp(addr, "255", 3) == 0 || strncmp(addr, "0", 1) == 0) {
+ return ipstring;
+ }
+ else {
+ if (port != 0) {
+ snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port);
+ return hash_word[index_hash];
+ }
+ else {
+ snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV4(%06x)", HA_ANON(scramble, addr, strlen(addr)));
+ return hash_word[index_hash];
+ }
+ }
+ break;
+
+ case AF_INET6:
+ if (strcmp(addr, "::1") == 0) {
+ return ipstring;
+ }
+ else {
+ if (port != 0) {
+ snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x):%d", HA_ANON(scramble, addr, strlen(addr)), port);
+ return hash_word[index_hash];
+ }
+ else {
+ snprintf(hash_word[index_hash], sizeof(hash_word[index_hash]), "IPV6(%06x)", HA_ANON(scramble, addr, strlen(addr)));
+ return hash_word[index_hash];
+ }
+ }
+ break;
+
+ case AF_UNIX:
+ return HA_ANON_STR(scramble, ipstring);
+ break;
+
+ default:
+ return ipstring;
+ break;
+ };
+ }
+ return ipstring;
+}
+
+/* Initialize array <fp> with the fingerprint of word <word> by counting the
+ * transitions between characters. <fp> is a 1024-entries array indexed as
+ * 32*from+to. Positions for 'from' and 'to' are:
+ * 0..25=letter, 26=digit, 27=other, 28=begin, 29=end, others unused.
+ */
+void make_word_fingerprint(uint8_t *fp, const char *word)
+{
+ memset(fp, 0, 1024);
+ update_word_fingerprint(fp, word);
+}
+
+/* Return the distance between two word fingerprints created by function
+ * make_word_fingerprint(). It's a positive integer calculated as the sum of
+ * the differences between each location.
+ */
+int word_fingerprint_distance(const uint8_t *fp1, const uint8_t *fp2)
+{
+ int i, k, dist = 0;
+
+ for (i = 0; i < 1024; i++) {
+ k = (int)fp1[i] - (int)fp2[i];
+ dist += abs(k);
+ }
+ return dist;
+}
+
+/*
+ * This function compares the loaded openssl version with a string <version>
+ * This function use the same return code as compare_current_version:
+ *
+ * -1 : the version in argument is older than the current openssl version
+ * 0 : the version in argument is the same as the current openssl version
+ * 1 : the version in argument is newer than the current openssl version
+ *
+ * Or some errors:
+ * -2 : openssl is not available on this process
+ * -3 : the version in argument is not parsable
+ */
+int openssl_compare_current_version(const char *version)
+{
+#ifdef USE_OPENSSL
+ int numversion;
+
+ numversion = openssl_version_parser(version);
+ if (numversion == 0)
+ return -3;
+
+ if (numversion < OPENSSL_VERSION_NUMBER)
+ return -1;
+ else if (numversion > OPENSSL_VERSION_NUMBER)
+ return 1;
+ else
+ return 0;
+#else
+ return -2;
+#endif
+}
+
+/*
+ * This function compares the loaded openssl name with a string <name>
+ * This function returns 0 if the OpenSSL name starts like the passed parameter,
+ * 1 otherwise.
+ */
+int openssl_compare_current_name(const char *name)
+{
+#ifdef USE_OPENSSL
+ int name_len = 0;
+ const char *openssl_version = OpenSSL_version(OPENSSL_VERSION);
+
+ if (name) {
+ name_len = strlen(name);
+ if (strlen(name) <= strlen(openssl_version))
+ return strncmp(openssl_version, name, name_len);
+ }
+#endif
+ return 1;
+}
+
+#if defined(RTLD_DEFAULT) || defined(RTLD_NEXT)
+/* redefine dlopen() so that we can detect unexpected replacement of some
+ * critical symbols, typically init/alloc/free functions coming from alternate
+ * libraries. When called, a tainted flag is set (TAINTED_SHARED_LIBS).
+ * It's important to understand that the dynamic linker will present the
+ * first loaded of each symbol to all libs, so that if haproxy is linked
+ * with a new lib that uses a static inline or a #define to replace an old
+ * function, and a dependency was linked against an older version of that
+ * lib that had a function there, that lib would use all of the newer
+ * versions of the functions that are already loaded in haproxy, except
+ * for that unique function which would continue to be the old one. This
+ * creates all sort of problems when init code allocates smaller structs
+ * than required for example but uses new functions on them, etc. Thus what
+ * we do here is to try to detect API consistency: we take a fingerprint of
+ * a number of known functions, and verify that if they change in a loaded
+ * library, either there all appeared or all disappeared, but not partially.
+ * We can check up to 64 symbols that belong to individual groups that are
+ * checked together.
+ */
+void *dlopen(const char *filename, int flags)
+{
+ static void *(*_dlopen)(const char *filename, int flags);
+ struct {
+ const char *name;
+ uint64_t bit, grp;
+ void *curr, *next;
+ } check_syms[] = {
+ /* openssl's libcrypto checks: group bits 0x1f */
+ { .name="OPENSSL_init", .bit = 0x0000000000000001, .grp = 0x000000000000001f, }, // openssl 1.0 / 1.1 / 3.0
+ { .name="OPENSSL_init_crypto", .bit = 0x0000000000000002, .grp = 0x000000000000001f, }, // openssl 1.1 / 3.0
+ { .name="ENGINE_init", .bit = 0x0000000000000004, .grp = 0x000000000000001f, }, // openssl 1.x / 3.x with engine
+ { .name="EVP_CIPHER_CTX_init", .bit = 0x0000000000000008, .grp = 0x000000000000001f, }, // openssl 1.0
+ { .name="HMAC_Init", .bit = 0x0000000000000010, .grp = 0x000000000000001f, }, // openssl 1.x
+
+ /* openssl's libssl checks: group bits 0x3e0 */
+ { .name="OPENSSL_init_ssl", .bit = 0x0000000000000020, .grp = 0x00000000000003e0, }, // openssl 1.1 / 3.0
+ { .name="SSL_library_init", .bit = 0x0000000000000040, .grp = 0x00000000000003e0, }, // openssl 1.x
+ { .name="SSL_is_quic", .bit = 0x0000000000000080, .grp = 0x00000000000003e0, }, // quictls
+ { .name="SSL_CTX_new_ex", .bit = 0x0000000000000100, .grp = 0x00000000000003e0, }, // openssl 3.x
+ { .name="SSL_CTX_get0_security_ex_data", .bit = 0x0000000000000200, .grp = 0x00000000000003e0, }, // openssl 1.x / 3.x
+
+ /* insert only above, 0 must be the last one */
+ { 0 },
+ };
+ const char *trace;
+ uint64_t own_fp, lib_fp; // symbols fingerprints
+ void *addr;
+ void *ret;
+ int sym = 0;
+
+ if (!_dlopen) {
+ _dlopen = get_sym_next_addr("dlopen");
+ if (!_dlopen || _dlopen == dlopen) {
+ _dlopen = NULL;
+ return NULL;
+ }
+ }
+
+ /* save a few pointers to critical symbols. We keep a copy of both the
+ * current and the next value, because we might already have replaced
+ * some of them in an inconsistent way (i.e. not all), and we're only
+ * interested in verifying that a loaded library doesn't come with a
+ * completely different definition that would be incompatible. We'll
+ * keep a fingerprint of our own symbols.
+ */
+ own_fp = 0;
+ for (sym = 0; check_syms[sym].name; sym++) {
+ check_syms[sym].curr = get_sym_curr_addr(check_syms[sym].name);
+ check_syms[sym].next = get_sym_next_addr(check_syms[sym].name);
+ if (check_syms[sym].curr || check_syms[sym].next)
+ own_fp |= check_syms[sym].bit;
+ }
+
+ /* now open the requested lib */
+ ret = _dlopen(filename, flags);
+ if (!ret)
+ return ret;
+
+ mark_tainted(TAINTED_SHARED_LIBS);
+
+ /* and check that critical symbols didn't change */
+ lib_fp = 0;
+ for (sym = 0; check_syms[sym].name; sym++) {
+ addr = dlsym(ret, check_syms[sym].name);
+ if (addr)
+ lib_fp |= check_syms[sym].bit;
+ }
+
+ if (lib_fp != own_fp) {
+ /* let's check what changed: */
+ uint64_t mask = 0;
+
+ for (sym = 0; check_syms[sym].name; sym++) {
+ mask = check_syms[sym].grp;
+
+ /* new group of symbols. If they all appeared together
+ * their use will be consistent. If none appears, it's
+ * just that the lib doesn't use them. If some appear
+ * or disappear, it means the lib relies on a different
+ * dependency and will end up with a mix.
+ */
+ if (!(own_fp & mask) || !(lib_fp & mask) ||
+ (own_fp & mask) == (lib_fp & mask))
+ continue;
+
+ /* let's report a symbol that really changes */
+ if (!((own_fp ^ lib_fp) & check_syms[sym].bit))
+ continue;
+
+ /* OK it's clear that this symbol was redefined */
+ mark_tainted(TAINTED_REDEFINITION);
+
+ trace = hlua_show_current_location("\n ");
+ ha_warning("dlopen(): shared library '%s' brings a different and inconsistent definition of symbol '%s'. The process cannot be trusted anymore!%s%s\n",
+ filename, check_syms[sym].name,
+ trace ? " Suspected call location: \n " : "",
+ trace ? trace : "");
+ }
+ }
+
+ return ret;
+}
+#endif
+
+static int init_tools_per_thread()
+{
+ /* Let's make each thread start from a different position */
+ statistical_prng_state += tid * MAX_THREADS;
+ if (!statistical_prng_state)
+ statistical_prng_state++;
+ return 1;
+}
+REGISTER_PER_THREAD_INIT(init_tools_per_thread);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/trace.c b/src/trace.c
new file mode 100644
index 0000000..a233c0d
--- /dev/null
+++ b/src/trace.c
@@ -0,0 +1,997 @@
+/*
+ * Runtime tracing API
+ *
+ * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <import/ist.h>
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/cli.h>
+#include <haproxy/errors.h>
+#include <haproxy/istbuf.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/sink.h>
+#include <haproxy/trace.h>
+
+struct list trace_sources = LIST_HEAD_INIT(trace_sources);
+THREAD_LOCAL struct buffer trace_buf = { };
+
+/* allocates the trace buffers. Returns 0 in case of failure. It is safe to
+ * call to call this function multiple times if the size changes.
+ */
+static int alloc_trace_buffers_per_thread()
+{
+ chunk_init(&trace_buf, my_realloc2(trace_buf.area, global.tune.bufsize), global.tune.bufsize);
+ return !!trace_buf.area;
+}
+
+static void free_trace_buffers_per_thread()
+{
+ chunk_destroy(&trace_buf);
+}
+
+REGISTER_PER_THREAD_ALLOC(alloc_trace_buffers_per_thread);
+REGISTER_PER_THREAD_FREE(free_trace_buffers_per_thread);
+
+/* pick the lowest non-null argument with a non-null arg_def mask */
+static inline const void *trace_pick_arg(uint32_t arg_def, const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ if (arg_def & 0x0000FFFF) {
+ if ((arg_def & 0x000000FF) && a1)
+ return a1;
+ if ((arg_def & 0x0000FF00) && a2)
+ return a2;
+ }
+
+ if (arg_def & 0xFFFF0000) {
+ if ((arg_def & 0x00FF0000) && a3)
+ return a3;
+ if ((arg_def & 0xFF000000) && a4)
+ return a4;
+ }
+
+ return NULL;
+}
+
+/* Reports whether the trace is enabled for the specified arguments, needs to enable
+ * or disable tracking. It gets the same API as __trace() except for <cb> and <msg>
+ * which are not used and were dropped, and plockptr which is an optional pointer to
+ * the lockptr to be updated (or NULL) for tracking. The function returns:
+ * 0 if the trace is not enabled for the module or these values
+ * <0 if the trace matches some locking criteria but don't have the proper level.
+ * In this case the interested caller might have to consider disabling tracking.
+ * >0 if the trace is enabled for the given criteria.
+ * In all cases, <plockptr> will only be set if non-null and if a locking criterion
+ * matched. It will be up to the caller to enable tracking if desired. A casual
+ * tester not interested in adjusting tracking (i.e. calling the function before
+ * deciding so prepare a buffer to be dumped) will only need to pass 0 for plockptr
+ * and check if the result is >0.
+ */
+int __trace_enabled(enum trace_level level, uint64_t mask, struct trace_source *src,
+ const struct ist where, const char *func,
+ const void *a1, const void *a2, const void *a3, const void *a4,
+ const void **plockptr)
+{
+ const struct listener *li = NULL;
+ const struct proxy *fe = NULL;
+ const struct proxy *be = NULL;
+ const struct server *srv = NULL;
+ const struct session *sess = NULL;
+ const struct stream *strm = NULL;
+ const struct connection *conn = NULL;
+ const struct check *check = NULL;
+ const struct quic_conn *qc = NULL;
+ const struct appctx *appctx = NULL;
+ const void *lockon_ptr = NULL;
+
+ if (likely(src->state == TRACE_STATE_STOPPED))
+ return 0;
+
+ /* check that at least one action is interested by this event */
+ if (((src->report_events | src->start_events | src->pause_events | src->stop_events) & mask) == 0)
+ return 0;
+
+ /* retrieve available information from the caller's arguments */
+ if (src->arg_def & TRC_ARGS_CONN)
+ conn = trace_pick_arg(src->arg_def & TRC_ARGS_CONN, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_SESS)
+ sess = trace_pick_arg(src->arg_def & TRC_ARGS_SESS, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_STRM)
+ strm = trace_pick_arg(src->arg_def & TRC_ARGS_STRM, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_CHK)
+ check = trace_pick_arg(src->arg_def & TRC_ARGS_CHK, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_QCON)
+ qc = trace_pick_arg(src->arg_def & TRC_ARGS_QCON, a1, a2, a3, a4);
+
+ if (src->arg_def & TRC_ARGS_APPCTX)
+ appctx = trace_pick_arg(src->arg_def & TRC_ARGS_APPCTX, a1, a2, a3, a4);
+
+ if (!sess && strm)
+ sess = strm->sess;
+ else if (!sess && conn && LIST_INLIST(&conn->session_list))
+ sess = conn->owner;
+ else if (!sess && check)
+ sess = check->sess;
+ else if (!sess && appctx)
+ sess = appctx->sess;
+
+ if (sess) {
+ fe = sess->fe;
+ li = sess->listener;
+ }
+
+ if (!li && conn)
+ li = objt_listener(conn->target);
+
+ if (li && !fe)
+ fe = li->bind_conf->frontend;
+
+ if (strm) {
+ be = strm->be;
+ srv = strm->srv_conn;
+ }
+ if (check) {
+ srv = check->server;
+ be = (srv ? srv->proxy : NULL);
+ }
+
+ if (!srv && conn)
+ srv = objt_server(conn->target);
+
+ if (srv && !be)
+ be = srv->proxy;
+
+ if (!be && conn)
+ be = objt_proxy(conn->target);
+
+ /* TODO: add handling of filters here, return if no match (not even update states) */
+
+ /* check if we need to start the trace now */
+ if (src->state == TRACE_STATE_WAITING) {
+ if ((src->start_events & mask) == 0)
+ return 0;
+
+ /* TODO: add update of lockon+lockon_ptr here */
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_RUNNING);
+ }
+
+ /* we may want to lock on a particular object */
+ if (src->lockon != TRACE_LOCKON_NOTHING) {
+ switch (src->lockon) {
+ case TRACE_LOCKON_BACKEND: lockon_ptr = be; break;
+ case TRACE_LOCKON_CONNECTION: lockon_ptr = conn; break;
+ case TRACE_LOCKON_FRONTEND: lockon_ptr = fe; break;
+ case TRACE_LOCKON_LISTENER: lockon_ptr = li; break;
+ case TRACE_LOCKON_SERVER: lockon_ptr = srv; break;
+ case TRACE_LOCKON_SESSION: lockon_ptr = sess; break;
+ case TRACE_LOCKON_STREAM: lockon_ptr = strm; break;
+ case TRACE_LOCKON_CHECK: lockon_ptr = check; break;
+ case TRACE_LOCKON_THREAD: lockon_ptr = ti; break;
+ case TRACE_LOCKON_QCON: lockon_ptr = qc; break;
+ case TRACE_LOCKON_APPCTX: lockon_ptr = appctx; break;
+ case TRACE_LOCKON_ARG1: lockon_ptr = a1; break;
+ case TRACE_LOCKON_ARG2: lockon_ptr = a2; break;
+ case TRACE_LOCKON_ARG3: lockon_ptr = a3; break;
+ case TRACE_LOCKON_ARG4: lockon_ptr = a4; break;
+ default: break; // silence stupid gcc -Wswitch
+ }
+
+ if (src->lockon_ptr && src->lockon_ptr != lockon_ptr)
+ return 0;
+
+ if (*plockptr && !src->lockon_ptr && lockon_ptr && src->state == TRACE_STATE_RUNNING)
+ *plockptr = lockon_ptr;
+ }
+
+ /* here the trace is running and is tracking a desired item */
+ if ((src->report_events & mask) == 0 || level > src->level) {
+ /* tracking did match, and might have to be disabled */
+ return -1;
+ }
+
+ /* OK trace still enabled */
+ return 1;
+}
+
+/* write a message for the given trace source */
+void __trace(enum trace_level level, uint64_t mask, struct trace_source *src,
+ const struct ist where, const char *func,
+ const void *a1, const void *a2, const void *a3, const void *a4,
+ void (*cb)(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4),
+ const struct ist msg)
+{
+ const void *lockon_ptr;
+ struct ist ist_func = ist(func);
+ char tnum[4];
+ struct ist line[12];
+ int words = 0;
+ int ret;
+
+ lockon_ptr = NULL;
+ ret = __trace_enabled(level, mask, src, where, func, a1, a2, a3, a4, &lockon_ptr);
+ if (lockon_ptr)
+ HA_ATOMIC_STORE(&src->lockon_ptr, lockon_ptr);
+
+ if (ret <= 0) {
+ if (ret < 0) // may have to disable tracking
+ goto end;
+ return;
+ }
+
+ /* log the logging location truncated to 10 chars from the right so that
+ * the line number and the end of the file name are there.
+ */
+ line[words++] = ist("[");
+ tnum[0] = '0' + tid / 10;
+ tnum[1] = '0' + tid % 10;
+ tnum[2] = '|';
+ tnum[3] = 0;
+ line[words++] = ist(tnum);
+ line[words++] = src->name;
+ line[words++] = ist("|");
+ line[words++] = ist2("012345" + level, 1); // "0" to "5"
+ line[words++] = ist("|");
+ line[words] = where;
+ if (line[words].len > 13) {
+ line[words].ptr += (line[words].len - 13);
+ line[words].len = 13;
+ }
+ words++;
+ line[words++] = ist("] ");
+
+ if (isttest(ist_func)) {
+ line[words++] = ist_func;
+ line[words++] = ist("(): ");
+ }
+
+ if (!cb)
+ cb = src->default_cb;
+
+ if (cb && src->verbosity) {
+ /* decode function passed, we want to pre-fill the
+ * buffer with the message and let the decode function
+ * do its job, possibly even overwriting it.
+ */
+ b_reset(&trace_buf);
+ b_istput(&trace_buf, msg);
+ cb(level, mask, src, where, ist_func, a1, a2, a3, a4);
+ line[words] = ist2(trace_buf.area, trace_buf.data);
+ words++;
+ }
+ else {
+ /* Note that here we could decide to print some args whose type
+ * is known, when verbosity is above the quiet level, and even
+ * to print the name and values of those which are declared for
+ * lock-on.
+ */
+ line[words++] = msg;
+ }
+
+ if (src->sink)
+ sink_write(src->sink, LOG_HEADER_NONE, 0, line, words);
+
+ end:
+ /* check if we need to stop the trace now */
+ if ((src->stop_events & mask) != 0) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ }
+ else if ((src->pause_events & mask) != 0) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_WAITING);
+ }
+}
+
+/* this callback may be used when no output modification is desired */
+void trace_no_cb(enum trace_level level, uint64_t mask, const struct trace_source *src,
+ const struct ist where, const struct ist func,
+ const void *a1, const void *a2, const void *a3, const void *a4)
+{
+ /* do nothing */
+}
+
+/* registers trace source <source>. Modifies the list element!
+ * The {start,pause,stop,report} events are not changed so the source may
+ * preset them.
+ */
+void trace_register_source(struct trace_source *source)
+{
+ source->lockon = TRACE_LOCKON_NOTHING;
+ source->level = TRACE_LEVEL_USER;
+ source->verbosity = 1;
+ source->sink = NULL;
+ source->state = TRACE_STATE_STOPPED;
+ source->lockon_ptr = NULL;
+ LIST_APPEND(&trace_sources, &source->source_link);
+}
+
+struct trace_source *trace_find_source(const char *name)
+{
+ struct trace_source *src;
+ const struct ist iname = ist(name);
+
+ list_for_each_entry(src, &trace_sources, source_link)
+ if (isteq(src->name, iname))
+ return src;
+ return NULL;
+}
+
+const struct trace_event *trace_find_event(const struct trace_event *ev, const char *name)
+{
+ for (; ev && ev->mask; ev++)
+ if (strcmp(ev->name, name) == 0)
+ return ev;
+ return NULL;
+}
+
+/* Returns the level value or a negative error code. */
+static int trace_parse_level(const char *level)
+{
+ if (!level)
+ return -1;
+
+ if (strcmp(level, "error") == 0)
+ return TRACE_LEVEL_ERROR;
+ else if (strcmp(level, "user") == 0)
+ return TRACE_LEVEL_USER;
+ else if (strcmp(level, "proto") == 0)
+ return TRACE_LEVEL_PROTO;
+ else if (strcmp(level, "state") == 0)
+ return TRACE_LEVEL_STATE;
+ else if (strcmp(level, "data") == 0)
+ return TRACE_LEVEL_DATA;
+ else if (strcmp(level, "developer") == 0)
+ return TRACE_LEVEL_DEVELOPER;
+ else
+ return -1;
+}
+
+/* Returns the verbosity value or a negative error code. */
+static int trace_source_parse_verbosity(struct trace_source *src,
+ const char *verbosity)
+{
+ const struct name_desc *nd;
+ int ret;
+
+ if (strcmp(verbosity, "quiet") == 0) {
+ ret = 0;
+ goto end;
+ }
+
+ /* Only "quiet" is defined for all sources. Other identifiers are
+ * specific to trace source.
+ */
+ BUG_ON(!src);
+
+ if (!src->decoding || !src->decoding[0].name) {
+ if (strcmp(verbosity, "default") != 0)
+ return -1;
+
+ ret = 1;
+ }
+ else {
+ for (nd = src->decoding; nd->name && nd->desc; nd++)
+ if (strcmp(verbosity, nd->name) == 0)
+ break;
+
+ if (!nd->name || !nd->desc)
+ return -1;
+
+ ret = nd - src->decoding + 1;
+ }
+
+ end:
+ return ret;
+}
+
+/* Parse a "trace" statement. Returns a severity as a LOG_* level and a status
+ * message that may be delivered to the user, in <msg>. The message will be
+ * nulled first and msg must be an allocated pointer. A null status message output
+ * indicates no error. Be careful not to use the return value as a boolean, as
+ * LOG_* values are not ordered as one could imagine (LOG_EMERG is zero). The
+ * function may/will use the trash buffer as the storage for the response
+ * message so that the caller never needs to release anything.
+ */
+static int trace_parse_statement(char **args, char **msg)
+{
+ struct trace_source *src;
+ uint64_t *ev_ptr = NULL;
+
+ /* no error by default */
+ *msg = NULL;
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sources as a warning */
+ chunk_printf(&trash,
+ "Supported trace sources and states (.=stopped, w=waiting, R=running) :\n"
+ " [.] 0 : not a source, will immediately stop all traces\n"
+ );
+
+ list_for_each_entry(src, &trace_sources, source_link)
+ chunk_appendf(&trash, " [%c] %-10s : %s\n", trace_state_char(src->state), src->name.ptr, src->desc);
+
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+
+ if (strcmp(args[1], "0") == 0) {
+ /* emergency stop of all traces */
+ list_for_each_entry(src, &trace_sources, source_link)
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ *msg = strdup("All traces now stopped");
+ return LOG_NOTICE;
+ }
+
+ src = trace_find_source(args[1]);
+ if (!src) {
+ memprintf(msg, "No such trace source '%s'", args[1]);
+ return LOG_ERR;
+ }
+
+ if (!*args[2]) {
+ *msg = "Supported commands:\n"
+ " event : list/enable/disable source-specific event reporting\n"
+ //" filter : list/enable/disable generic filters\n"
+ " level : list/set trace reporting level\n"
+ " lock : automatic lock on thread/connection/stream/...\n"
+ " pause : pause and automatically restart after a specific event\n"
+ " sink : list/set event sinks\n"
+ " start : start immediately or after a specific event\n"
+ " stop : stop immediately or after a specific event\n"
+ " verbosity : list/set trace output verbosity\n";
+ *msg = strdup(*msg);
+ return LOG_WARNING;
+ }
+ else if ((strcmp(args[2], "event") == 0 && (ev_ptr = &src->report_events)) ||
+ (strcmp(args[2], "pause") == 0 && (ev_ptr = &src->pause_events)) ||
+ (strcmp(args[2], "start") == 0 && (ev_ptr = &src->start_events)) ||
+ (strcmp(args[2], "stop") == 0 && (ev_ptr = &src->stop_events))) {
+ const struct trace_event *ev;
+ const char *name = args[3];
+ int neg = 0;
+ int i;
+
+ /* skip prefix '!', '-', '+' and remind negation */
+ while (*name) {
+ if (*name == '!' || *name == '-')
+ neg = 1;
+ else if (*name == '+')
+ neg = 0;
+ else
+ break;
+ name++;
+ }
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported events for source %s (+=enabled, -=disabled):\n", src->name.ptr);
+ if (ev_ptr != &src->report_events)
+ chunk_appendf(&trash, " - now : don't wait for events, immediately change the state\n");
+ chunk_appendf(&trash, " - none : disable all event types\n");
+ chunk_appendf(&trash, " - any : enable all event types\n");
+ for (i = 0; src->known_events && src->known_events[i].mask; i++) {
+ chunk_appendf(&trash, " %c %-12s : %s\n",
+ trace_event_char(*ev_ptr, src->known_events[i].mask),
+ src->known_events[i].name, src->known_events[i].desc);
+ }
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+
+ if (strcmp(name, "now") == 0 && ev_ptr != &src->report_events) {
+ HA_ATOMIC_STORE(ev_ptr, 0);
+ if (ev_ptr == &src->pause_events) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_WAITING);
+ }
+ else if (ev_ptr == &src->start_events) {
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_RUNNING);
+ }
+ else if (ev_ptr == &src->stop_events) {
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ HA_ATOMIC_STORE(&src->state, TRACE_STATE_STOPPED);
+ }
+ return 0;
+ }
+
+ if (strcmp(name, "none") == 0)
+ HA_ATOMIC_STORE(ev_ptr, 0);
+ else if (strcmp(name, "any") == 0)
+ HA_ATOMIC_STORE(ev_ptr, ~0);
+ else {
+ ev = trace_find_event(src->known_events, name);
+ if (!ev) {
+ memprintf(msg, "No such trace event '%s'", name);
+ return LOG_ERR;
+ }
+
+ if (!neg)
+ HA_ATOMIC_OR(ev_ptr, ev->mask);
+ else
+ HA_ATOMIC_AND(ev_ptr, ~ev->mask);
+ }
+ }
+ else if (strcmp(args[2], "sink") == 0) {
+ const char *name = args[3];
+ struct sink *sink;
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported sinks for source %s (*=current):\n", src->name.ptr);
+ chunk_appendf(&trash, " %c none : no sink\n", src->sink ? ' ' : '*');
+ list_for_each_entry(sink, &sink_list, sink_list) {
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->sink == sink ? '*' : ' ',
+ sink->name, sink->desc);
+ }
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+
+ if (strcmp(name, "none") == 0)
+ sink = NULL;
+ else {
+ sink = sink_find(name);
+ if (!sink) {
+ memprintf(msg, "No such trace sink '%s'", name);
+ return LOG_ERR;
+ }
+ }
+
+ HA_ATOMIC_STORE(&src->sink, sink);
+ }
+ else if (strcmp(args[2], "level") == 0) {
+ const char *name = args[3];
+ int level;
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported trace levels for source %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " %c error : report errors\n",
+ src->level == TRACE_LEVEL_ERROR ? '*' : ' ');
+ chunk_appendf(&trash, " %c user : also information useful to the end user\n",
+ src->level == TRACE_LEVEL_USER ? '*' : ' ');
+ chunk_appendf(&trash, " %c proto : also protocol-level updates\n",
+ src->level == TRACE_LEVEL_PROTO ? '*' : ' ');
+ chunk_appendf(&trash, " %c state : also report internal state changes\n",
+ src->level == TRACE_LEVEL_STATE ? '*' : ' ');
+ chunk_appendf(&trash, " %c data : also report data transfers\n",
+ src->level == TRACE_LEVEL_DATA ? '*' : ' ');
+ chunk_appendf(&trash, " %c developer : also report information useful only to the developer\n",
+ src->level == TRACE_LEVEL_DEVELOPER ? '*' : ' ');
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+
+ level = trace_parse_level(name);
+ if (level < 0) {
+ memprintf(msg, "No such trace level '%s'", name);
+ return LOG_ERR;
+ }
+
+ HA_ATOMIC_STORE(&src->level, level);
+ }
+ else if (strcmp(args[2], "lock") == 0) {
+ const char *name = args[3];
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported lock-on criteria for source %s:\n", src->name.ptr);
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c backend : lock on the backend that started the trace\n",
+ src->lockon == TRACE_LOCKON_BACKEND ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_CHK)
+ chunk_appendf(&trash, " %c check : lock on the check that started the trace\n",
+ src->lockon == TRACE_LOCKON_CHECK ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_CONN)
+ chunk_appendf(&trash, " %c connection : lock on the connection that started the trace\n",
+ src->lockon == TRACE_LOCKON_CONNECTION ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c frontend : lock on the frontend that started the trace\n",
+ src->lockon == TRACE_LOCKON_FRONTEND ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c listener : lock on the listener that started the trace\n",
+ src->lockon == TRACE_LOCKON_LISTENER ? '*' : ' ');
+
+ chunk_appendf(&trash, " %c nothing : do not lock on anything\n",
+ src->lockon == TRACE_LOCKON_NOTHING ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c server : lock on the server that started the trace\n",
+ src->lockon == TRACE_LOCKON_SERVER ? '*' : ' ');
+
+ if (src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM))
+ chunk_appendf(&trash, " %c session : lock on the session that started the trace\n",
+ src->lockon == TRACE_LOCKON_SESSION ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_STRM)
+ chunk_appendf(&trash, " %c stream : lock on the stream that started the trace\n",
+ src->lockon == TRACE_LOCKON_STREAM ? '*' : ' ');
+
+ if (src->arg_def & TRC_ARGS_APPCTX)
+ chunk_appendf(&trash, " %c applet : lock on the applet that started the trace\n",
+ src->lockon == TRACE_LOCKON_APPCTX ? '*' : ' ');
+
+ chunk_appendf(&trash, " %c thread : lock on the thread that started the trace\n",
+ src->lockon == TRACE_LOCKON_THREAD ? '*' : ' ');
+
+ if (src->lockon_args && src->lockon_args[0].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG1 ? '*' : ' ',
+ src->lockon_args[0].name, src->lockon_args[0].desc);
+
+ if (src->lockon_args && src->lockon_args[1].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG2 ? '*' : ' ',
+ src->lockon_args[1].name, src->lockon_args[1].desc);
+
+ if (src->lockon_args && src->lockon_args[2].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG3 ? '*' : ' ',
+ src->lockon_args[2].name, src->lockon_args[2].desc);
+
+ if (src->lockon_args && src->lockon_args[3].name)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ src->lockon == TRACE_LOCKON_ARG4 ? '*' : ' ',
+ src->lockon_args[3].name, src->lockon_args[3].desc);
+
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM)) && strcmp(name, "backend") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_BACKEND);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_CHK) && strcmp(name, "check") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_CHECK);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_CONN) && strcmp(name, "connection") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_CONNECTION);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "frontend") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_FRONTEND);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "listener") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_LISTENER);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (strcmp(name, "nothing") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_NOTHING);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_STRM)) && strcmp(name, "server") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_SERVER);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & (TRC_ARGS_CONN|TRC_ARGS_SESS|TRC_ARGS_STRM)) && strcmp(name, "session") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_SESSION);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_STRM) && strcmp(name, "stream") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_STREAM);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if ((src->arg_def & TRC_ARGS_APPCTX) && strcmp(name, "appctx") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_APPCTX);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (strcmp(name, "thread") == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_THREAD);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[0].name && strcmp(name, src->lockon_args[0].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG1);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[1].name && strcmp(name, src->lockon_args[1].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG2);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[2].name && strcmp(name, src->lockon_args[2].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG3);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else if (src->lockon_args && src->lockon_args[3].name && strcmp(name, src->lockon_args[3].name) == 0) {
+ HA_ATOMIC_STORE(&src->lockon, TRACE_LOCKON_ARG4);
+ HA_ATOMIC_STORE(&src->lockon_ptr, NULL);
+ }
+ else {
+ memprintf(msg, "Unsupported lock-on criterion '%s'", name);
+ return LOG_ERR;
+ }
+ }
+ else if (strcmp(args[2], "verbosity") == 0) {
+ const char *name = args[3];
+ const struct name_desc *nd;
+ int verbosity;
+
+ if (!*name) {
+ chunk_printf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " %c quiet : only report basic information with no decoding\n",
+ src->verbosity == 0 ? '*' : ' ');
+ if (!src->decoding || !src->decoding[0].name) {
+ chunk_appendf(&trash, " %c default : report extra information when available\n",
+ src->verbosity > 0 ? '*' : ' ');
+ } else {
+ for (nd = src->decoding; nd->name && nd->desc; nd++)
+ chunk_appendf(&trash, " %c %-10s : %s\n",
+ nd == (src->decoding + src->verbosity - 1) ? '*' : ' ',
+ nd->name, nd->desc);
+ }
+ trash.area[trash.data] = 0;
+ *msg = strdup(trash.area);
+ return LOG_WARNING;
+ }
+
+ verbosity = trace_source_parse_verbosity(src, name);
+ if (verbosity < 0) {
+ memprintf(msg, "No such verbosity level '%s'", name);
+ return LOG_ERR;
+ }
+
+ HA_ATOMIC_STORE(&src->verbosity, verbosity);
+ }
+ else {
+ memprintf(msg, "Unknown trace keyword '%s'", args[2]);
+ return LOG_ERR;
+ }
+ return 0;
+
+}
+
+void _trace_parse_cmd(struct trace_source *src, int level, int verbosity)
+{
+ src->sink = sink_find("stderr");
+ src->level = level >= 0 ? level : TRACE_LEVEL_ERROR;
+ src->verbosity = verbosity >= 0 ? verbosity : 1;
+ src->state = TRACE_STATE_RUNNING;
+}
+
+/* Parse a process argument specified via "-dt".
+ *
+ * Returns 0 on success else non-zero.
+ */
+int trace_parse_cmd(char *arg, char **errmsg)
+{
+ char *str;
+
+ if (!arg) {
+ /* No trace specification, activate all sources on error level. */
+ struct trace_source *src = NULL;
+
+ list_for_each_entry(src, &trace_sources, source_link)
+ _trace_parse_cmd(src, -1, -1);
+ return 0;
+ }
+
+ while ((str = strtok(arg, ","))) {
+ struct trace_source *src = NULL;
+ char *field, *name;
+ char *sep;
+ int level = -1, verbosity = -1;
+
+ /* 1. name */
+ name = str;
+ sep = strchr(str, ':');
+ if (sep) {
+ str = sep + 1;
+ *sep = '\0';
+ }
+ else {
+ str = NULL;
+ }
+
+ if (strlen(name)) {
+ src = trace_find_source(name);
+ if (!src) {
+ memprintf(errmsg, "unknown trace source '%s'", name);
+ return 1;
+ }
+ }
+
+ if (!str || !strlen(str))
+ goto parse;
+
+ /* 2. level */
+ field = str;
+ sep = strchr(str, ':');
+ if (sep) {
+ str = sep + 1;
+ *sep = '\0';
+ }
+ else {
+ str = NULL;
+ }
+
+ if (strlen(field)) {
+ level = trace_parse_level(field);
+ if (level < 0) {
+ memprintf(errmsg, "no such level '%s'", field);
+ return 1;
+ }
+ }
+
+ if (!str || !strlen(str))
+ goto parse;
+
+ /* 3. verbosity */
+ field = str;
+ if (strchr(field, ':')) {
+ memprintf(errmsg, "too many double-colon separator");
+ return 1;
+ }
+
+ if (!src && strcmp(field, "quiet") != 0) {
+ memprintf(errmsg, "trace source must be specified for verbosity other than 'quiet'");
+ return 1;
+ }
+
+ verbosity = trace_source_parse_verbosity(src, field);
+ if (verbosity < 0) {
+ memprintf(errmsg, "no such verbosity '%s' for source '%s'", field, name);
+ return 1;
+ }
+
+ parse:
+ if (src) {
+ _trace_parse_cmd(src, level, verbosity);
+ }
+ else {
+ list_for_each_entry(src, &trace_sources, source_link)
+ _trace_parse_cmd(src, level, verbosity);
+ }
+
+ /* Reset arg to NULL for strtok. */
+ arg = NULL;
+ }
+
+ return 0;
+}
+
+/* parse a "trace" statement in the "global" section, returns 1 if a message is returned, otherwise zero */
+static int cfg_parse_trace(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ char *msg;
+ int severity;
+
+ severity = trace_parse_statement(args, &msg);
+ if (msg) {
+ if (severity >= LOG_NOTICE)
+ ha_notice("parsing [%s:%d] : '%s': %s\n", file, line, args[0], msg);
+ else if (severity >= LOG_WARNING)
+ ha_warning("parsing [%s:%d] : '%s': %s\n", file, line, args[0], msg);
+ else {
+ /* let the caller free the message */
+ *err = msg;
+ return -1;
+ }
+ ha_free(&msg);
+ }
+ return 0;
+}
+
+/* parse the command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_trace(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ char *msg;
+ int severity;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ severity = trace_parse_statement(args, &msg);
+ if (msg)
+ return cli_dynmsg(appctx, severity, msg);
+
+ /* total success */
+ return 0;
+}
+
+/* parse the command, returns 1 if a message is returned, otherwise zero */
+static int cli_parse_show_trace(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct trace_source *src;
+ const struct sink *sink;
+ int i;
+
+ args++; // make args[1] the 1st arg
+
+ if (!*args[1]) {
+ /* no arg => report the list of supported sources */
+ chunk_printf(&trash,
+ "Supported trace sources and states (.=stopped, w=waiting, R=running) :\n"
+ );
+
+ list_for_each_entry(src, &trace_sources, source_link) {
+ sink = src->sink;
+ chunk_appendf(&trash, " [%c] %-10s -> %s [drp %u] [%s]\n",
+ trace_state_char(src->state), src->name.ptr,
+ sink ? sink->name : "none",
+ sink ? sink->ctx.dropped : 0,
+ src->desc);
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_INFO, trash.area);
+ }
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ src = trace_find_source(args[1]);
+ if (!src)
+ return cli_err(appctx, "No such trace source");
+
+ sink = src->sink;
+ chunk_printf(&trash, "Trace status for %s:\n", src->name.ptr);
+ chunk_appendf(&trash, " - sink: %s [%u dropped]\n",
+ sink ? sink->name : "none", sink ? sink->ctx.dropped : 0);
+
+ chunk_appendf(&trash, " - event name : report start stop pause\n");
+ for (i = 0; src->known_events && src->known_events[i].mask; i++) {
+ chunk_appendf(&trash, " %-12s : %c %c %c %c\n",
+ src->known_events[i].name,
+ trace_event_char(src->report_events, src->known_events[i].mask),
+ trace_event_char(src->start_events, src->known_events[i].mask),
+ trace_event_char(src->stop_events, src->known_events[i].mask),
+ trace_event_char(src->pause_events, src->known_events[i].mask));
+ }
+
+ trash.area[trash.data] = 0;
+ return cli_msg(appctx, LOG_WARNING, trash.area);
+}
+
+static struct cli_kw_list cli_kws = {{ },{
+ { { "trace", NULL }, "trace [<module>|0] [cmd [args...]] : manage live tracing (empty to list, 0 to stop all)", cli_parse_trace, NULL, NULL },
+ { { "show", "trace", NULL }, "show trace [<module>] : show live tracing state", cli_parse_show_trace, NULL, NULL },
+ {{},}
+}};
+
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
+
+static struct cfg_kw_list cfg_kws = {ILH, {
+ { CFG_GLOBAL, "trace", cfg_parse_trace, KWF_EXPERIMENTAL },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/uri_auth.c b/src/uri_auth.c
new file mode 100644
index 0000000..db7e6c6
--- /dev/null
+++ b/src/uri_auth.c
@@ -0,0 +1,318 @@
+/*
+ * URI-based user authentication using the HTTP basic method.
+ *
+ * Copyright 2006-2007 Willy Tarreau <w@1wt.eu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <haproxy/api.h>
+#include <haproxy/base64.h>
+#include <haproxy/errors.h>
+#include <haproxy/list.h>
+#include <haproxy/stats-t.h>
+#include <haproxy/uri_auth.h>
+
+
+/*
+ * Initializes a basic uri_auth structure header and returns a pointer to it.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_check_init_uri_auth(struct uri_auth **root)
+{
+ struct uri_auth *u;
+
+ if (!root || !*root) {
+ if ((u = calloc(1, sizeof (*u))) == NULL)
+ goto out_u;
+
+ LIST_INIT(&u->http_req_rules);
+ LIST_INIT(&u->admin_rules);
+ } else
+ u = *root;
+
+ if (!u->uri_prefix) {
+ u->uri_len = strlen(STATS_DEFAULT_URI);
+ if ((u->uri_prefix = strdup(STATS_DEFAULT_URI)) == NULL)
+ goto out_uri;
+ }
+
+ if (root && !*root)
+ *root = u;
+
+ return u;
+
+ out_uri:
+ if (!root || !*root)
+ free(u);
+ out_u:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with <uri> set as the uri_prefix.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_uri(struct uri_auth **root, char *uri)
+{
+ struct uri_auth *u;
+ char *uri_copy;
+ int uri_len;
+
+ uri_len = strlen(uri);
+ if ((uri_copy = strdup(uri)) == NULL)
+ goto out_uri;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ free(u->uri_prefix);
+ u->uri_prefix = uri_copy;
+ u->uri_len = uri_len;
+ return u;
+
+ out_u:
+ free(uri_copy);
+ out_uri:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with <realm> set as the realm.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_realm(struct uri_auth **root, char *realm)
+{
+ struct uri_auth *u;
+ char *realm_copy;
+
+ if ((realm_copy = strdup(realm)) == NULL)
+ goto out_realm;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ free(u->auth_realm);
+ u->auth_realm = realm_copy;
+ return u;
+
+ out_u:
+ free(realm_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with STAT_SHNODE flag enabled and
+ * <node> set as the name if it is not empty.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_node(struct uri_auth **root, char *name)
+{
+ struct uri_auth *u;
+ char *node_copy = NULL;
+
+ if (name && *name) {
+ node_copy = strdup(name);
+ if (node_copy == NULL)
+ goto out_realm;
+ }
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ if (!stats_set_flag(root, STAT_SHNODE))
+ goto out_u;
+
+ if (node_copy) {
+ free(u->node);
+ u->node = node_copy;
+ }
+
+ return u;
+
+ out_u:
+ free(node_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with STAT_SHDESC flag enabled and
+ * <description> set as the desc if it is not empty.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_desc(struct uri_auth **root, char *desc)
+{
+ struct uri_auth *u;
+ char *desc_copy = NULL;
+
+ if (desc && *desc) {
+ desc_copy = strdup(desc);
+ if (desc_copy == NULL)
+ goto out_realm;
+ }
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out_u;
+
+ if (!stats_set_flag(root, STAT_SHDESC))
+ goto out_u;
+
+ if (desc_copy) {
+ free(u->desc);
+ u->desc = desc_copy;
+ }
+
+ return u;
+
+ out_u:
+ free(desc_copy);
+ out_realm:
+ return NULL;
+}
+
+/*
+ * Returns a default uri_auth with the <refresh> refresh interval.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_refresh(struct uri_auth **root, int interval)
+{
+ struct uri_auth *u;
+
+ if ((u = stats_check_init_uri_auth(root)) != NULL)
+ u->refresh = interval;
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with the <flag> set.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_set_flag(struct uri_auth **root, int flag)
+{
+ struct uri_auth *u;
+
+ if ((u = stats_check_init_uri_auth(root)) != NULL)
+ u->flags |= flag;
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with a <user:passwd> entry added to the list of
+ * authorized users. If a matching entry is found, no update will be performed.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_add_auth(struct uri_auth **root, char *user)
+{
+ struct uri_auth *u;
+ struct auth_users *newuser;
+ char *pass;
+
+ pass = strchr(user, ':');
+ if (pass)
+ *pass++ = '\0';
+ else
+ pass = "";
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ return NULL;
+
+ if (!u->userlist)
+ u->userlist = calloc(1, sizeof(*u->userlist));
+
+ if (!u->userlist)
+ return NULL;
+
+ if (!u->userlist->name)
+ u->userlist->name = strdup(".internal-stats-userlist");
+
+ if (!u->userlist->name)
+ return NULL;
+
+ for (newuser = u->userlist->users; newuser; newuser = newuser->next)
+ if (strcmp(newuser->user, user) == 0) {
+ ha_warning("uri auth: ignoring duplicated user '%s'.\n",
+ user);
+ return u;
+ }
+
+ newuser = calloc(1, sizeof(*newuser));
+ if (!newuser)
+ return NULL;
+
+ newuser->user = strdup(user);
+ if (!newuser->user) {
+ free(newuser);
+ return NULL;
+ }
+
+ newuser->pass = strdup(pass);
+ if (!newuser->pass) {
+ free(newuser->user);
+ free(newuser);
+ return NULL;
+ }
+
+ newuser->flags |= AU_O_INSECURE;
+ newuser->next = u->userlist->users;
+ u->userlist->users = newuser;
+
+ return u;
+}
+
+/*
+ * Returns a default uri_auth with a <scope> entry added to the list of
+ * allowed scopes. If a matching entry is found, no update will be performed.
+ * Uses the pointer provided if not NULL and not initialized.
+ */
+struct uri_auth *stats_add_scope(struct uri_auth **root, char *scope)
+{
+ struct uri_auth *u;
+ char *new_name;
+ struct stat_scope *old_scope, **scope_list;
+
+ if ((u = stats_check_init_uri_auth(root)) == NULL)
+ goto out;
+
+ scope_list = &u->scope;
+ while ((old_scope = *scope_list)) {
+ if (strcmp(old_scope->px_id, scope) == 0)
+ break;
+ scope_list = &old_scope->next;
+ }
+
+ if (!old_scope) {
+ if ((new_name = strdup(scope)) == NULL)
+ goto out_u;
+
+ if ((old_scope = calloc(1, sizeof(*old_scope))) == NULL)
+ goto out_name;
+
+ old_scope->px_id = new_name;
+ old_scope->px_len = strlen(new_name);
+ *scope_list = old_scope;
+ }
+ return u;
+
+ out_name:
+ free(new_name);
+ out_u:
+ free(u);
+ out:
+ return NULL;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
new file mode 100644
index 0000000..bc793f2
--- /dev/null
+++ b/src/uri_normalizer.c
@@ -0,0 +1,467 @@
+/*
+ * HTTP request URI normalization.
+ *
+ * Copyright 2021 Tim Duesterhus <tim@bastelstu.be>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <import/ist.h>
+
+#include <haproxy/api.h>
+#include <haproxy/buf.h>
+#include <haproxy/chunk.h>
+#include <haproxy/tools.h>
+#include <haproxy/uri_normalizer.h>
+
+/* Encodes '#' as '%23'. */
+enum uri_normalizer_err uri_normalizer_fragment_encode(const struct ist input, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ while (istlen(scanner)) {
+ const struct ist before_hash = istsplit(&scanner, '#');
+
+ if (istcat(&output, before_hash, size) < 0) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ if (istend(before_hash) != istend(scanner)) {
+ if (istcat(&output, ist("%23"), size) < 0) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Returns 1 if the given character is part of the 'unreserved' set in the
+ * RFC 3986 ABNF.
+ * Returns 0 if not.
+ */
+static int is_unreserved_character(unsigned char c)
+{
+ switch (c) {
+ case 'A'...'Z': /* ALPHA */
+ case 'a'...'z': /* ALPHA */
+ case '0'...'9': /* DIGIT */
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* Decodes percent encoded characters that are part of the 'unreserved' set.
+ *
+ * RFC 3986, section 2.3:
+ * > URIs that differ in the replacement of an unreserved character with
+ * > its corresponding percent-encoded US-ASCII octet are equivalent [...]
+ * > when found in a URI, should be decoded to their corresponding unreserved
+ * > characters by URI normalizers.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without performing any decoding.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ /* The output will either be shortened or have the same length. */
+ if (size < istlen(input)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner)) {
+ const char current = istshift(&scanner);
+
+ if (current == '%') {
+ if (istlen(scanner) >= 2) {
+ if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
+ char hex1, hex2, c;
+
+ hex1 = istshift(&scanner);
+ hex2 = istshift(&scanner);
+ c = (hex2i(hex1) << 4) + hex2i(hex2);
+
+ if (is_unreserved_character(c)) {
+ output = __istappend(output, c);
+ }
+ else {
+ output = __istappend(output, current);
+ output = __istappend(output, hex1);
+ output = __istappend(output, hex2);
+ }
+
+ continue;
+ }
+ }
+
+ if (strict) {
+ err = URI_NORMALIZER_ERR_INVALID_INPUT;
+ goto fail;
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Uppercases letters used in percent encoding.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without modifying the following letters.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ /* The output will have the same length. */
+ if (size < istlen(input)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner)) {
+ const char current = istshift(&scanner);
+
+ if (current == '%') {
+ if (istlen(scanner) >= 2) {
+ if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
+ output = __istappend(output, current);
+ output = __istappend(output, toupper(istshift(&scanner)));
+ output = __istappend(output, toupper(istshift(&scanner)));
+ continue;
+ }
+ }
+
+ if (strict) {
+ err = URI_NORMALIZER_ERR_INVALID_INPUT;
+ goto fail;
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Removes `/./` from the given path. */
+enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newpath = *dst;
+
+ struct ist scanner = path;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner) > 0) {
+ const struct ist segment = istsplit(&scanner, '/');
+
+ if (!isteq(segment, ist("."))) {
+ if (istcat(&newpath, segment, size) < 0) {
+ /* This is impossible, because we checked the size of the destination buffer. */
+ my_unreachable();
+ err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+ goto fail;
+ }
+
+ if (istend(segment) != istend(scanner))
+ newpath = __istappend(newpath, '/');
+ }
+ }
+
+ *dst = newpath;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Merges `/../` with preceding path segments.
+ *
+ * If `full` is set to `0` then `/../` will be printed at the start of the resulting
+ * path if the number of `/../` exceeds the number of other segments. If `full` is
+ * set to `1` these will not be printed.
+ */
+enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ char * const tail = istptr(*dst) + size;
+ char *head = tail;
+
+ ssize_t offset = istlen(path) - 1;
+
+ int up = 0;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ /* Handle `/..` at the end of the path without a trailing slash. */
+ if (offset >= 2 && istmatch(istadv(path, offset - 2), ist("/.."))) {
+ up++;
+ offset -= 2;
+ }
+
+ while (offset >= 0) {
+ if (offset >= 3 && istmatch(istadv(path, offset - 3), ist("/../"))) {
+ up++;
+ offset -= 3;
+ continue;
+ }
+
+ if (up > 0) {
+ /* Skip the slash. */
+ offset--;
+
+ /* First check whether we already reached the start of the path,
+ * before popping the current `/../`.
+ */
+ if (offset >= 0) {
+ up--;
+
+ /* Skip the current path segment. */
+ while (offset >= 0 && istptr(path)[offset] != '/')
+ offset--;
+ }
+ }
+ else {
+ /* Prepend the slash. */
+ *(--head) = istptr(path)[offset];
+ offset--;
+
+ /* Prepend the current path segment. */
+ while (offset >= 0 && istptr(path)[offset] != '/') {
+ *(--head) = istptr(path)[offset];
+ offset--;
+ }
+ }
+ }
+
+ if (up > 0) {
+ /* Prepend a trailing slash. */
+ *(--head) = '/';
+
+ if (!full) {
+ /* Prepend unconsumed `/..`. */
+ do {
+ *(--head) = '.';
+ *(--head) = '.';
+ *(--head) = '/';
+ up--;
+ } while (up > 0);
+ }
+ }
+
+ *dst = ist2(head, tail - head);
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Merges adjacent slashes in the given path. */
+enum uri_normalizer_err uri_normalizer_path_merge_slashes(const struct ist path, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newpath = *dst;
+
+ struct ist scanner = path;
+
+ /* The path will either be shortened or have the same length. */
+ if (size < istlen(path)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner) > 0) {
+ const char current = istshift(&scanner);
+
+ if (current == '/') {
+ while (istlen(scanner) > 0 && *istptr(scanner) == '/')
+ scanner = istnext(scanner);
+ }
+
+ newpath = __istappend(newpath, current);
+ }
+
+ *dst = newpath;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/* Compares two query parameters by name. Query parameters are ordered
+ * as with memcmp. Shorter parameter names are ordered lower. Identical
+ * parameter names are compared by their pointer to maintain a stable
+ * sort.
+ */
+static int query_param_cmp(const void *a, const void *b)
+{
+ const struct ist param_a = *(struct ist*)a;
+ const struct ist param_b = *(struct ist*)b;
+ const struct ist param_a_name = iststop(param_a, '=');
+ const struct ist param_b_name = iststop(param_b, '=');
+
+ int cmp = istdiff(param_a_name, param_b_name);
+
+ if (cmp != 0)
+ return cmp;
+
+ /* The contents are identical: Compare the pointer. */
+ if (istptr(param_a) < istptr(param_b))
+ return -1;
+
+ if (istptr(param_a) > istptr(param_b))
+ return 1;
+
+ return 0;
+}
+
+/* Sorts the parameters within the given query string. */
+enum uri_normalizer_err uri_normalizer_query_sort(const struct ist query, const char delim, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist newquery = *dst;
+
+ struct ist scanner = query;
+
+ const struct buffer *trash = get_trash_chunk();
+ struct ist *params = (struct ist *)b_orig(trash);
+ const size_t max_param = b_size(trash) / sizeof(*params);
+ size_t param_count = 0;
+
+ size_t i;
+
+ /* The query will have the same length. */
+ if (size < istlen(query)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ /* Handle the leading '?'. */
+ newquery = __istappend(newquery, istshift(&scanner));
+
+ while (istlen(scanner) > 0) {
+ const struct ist param = istsplit(&scanner, delim);
+
+ if (param_count + 1 > max_param) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ params[param_count] = param;
+ param_count++;
+ }
+
+ qsort(params, param_count, sizeof(*params), query_param_cmp);
+
+ for (i = 0; i < param_count; i++) {
+ if (i > 0)
+ newquery = __istappend(newquery, delim);
+
+ if (istcat(&newquery, params[i], size) < 0) {
+ /* This is impossible, because we checked the size of the destination buffer. */
+ my_unreachable();
+ err = URI_NORMALIZER_ERR_INTERNAL_ERROR;
+ goto fail;
+ }
+ }
+
+ *dst = newquery;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/vars.c b/src/vars.c
new file mode 100644
index 0000000..7ec753e
--- /dev/null
+++ b/src/vars.c
@@ -0,0 +1,1454 @@
+#include <ctype.h>
+
+#include <haproxy/api.h>
+#include <haproxy/arg.h>
+#include <haproxy/buf.h>
+#include <haproxy/cfgparse.h>
+#include <haproxy/check.h>
+#include <haproxy/cli.h>
+#include <haproxy/global.h>
+#include <haproxy/http.h>
+#include <haproxy/http_rules.h>
+#include <haproxy/list.h>
+#include <haproxy/log.h>
+#include <haproxy/sample.h>
+#include <haproxy/session.h>
+#include <haproxy/stream-t.h>
+#include <haproxy/tcp_rules.h>
+#include <haproxy/tcpcheck.h>
+#include <haproxy/tools.h>
+#include <haproxy/vars.h>
+#include <haproxy/xxhash.h>
+
+
+/* This contains a pool of struct vars */
+DECLARE_STATIC_POOL(var_pool, "vars", sizeof(struct var));
+
+/* list of variables for the process scope. */
+struct vars proc_vars THREAD_ALIGNED(64);
+
+/* This array of int contains the system limits per context. */
+static unsigned int var_global_limit = 0;
+static unsigned int var_proc_limit = 0;
+static unsigned int var_sess_limit = 0;
+static unsigned int var_txn_limit = 0;
+static unsigned int var_reqres_limit = 0;
+static unsigned int var_check_limit = 0;
+static uint64_t var_name_hash_seed = 0;
+
+/* Structure and array matching set-var conditions to their respective flag
+ * value.
+ */
+struct var_set_condition {
+ const char *cond_str;
+ uint flag;
+};
+
+static struct var_set_condition conditions_array[] = {
+ { "ifexists", VF_COND_IFEXISTS },
+ { "ifnotexists", VF_COND_IFNOTEXISTS },
+ { "ifempty", VF_COND_IFEMPTY },
+ { "ifnotempty", VF_COND_IFNOTEMPTY },
+ { "ifset", VF_COND_IFSET },
+ { "ifnotset", VF_COND_IFNOTSET },
+ { "ifgt", VF_COND_IFGT },
+ { "iflt", VF_COND_IFLT },
+ { NULL, 0 }
+};
+
+/* returns the struct vars pointer for a session, stream and scope, or NULL if
+ * it does not exist.
+ */
+static inline struct vars *get_vars(struct session *sess, struct stream *strm, enum vars_scope scope)
+{
+ switch (scope) {
+ case SCOPE_PROC:
+ return &proc_vars;
+ case SCOPE_SESS:
+ return sess ? &sess->vars : NULL;
+ case SCOPE_CHECK: {
+ struct check *check = sess ? objt_check(sess->origin) : NULL;
+
+ return check ? &check->vars : NULL;
+ }
+ case SCOPE_TXN:
+ return strm ? &strm->vars_txn : NULL;
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ default:
+ return strm ? &strm->vars_reqres : NULL;
+ }
+}
+
+/* This function adds or remove memory size from the accounting. The inner
+ * pointers may be null when setting the outer ones only.
+ */
+void var_accounting_diff(struct vars *vars, struct session *sess, struct stream *strm, int size)
+{
+ switch (vars->scope) {
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ if (var_reqres_limit && strm)
+ _HA_ATOMIC_ADD(&strm->vars_reqres.size, size);
+ __fallthrough;
+ case SCOPE_TXN:
+ if (var_txn_limit && strm)
+ _HA_ATOMIC_ADD(&strm->vars_txn.size, size);
+ goto scope_sess;
+ case SCOPE_CHECK:
+ if (var_check_limit) {
+ struct check *check = objt_check(sess->origin);
+
+ if (check)
+ _HA_ATOMIC_ADD(&check->vars.size, size);
+ }
+scope_sess:
+ __fallthrough;
+ case SCOPE_SESS:
+ if (var_sess_limit)
+ _HA_ATOMIC_ADD(&sess->vars.size, size);
+ __fallthrough;
+ case SCOPE_PROC:
+ if (var_proc_limit || var_global_limit)
+ _HA_ATOMIC_ADD(&proc_vars.size, size);
+ }
+}
+
+/* This function returns 1 if the <size> is available in the var
+ * pool <vars>, otherwise returns 0. If the space is available,
+ * the size is reserved. The inner pointers may be null when setting
+ * the outer ones only. The accounting uses either <sess> or <strm>
+ * depending on the scope. <strm> may be NULL when no stream is known
+ * and only the session exists (eg: tcp-request connection).
+ */
+static int var_accounting_add(struct vars *vars, struct session *sess, struct stream *strm, int size)
+{
+ switch (vars->scope) {
+ case SCOPE_REQ:
+ case SCOPE_RES:
+ if (var_reqres_limit && strm && strm->vars_reqres.size + size > var_reqres_limit)
+ return 0;
+ __fallthrough;
+ case SCOPE_TXN:
+ if (var_txn_limit && strm && strm->vars_txn.size + size > var_txn_limit)
+ return 0;
+ goto scope_sess;
+ case SCOPE_CHECK: {
+ struct check *check = objt_check(sess->origin);
+
+ if (var_check_limit && check && check->vars.size + size > var_check_limit)
+ return 0;
+ }
+scope_sess:
+ __fallthrough;
+ case SCOPE_SESS:
+ if (var_sess_limit && sess->vars.size + size > var_sess_limit)
+ return 0;
+ __fallthrough;
+ case SCOPE_PROC:
+ /* note: scope proc collects all others and is currently identical to the
+ * global limit.
+ */
+ if (var_proc_limit && proc_vars.size + size > var_proc_limit)
+ return 0;
+ if (var_global_limit && proc_vars.size + size > var_global_limit)
+ return 0;
+ }
+ var_accounting_diff(vars, sess, strm, size);
+ return 1;
+}
+
+/* This function removes a variable from the list and frees the memory it was
+ * using. If the variable is marked "VF_PERMANENT", the sample_data is only
+ * reset to SMP_T_ANY unless <force> is non nul. Returns the freed size.
+ */
+unsigned int var_clear(struct var *var, int force)
+{
+ unsigned int size = 0;
+
+ if (var->data.type == SMP_T_STR || var->data.type == SMP_T_BIN) {
+ ha_free(&var->data.u.str.area);
+ size += var->data.u.str.data;
+ }
+ else if (var->data.type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ ha_free(&var->data.u.meth.str.area);
+ size += var->data.u.meth.str.data;
+ }
+ /* wipe the sample */
+ var->data.type = SMP_T_ANY;
+
+ if (!(var->flags & VF_PERMANENT) || force) {
+ LIST_DELETE(&var->l);
+ pool_free(var_pool, var);
+ size += sizeof(struct var);
+ }
+ return size;
+}
+
+/* This function free all the memory used by all the variables
+ * in the list.
+ */
+void vars_prune(struct vars *vars, struct session *sess, struct stream *strm)
+{
+ struct var *var, *tmp;
+ unsigned int size = 0;
+
+ vars_wrlock(vars);
+ list_for_each_entry_safe(var, tmp, &vars->head, l) {
+ size += var_clear(var, 1);
+ }
+ vars_wrunlock(vars);
+ var_accounting_diff(vars, sess, strm, -size);
+}
+
+/* This function frees all the memory used by all the session variables in the
+ * list starting at <vars>.
+ */
+void vars_prune_per_sess(struct vars *vars)
+{
+ struct var *var, *tmp;
+ unsigned int size = 0;
+
+ vars_wrlock(vars);
+ list_for_each_entry_safe(var, tmp, &vars->head, l) {
+ size += var_clear(var, 1);
+ }
+ vars_wrunlock(vars);
+
+ if (var_sess_limit)
+ _HA_ATOMIC_SUB(&vars->size, size);
+ if (var_proc_limit || var_global_limit)
+ _HA_ATOMIC_SUB(&proc_vars.size, size);
+}
+
+/* This function initializes a variables list head */
+void vars_init_head(struct vars *vars, enum vars_scope scope)
+{
+ LIST_INIT(&vars->head);
+ vars->scope = scope;
+ vars->size = 0;
+ HA_RWLOCK_INIT(&vars->rwlock);
+}
+
+/* This function returns a hash value and a scope for a variable name of a
+ * specified length. It makes sure that the scope is valid. It returns non-zero
+ * on success, 0 on failure. Neither hash nor scope may be NULL.
+ */
+static int vars_hash_name(const char *name, int len, enum vars_scope *scope,
+ uint64_t *hash, char **err)
+{
+ const char *tmp;
+
+ /* Check length. */
+ if (len == 0) {
+ memprintf(err, "Empty variable name cannot be accepted");
+ return 0;
+ }
+
+ /* Check scope. */
+ if (len > 5 && strncmp(name, "proc.", 5) == 0) {
+ name += 5;
+ len -= 5;
+ *scope = SCOPE_PROC;
+ }
+ else if (len > 5 && strncmp(name, "sess.", 5) == 0) {
+ name += 5;
+ len -= 5;
+ *scope = SCOPE_SESS;
+ }
+ else if (len > 4 && strncmp(name, "txn.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_TXN;
+ }
+ else if (len > 4 && strncmp(name, "req.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_REQ;
+ }
+ else if (len > 4 && strncmp(name, "res.", 4) == 0) {
+ name += 4;
+ len -= 4;
+ *scope = SCOPE_RES;
+ }
+ else if (len > 6 && strncmp(name, "check.", 6) == 0) {
+ name += 6;
+ len -= 6;
+ *scope = SCOPE_CHECK;
+ }
+ else {
+ memprintf(err, "invalid variable name '%.*s'. A variable name must be start by its scope. "
+ "The scope can be 'proc', 'sess', 'txn', 'req', 'res' or 'check'", len, name);
+ return 0;
+ }
+
+ /* Check variable name syntax. */
+ for (tmp = name; tmp < name + len; tmp++) {
+ if (!isalnum((unsigned char)*tmp) && *tmp != '_' && *tmp != '.') {
+ memprintf(err, "invalid syntax at char '%s'", tmp);
+ return 0;
+ }
+ }
+
+ *hash = XXH3(name, len, var_name_hash_seed);
+ return 1;
+}
+
+/* This function returns the variable from the given list that matches
+ * <name_hash> or returns NULL if not found. It's only a linked list since it
+ * is not expected to have many variables per scope (a few tens at best).
+ * The caller is responsible for ensuring that <vars> is properly locked.
+ */
+static struct var *var_get(struct vars *vars, uint64_t name_hash)
+{
+ struct var *var;
+
+ list_for_each_entry(var, &vars->head, l)
+ if (var->name_hash == name_hash)
+ return var;
+ return NULL;
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_fetch_var(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ const struct var_desc *var_desc = &args[0].data.var;
+ const struct buffer *def = NULL;
+
+ if (args[1].type == ARGT_STR)
+ def = &args[1].data.str;
+
+ return vars_get_by_desc(var_desc, smp, def);
+}
+
+/*
+ * Clear the contents of a variable so that it can be reset directly.
+ * This function is used just before a variable is filled out of a sample's
+ * content.
+ */
+static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struct var *var, int var_type)
+{
+ if (var_type == SMP_T_STR || var_type == SMP_T_BIN) {
+ ha_free(&var->data.u.str.area);
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -var->data.u.str.data);
+ }
+ else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) {
+ ha_free(&var->data.u.meth.str.area);
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -var->data.u.meth.str.data);
+ }
+}
+
+/* This function tries to create a variable whose name hash is <name_hash> in
+ * scope <scope> and store sample <smp> as its value.
+ *
+ * The stream and session are extracted from <smp>, whose stream may be NULL
+ * when scope is SCOPE_SESS. In case there wouldn't be enough memory to store
+ * the sample while the variable was already created, it would be changed to
+ * a bool (which is memory-less).
+ *
+ * Flags is a bitfield that may contain one of the following flags:
+ * - VF_CREATEONLY: do nothing if the variable already exists (success).
+ * - VF_PERMANENT: this flag will be passed to the variable upon creation
+ *
+ * - VF_COND_IFEXISTS: only set variable if it already exists
+ * - VF_COND_IFNOTEXISTS: only set variable if it did not exist yet
+ * - VF_COND_IFEMPTY: only set variable if sample is empty
+ * - VF_COND_IFNOTEMPTY: only set variable if sample is not empty
+ * - VF_COND_IFSET: only set variable if its type is not SMP_TYPE_ANY
+ * - VF_COND_IFNOTSET: only set variable if its type is ANY
+ * - VF_COND_IFGT: only set variable if its value is greater than the sample's
+ * - VF_COND_IFLT: only set variable if its value is less than the sample's
+ *
+ * It returns 0 on failure, non-zero on success.
+ */
+static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags)
+{
+ struct vars *vars;
+ struct var *var;
+ int ret = 0;
+ int previous_type = SMP_T_ANY;
+
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ vars_wrlock(vars);
+
+ /* Look for existing variable name. */
+ var = var_get(vars, name_hash);
+
+ if (var) {
+ if (flags & VF_CREATEONLY) {
+ ret = 1;
+ goto unlock;
+ }
+
+ if (flags & VF_COND_IFNOTEXISTS)
+ goto unlock;
+ } else {
+ if (flags & VF_COND_IFEXISTS)
+ goto unlock;
+
+ /* Check memory available. */
+ if (!var_accounting_add(vars, smp->sess, smp->strm, sizeof(struct var)))
+ goto unlock;
+
+ /* Create new entry. */
+ var = pool_alloc(var_pool);
+ if (!var)
+ goto unlock;
+ LIST_APPEND(&vars->head, &var->l);
+ var->name_hash = name_hash;
+ var->flags = flags & VF_PERMANENT;
+ var->data.type = SMP_T_ANY;
+ }
+
+ /* A variable of type SMP_T_ANY is considered as unset (either created
+ * and never set or unset-var was called on it).
+ */
+ if ((flags & VF_COND_IFSET && var->data.type == SMP_T_ANY) ||
+ (flags & VF_COND_IFNOTSET && var->data.type != SMP_T_ANY))
+ goto unlock;
+
+ /* Set type. */
+ previous_type = var->data.type;
+ var->data.type = smp->data.type;
+
+ if (flags & VF_COND_IFEMPTY) {
+ switch(smp->data.type) {
+ case SMP_T_ANY:
+ case SMP_T_STR:
+ case SMP_T_BIN:
+ /* The actual test on the contents of the sample will be
+ * performed later.
+ */
+ break;
+ default:
+ /* The sample cannot be empty since it has a scalar type. */
+ var->data.type = previous_type;
+ goto unlock;
+ }
+ }
+
+ /* Copy data. If the data needs memory, the function can fail. */
+ switch (var->data.type) {
+ case SMP_T_BOOL:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.sint = smp->data.u.sint;
+ break;
+ case SMP_T_SINT:
+ if (previous_type == var->data.type) {
+ if (((flags & VF_COND_IFGT) && !(var->data.u.sint > smp->data.u.sint)) ||
+ ((flags & VF_COND_IFLT) && !(var->data.u.sint < smp->data.u.sint)))
+ goto unlock;
+ }
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.sint = smp->data.u.sint;
+ break;
+ case SMP_T_IPV4:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.ipv4 = smp->data.u.ipv4;
+ break;
+ case SMP_T_IPV6:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.ipv6 = smp->data.u.ipv6;
+ break;
+ case SMP_T_STR:
+ case SMP_T_BIN:
+ if ((flags & VF_COND_IFNOTEMPTY && !smp->data.u.str.data) ||
+ (flags & VF_COND_IFEMPTY && smp->data.u.str.data)) {
+ var->data.type = previous_type;
+ goto unlock;
+ }
+ var_clear_buffer(smp, vars, var, previous_type);
+ if (!var_accounting_add(vars, smp->sess, smp->strm, smp->data.u.str.data)) {
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+
+ var->data.u.str.area = malloc(smp->data.u.str.data);
+ if (!var->data.u.str.area) {
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -smp->data.u.str.data);
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+ var->data.u.str.data = smp->data.u.str.data;
+ memcpy(var->data.u.str.area, smp->data.u.str.area,
+ var->data.u.str.data);
+ break;
+ case SMP_T_METH:
+ var_clear_buffer(smp, vars, var, previous_type);
+ var->data.u.meth.meth = smp->data.u.meth.meth;
+ if (smp->data.u.meth.meth != HTTP_METH_OTHER)
+ break;
+
+ if (!var_accounting_add(vars, smp->sess, smp->strm, smp->data.u.meth.str.data)) {
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+
+ var->data.u.meth.str.area = malloc(smp->data.u.meth.str.data);
+ if (!var->data.u.meth.str.area) {
+ var_accounting_diff(vars, smp->sess, smp->strm,
+ -smp->data.u.meth.str.data);
+ var->data.type = SMP_T_BOOL; /* This type doesn't use additional memory. */
+ goto unlock;
+ }
+ var->data.u.meth.str.data = smp->data.u.meth.str.data;
+ var->data.u.meth.str.size = smp->data.u.meth.str.data;
+ memcpy(var->data.u.meth.str.area, smp->data.u.meth.str.area,
+ var->data.u.meth.str.data);
+ break;
+ }
+
+ /* OK, now done */
+ ret = 1;
+ unlock:
+ vars_wrunlock(vars);
+ return ret;
+}
+
+/* Deletes a variable matching name hash <name_hash> and scope <scope> for the
+ * session and stream found in <smp>. Note that stream may be null for
+ * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1.
+ */
+static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp)
+{
+ struct vars *vars;
+ struct var *var;
+ unsigned int size = 0;
+
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ /* Look for existing variable name. */
+ vars_wrlock(vars);
+ var = var_get(vars, name_hash);
+ if (var) {
+ size = var_clear(var, 0);
+ var_accounting_diff(vars, smp->sess, smp->strm, -size);
+ }
+ vars_wrunlock(vars);
+ return 1;
+}
+
+
+/*
+ * Convert a string set-var condition into its numerical value.
+ * The corresponding bit is set in the <cond_bitmap> parameter if the
+ * <cond> is known.
+ * Returns 1 in case of success.
+ */
+static int vars_parse_cond_param(const struct buffer *cond, uint *cond_bitmap, char **err)
+{
+ struct var_set_condition *cond_elt = &conditions_array[0];
+
+ /* The conditions array is NULL terminated. */
+ while (cond_elt->cond_str) {
+ if (chunk_strcmp(cond, cond_elt->cond_str) == 0) {
+ *cond_bitmap |= cond_elt->flag;
+ break;
+ }
+ ++cond_elt;
+ }
+
+ if (cond_elt->cond_str == NULL && err)
+ memprintf(err, "unknown condition \"%.*s\"", (int)cond->data, cond->area);
+
+ return cond_elt->cond_str != NULL;
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_conv_store(const struct arg *args, struct sample *smp, void *private)
+{
+ uint conditions = 0;
+ int cond_idx = 1;
+
+ while (args[cond_idx].type == ARGT_STR) {
+ if (vars_parse_cond_param(&args[cond_idx++].data.str, &conditions, NULL) == 0)
+ break;
+ }
+
+ return var_set(args[0].data.var.name_hash, args[0].data.var.scope, smp, conditions);
+}
+
+/* Returns 0 if fails, else returns 1. */
+static int smp_conv_clear(const struct arg *args, struct sample *smp, void *private)
+{
+ return var_unset(args[0].data.var.name_hash, args[0].data.var.scope, smp);
+}
+
+/* This functions check an argument entry and fill it with a variable
+ * type. The argument must be a string. If the variable lookup fails,
+ * the function returns 0 and fill <err>, otherwise it returns 1.
+ */
+int vars_check_arg(struct arg *arg, char **err)
+{
+ enum vars_scope scope;
+ struct sample empty_smp = { };
+ uint64_t hash;
+
+ /* Check arg type. */
+ if (arg->type != ARGT_STR) {
+ memprintf(err, "unexpected argument type");
+ return 0;
+ }
+
+ /* Register new variable name. */
+ if (!vars_hash_name(arg->data.str.area, arg->data.str.data, &scope, &hash, err))
+ return 0;
+
+ if (scope == SCOPE_PROC && !var_set(hash, scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+ return 0;
+
+ /* properly destroy the chunk */
+ chunk_destroy(&arg->data.str);
+
+ /* Use the global variable name pointer. */
+ arg->type = ARGT_VAR;
+ arg->data.var.name_hash = hash;
+ arg->data.var.scope = scope;
+ return 1;
+}
+
+/* This function stores a sample in a variable unless it is of type "proc" and
+ * not defined yet.
+ * Returns zero on failure and non-zero otherwise. The variable not being
+ * defined is treated as a failure.
+ */
+int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ /* Variable creation is allowed for all scopes apart from the PROC one. */
+ return var_set(hash, scope, smp, (scope == SCOPE_PROC) ? VF_COND_IFEXISTS : 0);
+}
+
+
+/* This function stores a sample in a variable.
+ * Returns zero on failure and non-zero otherwise.
+ */
+int vars_set_by_name(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ return var_set(hash, scope, smp, 0);
+}
+
+/* This function unsets a variable if it was already defined.
+ * Returns zero on failure and non-zero otherwise.
+ */
+int vars_unset_by_name_ifexist(const char *name, size_t len, struct sample *smp)
+{
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ return var_unset(hash, scope, smp);
+}
+
+
+/* This retrieves variable whose hash matches <name_hash> from variables <vars>,
+ * and if found and not empty, duplicates the result into sample <smp>.
+ * smp_dup() is used in order to release the variables lock ASAP (so a pre-
+ * allocated chunk is obtained via get_trash_shunk()). The variables' lock is
+ * used for reads.
+ *
+ * The function returns 0 if the variable was not found and no default
+ * value was provided in <def>, otherwise 1 with the sample filled.
+ * Default values are always returned as strings.
+ */
+static int var_to_smp(struct vars *vars, uint64_t name_hash, struct sample *smp, const struct buffer *def)
+{
+ struct var *var;
+
+ /* Get the variable entry. */
+ vars_rdlock(vars);
+ var = var_get(vars, name_hash);
+ if (!var || !var->data.type) {
+ if (!def) {
+ vars_rdunlock(vars);
+ return 0;
+ }
+
+ /* not found but we have a default value */
+ smp->data.type = SMP_T_STR;
+ smp->data.u.str = *def;
+ }
+ else
+ smp->data = var->data;
+
+ /* Copy sample. */
+ smp_dup(smp);
+
+ vars_rdunlock(vars);
+ return 1;
+}
+
+/* This function fills a sample with the variable content.
+ *
+ * Keep in mind that a sample content is duplicated by using smp_dup()
+ * and it therefore uses a pre-allocated trash chunk as returned by
+ * get_trash_chunk().
+ *
+ * If the variable is not valid in this scope, 0 is always returned.
+ * If the variable is valid but not found, either the default value
+ * <def> is returned if not NULL, or zero is returned.
+ *
+ * Returns 1 if the sample is filled, otherwise it returns 0.
+ */
+int vars_get_by_name(const char *name, size_t len, struct sample *smp, const struct buffer *def)
+{
+ struct vars *vars;
+ enum vars_scope scope;
+ uint64_t hash;
+
+ /* Resolve name and scope. */
+ if (!vars_hash_name(name, len, &scope, &hash, NULL))
+ return 0;
+
+ /* Select "vars" pool according with the scope. */
+ vars = get_vars(smp->sess, smp->strm, scope);
+ if (!vars || vars->scope != scope)
+ return 0;
+
+ return var_to_smp(vars, hash, smp, def);
+}
+
+/* This function fills a sample with the content of the variable described
+ * by <var_desc>.
+ *
+ * Keep in mind that a sample content is duplicated by using smp_dup()
+ * and it therefore uses a pre-allocated trash chunk as returned by
+ * get_trash_chunk().
+ *
+ * If the variable is not valid in this scope, 0 is always returned.
+ * If the variable is valid but not found, either the default value
+ * <def> is returned if not NULL, or zero is returned.
+ *
+ * Returns 1 if the sample is filled, otherwise it returns 0.
+ */
+int vars_get_by_desc(const struct var_desc *var_desc, struct sample *smp, const struct buffer *def)
+{
+ struct vars *vars;
+
+ /* Select "vars" pool according with the scope. */
+ vars = get_vars(smp->sess, smp->strm, var_desc->scope);
+
+ /* Check if the scope is available a this point of processing. */
+ if (!vars || vars->scope != var_desc->scope)
+ return 0;
+
+ return var_to_smp(vars, var_desc->name_hash, smp, def);
+}
+
+/* Always returns ACT_RET_CONT even if an error occurs. */
+static enum act_return action_store(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct buffer *fmtstr = NULL;
+ struct sample smp;
+ int dir;
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_SES: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_REQ_CNT: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_TCP_RES_CNT: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_HTTP_REQ: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_HTTP_RES: dir = SMP_OPT_DIR_RES; break;
+ case ACT_F_TCP_CHK: dir = SMP_OPT_DIR_REQ; break;
+ case ACT_F_CFG_PARSER: dir = SMP_OPT_DIR_REQ; break; /* not used anyway */
+ case ACT_F_CLI_PARSER: dir = SMP_OPT_DIR_REQ; break; /* not used anyway */
+ default:
+ send_log(px, LOG_ERR, "Vars: internal error while execute action store.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("Vars: internal error while execute action store.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* Process the expression. */
+ memset(&smp, 0, sizeof(smp));
+
+ if (!LIST_ISEMPTY(&rule->arg.vars.fmt)) {
+ /* a format-string is used */
+
+ fmtstr = alloc_trash_chunk();
+ if (!fmtstr) {
+ send_log(px, LOG_ERR, "Vars: memory allocation failure while processing store rule.");
+ if (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE))
+ ha_alert("Vars: memory allocation failure while processing store rule.\n");
+ return ACT_RET_CONT;
+ }
+
+ /* execute the log-format expression */
+ fmtstr->data = sess_build_logline(sess, s, fmtstr->area, fmtstr->size, &rule->arg.vars.fmt);
+
+ /* convert it to a sample of type string as it's what the vars
+ * API consumes, and store it.
+ */
+ smp_set_owner(&smp, px, sess, s, 0);
+ smp.data.type = SMP_T_STR;
+ smp.data.u.str = *fmtstr;
+ var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, rule->arg.vars.conditions);
+ }
+ else {
+ /* an expression is used */
+ if (!sample_process(px, sess, s, dir|SMP_OPT_FINAL,
+ rule->arg.vars.expr, &smp))
+ return ACT_RET_CONT;
+ }
+
+ /* Store the sample, and ignore errors. */
+ var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp, rule->arg.vars.conditions);
+ free_trash_chunk(fmtstr);
+ return ACT_RET_CONT;
+}
+
+/* Always returns ACT_RET_CONT even if an error occurs. */
+static enum act_return action_clear(struct act_rule *rule, struct proxy *px,
+ struct session *sess, struct stream *s, int flags)
+{
+ struct sample smp;
+
+ memset(&smp, 0, sizeof(smp));
+ smp_set_owner(&smp, px, sess, s, SMP_OPT_FINAL);
+
+ /* Clear the variable using the sample context, and ignore errors. */
+ var_unset(rule->arg.vars.name_hash, rule->arg.vars.scope, &smp);
+ return ACT_RET_CONT;
+}
+
+static void release_store_rule(struct act_rule *rule)
+{
+ struct logformat_node *lf, *lfb;
+
+ list_for_each_entry_safe(lf, lfb, &rule->arg.vars.fmt, list) {
+ LIST_DELETE(&lf->list);
+ release_sample_expr(lf->expr);
+ free(lf->arg);
+ free(lf);
+ }
+
+ release_sample_expr(rule->arg.vars.expr);
+}
+
+/* This two function checks the variable name and replace the
+ * configuration string name by the global string name. its
+ * the same string, but the global pointer can be easy to
+ * compare. They return non-zero on success, zero on failure.
+ *
+ * The first function checks a sample-fetch and the second
+ * checks a converter.
+ */
+static int smp_check_var(struct arg *args, char **err)
+{
+ return vars_check_arg(&args[0], err);
+}
+
+static int conv_check_var(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err_msg)
+{
+ int cond_idx = 1;
+ uint conditions = 0;
+ int retval = vars_check_arg(&args[0], err_msg);
+
+ while (retval && args[cond_idx].type == ARGT_STR)
+ retval = vars_parse_cond_param(&args[cond_idx++].data.str, &conditions, err_msg);
+
+ return retval;
+}
+
+/* This function is a common parser for using variables. It understands
+ * the format:
+ *
+ * set-var-fmt(<variable-name>[,<cond> ...]) <format-string>
+ * set-var(<variable-name>[,<cond> ...]) <expression>
+ * unset-var(<variable-name>)
+ *
+ * It returns ACT_RET_PRS_ERR if fails and <err> is filled with an error
+ * message. Otherwise, it returns ACT_RET_PRS_OK and the variable <expr>
+ * is filled with the pointer to the expression to execute. The proxy is
+ * only used to retrieve the ->conf entries.
+ */
+static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy *px,
+ struct act_rule *rule, char **err)
+{
+ const char *var_name = args[*arg-1];
+ int var_len;
+ const char *kw_name;
+ int flags = 0, set_var = 0; /* 0=unset-var, 1=set-var, 2=set-var-fmt */
+ struct sample empty_smp = { };
+ struct ist condition = IST_NULL;
+ struct ist var = IST_NULL;
+ struct ist varname_ist = IST_NULL;
+
+ if (strncmp(var_name, "set-var-fmt", 11) == 0) {
+ var_name += 11;
+ set_var = 2;
+ }
+ else if (strncmp(var_name, "set-var", 7) == 0) {
+ var_name += 7;
+ set_var = 1;
+ }
+ else if (strncmp(var_name, "unset-var", 9) == 0) {
+ var_name += 9;
+ set_var = 0;
+ }
+
+ if (*var_name != '(') {
+ memprintf(err, "invalid or incomplete action '%s'. Expects 'set-var(<var-name>)', 'set-var-fmt(<var-name>)' or 'unset-var(<var-name>)'",
+ args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+ var_name++; /* jump the '(' */
+ var_len = strlen(var_name);
+ var_len--; /* remove the ')' */
+ if (var_name[var_len] != ')') {
+ memprintf(err, "incomplete argument after action '%s'. Expects 'set-var(<var-name>)', 'set-var-fmt(<var-name>)' or 'unset-var(<var-name>)'",
+ args[*arg-1]);
+ return ACT_RET_PRS_ERR;
+ }
+
+ /* Parse the optional conditions. */
+ var = ist2(var_name, var_len);
+ varname_ist = istsplit(&var, ',');
+ var_len = istlen(varname_ist);
+
+ condition = istsplit(&var, ',');
+
+ if (istlen(condition) && set_var == 0) {
+ memprintf(err, "unset-var does not expect parameters after the variable name. Only \"set-var\" and \"set-var-fmt\" manage conditions");
+ return ACT_RET_PRS_ERR;
+ }
+
+ while (istlen(condition)) {
+ struct buffer cond = {};
+
+ chunk_initlen(&cond, istptr(condition), 0, istlen(condition));
+ if (vars_parse_cond_param(&cond, &rule->arg.vars.conditions, err) == 0)
+ return ACT_RET_PRS_ERR;
+
+ condition = istsplit(&var, ',');
+ }
+
+ LIST_INIT(&rule->arg.vars.fmt);
+ if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err))
+ return ACT_RET_PRS_ERR;
+
+ if (rule->arg.vars.scope == SCOPE_PROC &&
+ !var_set(rule->arg.vars.name_hash, rule->arg.vars.scope, &empty_smp, VF_CREATEONLY|VF_PERMANENT))
+ return 0;
+
+ /* There is no fetch method when variable is unset. Just set the right
+ * action and return. */
+ if (!set_var) {
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_clear;
+ rule->release_ptr = release_store_rule;
+ return ACT_RET_PRS_OK;
+ }
+
+ kw_name = args[*arg-1];
+
+ switch (rule->from) {
+ case ACT_F_TCP_REQ_CON:
+ flags = SMP_VAL_FE_CON_ACC;
+ px->conf.args.ctx = ARGC_TCO;
+ break;
+ case ACT_F_TCP_REQ_SES:
+ flags = SMP_VAL_FE_SES_ACC;
+ px->conf.args.ctx = ARGC_TSE;
+ break;
+ case ACT_F_TCP_REQ_CNT:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_REQ_CNT;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_REQ_CNT;
+ px->conf.args.ctx = ARGC_TRQ;
+ break;
+ case ACT_F_TCP_RES_CNT:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_RES_CNT;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_RES_CNT;
+ px->conf.args.ctx = ARGC_TRS;
+ break;
+ case ACT_F_HTTP_REQ:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_HRQ_HDR;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_HRQ_HDR;
+ px->conf.args.ctx = ARGC_HRQ;
+ break;
+ case ACT_F_HTTP_RES:
+ if (px->cap & PR_CAP_FE)
+ flags |= SMP_VAL_FE_HRS_HDR;
+ if (px->cap & PR_CAP_BE)
+ flags |= SMP_VAL_BE_HRS_HDR;
+ px->conf.args.ctx = ARGC_HRS;
+ break;
+ case ACT_F_TCP_CHK:
+ flags = SMP_VAL_BE_CHK_RUL;
+ px->conf.args.ctx = ARGC_TCK;
+ break;
+ case ACT_F_CFG_PARSER:
+ flags = SMP_VAL_CFG_PARSER;
+ px->conf.args.ctx = ARGC_CFG;
+ break;
+ case ACT_F_CLI_PARSER:
+ flags = SMP_VAL_CLI_PARSER;
+ px->conf.args.ctx = ARGC_CLI;
+ break;
+ default:
+ memprintf(err,
+ "internal error, unexpected rule->from=%d, please report this bug!",
+ rule->from);
+ return ACT_RET_PRS_ERR;
+ }
+
+ if (set_var == 2) { /* set-var-fmt */
+ if (!parse_logformat_string(args[*arg], px, &rule->arg.vars.fmt, 0, flags, err))
+ return ACT_RET_PRS_ERR;
+
+ (*arg)++;
+
+ /* for late error reporting */
+ free(px->conf.lfs_file);
+ px->conf.lfs_file = strdup(px->conf.args.file);
+ px->conf.lfs_line = px->conf.args.line;
+ } else {
+ /* set-var */
+ rule->arg.vars.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
+ px->conf.args.line, err, &px->conf.args, NULL);
+ if (!rule->arg.vars.expr)
+ return ACT_RET_PRS_ERR;
+
+ if (!(rule->arg.vars.expr->fetch->val & flags)) {
+ memprintf(err,
+ "fetch method '%s' extracts information from '%s', none of which is available here",
+ kw_name, sample_src_names(rule->arg.vars.expr->fetch->use));
+ free(rule->arg.vars.expr);
+ return ACT_RET_PRS_ERR;
+ }
+ }
+
+ rule->action = ACT_CUSTOM;
+ rule->action_ptr = action_store;
+ rule->release_ptr = release_store_rule;
+ return ACT_RET_PRS_OK;
+}
+
+
+/* parses a global "set-var" directive. It will create a temporary rule and
+ * expression that are parsed, processed, and released on the fly so that we
+ * respect the real set-var syntax. These directives take the following format:
+ * set-var <name> <expression>
+ * set-var-fmt <name> <fmt>
+ * Note that parse_store() expects "set-var(name) <expression>" so we have to
+ * temporarily replace the keyword here.
+ */
+static int vars_parse_global_set_var(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ struct proxy px = {
+ .id = "CFG",
+ .conf.args = { .file = file, .line = line, },
+ };
+ struct act_rule rule = {
+ .arg.vars.scope = SCOPE_PROC,
+ .from = ACT_F_CFG_PARSER,
+ .conf = { .file = (char *)file, .line = line, },
+ };
+ enum obj_type objt = OBJ_TYPE_NONE;
+ struct session *sess = NULL;
+ enum act_parse_ret p_ret;
+ char *old_arg1;
+ char *tmp_arg1;
+ int arg = 2; // variable name
+ int ret = -1;
+ int use_fmt = 0;
+
+ LIST_INIT(&px.conf.args.list);
+
+ use_fmt = strcmp(args[0], "set-var-fmt") == 0;
+
+ if (!*args[1] || !*args[2]) {
+ if (use_fmt)
+ memprintf(err, "'%s' requires a process-wide variable name ('proc.<name>') and a format string.", args[0]);
+ else
+ memprintf(err, "'%s' requires a process-wide variable name ('proc.<name>') and a sample expression.", args[0]);
+ goto end;
+ }
+
+ tmp_arg1 = NULL;
+ if (!memprintf(&tmp_arg1, "set-var%s(%s)", use_fmt ? "-fmt" : "", args[1]))
+ goto end;
+
+ /* parse_store() will always return a message in <err> on error */
+ old_arg1 = args[1]; args[1] = tmp_arg1;
+ p_ret = parse_store((const char **)args, &arg, &px, &rule, err);
+ free(args[1]); args[1] = old_arg1;
+
+ if (p_ret != ACT_RET_PRS_OK)
+ goto end;
+
+ if (rule.arg.vars.scope != SCOPE_PROC) {
+ memprintf(err, "'%s': cannot set variable '%s', only scope 'proc' is permitted in the global section.", args[0], args[1]);
+ goto end;
+ }
+
+ if (smp_resolve_args(&px, err) != 0) {
+ release_sample_expr(rule.arg.vars.expr);
+ indent_msg(err, 2);
+ goto end;
+ }
+
+ if (use_fmt && !(sess = session_new(&px, NULL, &objt))) {
+ release_sample_expr(rule.arg.vars.expr);
+ memprintf(err, "'%s': out of memory when trying to set variable '%s' in the global section.", args[0], args[1]);
+ goto end;
+ }
+
+ action_store(&rule, &px, sess, NULL, 0);
+ release_sample_expr(rule.arg.vars.expr);
+ if (sess)
+ session_free(sess);
+
+ ret = 0;
+ end:
+ return ret;
+}
+
+/* parse CLI's "get var <name>" */
+static int vars_parse_cli_get_var(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct vars *vars;
+ struct sample smp = { };
+ int i;
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!*args[2])
+ return cli_err(appctx, "Missing process-wide variable identifier.\n");
+
+ vars = get_vars(NULL, NULL, SCOPE_PROC);
+ if (!vars || vars->scope != SCOPE_PROC)
+ return 0;
+
+ if (!vars_get_by_name(args[2], strlen(args[2]), &smp, NULL))
+ return cli_err(appctx, "Variable not found.\n");
+
+ /* the sample returned by vars_get_by_name() is allocated into a trash
+ * chunk so we have no constraint to manipulate it.
+ */
+ chunk_printf(&trash, "%s: type=%s value=", args[2], smp_to_type[smp.data.type]);
+
+ if (!sample_casts[smp.data.type][SMP_T_STR] ||
+ !sample_casts[smp.data.type][SMP_T_STR](&smp)) {
+ chunk_appendf(&trash, "(undisplayable)\n");
+ } else {
+ /* Display the displayable chars*. */
+ b_putchr(&trash, '<');
+ for (i = 0; i < smp.data.u.str.data; i++) {
+ if (isprint((unsigned char)smp.data.u.str.area[i]))
+ b_putchr(&trash, smp.data.u.str.area[i]);
+ else
+ b_putchr(&trash, '.');
+ }
+ b_putchr(&trash, '>');
+ b_putchr(&trash, '\n');
+ b_putchr(&trash, 0);
+ }
+ return cli_msg(appctx, LOG_INFO, trash.area);
+}
+
+/* parse CLI's "set var <name>". It accepts:
+ * - set var <name> <expression>
+ * - set var <name> expr <expression>
+ * - set var <name> fmt <format>
+ */
+static int vars_parse_cli_set_var(char **args, char *payload, struct appctx *appctx, void *private)
+{
+ struct proxy px = {
+ .id = "CLI",
+ .conf.args = { .file = "CLI", .line = 0, },
+ };
+ struct act_rule rule = {
+ .arg.vars.scope = SCOPE_PROC,
+ .from = ACT_F_CLI_PARSER,
+ .conf = { .file = "CLI", .line = 0, },
+ };
+ enum obj_type objt = OBJ_TYPE_NONE;
+ struct session *sess = NULL;
+ enum act_parse_ret p_ret;
+ const char *tmp_args[3];
+ int tmp_arg;
+ char *tmp_act;
+ char *err = NULL;
+ int nberr;
+ int use_fmt = 0;
+
+ LIST_INIT(&px.conf.args.list);
+
+ if (!cli_has_level(appctx, ACCESS_LVL_OPER))
+ return 1;
+
+ if (!*args[2])
+ return cli_err(appctx, "Missing process-wide variable identifier.\n");
+
+ if (!*args[3])
+ return cli_err(appctx, "Missing either 'expr', 'fmt' or expression.\n");
+
+ if (*args[4]) {
+ /* this is the long format */
+ if (strcmp(args[3], "fmt") == 0)
+ use_fmt = 1;
+ else if (strcmp(args[3], "expr") != 0) {
+ memprintf(&err, "'%s %s': arg type must be either 'expr' or 'fmt' but got '%s'.", args[0], args[1], args[3]);
+ goto fail;
+ }
+ }
+
+ tmp_act = NULL;
+ if (!memprintf(&tmp_act, "set-var%s(%s)", use_fmt ? "-fmt" : "", args[2])) {
+ memprintf(&err, "memory allocation error.");
+ goto fail;
+ }
+
+ /* parse_store() will always return a message in <err> on error */
+ tmp_args[0] = tmp_act;
+ tmp_args[1] = (*args[4]) ? args[4] : args[3];
+ tmp_args[2] = "";
+ tmp_arg = 1; // must point to the first arg after the action
+ p_ret = parse_store(tmp_args, &tmp_arg, &px, &rule, &err);
+ free(tmp_act);
+
+ if (p_ret != ACT_RET_PRS_OK)
+ goto fail;
+
+ if (rule.arg.vars.scope != SCOPE_PROC) {
+ memprintf(&err, "'%s %s': cannot set variable '%s', only scope 'proc' is permitted here.", args[0], args[1], args[2]);
+ goto fail;
+ }
+
+ err = NULL;
+ nberr = smp_resolve_args(&px, &err);
+ if (nberr) {
+ release_sample_expr(rule.arg.vars.expr);
+ indent_msg(&err, 2);
+ goto fail;
+ }
+
+ if (use_fmt && !(sess = session_new(&px, NULL, &objt))) {
+ release_sample_expr(rule.arg.vars.expr);
+ memprintf(&err, "memory allocation error.");
+ goto fail;
+ }
+
+ action_store(&rule, &px, sess, NULL, 0);
+ release_sample_expr(rule.arg.vars.expr);
+ if (sess)
+ session_free(sess);
+
+ appctx->st0 = CLI_ST_PROMPT;
+ return 0;
+ fail:
+ return cli_dynerr(appctx, err);
+}
+
+static int vars_max_size(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err, unsigned int *limit)
+{
+ char *error;
+
+ *limit = strtol(args[1], &error, 10);
+ if (*error != 0) {
+ memprintf(err, "%s: '%s' is an invalid size", args[0], args[1]);
+ return -1;
+ }
+ return 0;
+}
+
+static int vars_max_size_global(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_global_limit);
+}
+
+static int vars_max_size_proc(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_proc_limit);
+}
+
+static int vars_max_size_sess(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_sess_limit);
+}
+
+static int vars_max_size_txn(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_txn_limit);
+}
+
+static int vars_max_size_reqres(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_reqres_limit);
+}
+
+static int vars_max_size_check(char **args, int section_type, struct proxy *curpx,
+ const struct proxy *defpx, const char *file, int line,
+ char **err)
+{
+ return vars_max_size(args, section_type, curpx, defpx, file, line, err, &var_check_limit);
+}
+
+/* early boot initialization */
+static void vars_init()
+{
+ var_name_hash_seed = ha_random64();
+ /* Initialize process vars */
+ vars_init_head(&proc_vars, SCOPE_PROC);
+}
+
+INITCALL0(STG_PREPARE, vars_init);
+
+static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
+
+ { "var", smp_fetch_var, ARG2(1,STR,STR), smp_check_var, SMP_T_ANY, SMP_USE_CONST },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);
+
+static struct sample_conv_kw_list sample_conv_kws = {ILH, {
+ { "set-var", smp_conv_store, ARG5(1,STR,STR,STR,STR,STR), conv_check_var, SMP_T_ANY, SMP_T_ANY },
+ { "unset-var", smp_conv_clear, ARG1(1,STR), conv_check_var, SMP_T_ANY, SMP_T_ANY },
+ { /* END */ },
+}};
+
+INITCALL1(STG_REGISTER, sample_register_convs, &sample_conv_kws);
+
+static struct action_kw_list tcp_req_conn_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_conn_keywords_register, &tcp_req_conn_kws);
+
+static struct action_kw_list tcp_req_sess_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_kws);
+
+static struct action_kw_list tcp_req_cont_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_kws);
+
+static struct action_kw_list tcp_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_kws);
+
+static struct action_kw_list tcp_check_kws = {ILH, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, tcp_check_keywords_register, &tcp_check_kws);
+
+static struct action_kw_list http_req_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
+
+static struct action_kw_list http_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_res_keywords_register, &http_res_kws);
+
+static struct action_kw_list http_after_res_kws = { { }, {
+ { "set-var-fmt", parse_store, KWF_MATCH_PREFIX },
+ { "set-var", parse_store, KWF_MATCH_PREFIX },
+ { "unset-var", parse_store, KWF_MATCH_PREFIX },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, http_after_res_keywords_register, &http_after_res_kws);
+
+static struct cfg_kw_list cfg_kws = {{ },{
+ { CFG_GLOBAL, "set-var", vars_parse_global_set_var },
+ { CFG_GLOBAL, "set-var-fmt", vars_parse_global_set_var },
+ { CFG_GLOBAL, "tune.vars.global-max-size", vars_max_size_global },
+ { CFG_GLOBAL, "tune.vars.proc-max-size", vars_max_size_proc },
+ { CFG_GLOBAL, "tune.vars.sess-max-size", vars_max_size_sess },
+ { CFG_GLOBAL, "tune.vars.txn-max-size", vars_max_size_txn },
+ { CFG_GLOBAL, "tune.vars.reqres-max-size", vars_max_size_reqres },
+ { CFG_GLOBAL, "tune.vars.check-max-size", vars_max_size_check },
+ { /* END */ }
+}};
+
+INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws);
+
+
+/* register cli keywords */
+static struct cli_kw_list cli_kws = {{ },{
+ { { "get", "var", NULL }, "get var <name> : retrieve contents of a process-wide variable", vars_parse_cli_get_var, NULL },
+ { { "set", "var", NULL }, "set var <name> [fmt|expr] {<fmt>|<expr>}: set variable from an expression or a format", vars_parse_cli_set_var, NULL, NULL, NULL, ACCESS_EXPERIMENTAL },
+ { { NULL }, NULL, NULL, NULL }
+}};
+INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws);
diff --git a/src/version.c b/src/version.c
new file mode 100644
index 0000000..e7bb748
--- /dev/null
+++ b/src/version.c
@@ -0,0 +1,28 @@
+/*
+ * Version reporting : all user-visible version information should come from
+ * this file so that rebuilding only this one is enough to report the latest
+ * code version.
+ */
+
+#include <haproxy/global.h>
+#include <haproxy/version.h>
+
+/* These ones are made variables and not constants so that they are stored into
+ * the data region and prominently appear in core files.
+ */
+char haproxy_version_here[] = "HAProxy version follows";
+char haproxy_version[] = HAPROXY_VERSION;
+char haproxy_date[] = HAPROXY_DATE;
+char stats_version_string[] = STATS_VERSION_STRING;
+
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+#define SANITIZE_STRING " with address sanitizer"
+#else
+#define SANITIZE_STRING ""
+#endif
+
+#if defined(__clang_version__)
+REGISTER_BUILD_OPTS("Built with clang compiler version " __clang_version__ "" SANITIZE_STRING);
+#elif defined(__VERSION__)
+REGISTER_BUILD_OPTS("Built with gcc compiler version " __VERSION__ "" SANITIZE_STRING);
+#endif
diff --git a/src/wdt.c b/src/wdt.c
new file mode 100644
index 0000000..865bb7b
--- /dev/null
+++ b/src/wdt.c
@@ -0,0 +1,193 @@
+/*
+ * Thread lockup detection
+ *
+ * Copyright 2000-2019 Willy Tarreau <willy@haproxy.org>.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <signal.h>
+#include <time.h>
+
+#include <haproxy/api.h>
+#include <haproxy/clock.h>
+#include <haproxy/debug.h>
+#include <haproxy/errors.h>
+#include <haproxy/global.h>
+#include <haproxy/signal-t.h>
+#include <haproxy/thread.h>
+#include <haproxy/tools.h>
+
+
+/*
+ * It relies on timer_create() and timer_settime() which are only available in
+ * this case.
+ */
+#if defined(USE_RT) && defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME)
+
+/* define a dummy value to designate "no timer". Use only 32 bits. */
+#ifndef TIMER_INVALID
+#define TIMER_INVALID ((timer_t)(unsigned long)(0xfffffffful))
+#endif
+
+static timer_t per_thread_wd_timer[MAX_THREADS];
+
+/* Setup (or ping) the watchdog timer for thread <thr>. Returns non-zero on
+ * success, zero on failure. It interrupts once per second of CPU time. It
+ * happens that timers based on the CPU time are not automatically re-armed
+ * so we only use the value and leave the interval unset.
+ */
+int wdt_ping(int thr)
+{
+ struct itimerspec its;
+
+ its.it_value.tv_sec = 1; its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = 0; its.it_interval.tv_nsec = 0;
+ return timer_settime(per_thread_wd_timer[thr], 0, &its, NULL) == 0;
+}
+
+/* This is the WDTSIG signal handler */
+void wdt_handler(int sig, siginfo_t *si, void *arg)
+{
+ unsigned long long n, p;
+ ulong thr_bit;
+ int thr, tgrp;
+
+ switch (si->si_code) {
+ case SI_TIMER:
+ /* A thread's timer fired, the thread ID is in si_int. We have
+ * no guarantee that the thread handling this signal is in any
+ * way related to the one triggering it, so we need to retrieve
+ * the thread number from there. Note: this thread might
+ * continue to execute in parallel.
+ */
+ thr = si->si_value.sival_int;
+
+ /* cannot happen unless an unknown timer tries to play with our
+ * nerves. Let's die for now if this happens.
+ */
+ if (thr < 0 || thr >= global.nbthread)
+ break;
+
+ tgrp = ha_thread_info[thr].tgid;
+ thr_bit = ha_thread_info[thr].ltid_bit;
+ p = ha_thread_ctx[thr].prev_cpu_time;
+ n = now_cpu_time_thread(thr);
+
+ /* not yet reached the deadline of 1 sec,
+ * or p wasn't initialized yet
+ */
+ if (!p || n - p < 1000000000UL)
+ goto update_and_leave;
+
+ if ((_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_SLEEPING) ||
+ (_HA_ATOMIC_LOAD(&ha_tgroup_ctx[tgrp-1].threads_harmless) & thr_bit)) {
+ /* This thread is currently doing exactly nothing
+ * waiting in the poll loop (unlikely but possible),
+ * waiting for all other threads to join the rendez-vous
+ * point (common), or waiting for another thread to
+ * finish an isolated operation (unlikely but possible).
+ */
+ goto update_and_leave;
+ }
+
+ /* So the thread indeed appears locked up. In order to be
+ * certain that we're not witnessing an exceptional spike of
+ * CPU usage due to a configuration issue (like running tens
+ * of thousands of tasks in a single loop), we'll check if the
+ * scheduler is still alive by setting the TH_FL_STUCK flag
+ * that the scheduler clears when switching to the next task.
+ * If it's already set, then it's our second call with no
+ * progress and the thread is dead.
+ */
+ if (!(_HA_ATOMIC_LOAD(&ha_thread_ctx[thr].flags) & TH_FL_STUCK)) {
+ _HA_ATOMIC_OR(&ha_thread_ctx[thr].flags, TH_FL_STUCK);
+ goto update_and_leave;
+ }
+
+ /* No doubt now, there's no hop to recover, die loudly! */
+ break;
+
+#if defined(USE_THREAD) && defined(SI_TKILL) /* Linux uses this */
+
+ case SI_TKILL:
+ /* we got a pthread_kill, stop on it */
+ thr = tid;
+ break;
+
+#elif defined(USE_THREAD) && defined(SI_LWP) /* FreeBSD uses this */
+
+ case SI_LWP:
+ /* we got a pthread_kill, stop on it */
+ thr = tid;
+ break;
+
+#endif
+ default:
+ /* unhandled other conditions */
+ return;
+ }
+
+ /* By default we terminate. If we're not on the victim thread, better
+ * bounce the signal there so that we produce a cleaner stack trace
+ * with the other thread interrupted exactly where it was running and
+ * the current one not involved in this.
+ */
+#ifdef USE_THREAD
+ if (thr != tid)
+ ha_tkill(thr, sig);
+ else
+#endif
+ ha_panic();
+ return;
+
+ update_and_leave:
+ wdt_ping(thr);
+}
+
+int init_wdt_per_thread()
+{
+ if (!clock_setup_signal_timer(&per_thread_wd_timer[tid], WDTSIG, tid))
+ goto fail1;
+
+ if (!wdt_ping(tid))
+ goto fail2;
+
+ return 1;
+
+ fail2:
+ timer_delete(per_thread_wd_timer[tid]);
+ fail1:
+ per_thread_wd_timer[tid] = TIMER_INVALID;
+ ha_warning("Failed to setup watchdog timer for thread %u, disabling lockup detection.\n", tid);
+ return 1;
+}
+
+void deinit_wdt_per_thread()
+{
+ if (per_thread_wd_timer[tid] != TIMER_INVALID)
+ timer_delete(per_thread_wd_timer[tid]);
+}
+
+/* registers the watchdog signal handler and returns 0. This sets up the signal
+ * handler for WDTSIG, so it must be called once per process.
+ */
+int init_wdt()
+{
+ struct sigaction sa;
+
+ sa.sa_handler = NULL;
+ sa.sa_sigaction = wdt_handler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ sigaction(WDTSIG, &sa, NULL);
+ return ERR_NONE;
+}
+
+REGISTER_POST_CHECK(init_wdt);
+REGISTER_PER_THREAD_INIT(init_wdt_per_thread);
+REGISTER_PER_THREAD_DEINIT(deinit_wdt_per_thread);
+#endif
diff --git a/src/xprt_handshake.c b/src/xprt_handshake.c
new file mode 100644
index 0000000..33f7750
--- /dev/null
+++ b/src/xprt_handshake.c
@@ -0,0 +1,299 @@
+/*
+ * Pseudo-xprt to handle any handshake except the SSL handshake
+ *
+ * Copyright 2019 HAProxy Technologies, Olivier Houchard <ohouchard@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/connection.h>
+
+struct xprt_handshake_ctx {
+ struct connection *conn;
+ struct wait_event *subs;
+ struct wait_event wait_event;
+ const struct xprt_ops *xprt;
+ void *xprt_ctx;
+};
+
+DECLARE_STATIC_POOL(xprt_handshake_ctx_pool, "xprt_handshake_ctx", sizeof(struct xprt_handshake_ctx));
+
+/* This XPRT doesn't take care of sending or receiving data, once its handshake
+ * is done, it just removes itself
+ */
+static size_t xprt_handshake_from_buf(struct connection *conn, void *xprt_ctx, const struct buffer *buf, size_t count, int flags)
+{
+ return 0;
+}
+
+static size_t xprt_handshake_to_buf(struct connection *conn, void *xprt_ctx, struct buffer *buf, size_t count, int flags)
+{
+ return 0;
+}
+
+/* xprt_handshake_io_cb is exported to see it resolved in "show fd" */
+struct task *xprt_handshake_io_cb(struct task *t, void *bctx, unsigned int state)
+{
+ struct xprt_handshake_ctx *ctx = bctx;
+ struct connection *conn = ctx->conn;
+
+ if (conn->flags & CO_FL_SOCKS4_SEND)
+ if (!conn_send_socks4_proxy_request(conn)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND,
+ &ctx->wait_event);
+
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_SOCKS4_RECV)
+ if (!conn_recv_socks4_proxy_response(conn)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_ACCEPT_CIP)
+ if (!conn_recv_netscaler_cip(conn, CO_FL_ACCEPT_CIP)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_ACCEPT_PROXY)
+ if (!conn_recv_proxy(conn, CO_FL_ACCEPT_PROXY)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_RECV,
+ &ctx->wait_event);
+ goto out;
+ }
+
+ if (conn->flags & CO_FL_SEND_PROXY)
+ if (!conn_send_proxy(conn, CO_FL_SEND_PROXY)) {
+ ctx->xprt->subscribe(conn, ctx->xprt_ctx, SUB_RETRY_SEND,
+ &ctx->wait_event);
+ goto out;
+ }
+
+out:
+ /* Wake the stream if we're done with the handshake, or we have a
+ * connection error
+ * */
+ if ((conn->flags & CO_FL_ERROR) ||
+ !(conn->flags & CO_FL_HANDSHAKE)) {
+ int ret = 0;
+ int woke = 0;
+ int was_conn_ctx = 0;
+
+ /* On error, wake any waiter */
+ if (ctx->subs) {
+ tasklet_wakeup(ctx->subs->tasklet);
+ ctx->subs->events = 0;
+ woke = 1;
+ ctx->subs = NULL;
+ }
+
+ /* Remove ourself from the xprt chain */
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn,
+ ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (conn->xprt_ctx == ctx) {
+ conn->xprt_ctx = ctx->xprt_ctx;
+ conn->xprt = ctx->xprt;
+ was_conn_ctx = 1;
+ } else
+ conn->xprt->remove_xprt(conn, conn->xprt_ctx, ctx,
+ ctx->xprt, ctx->xprt_ctx);
+ /* If we're the first xprt for the connection, let the
+ * upper layers know. If no mux was set up yet, then call
+ * conn_create_mux, and if we have a mux, and it has a wake
+ * method, call it too.
+ */
+ if (was_conn_ctx) {
+ if (!ctx->conn->mux)
+ ret = conn_create_mux(ctx->conn);
+ if (ret >= 0 && !woke && ctx->conn->mux && ctx->conn->mux->wake)
+ ret = ctx->conn->mux->wake(ctx->conn);
+ }
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ t = NULL;
+ }
+ return t;
+}
+
+static int xprt_handshake_start(struct connection *conn, void *xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt->start) {
+ int ret;
+
+ ret = ctx->xprt->start(conn, ctx->xprt_ctx);
+ if (ret < 0)
+ return ret;
+ }
+ tasklet_wakeup(ctx->wait_event.tasklet);
+
+ return 0;
+}
+
+static int xprt_handshake_init(struct connection *conn, void **xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx;
+ /* already initialized */
+ if (*xprt_ctx)
+ return 0;
+
+ ctx = pool_alloc(xprt_handshake_ctx_pool);
+ if (!ctx) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ return -1;
+ }
+ ctx->conn = conn;
+ ctx->wait_event.tasklet = tasklet_new();
+ if (!ctx->wait_event.tasklet) {
+ conn->err_code = CO_ER_SSL_NO_MEM;
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ return -1;
+ }
+ ctx->wait_event.tasklet->process = xprt_handshake_io_cb;
+ ctx->wait_event.tasklet->context = ctx;
+ ctx->wait_event.events = 0;
+
+ ctx->xprt = NULL;
+ ctx->xprt_ctx = NULL;
+ ctx->subs = NULL;
+ *xprt_ctx = ctx;
+
+ return 0;
+}
+
+static void xprt_handshake_close(struct connection *conn, void *xprt_ctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx) {
+ if (ctx->wait_event.events != 0)
+ ctx->xprt->unsubscribe(ctx->conn, ctx->xprt_ctx,
+ ctx->wait_event.events,
+ &ctx->wait_event);
+ if (ctx->subs) {
+ ctx->subs->events = 0;
+ tasklet_wakeup(ctx->subs->tasklet);
+ }
+
+ if (ctx->xprt && ctx->xprt->close)
+ ctx->xprt->close(conn, ctx->xprt_ctx);
+ /* Remove any handshake flag, and if we were the connection
+ * xprt, get back to XPRT_RAW. If we're here because we
+ * failed an outoging connection, it will be retried using
+ * the same struct connection, and as xprt_handshake is a bit
+ * magic, because it requires a call to add_xprt(), it's better
+ * to fallback to the original XPRT to re-initiate the
+ * connection.
+ */
+ conn->flags &= ~CO_FL_HANDSHAKE;
+ if (conn->xprt == xprt_get(XPRT_HANDSHAKE))
+ conn->xprt = xprt_get(XPRT_RAW);
+ tasklet_free(ctx->wait_event.tasklet);
+ pool_free(xprt_handshake_ctx_pool, ctx);
+ }
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int xprt_handshake_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ ctx->subs = es;
+ es->events |= event_type;
+ return 0;
+
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int xprt_handshake_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(ctx->subs && ctx->subs != es);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ ctx->subs = NULL;
+
+ return 0;
+}
+
+/* Use the provided XPRT as an underlying XPRT, and provide the old one.
+ * Returns 0 on success, and non-zero on failure.
+ */
+static int xprt_handshake_add_xprt(struct connection *conn, void *xprt_ctx, void *toadd_ctx, const struct xprt_ops *toadd_ops, void **oldxprt_ctx, const struct xprt_ops **oldxprt_ops)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (oldxprt_ops)
+ *oldxprt_ops = ctx->xprt;
+ if (oldxprt_ctx)
+ *oldxprt_ctx = ctx->xprt_ctx;
+ ctx->xprt = toadd_ops;
+ ctx->xprt_ctx = toadd_ctx;
+
+ return 0;
+}
+
+/* Remove the specified xprt. If if it our underlying XPRT, remove it and
+ * return 0, otherwise just call the remove_xprt method from the underlying
+ * XPRT.
+ */
+static int xprt_handshake_remove_xprt(struct connection *conn, void *xprt_ctx, void *toremove_ctx, const struct xprt_ops *newops, void *newctx)
+{
+ struct xprt_handshake_ctx *ctx = xprt_ctx;
+
+ if (ctx->xprt_ctx == toremove_ctx) {
+ ctx->xprt_ctx = newctx;
+ ctx->xprt = newops;
+ return 0;
+ }
+ return (ctx->xprt->remove_xprt(conn, ctx->xprt_ctx, toremove_ctx, newops, newctx));
+}
+
+struct xprt_ops xprt_handshake = {
+ .snd_buf = xprt_handshake_from_buf,
+ .rcv_buf = xprt_handshake_to_buf,
+ .subscribe = xprt_handshake_subscribe,
+ .unsubscribe = xprt_handshake_unsubscribe,
+ .remove_xprt = xprt_handshake_remove_xprt,
+ .add_xprt = xprt_handshake_add_xprt,
+ .init = xprt_handshake_init,
+ .start = xprt_handshake_start,
+ .close= xprt_handshake_close,
+ .rcv_pipe = NULL,
+ .snd_pipe = NULL,
+ .shutr = NULL,
+ .shutw = NULL,
+ .name = "HS",
+};
+
+static void __xprt_handshake_init(void)
+{
+ xprt_register(XPRT_HANDSHAKE, &xprt_handshake);
+}
+
+INITCALL0(STG_REGISTER, __xprt_handshake_init);
diff --git a/src/xprt_quic.c b/src/xprt_quic.c
new file mode 100644
index 0000000..eda113c
--- /dev/null
+++ b/src/xprt_quic.c
@@ -0,0 +1,175 @@
+/*
+ * QUIC xprt layer. Act as an abstraction between quic_conn and MUX layers.
+ *
+ * Copyright 2020 HAProxy Technologies, Frederic Lecaille <flecaille@haproxy.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/api.h>
+#include <haproxy/connection.h>
+#include <haproxy/quic_conn.h>
+#include <haproxy/ssl_sock.h>
+#include <haproxy/quic_trace.h>
+#include <haproxy/trace.h>
+
+static void quic_close(struct connection *conn, void *xprt_ctx)
+{
+ struct ssl_sock_ctx *conn_ctx = xprt_ctx;
+ struct quic_conn *qc = conn_ctx->qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_CLOSE, qc);
+
+ /* Next application data can be dropped. */
+ qc->mux_state = QC_MUX_RELEASED;
+
+ /* If the quic-conn timer has already expired or if already in "connection close"
+ * state, free the quic-conn.
+ */
+ if (qc->flags & (QUIC_FL_CONN_EXP_TIMER|QUIC_FL_CONN_CLOSING)) {
+ quic_conn_release(qc);
+ qc = NULL;
+ goto leave;
+ }
+
+ /* Schedule a CONNECTION_CLOSE emission. If process stopping is in
+ * progress, quic-conn idle-timer will be scheduled immediately after
+ * its emission to ensure an immediate connection closing.
+ */
+ qc_check_close_on_released_mux(qc);
+ leave:
+ TRACE_LEAVE(QUIC_EV_CONN_CLOSE, qc);
+}
+
+/* Called from the upper layer, to subscribe <es> to events <event_type>. The
+ * event subscriber <es> is not allowed to change from a previous call as long
+ * as at least one event is still subscribed. The <event_type> must only be a
+ * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
+ */
+static int quic_conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct quic_conn *qc = conn->handle.qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SUB, qc);
+
+ BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
+ BUG_ON(qc->subs && qc->subs != es);
+
+ es->events |= event_type;
+ qc->subs = es;
+
+ /* TODO implement a check_events to detect if subscriber should be
+ * woken up immediately ?
+ */
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("subscribe(recv)", QUIC_EV_CONN_XPRTRECV, qc);
+
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("subscribe(send)", QUIC_EV_CONN_XPRTSEND, qc);
+
+ TRACE_LEAVE(QUIC_EV_CONN_SUB, qc);
+
+ return 0;
+}
+
+/* Called from the upper layer, to unsubscribe <es> from events <event_type>.
+ * The <es> pointer is not allowed to differ from the one passed to the
+ * subscribe() call. It always returns zero.
+ */
+static int quic_conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
+{
+ struct quic_conn *qc = conn->handle.qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_SUB, qc);
+
+ if (event_type & SUB_RETRY_RECV)
+ TRACE_DEVEL("unsubscribe(recv)", QUIC_EV_CONN_XPRTRECV, qc);
+ if (event_type & SUB_RETRY_SEND)
+ TRACE_DEVEL("unsubscribe(send)", QUIC_EV_CONN_XPRTSEND, qc);
+
+ es->events &= ~event_type;
+ if (!es->events)
+ qc->subs = NULL;
+
+ /* TODO implement ignore_events similar to conn_unsubscribe() ? */
+
+ TRACE_LEAVE(QUIC_EV_CONN_SUB, qc);
+
+ return 0;
+}
+
+/* Store in <xprt_ctx> the context attached to <conn>.
+ * Returns always 0.
+ */
+static int qc_conn_init(struct connection *conn, void **xprt_ctx)
+{
+ struct quic_conn *qc = conn->handle.qc;
+
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ /* Ensure thread connection migration is finalized ASAP. */
+ if (qc->flags & QUIC_FL_CONN_AFFINITY_CHANGED)
+ qc_finalize_affinity_rebind(qc);
+
+ /* do not store the context if already set */
+ if (*xprt_ctx)
+ goto out;
+
+ *xprt_ctx = qc->xprt_ctx;
+
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+
+ return 0;
+}
+
+/* Start the QUIC transport layer */
+static int qc_xprt_start(struct connection *conn, void *ctx)
+{
+ int ret = 0;
+ struct quic_conn *qc;
+
+ qc = conn->handle.qc;
+ TRACE_ENTER(QUIC_EV_CONN_NEW, qc);
+
+ /* mux-quic can now be considered ready. */
+ qc->mux_state = QC_MUX_READY;
+
+ ret = 1;
+ out:
+ TRACE_LEAVE(QUIC_EV_CONN_NEW, qc);
+ return ret;
+}
+
+static struct ssl_sock_ctx *qc_get_ssl_sock_ctx(struct connection *conn)
+{
+ if (!conn || conn->xprt != xprt_get(XPRT_QUIC) || !conn->handle.qc || !conn->xprt_ctx)
+ return NULL;
+
+ return conn->handle.qc->xprt_ctx;
+}
+
+/* transport-layer operations for QUIC connections. */
+static struct xprt_ops ssl_quic = {
+ .close = quic_close,
+ .subscribe = quic_conn_subscribe,
+ .unsubscribe = quic_conn_unsubscribe,
+ .init = qc_conn_init,
+ .start = qc_xprt_start,
+ .prepare_bind_conf = ssl_sock_prepare_bind_conf,
+ .destroy_bind_conf = ssl_sock_destroy_bind_conf,
+ .get_alpn = ssl_sock_get_alpn,
+ .get_ssl_sock_ctx = qc_get_ssl_sock_ctx,
+ .name = "QUIC",
+};
+
+static void __quic_conn_init(void)
+{
+ xprt_register(XPRT_QUIC, &ssl_quic);
+}
+INITCALL0(STG_REGISTER, __quic_conn_init);